pax_global_header00006660000000000000000000000064145167643370014532gustar00rootroot0000000000000052 comment=42ee9da2057153b1aef734e8e462c846a33aa805 amgcl-1.4.4/000077500000000000000000000000001451676433700126235ustar00rootroot00000000000000amgcl-1.4.4/.appveyor.yml000066400000000000000000000006011451676433700152660ustar00rootroot00000000000000os: - Visual Studio 2015 environment: BOOST_ROOT: C:\Libraries\boost_1_67_0 clone_folder: C:\amgcl platform: - x86 configuration: - Release build_script: - git submodule update --init --recursive - cmake . -DAMGCL_BUILD_TESTS=ON -Bbuild - cmake --build build --config Release test_script: - cd build - ctest --build-config Release --output-on-failure amgcl-1.4.4/.gitignore000066400000000000000000000002031451676433700146060ustar00rootroot00000000000000build* .*.swp RELEASE-VERSION dist tmp pyamgcl.egg-info __pycache__ /debug/ .project .cproject .pydevproject .sync.ffs_db *.bundle amgcl-1.4.4/.gitmodules000066400000000000000000000001421451676433700147750ustar00rootroot00000000000000[submodule "pyamgcl/pybind11"] path = pyamgcl/pybind11 url = https://github.com/pybind/pybind11 amgcl-1.4.4/.travis.yml000066400000000000000000000032001451676433700147270ustar00rootroot00000000000000language: cpp env: global: - OMP_NUM_THREADS=4 - BOOST_BASENAME=boost_1_62_0 matrix: include: - compiler: gcc env: - MATRIX_EVAL="CC=gcc-5 && CXX=g++-5" - BUILD_EXAMPLES=ON - TEST_COVERAGE=ON - TEST_PYTHON=ON addons: apt: sources: - ubuntu-toolchain-r-test packages: - gcc-5 - g++-5 - lcov - openmpi-bin - libopenmpi-dev - python3-scipy - compiler: clang env: - BUILD_EXAMPLES=OFF - TEST_COVERAGE=OFF - TEST_PYTHON=OFF - os: osx osx_image: xcode9.1 env: - BUILD_EXAMPLES=OFF - TEST_COVERAGE=OFF - TEST_PYTHON=OFF cache: directories: - ${HOME}/${BOOST_BASENAME}/boost - ${HOME}/${BOOST_BASENAME}/stage/lib before_install: - eval "${MATRIX_EVAL}" - | if [ "$TRAVIS_OS_NAME" = "linux" ] ; then source .travis/install_boost.sh fi script: - mkdir -p build && cd build - | if [ "$CC" = "clang" ] ; then export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(dirname $(which clang++))/../lib fi - | cmake -DAMGCL_TEST_COVERAGE=${TEST_COVERAGE} \ -DAMGCL_BUILD_TESTS=ON \ -DAMGCL_BUILD_EXAMPLES=${BUILD_EXAMPLES} \ -DAMGCL_HAVE_PYTHON=${TEST_PYTHON} \ -DPYTHON_EXECUTABLE:FILEPATH=/usr/bin/python3 .. - make - ctest --output-on-failure after_success: - | if [ "$TEST_COVERAGE" = "ON" ]; then lcov --directory tests --base-directory ../amgcl --capture --output-file coverage.info lcov --remove coverage.info '/usr*' -o coverage.info bash <(curl -s https://codecov.io/bash) fi amgcl-1.4.4/.travis/000077500000000000000000000000001451676433700142115ustar00rootroot00000000000000amgcl-1.4.4/.travis/install_boost.sh000066400000000000000000000005351451676433700174240ustar00rootroot00000000000000#!/bin/bash export BOOST_ROOT=${HOME}/${BOOST_BASENAME} if [ ! -e ${BOOST_ROOT}/boost/config.hpp ]; then pushd ${HOME} wget https://downloads.sourceforge.net/project/boost/boost/1.62.0/${BOOST_BASENAME}.tar.bz2 rm -rf $BOOST_BASENAME tar xf ${BOOST_BASENAME}.tar.bz2 (cd $BOOST_BASENAME && ./bootstrap.sh && ./b2) popd fi amgcl-1.4.4/CMakeLists.txt000066400000000000000000000253211451676433700153660ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.1) if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Build type") message(STATUS "No build type selected, default to ${CMAKE_BUILD_TYPE}") endif() project(AMGCL) if (NOT (CMAKE_VERSION LESS 3.3)) cmake_policy(SET CMP0058 OLD) endif() set(AMGCL_MASTER_PROJECT OFF) if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) set(AMGCL_MASTER_PROJECT ON) endif() set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) if (CMAKE_VERSION VERSION_LESS "3.1.0") set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/opencl) endif() #---------------------------------------------------------------------------- # Find Boost #---------------------------------------------------------------------------- option(Boost_USE_STATIC_LIBS "Use static versions of Boost libraries" OFF) if (WIN32) set(Boost_USE_STATIC_LIBS ON) endif () find_package(Boost COMPONENTS program_options serialization unit_test_framework ) #---------------------------------------------------------------------------- # Builtin backend #---------------------------------------------------------------------------- add_library(amgcl INTERFACE) add_library(amgcl::amgcl ALIAS amgcl) target_compile_features(amgcl INTERFACE cxx_auto_type cxx_range_for cxx_rvalue_references cxx_right_angle_brackets cxx_static_assert ) if (Boost_FOUND) target_include_directories(amgcl SYSTEM INTERFACE ${Boost_INCLUDE_DIRS} ) else() target_compile_definitions(amgcl INTERFACE AMGCL_NO_BOOST) endif() target_include_directories(amgcl INTERFACE $ $ ) find_package(OpenMP) if (OPENMP_FOUND) target_compile_options(amgcl INTERFACE ${OpenMP_CXX_FLAGS}) target_link_libraries(amgcl INTERFACE $<$:${OpenMP_CXX_FLAGS}> $<$:${OpenMP_CXX_FLAGS}> $<$:${OpenMP_CXX_FLAGS}> ) endif () #---------------------------------------------------------------------------- # Common compile options and definitions #---------------------------------------------------------------------------- target_compile_options(amgcl INTERFACE # Compiler is GNU (g++): $<$:$> # Compiler is Clang: $<$:$> # Compiler is MSVC: $<$:/bigobj> $<$:/wd4715> ) target_compile_definitions(amgcl INTERFACE # Compiler is MSVC: $<$:NOMINMAX> $<$:_USE_MATH_DEFINES> $<$:_VARIADIC_MAX=10> $<$:_SCL_SECURE_NO_WARNINGS> ) #---------------------------------------------------------------------------- # Eigen backend #---------------------------------------------------------------------------- find_path(EIGEN_INCLUDE Eigen/SparseCore PATH_SUFFIXES eigen3) if (EIGEN_INCLUDE) add_library(eigen_target INTERFACE) target_include_directories(eigen_target INTERFACE ${EIGEN_INCLUDE}) target_compile_options(eigen_target INTERFACE $<$:-Wno-int-in-bool-context> $<$:-Wno-maybe-uninitialized> $<$:-Wno-deprecated-copy> $<$:-Wno-deprecated-copy> $<$:-Wno-c++11-long-long> ) target_compile_definitions(eigen_target INTERFACE AMGCL_HAVE_EIGEN) endif() #---------------------------------------------------------------------------- # Find Blaze #---------------------------------------------------------------------------- find_package(blaze QUIET) if (blaze_FOUND) add_library(blaze_target INTERFACE) target_link_libraries(blaze_target INTERFACE blaze::blaze) endif() #---------------------------------------------------------------------------- # Find VexCL #---------------------------------------------------------------------------- if (Boost_FOUND) find_package(VexCL QUIET) endif() #---------------------------------------------------------------------------- # Find ViennaCL #---------------------------------------------------------------------------- find_path(VIENNACL_INCLUDE viennacl/forwards.h) if (VIENNACL_INCLUDE) add_library(viennacl_target INTERFACE) target_link_libraries(viennacl_target INTERFACE amgcl) target_include_directories(viennacl_target INTERFACE ${VIENNACL_INCLUDE}) target_compile_definitions(viennacl_target INTERFACE SOLVER_BACKEND_VIENNACL) find_package(OpenCL) if (OpenCL_FOUND) target_include_directories(viennacl_target INTERFACE ${OpenCL_INCLUDE_DIRS}) target_link_libraries(viennacl_target INTERFACE ${OpenCL_LIBRARY}) target_compile_definitions(viennacl_target INTERFACE VIENNACL_WITH_OPENCL) target_compile_options(viennacl_target INTERFACE $<$:-Wno-ignored-attributes> $<$:-Wno-deprecated-declarations> $<$:-Wno-deprecated-copy> $<$:-Wno-ignored-attributes> $<$:-Wno-deprecated-declarations> ) else() target_compile_definitions(viennacl_target INTERFACE VIENNACL_WITH_OPENMP) endif() endif() #---------------------------------------------------------------------------- # Find CUDA #---------------------------------------------------------------------------- if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "MSVC") find_package(CUDA) if (CUDA_FOUND) set(CUDA_TARGET_ARCH "Kepler Maxwell Pascal" CACHE STRING "Target architecture(s) for CUDA") cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS ${CUDA_TARGET_ARCH}) if (OPENMP_FOUND) list(APPEND CUDA_NVCC_FLAGS -Xcompiler ${OpenMP_CXX_FLAGS}) endif() if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") list(APPEND CUDA_NVCC_FLAGS ${CUDA_ARCH_FLAGS} -Wno-deprecated-gpu-targets) list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC -Xcompiler -Wno-vla) endif() add_library(cuda_target INTERFACE) target_link_libraries(cuda_target INTERFACE ${CUDA_cusparse_LIBRARY}) endif() endif() #---------------------------------------------------------------------------- # Find MPI #---------------------------------------------------------------------------- find_package(MPI 3.0) if (MPI_CXX_FOUND) # Need this to comply with CMP004 policy: string(STRIP "${MPI_CXX_LINK_FLAGS}" MPI_CXX_LINK_FLAGS) string(STRIP "${MPI_CXX_LIBRARIES}" MPI_CXX_LIBRARIES) add_library(mpi_target INTERFACE) target_link_libraries(mpi_target INTERFACE amgcl ${MPI_CXX_LIBRARIES}) target_include_directories(mpi_target INTERFACE ${MPI_CXX_INCLUDE_PATH}) target_compile_options(mpi_target INTERFACE $<$:-Wno-long-long> $<$:-Wno-literal-suffix> $<$:-Wno-long-long> ) endif() #---------------------------------------------------------------------------- # Find Pastix #---------------------------------------------------------------------------- find_package(Metis QUIET) find_package(Scotch QUIET) find_package(Pastix QUIET) find_package(BLAS QUIET) find_library(PTHREAD_LIBRARY pthread) find_library(BZ2_LIBRARY bz2) find_library(HWLOC_LIBRARY hwloc) if (Scotch_FOUND) add_library(scotch_target INTERFACE) target_include_directories(scotch_target INTERFACE ${Scotch_INCLUDES}) target_link_libraries(scotch_target INTERFACE ${Scotch_LIBRARIES}) target_compile_definitions(scotch_target INTERFACE AMGCL_HAVE_SCOTCH) endif() if (Pastix_INCLUDES AND Scotch_FOUND AND BLAS_FOUND AND PTHREAD_LIBRARY AND BZ2_LIBRARY) add_library(pastix_target INTERFACE) target_include_directories(pastix_target INTERFACE ${Pastix_INCLUDES} ${Scotch_INCLUDES} ) target_link_libraries(pastix_target INTERFACE ${Pastix_LIBRARIES} ${Scotch_LIBRARIES} ${BLAS_LIBRARIES} ${PTHREAD_LIBRARY} ${BZ2_LIBRARY} ${HWLOC_LIBRARY} ) target_compile_definitions(pastix_target INTERFACE AMGCL_HAVE_PASTIX) endif() if (AMGCL_MASTER_PROJECT) if (Boost_FOUND) option(AMGCL_BUILD_TESTS OFF) option(AMGCL_BUILD_EXAMPLES OFF) endif() option(AMGCL_DISABLE_RARE_COMPONENTS OFF) if(AMGCL_DISABLE_RARE_COMPONENTS) add_definitions( -DAMGCL_RUNTIME_DISABLE_MULTICOLOR_GS -DAMGCL_RUNTIME_DISABLE_SPAI1 -DAMGCL_RUNTIME_DISABLE_CHEBYSHEV ) endif() add_subdirectory(docs) if (AMGCL_BUILD_TESTS) enable_testing() add_subdirectory(tests) endif() if (AMGCL_BUILD_EXAMPLES) add_subdirectory(lib) add_subdirectory(examples) add_subdirectory(tutorial) #---------------------------------------------------------------------------- # Build Fortran example #---------------------------------------------------------------------------- option(AMGCL_HAVE_FORTRAN "Build Fortran example" OFF) if (AMGCL_HAVE_FORTRAN) add_subdirectory(fortran) endif() #---------------------------------------------------------------------------- # Build Python example #---------------------------------------------------------------------------- option(AMGCL_HAVE_PYTHON "Build Python example" OFF) if (AMGCL_HAVE_PYTHON) add_subdirectory(pyamgcl) endif() endif() install(DIRECTORY amgcl DESTINATION include) install(TARGETS amgcl EXPORT amgclTargets) if(TARGET eigen) install(TARGETS eigen EXPORT amgclTargets) endif() if(TARGET blaze) install(TARGETS blaze EXPORT amgclTargets) endif() if(TARGET viennacl) install(TARGETS viennacl EXPORT amgclTargets) endif() if(TARGET mpi) install(TARGETS mpi EXPORT amgclTargets) endif() configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/cmake/amgcl-config.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cmake/amgcl-config.cmake" COPYONLY ) export(EXPORT amgclTargets FILE "${CMAKE_CURRENT_BINARY_DIR}/cmake/amgcl-targets.cmake" NAMESPACE amgcl:: ) export(PACKAGE amgcl) install(EXPORT amgclTargets FILE amgcl-targets.cmake NAMESPACE amgcl:: DESTINATION share/amgcl/cmake ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/cmake/amgcl-config.cmake DESTINATION share/amgcl/cmake ) endif() amgcl-1.4.4/LICENSE.md000066400000000000000000000021431451676433700142270ustar00rootroot00000000000000The MIT License =============== Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. amgcl-1.4.4/MANIFEST.in000066400000000000000000000000661451676433700143630ustar00rootroot00000000000000recursive-include amgcl *.hpp include RELEASE-VERSION amgcl-1.4.4/README.md000066400000000000000000000053511451676433700141060ustar00rootroot00000000000000# AMGCL [![Documentation Status](https://readthedocs.org/projects/amgcl/badge/?version=latest)](http://amgcl.readthedocs.io/en/latest/?badge=latest) [![DOI](https://zenodo.org/badge/6987353.svg)](https://zenodo.org/badge/latestdoi/6987353) [![Build Status](https://travis-ci.org/ddemidov/amgcl.svg?branch=master)](https://travis-ci.org/ddemidov/amgcl) [![Build status](https://ci.appveyor.com/api/projects/status/r0s4lbln4qf9r8aq/branch/master?svg=true)](https://ci.appveyor.com/project/ddemidov/amgcl/branch/master) [![codecov](https://codecov.io/gh/ddemidov/amgcl/branch/master/graph/badge.svg)](https://codecov.io/gh/ddemidov/amgcl) [![Coverity Scan Build Status](https://scan.coverity.com/projects/5301/badge.svg)](https://scan.coverity.com/projects/5301) AMGCL is a header-only C++ library for solving large sparse linear systems with algebraic multigrid (AMG) method. AMG is one of the most effective iterative methods for solution of equation systems arising, for example, from discretizing PDEs on unstructured grids. The method can be used as a black-box solver for various computational problems, since it does not require any information about the underlying geometry. AMG is often used not as a standalone solver but as a preconditioner within an iterative solver (e.g. Conjugate Gradients, BiCGStab, or GMRES). AMGCL builds the AMG hierarchy on a CPU and then transfers it to one of the provided backends. This allows for transparent acceleration of the solution phase with help of OpenCL, CUDA, or OpenMP technologies. Users may provide their own backends which enables tight integration between AMGCL and the user code. See AMGCL documentation at http://amgcl.readthedocs.io/ ## Referencing Demidov, Denis. AMGCL: An efficient, flexible, and extensible algebraic multigrid implementation. Lobachevskii Journal of Mathematics, 40(5):535–546, May 2019. [doi](https://doi.org/10.1134/S1995080219050056) [pdf](https://rdcu.be/bHFsY) [bib](https://raw.githubusercontent.com/ddemidov/amgcl/master/docs/demidov19.bib) Demidov, Denis. AMGCL -- A C++ library for efficient solution of large sparse linear systems. Software Impacts, 6:100037, November 2020. [doi](https://doi.org/10.1016/j.simpa.2020.100037) [bib](https://raw.githubusercontent.com/ddemidov/amgcl/master/docs/demidov20.bib) Demidov, Denis, Lin Mu, and Bin Wang. Accelerating linear solvers for Stokes problems with C++ metaprogramming. Journal of Computational Science (2020): 101285. [doi](https://doi.org/10.1016/j.jocs.2020.101285) [arxiv](https://arxiv.org/pdf/2006.06052.pdf) [bib](https://raw.githubusercontent.com/ddemidov/amgcl/master/docs/demidov-mu-wang-20.bib) ## Support * GitHub issues page: https://github.com/ddemidov/amgcl/issues * Mailing list: https://groups.google.com/forum/#!forum/amgcl amgcl-1.4.4/amgcl/000077500000000000000000000000001451676433700137065ustar00rootroot00000000000000amgcl-1.4.4/amgcl/adapter/000077500000000000000000000000001451676433700153265ustar00rootroot00000000000000amgcl-1.4.4/amgcl/adapter/block_matrix.hpp000066400000000000000000000167421451676433700205270ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_BLOCK_MATRIX_HPP #define AMGCL_ADAPTER_BLOCK_MATRIX_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/block_matrix.hpp \author Denis Demidov \brief On-the-fly conversion to block valued matrix. \ingroup adapters */ #include #include #include #include namespace amgcl { namespace adapter { template struct block_matrix_adapter { typedef BlockType value_type; static const int BlockSize = math::static_rows::value; const Matrix &A; block_matrix_adapter(const Matrix &A) : A(A) { precondition( backend::rows(A) % BlockSize == 0 && backend::cols(A) % BlockSize == 0, "Matrix size is not divisible by block size!" ); } size_t rows() const { return backend::rows(A) / BlockSize; } size_t cols() const { return backend::cols(A) / BlockSize; } size_t nonzeros() const { // Just an estimate: return backend::nonzeros(A) / (BlockSize * BlockSize); } struct row_iterator { typedef typename backend::row_iterator::type Base; typedef ptrdiff_t col_type; typedef BlockType val_type; std::array buf; Base * base; bool done; col_type cur_col; val_type cur_val; row_iterator(const Matrix &A, col_type row) : done(true) { base = reinterpret_cast(buf.data()); for(int i = 0; i < BlockSize; ++i) { new (base + i) Base(backend::row_begin(A, row * BlockSize + i)); if (base[i]) { col_type col = base[i].col() / BlockSize; if (done) { cur_col = col; done = false; } else { cur_col = std::min(cur_col, col); } } } if (done) return; // While we are gathering the current value, // base iteratirs are advanced to the next block-column. cur_val = math::zero(); col_type end = (cur_col + 1) * BlockSize; for(int i = 0; i < BlockSize; ++i) { for(; base[i] && static_cast(base[i].col()) < end; ++base[i]) { cur_val(i, base[i].col() % BlockSize) = base[i].value(); } } } ~row_iterator() { for(int i = 0; i < BlockSize; ++i) base[i].~Base(); } operator bool() const { return !done; } row_iterator& operator++() { // Base iterators are already at the next block-column. // We just need to gather the current column and value. done = true; col_type end = (cur_col + 1) * BlockSize; for(int i = 0; i < BlockSize; ++i) { if (base[i]) { col_type col = base[i].col() / BlockSize; if (done) { cur_col = col; done = false; } else { cur_col = std::min(cur_col, col); } } } if (done) return *this; cur_val = math::zero(); end = (cur_col + 1) * BlockSize; for(int i = 0; i < BlockSize; ++i) { for(; base[i] && static_cast(base[i].col()) < end; ++base[i]) { cur_val(i, base[i].col() % BlockSize) = base[i].value(); } } return *this; } col_type col() const { return cur_col; } val_type value() const { return cur_val; } }; row_iterator row_begin(size_t i) const { return row_iterator(A, i); } }; /// Convert scalar-valued matrix to a block-valued one. template block_matrix_adapter block_matrix(const Matrix &A) { return block_matrix_adapter(A); } template std::shared_ptr< backend::crs< typename math::element_of< typename backend::value_type::type >::type, typename backend::col_type::type, typename backend::ptr_type::type > > unblock_matrix(const Matrix &B) { typedef typename backend::value_type::type Block; typedef typename math::element_of::type Scalar; typedef typename backend::col_type::type Col; typedef typename backend::ptr_type::type Ptr; const int brows = math::static_rows::value; const int bcols = math::static_cols::value; static_assert(brows > 1 || bcols > 1, "Can not unblock scalar matrix!"); auto A = std::make_shared>(); A->set_size(backend::rows(B) * brows, backend::cols(B) * bcols); A->ptr[0] = 0; const ptrdiff_t nb = backend::rows(B); #pragma omp for for (ptrdiff_t ib = 0; ib < nb; ++ib) { auto w = backend::row_nonzeros(B, ib); for (ptrdiff_t i = 0, ia = ib * brows; i < brows; ++i, ++ia) { A->ptr[ia + 1] = w * bcols; } } A->scan_row_sizes(); A->set_nonzeros(); #pragma omp for for (ptrdiff_t ib = 0; ib < nb; ++ib) { for(auto b = backend::row_begin(B, ib); b; ++b) { auto c = b.col(); auto v = b.value(); for (ptrdiff_t i = 0, ia = ib * brows; i < brows; ++i, ++ia) { auto row_head = A->ptr[ia]; for(int j = 0; j < bcols; ++j) { A->col[row_head] = c * bcols + j; A->val[row_head] = v(i,j); ++row_head; } A->ptr[ia] = row_head; } } } std::rotate(A->ptr, A->ptr + A->nrows, A->ptr + A->nrows + 1); A->ptr[0] = 0; return A; } } // namespace adapter namespace backend { namespace detail { template struct use_builtin_matrix_ops< adapter::block_matrix_adapter > : std::true_type {}; } // namespace detail } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/complex.hpp000066400000000000000000000116541451676433700175150ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_COMPLEX_HPP #define AMGCL_ADAPTER_COMPLEX_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/complex.hpp \author Denis Demidov \brief Complex-valued matrix adapter. \ingroup adapters */ #include #include #include #include namespace amgcl { namespace adapter { template struct complex_adapter { static_assert(is_complex::type>::value, "value type should be complex"); typedef typename backend::value_type::type::value_type value_type; const Matrix &A; complex_adapter(const Matrix &A) : A(A) {} size_t rows() const { return 2 * backend::rows(A); } size_t cols() const { return 2 * backend::cols(A); } size_t nonzeros() const { return 4 * backend::nonzeros(A); } struct row_iterator { typedef typename backend::row_iterator::type Base; typedef typename Base::col_type col_type; row_iterator(const Base &base, bool row_real) : base(base), row_real(row_real), col_real(true) {} operator bool() const { return static_cast(base); } row_iterator& operator++() { col_real = !col_real; if (col_real) ++base; return *this; } col_type col() const { if (col_real) return base.col() * 2; else return base.col() * 2 + 1; } value_type value() const { if (row_real) { if (col_real) return std::real(base.value()); else return -std::imag(base.value()); } else { if (col_real) return std::imag(base.value()); else return std::real(base.value()); } } private: Base base; bool row_real; bool col_real; }; row_iterator row_begin(size_t i) const { return row_iterator(backend::row_begin(A, i / 2), i % 2 == 0); } }; template complex_adapter complex_matrix(const Matrix &A) { return complex_adapter(A); } template boost::iterator_range< typename std::add_pointer< typename std::conditional< std::is_const::value, typename std::add_const< typename boost::range_value< typename std::decay::type >::type::value_type >::type, typename boost::range_value< typename std::decay::type >::type::value_type >::type >::type > complex_range(Range &rng) { typedef typename std::add_pointer< typename std::conditional< std::is_const::value, typename std::add_const< typename boost::range_value< typename std::decay::type >::type::value_type >::type, typename boost::range_value< typename std::decay::type >::type::value_type >::type >::type pointer_type; pointer_type b = reinterpret_cast(&rng[0]); pointer_type e = b + 2 * boost::size(rng); return boost::iterator_range(b, e); } } // namespace adapter namespace backend { namespace detail { template struct use_builtin_matrix_ops< amgcl::adapter::complex_adapter > : std::true_type {}; } // namespace detail } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/crs_builder.hpp000066400000000000000000000114021451676433700203320ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_CRS_BUILDER_HPP #define AMGCL_ADAPTER_CRS_BUILDER_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/crs_builder.hpp \author Denis Demidov \brief Matrix builder that creates matrix rows as needed. \ingroup adapters Example: \code struct poisson_2d { typedef double val_type; typedef long col_type; poisson_2d(size_t n) : n(n), h2i((n - 1) * (n - 1)) {} // Number of rows in the constructed matrix: size_t rows() const { return n * n; } // Estimated number of nonzeros in the problem: size_t nonzeros() const { return 5 * rows(); } // Fills column numbers and values of nonzero elements in the given matrix row. void operator()(size_t row, std::vector &col, std::vector &val ) const { size_t i = row % n; size_t j = row / n; if (j > 0) { col.push_back(row - n); val.push_back(-h2i); } if (i > 0) { col.push_back(row - 1); val.push_back(-h2i); } col.push_back(row); val.push_back(4 * h2i); if (i + 1 < n) { col.push_back(row + 1); val.push_back(-h2i); } if (j + 1 < n) { col.push_back(row + n); val.push_back(-h2i); } } private: size_t n; double h2i; }; amgcl::make_solver< Backend, Coarsening, Relaxation, IterativeSolver > solve( amgcl::backend::make_matrix( poisson_2d(m) ) ); \endcode */ #include #include namespace amgcl { /// Matrix adapters. namespace adapter { /// Generates matrix rows as needed with help of user-provided functor. /** * The generated rows are not stored anywhere. */ template struct matrix_builder { typedef typename RowBuilder::val_type value_type; typedef typename RowBuilder::col_type col_type; RowBuilder build_row; matrix_builder(const RowBuilder &row_builder) : build_row(row_builder) {} size_t rows() const { return build_row.rows(); } size_t cols() const { return build_row.rows(); } size_t nonzeros() const { return build_row.nonzeros(); } struct row_iterator { typedef typename RowBuilder::col_type col_type; typedef typename RowBuilder::val_type val_type; typedef typename std::vector::const_iterator col_iterator; typedef typename std::vector::const_iterator val_iterator; row_iterator(const RowBuilder &build_row, size_t i) : ptr(0) { build_row(i, m_col, m_val); } operator bool() const { return m_col.size() - ptr; } row_iterator& operator++() { ++ptr; return *this; } col_type col() const { return m_col[ptr]; } val_type value() const { return m_val[ptr]; } private: int ptr; std::vector m_col; std::vector m_val; }; row_iterator row_begin(size_t i) const { return row_iterator(build_row, i); } }; /// Convenience function returning an instance of matrix_builder template matrix_builder make_matrix(const RowBuilder &row_builder) { return matrix_builder(row_builder); } } // namespace adapter namespace backend { namespace detail { template struct use_builtin_matrix_ops< amgcl::adapter::matrix_builder > : std::true_type {}; } // namespace detail } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/crs_tuple.hpp000066400000000000000000000154251451676433700200460ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_CRS_TUPLE_HPP #define AMGCL_ADAPTER_CRS_TUPLE_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/crs_tuple.hpp \author Denis Demidov \brief Specify sparse matrix as a tuple of CRS arrays. \ingroup adapters Example: \code // Adapt STL containers: std::vector ptr; std::vector col; std::vector val; AMG amg( std::tie(n, ptr, col, val) ); // Adapt raw arrays: int *ptr; int *col; double *val; AMG amg(std::make_tuple(n, amgcl::make_iterator_range(ptr, ptr + n + 1), amgcl::make_iterator_range(col, col + ptr[n]), amgcl::make_iterator_range(val, val + ptr[n]) ) ); \endcode */ /** * \defgroup adapters Matrix adapters * \brief Adapters for variuos sparse matrix formats. */ #include #include #include #include #include #include #include namespace amgcl { namespace backend { //--------------------------------------------------------------------------- // Specialization of matrix interface //--------------------------------------------------------------------------- template < typename N, typename PRng, typename CRng, typename VRng > struct value_type< std::tuple > { typedef typename std::decay()[0])>::type type; }; template < typename N, typename PRng, typename CRng, typename VRng > struct rows_impl< std::tuple > { static size_t get(const std::tuple &A) { return std::get<0>(A); } }; template < typename N, typename PRng, typename CRng, typename VRng > struct cols_impl< std::tuple > { static size_t get(const std::tuple &A) { return std::get<0>(A); } }; template < typename N, typename PRng, typename CRng, typename VRng > struct nonzeros_impl< std::tuple > { static size_t get(const std::tuple &A) { return std::get<1>(A)[std::get<0>(A)]; } }; template < typename N, typename PRng, typename CRng, typename VRng > struct row_iterator< std::tuple > { class type { public: typedef typename std::decay()[0])>::type col_type; typedef typename std::decay()[0])>::type val_type; type(const std::tuple &A, size_t row) : m_col(std::begin(std::get<2>(A))) , m_end(std::begin(std::get<2>(A))) , m_val(std::begin(std::get<3>(A))) { typedef typename std::decay()[0])>::type ptr_type; ptr_type row_begin = std::get<1>(A)[row]; ptr_type row_end = std::get<1>(A)[row + 1]; m_col += row_begin; m_end += row_end; m_val += row_begin; } operator bool() const { return m_col != m_end; } type& operator++() { ++m_col; ++m_val; return *this; } col_type col() const { return *m_col; } val_type value() const { return *m_val; } private: typedef decltype(std::begin(std::declval())) val_iterator; typedef decltype(std::begin(std::declval())) col_iterator; col_iterator m_col; col_iterator m_end; val_iterator m_val; }; }; template < typename N, typename PRng, typename CRng, typename VRng > struct row_begin_impl< std::tuple > { typedef std::tuple Matrix; static typename row_iterator::type get(const Matrix &matrix, size_t row) { return typename row_iterator::type(matrix, row); } }; template < typename N, typename PRng, typename CRng, typename VRng > struct row_nonzeros_impl< std::tuple > { typedef std::tuple Matrix; static size_t get(const Matrix &A, size_t row) { return std::get<1>(A)[row + 1] - std::get<1>(A)[row]; } }; template < typename N, typename PRng, typename CRng, typename VRng > struct ptr_data_impl< std::tuple > { typedef std::tuple Matrix; typedef typename std::decay()[0])>::type ptr_type; typedef const ptr_type* type; static type get(const Matrix &A) { return &std::get<1>(A)[0]; } }; template < typename N, typename PRng, typename CRng, typename VRng > struct col_data_impl< std::tuple > { typedef std::tuple Matrix; typedef typename std::decay()[0])>::type col_type; typedef const col_type* type; static type get(const Matrix &A) { return &std::get<2>(A)[0]; } }; template < typename N, typename PRng, typename CRng, typename VRng > struct val_data_impl< std::tuple > { typedef std::tuple Matrix; typedef typename std::decay()[0])>::type val_type; typedef const val_type* type; static type get(const Matrix &A) { return &std::get<3>(A)[0]; } }; namespace detail { template < typename N, typename PRng, typename CRng, typename VRng > struct use_builtin_matrix_ops< std::tuple > : std::true_type {}; } // namespace detail } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/eigen.hpp000066400000000000000000000072471451676433700171400ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_EIGEN_HPP #define AMGCL_ADAPTER_EIGEN_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/eigen.hpp \author Denis Demidov \brief Adapters for Eigen types to be used with builtin backend. \ingroup adapters */ #include #include #include #include namespace amgcl { namespace backend { //--------------------------------------------------------------------------- // Backend interface specialization for Eigen types //--------------------------------------------------------------------------- template struct is_eigen_sparse_matrix : std::false_type {}; template struct is_eigen_type : std::false_type {}; template struct is_eigen_sparse_matrix< Eigen::Map> > : std::true_type {}; template struct is_eigen_sparse_matrix< Eigen::SparseMatrix > : std::true_type {}; template struct is_eigen_type< T, typename std::enable_if< std::is_arithmetic::value && std::is_base_of, T>::value >::type > : std::true_type {}; template struct value_type< T, typename std::enable_if::value>::type > { typedef typename T::Scalar type; }; template struct nonzeros_impl< T, typename std::enable_if::value>::type > { static size_t get(const T &matrix) { return matrix.nonZeros(); } }; template struct row_iterator < T, typename std::enable_if::value>::type > { typedef typename T::InnerIterator type; }; template struct row_begin_impl < T, typename std::enable_if::value>::type > { typedef typename row_iterator::type iterator; static iterator get(const T &matrix, size_t row) { return iterator(matrix, row); } }; } // namespace backend } // namespace amgcl #define AMGCL_USE_EIGEN_VECTORS_WITH_BUILTIN_BACKEND() \ namespace amgcl { namespace backend { \ template \ struct is_builtin_vector< Eigen::Matrix > \ : std::true_type {}; \ } } #endif amgcl-1.4.4/amgcl/adapter/epetra.hpp000066400000000000000000000107611451676433700173240ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_EPETRA_HPP #define AMGCL_ADAPTER_EPETRA_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Copyright (c) 2014, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/epetra.hpp \author Denis Demidov \brief Adapt Epetra_CrsMatrix from Trilinos. \ingroup adapters */ #include #include #include #include #include #include #include namespace amgcl { namespace adapter { /// Adapts Epetra_CrsMatrix class epetra_map { public: typedef double value_type; epetra_map(const Epetra_CrsMatrix &A) : A(A), order(A.ColMap()) { const Epetra_Map& row_map = A.RowMap(); const Epetra_Map& col_map = A.ColMap(); int entries_before; int local_entries = row_map.NumMyElements(); A.Comm().ScanSum(&local_entries, &entries_before, 1); entries_before -= local_entries; Epetra_IntVector perm(row_map); for(int i = 0, j = entries_before; i < local_entries; ++i, ++j) perm[i] = j; Epetra_Import importer = Epetra_Import(col_map, row_map); order.Import(perm, importer, Insert); } size_t rows() const { return A.NumMyRows(); } size_t cols() const { return A.NumGlobalCols(); } size_t nonzeros() const { return A.NumMyNonzeros(); } class row_iterator { public: typedef int col_type; typedef double val_type; row_iterator( const Epetra_CrsMatrix &A, const Epetra_IntVector &order, int row ) { int nnz; A.ExtractMyRowView(row, nnz, m_val, m_col); m_end = m_col + nnz; col_copy.assign(m_col, m_col + nnz); val_copy.assign(m_val, m_val + nnz); for(auto &c : col_copy) c = order[c]; m_col = &col_copy[0]; m_end = m_col + nnz; m_val = &val_copy[0]; amgcl::detail::sort_row(m_col, m_val, nnz); } operator bool() const { return m_col != m_end; } row_iterator& operator++() { ++m_col; ++m_val; return *this; } col_type col() const { return *m_col; } val_type value() const { return *m_val; } private: col_type * m_col; col_type * m_end; val_type * m_val; std::vector col_copy; std::vector val_copy; }; row_iterator row_begin(int row) const { return row_iterator(A, order, row); } private: const Epetra_CrsMatrix &A; Epetra_IntVector order; }; /// Adapts Epetra_CrsMatrix inline epetra_map map(const Epetra_CrsMatrix &A) { return epetra_map(A); } } // namespace adapter } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/reorder.hpp000066400000000000000000000146271451676433700175130ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_REORDER_HPP #define AMGCL_ADAPTER_REORDER_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/reorder.hpp \author Denis Demidov \brief On-the-fly reodering of matrix and vectors. \ingroup adapters */ #include #include #include #include #include #include namespace amgcl { namespace adapter { template struct reordered_matrix { typedef typename backend::value_type::type value_type; typedef typename backend::row_iterator::type base_iterator; const Matrix &A; const ptrdiff_t * perm; const ptrdiff_t * iperm; reordered_matrix(const Matrix &A, const ptrdiff_t *perm, const ptrdiff_t * iperm) : A(A), perm(perm), iperm(iperm) {} size_t rows() const { return backend::rows(A); } size_t cols() const { return backend::cols(A); } size_t nonzeros() const { return backend::nonzeros(A); } struct row_iterator { base_iterator base; const ptrdiff_t * iperm; row_iterator(const base_iterator &base, const ptrdiff_t *iperm) : base(base), iperm(iperm) {} operator bool() const { return base; } row_iterator& operator++() { ++base; return *this; } ptrdiff_t col() const { return iperm[base.col()]; } value_type value() const { return base.value(); } }; row_iterator row_begin(size_t i) const { return row_iterator(backend::row_begin(A, perm[i]), iperm); } }; template struct reordered_vector { typedef typename backend::value_type::type>::type raw_value_type; typedef typename std::conditional< std::is_const::value, const raw_value_type, raw_value_type >::type value_type; Vector &x; const ptrdiff_t *perm; reordered_vector(Vector &x, const ptrdiff_t *perm) : x(x), perm(perm) {} size_t size() const { return boost::size(x); } value_type& operator[](size_t i) const { return x[perm[i]]; } boost::permutation_iterator< typename std::decay::type::iterator, const ptrdiff_t* > begin() { return boost::make_permutation_iterator(boost::begin(x), perm); } boost::permutation_iterator< typename std::decay::type::const_iterator, const ptrdiff_t* > begin() const { return boost::make_permutation_iterator(boost::begin(x), perm); } boost::permutation_iterator< typename std::decay::type::iterator, const ptrdiff_t* > end() { return boost::make_permutation_iterator(boost::end(x), perm + size()); } boost::permutation_iterator< typename std::decay::type::const_iterator, const ptrdiff_t* > end() const { return boost::make_permutation_iterator(boost::end(x), perm + size()); } }; } // namespace adapter namespace backend { namespace detail { template struct use_builtin_matrix_ops< adapter::reordered_matrix > : std::true_type {}; } // namespace detail template struct is_builtin_vector< adapter::reordered_vector > : is_builtin_vector::type> {}; } // namespace backend namespace adapter { template > class reorder { public: template reorder(const Matrix &A) : n(backend::rows(A)), perm(n), iperm(n) { ordering::get(A, perm); #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) iperm[perm[i]] = i; } template typename std::enable_if< !backend::is_builtin_vector::value, reordered_matrix >::type operator()(const Matrix &A) const { return reordered_matrix(A, perm.data(), iperm.data()); } template typename std::enable_if< backend::is_builtin_vector::value, reordered_vector >::type operator()(Vector &x) const { return reordered_vector(x, perm.data()); } template typename std::enable_if< backend::is_builtin_vector::value, reordered_vector >::type operator()(const Vector &x) const { return reordered_vector(x, perm.data()); } template void forward(const Vector1 &x, Vector2 &y) const { #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) y[i] = x[perm[i]]; } template void inverse(const Vector1 &x, Vector2 &y) const { #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) y[perm[i]] = x[i]; } private: ptrdiff_t n; backend::numa_vector perm, iperm; }; } // namespace adapter } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/scaled_problem.hpp000066400000000000000000000116201451676433700210120ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_SCALED_PROBLEM_HPP #define AMGCL_ADAPTER_SCALED_PROBLEM_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/scaled_problem.hpp \author Denis Demidov \brief Scale matrix, rhs, and solution. Example: \code auto A = std::tie(rows, ptr, col, val); auto scale = amgcl::adapter::scale_diagonal(A, bprm); // Setup solver Solver solve(scale.matrix(A), prm, bprm); // option 1: rhs is untouched solve(*scale.rhs(b), x); // option 2: rhs is prescaled in-place scale(b); solve(b, x); // postprocess the solution: scale(x); \endcode */ #include #include #include namespace amgcl { namespace adapter { template struct scaled_matrix { typedef typename backend::value_type::type value_type; typedef typename backend::value_type::type scale_type; const Matrix &A; const Scale &s; scaled_matrix(const Matrix &A, const Scale &s) : A(A), s(s) {} size_t rows() const { return backend::rows(A); } size_t cols() const { return backend::cols(A); } size_t nonzeros() const { return backend::nonzeros(A); } struct row_iterator : public backend::row_iterator::type { typedef typename backend::row_iterator::type Base; scale_type si; const Scale &s; row_iterator(const Matrix &A, const Scale &s, size_t i) : Base(A, i), si(s[i]), s(s) {} value_type value() const { return si * static_cast(this)->value() * s[this->col()]; } }; row_iterator row_begin(size_t i) const { return row_iterator(A, s, i); } }; template struct scaled_problem { typedef typename Backend::params backend_params; const std::shared_ptr s; const backend_params &bprm; scaled_problem(std::shared_ptr s, const backend_params &bprm = backend_params()) : s(s), bprm(bprm) {} template scaled_matrix matrix(const Matrix &A) const { return scaled_matrix(A, *s); } template std::shared_ptr rhs(const Vector &v) const { auto t = Backend::copy_vector(v, bprm); (*this)(*t); return t; } template void operator()(Vector &x) const { typedef typename backend::value_type::type value_type; typedef typename math::scalar_of::type scalar_type; const auto one = math::identity(); const auto zero = math::zero(); if (backend::is_builtin_vector::value) { backend::vmul(one, *s, x, zero, x); } else { backend::vmul(one, *Backend::copy_vector(*s, bprm), x, zero, x); } } }; template scaled_problem< Backend, std::vector< typename math::scalar_of< typename backend::value_type::type >::type> > scale_diagonal( const Matrix &A, const typename Backend::params &bprm = typename Backend::params() ) { typedef typename backend::value_type::type value_type; typedef typename math::scalar_of::type scalar_type; ptrdiff_t n = backend::rows(A); auto s = std::make_shared>(n); #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { for(auto a = backend::row_begin(A, i); a; ++a) { if (a.col() == i) { (*s)[i] = math::inverse(sqrt(math::norm(a.value()))); break; } } } return scaled_problem>(s, bprm); } } // namespace adapter } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/ublas.hpp000066400000000000000000000050011451676433700171410ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_UBLAS_HPP #define AMGCL_ADAPTER_UBLAS_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Copyright (c) 2014, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/ublas.hpp \author Denis Demidov \brief Adapters for Boost.uBlas matrices and vectors. \ingroup adapters */ #include #include #include #include namespace amgcl { namespace backend { // Make builtin backend recognize ublas vectors as its own: template struct is_builtin_vector< boost::numeric::ublas::vector > : std::true_type {}; /// Adapts Boost.uBlas matrix. template std::tuple< size_t, iterator_range, iterator_range, iterator_range > map(const boost::numeric::ublas::compressed_matrix &A) { return std::make_tuple( A.size1(), make_iterator_range( A.index1_data().begin(), A.index1_data().end() ), make_iterator_range( A.index2_data().begin(), A.index2_data().end() ), make_iterator_range( A.value_data().begin(), A.value_data().end() ) ); } } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/adapter/zero_copy.hpp000066400000000000000000000063241451676433700200550ustar00rootroot00000000000000#ifndef AMGCL_ADAPTER_ZERO_COPY_HPP #define AMGCL_ADAPTER_ZERO_COPY_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** \file amgcl/adapter/zero_copy.hpp \author Denis Demidov \brief Zero-copy adapter for input matrix in CRS format. \ingroup adapters */ #include #include #include namespace amgcl { namespace adapter { template std::shared_ptr< backend::crs > zero_copy(size_t nrows, size_t ncols, const Ptr *ptr, const Col *col, const Val *val) { // Check that Ptr and Col types are binary-compatible with ptrdiff_t: static_assert(std::is_integral::value, "Unsupported Ptr type"); static_assert(std::is_integral::value, "Unsupported Col type"); static_assert(sizeof(Ptr) == sizeof(ptrdiff_t), "Unsupported Ptr type"); static_assert(sizeof(Col) == sizeof(ptrdiff_t), "Unsupported Col type"); auto A = std::make_shared< backend::crs >(); A->nrows = nrows; A->ncols = ncols; A->nnz = nrows ? ptr[nrows] : 0; A->ptr = (ptrdiff_t*)ptr; A->col = (ptrdiff_t*)col; A->val = (Val*)val; A->own_data = false; return A; } template std::shared_ptr< backend::crs > zero_copy(size_t n, const Ptr *ptr, const Col *col, const Val *val) { return zero_copy(n, n, ptr, col, val); } template std::shared_ptr< backend::crs > zero_copy_direct(size_t nrows, size_t ncols, const Ptr *ptr, const Col *col, const Val *val) { auto A = std::make_shared< backend::crs >(); A->nrows = nrows; A->ncols = ncols; A->nnz = nrows ? ptr[nrows] : 0; A->ptr = const_cast(ptr); A->col = const_cast(col); A->val = const_cast(val); A->own_data = false; return A; } template std::shared_ptr< backend::crs > zero_copy_direct(size_t n, const Ptr *ptr, const Col *col, const Val *val) { return zero_copy_direct(n, n, ptr, col, val); } } // namespace adapter } // namespace amgcl #endif amgcl-1.4.4/amgcl/amg.hpp000066400000000000000000000521671451676433700151760ustar00rootroot00000000000000#ifndef AMGCL_AMG_HPP #define AMGCL_AMG_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/amg.hpp * \author Denis Demidov * \brief An AMG preconditioner. */ #include #include #include #include #include #include #include /// Primary namespace. namespace amgcl { /// Algebraic multigrid method. /** * AMG is one the most effective methods for solution of large sparse * unstructured systems of equations, arising, for example, from discretization * of PDEs on unstructured grids \cite Trottenberg2001. The method can be used * as a black-box solver for various computational problems, since it does not * require any information about the underlying geometry. * * The three template parameters allow the user to select the exact components * of the method: * 1. *Backend* to transfer the constructed hierarchy to, * 2. *Coarsening* strategy for hierarchy construction, and * 3. *Relaxation* scheme (smoother to use during the solution phase). * * Instance of the class builds the AMG hierarchy for the given system matrix * and is intended to be used as a preconditioner. */ template < class Backend, template class Coarsening, template class Relax > class amg { public: typedef Backend backend_type; typedef typename Backend::value_type value_type; typedef typename Backend::col_type col_type; typedef typename Backend::ptr_type ptr_type; typedef typename Backend::matrix matrix; typedef typename Backend::vector vector; typedef Coarsening coarsening_type; typedef Relax relax_type; typedef typename backend::builtin::matrix build_matrix; typedef typename math::scalar_of::type scalar_type; /// Backend parameters. typedef typename Backend::params backend_params; /// Parameters of the method. /** * The amgcl::amg::params struct includes parameters for each * component of the method as well as some universal parameters. */ struct params { typedef typename coarsening_type::params coarsening_params; typedef typename relax_type::params relax_params; coarsening_params coarsening; ///< Coarsening parameters. relax_params relax; ///< Relaxation parameters. /// Specifies when level is coarse enough to be solved directly. /** * If number of variables at a next level in the hierarchy becomes * lower than this threshold, then the hierarchy construction is * stopped and the linear system is solved directly at this level. */ unsigned coarse_enough; /// Use direct solver at the coarsest level. /** * When set, the coarsest level is solved with a direct solver. * Otherwise a smoother is used as a solver. */ bool direct_coarse; /// Maximum number of levels. /** If this number is reached while the size of the last level is * greater that `coarse_enough`, then the coarsest level will not * be solved exactly, but will use a smoother. */ unsigned max_levels; /// Number of pre-relaxations. unsigned npre; /// Number of post-relaxations. unsigned npost; /// Number of cycles (1 for V-cycle, 2 for W-cycle, etc.). unsigned ncycle; /// Number of cycles to make as part of preconditioning. unsigned pre_cycles; /// Keep matrices in internal format to allow for quick rebuild of the hierarchy bool allow_rebuild; params() : coarse_enough( Backend::direct_solver::coarse_enough() ), direct_coarse(true), max_levels( std::numeric_limits::max() ), npre(1), npost(1), ncycle(1), pre_cycles(1), allow_rebuild(std::is_same::value) {} #ifndef AMGCL_NO_BOOST params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_CHILD(p, coarsening), AMGCL_PARAMS_IMPORT_CHILD(p, relax), AMGCL_PARAMS_IMPORT_VALUE(p, coarse_enough), AMGCL_PARAMS_IMPORT_VALUE(p, direct_coarse), AMGCL_PARAMS_IMPORT_VALUE(p, max_levels), AMGCL_PARAMS_IMPORT_VALUE(p, npre), AMGCL_PARAMS_IMPORT_VALUE(p, npost), AMGCL_PARAMS_IMPORT_VALUE(p, ncycle), AMGCL_PARAMS_IMPORT_VALUE(p, pre_cycles), AMGCL_PARAMS_IMPORT_VALUE(p, allow_rebuild) { check_params(p, {"coarsening", "relax", "coarse_enough", "direct_coarse", "max_levels", "npre", "npost", "ncycle", "pre_cycles", "allow_rebuild"}); precondition(max_levels > 0, "max_levels should be positive"); } void get( boost::property_tree::ptree &p, const std::string &path = "" ) const { AMGCL_PARAMS_EXPORT_CHILD(p, path, coarsening); AMGCL_PARAMS_EXPORT_CHILD(p, path, relax); AMGCL_PARAMS_EXPORT_VALUE(p, path, coarse_enough); AMGCL_PARAMS_EXPORT_VALUE(p, path, direct_coarse); AMGCL_PARAMS_EXPORT_VALUE(p, path, max_levels); AMGCL_PARAMS_EXPORT_VALUE(p, path, npre); AMGCL_PARAMS_EXPORT_VALUE(p, path, npost); AMGCL_PARAMS_EXPORT_VALUE(p, path, ncycle); AMGCL_PARAMS_EXPORT_VALUE(p, path, pre_cycles); AMGCL_PARAMS_EXPORT_VALUE(p, path, allow_rebuild); } #endif } prm; /// Builds the AMG hierarchy for the system matrix. /** * The input matrix is copied here and is safe to delete afterwards. * * \param A The system matrix. Should be convertible to * amgcl::backend::crs<>. * \param p AMG parameters. * * \sa amgcl/adapter/crs_tuple.hpp */ template amg( const Matrix &M, const params &p = params(), const backend_params &bprm = backend_params() ) : prm(p) { auto A = std::make_shared(M); sort_rows(*A); do_init(A, bprm); } /// Builds the AMG hierarchy for the system matrix. /** * The shared pointer to the input matrix is passed here. The matrix * will not be copied and should out-live the amg instance. * The matrix should be either in amgcl::backend::crs format, or * inherit from the class and override its ptr(), col(), and val() * virtual functions. * * \param A The system matrix. * \param p AMG parameters. * * \sa amgcl/adapter/crs_tuple.hpp */ amg( std::shared_ptr A, const params &p = params(), const backend_params &bprm = backend_params() ) : prm(p) { do_init(A, bprm); } /// Rebuild the hierarchy using the new system matrix. /** * This requires for prm.allow_rebuild to be set. The transfer * operators created during the initial setup are reused. */ template void rebuild( const Matrix &M, const backend_params &bprm = backend_params() ) { auto A = std::make_shared(M); sort_rows(*A); rebuild(A, bprm); } /// Rebuild the hierarchy using the new system matrix. /** * This requires for prm.allow_rebuild to be set. The transfer * operators created during the initial setup are reused. */ void rebuild( std::shared_ptr A, const backend_params &bprm = backend_params() ) { precondition(prm.allow_rebuild, "allow_rebuild is not set!"); precondition( backend::rows(*A) == backend::rows(system_matrix()) && backend::cols(*A) == backend::rows(*A), "Matrix dimensions differ from the original ones!" ); AMGCL_TIC("rebuild"); coarsening_type C(prm.coarsening); for(auto &level : levels) { A = level.rebuild(A, C, prm, bprm); } AMGCL_TOC("rebuild"); } /// Performs single V-cycle for the given right-hand side and solution. /** * \param rhs Right-hand side vector. * \param x Solution vector. */ template void cycle(const Vec1 &rhs, Vec2 &&x) const { cycle(levels.begin(), rhs, x); } /// Performs single V-cycle after clearing x. /** * This is intended for use as a preconditioning procedure. * * \param rhs Right-hand side vector. * \param x Solution vector. */ template void apply(const Vec1 &rhs, Vec2 &&x) const { if (prm.pre_cycles) { backend::clear(x); for(unsigned i = 0; i < prm.pre_cycles; ++i) cycle(rhs, x); } else { backend::copy(rhs, x); } } /// Returns the system matrix from the finest level. std::shared_ptr system_matrix_ptr() const { return levels.front().A; } const matrix& system_matrix() const { return *system_matrix_ptr(); } size_t bytes() const { size_t b = 0; for(const auto &lvl : levels) b += lvl.bytes(); return b; } private: struct level { size_t m_rows, m_nonzeros; std::shared_ptr f; std::shared_ptr u; std::shared_ptr t; std::shared_ptr A; std::shared_ptr P; std::shared_ptr R; std::shared_ptr bP; std::shared_ptr bR; std::shared_ptr< typename Backend::direct_solver > solve; std::shared_ptr relax; size_t bytes() const { size_t b = 0; if (f) b += backend::bytes(*f); if (u) b += backend::bytes(*u); if (t) b += backend::bytes(*t); if (A) b += backend::bytes(*A); if (P) b += backend::bytes(*P); if (R) b += backend::bytes(*R); if (solve) b += backend::bytes(*solve); if (relax) b += backend::bytes(*relax); return b; } level() : m_rows(0), m_nonzeros(0) {} level(std::shared_ptr A, params &prm, const backend_params &bprm) : m_rows(backend::rows(*A)), m_nonzeros(backend::nonzeros(*A)) { AMGCL_TIC("move to backend"); f = Backend::create_vector(m_rows, bprm); u = Backend::create_vector(m_rows, bprm); t = Backend::create_vector(m_rows, bprm); this->A = Backend::copy_matrix(A, bprm); AMGCL_TOC("move to backend"); AMGCL_TIC("relaxation"); relax = std::make_shared(*A, prm.relax, bprm); AMGCL_TOC("relaxation"); } std::shared_ptr step_down( std::shared_ptr A, coarsening_type &C, const backend_params &bprm, bool allow_rebuild) { AMGCL_TIC("transfer operators"); std::shared_ptr P, R; try { std::tie(P, R) = C.transfer_operators(*A); } catch(error::empty_level) { AMGCL_TOC("transfer operators"); return std::shared_ptr(); } sort_rows(*P); sort_rows(*R); if (allow_rebuild) { bP = P; bR = R; } AMGCL_TOC("transfer operators"); AMGCL_TIC("move to backend"); this->P = Backend::copy_matrix(P, bprm); this->R = Backend::copy_matrix(R, bprm); AMGCL_TOC("move to backend"); AMGCL_TIC("coarse operator"); A = C.coarse_operator(*A, *P, *R); sort_rows(*A); AMGCL_TOC("coarse operator"); return A; } void create_coarse( std::shared_ptr A, const backend_params &bprm, bool single_level) { m_rows = backend::rows(*A); m_nonzeros = backend::nonzeros(*A); u = Backend::create_vector(m_rows, bprm); f = Backend::create_vector(m_rows, bprm); solve = Backend::create_solver(A, bprm); if (single_level) this->A = Backend::copy_matrix(A, bprm); } std::shared_ptr rebuild( std::shared_ptr A, const coarsening_type &C, const params &prm, const backend_params &bprm ) { if (this->A) { AMGCL_TIC("move to backend"); this->A = Backend::copy_matrix(A, bprm); AMGCL_TOC("move to backend"); } if(relax) { AMGCL_TIC("relaxation"); relax = std::make_shared(*A, prm.relax, bprm); AMGCL_TOC("relaxation"); } if (solve) { AMGCL_TIC("coarsest level"); solve = Backend::create_solver(A, bprm); AMGCL_TOC("coarsest level"); } if (bP && bR) { AMGCL_TIC("coarse operator"); A = C.coarse_operator(*A, *bP, *bR); sort_rows(*A); AMGCL_TOC("coarse operator"); } return A; } size_t rows() const { return m_rows; } size_t nonzeros() const { return m_nonzeros; } }; typedef typename std::list::const_iterator level_iterator; std::list levels; void do_init( std::shared_ptr A, const backend_params &bprm = backend_params() ) { precondition( backend::rows(*A) == backend::cols(*A), "Matrix should be square!" ); bool direct_coarse_solve = true; coarsening_type C(prm.coarsening); while( backend::rows(*A) > prm.coarse_enough) { levels.push_back( level(A, prm, bprm) ); if (levels.size() >= prm.max_levels) break; A = levels.back().step_down(A, C, bprm, prm.allow_rebuild); if (!A) { // Zero-sized coarse level. Probably the system matrix on // this level is diagonal, should be easily solvable with a // couple of smoother iterations. direct_coarse_solve = false; break; } } if (!A || backend::rows(*A) > prm.coarse_enough) { // The coarse matrix is still too big to be solved directly. direct_coarse_solve = false; } if (direct_coarse_solve) { AMGCL_TIC("coarsest level"); if (prm.direct_coarse) { level l; l.create_coarse(A, bprm, levels.empty()); levels.push_back(l); } else { levels.push_back( level(A, prm, bprm) ); } AMGCL_TOC("coarsest level"); } } template void cycle(level_iterator lvl, const Vec1 &rhs, Vec2 &x) const { level_iterator nxt = lvl, end = levels.end(); ++nxt; if (nxt == end) { if (lvl->solve) { AMGCL_TIC("coarse"); (*lvl->solve)(rhs, x); AMGCL_TOC("coarse"); } else { AMGCL_TIC("relax"); for(size_t i = 0; i < prm.npre; ++i) lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t); for(size_t i = 0; i < prm.npost; ++i) lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t); AMGCL_TOC("relax"); } } else { for (size_t j = 0; j < prm.ncycle; ++j) { AMGCL_TIC("relax"); for(size_t i = 0; i < prm.npre; ++i) lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t); AMGCL_TOC("relax"); backend::residual(rhs, *lvl->A, x, *lvl->t); backend::spmv(math::identity(), *lvl->R, *lvl->t, math::zero(), *nxt->f); backend::clear(*nxt->u); cycle(nxt, *nxt->f, *nxt->u); backend::spmv(math::identity(), *lvl->P, *nxt->u, math::identity(), x); AMGCL_TIC("relax"); for(size_t i = 0; i < prm.npost; ++i) lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t); AMGCL_TOC("relax"); } } } template class C, template class R> friend std::ostream& operator<<(std::ostream &os, const amg &a); }; /// Sends information about the AMG hierarchy to output stream. template class C, template class R> std::ostream& operator<<(std::ostream &os, const amg &a) { typedef typename amg::level level; ios_saver ss(os); size_t sum_dof = 0; size_t sum_nnz = 0; size_t sum_mem = 0; for(const level &lvl : a.levels) { sum_dof += lvl.rows(); sum_nnz += lvl.nonzeros(); sum_mem += lvl.bytes(); } os << "Number of levels: " << a.levels.size() << "\nOperator complexity: " << std::fixed << std::setprecision(2) << 1.0 * sum_nnz / a.levels.front().nonzeros() << "\nGrid complexity: " << std::fixed << std::setprecision(2) << 1.0 * sum_dof / a.levels.front().rows() << "\nMemory footprint: " << human_readable_memory(sum_mem) << "\n\n" "level unknowns nonzeros memory\n" "---------------------------------------------\n"; size_t depth = 0; for(const level &lvl : a.levels) { os << std::setw(5) << depth++ << std::setw(13) << lvl.rows() << std::setw(15) << lvl.nonzeros() << std::setw(12) << human_readable_memory(lvl.bytes()) << " (" << std::setw(5) << std::fixed << std::setprecision(2) << 100.0 * lvl.nonzeros() / sum_nnz << "%)" << std::endl; } return os; } } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/000077500000000000000000000000001451676433700152755ustar00rootroot00000000000000amgcl-1.4.4/amgcl/backend/blaze.hpp000066400000000000000000000205651451676433700171130ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_BLAZE_HPP #define AMGCL_BACKEND_BLAZE_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/viennacl.hpp * \author Denis Demidov * \brief Blaze backend. * * Uses Blaze (https://code.google.com/p/blaze-lib) types and operations. */ #include #include #include namespace amgcl { namespace backend { /// Blaze backend /** * This is a backend that uses types defined in the Blaze library * (https://bitbucket.org/blaze-lib/blaze/src). * * \param real Value type. * \ingroup backends */ template struct blaze { typedef real value_type; typedef ptrdiff_t index_type; typedef ptrdiff_t col_type; typedef ptrdiff_t ptr_type; struct provides_row_iterator : std::true_type {}; typedef ::blaze::CompressedMatrix matrix; typedef ::blaze::DynamicVector vector; typedef ::blaze::DynamicVector matrix_diagonal; typedef solver::skyline_lu direct_solver; /// Backend parameters. typedef amgcl::detail::empty_params params; static std::string name() { return "blaze"; } /// Copy matrix from builtin backend. static std::shared_ptr copy_matrix( std::shared_ptr< typename builtin::matrix > A, const params& ) { typedef typename row_iterator::matrix>::type row_iterator; const size_t n = rows(*A); const size_t m = cols(*A); auto B = std::make_shared(n, m); B->reserve(nonzeros(*A)); for(size_t i = 0; i < n; ++i) { for(row_iterator a = A->row_begin(i); a; ++a) { B->append(i, a.col(), a.value()); } B->finalize(i); } return B; } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(typename builtin::vector const &x, const params&) { auto v = std::make_shared(x.size(), &x[0]); return v; } /// Copy vector from builtin backend. static std::shared_ptr copy_vector( std::shared_ptr< typename builtin::vector > x, const params &prm ) { return copy_vector(*x, prm); } /// Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params&) { return std::make_shared(size); } /// Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr< typename builtin::matrix > A, const params&) { return std::make_shared(*A); } }; //--------------------------------------------------------------------------- // Backend interface implementation //--------------------------------------------------------------------------- template < typename V, bool O > struct value_type < ::blaze::CompressedMatrix > { typedef V type; }; template < typename V > struct value_type < ::blaze::DynamicVector > { typedef V type; }; template < typename V, bool O > struct cols_impl< ::blaze::CompressedMatrix > { typedef ::blaze::CompressedMatrix matrix; static size_t get(const matrix &A) { return A.columns(); } }; template < typename V, bool O > struct nonzeros_impl< ::blaze::CompressedMatrix > { typedef ::blaze::CompressedMatrix matrix; static size_t get(const matrix &A) { return A.nonZeros(); } }; template < typename V, bool O > struct row_iterator< ::blaze::CompressedMatrix > { struct type { typedef typename ::blaze::CompressedMatrix::ConstIterator Base; Base base; Base end; operator bool() const { return base != end; } type operator++() { ++base; return *this; } size_t col() const { return base->index(); } V value() const { return base->value(); } }; }; template < typename V, bool O > struct row_begin_impl< ::blaze::CompressedMatrix > { typedef typename row_iterator< ::blaze::CompressedMatrix >::type iterator; static iterator get(const ::blaze::CompressedMatrix &A, size_t row) { return iterator{A.cbegin(row), A.cend(row)}; } }; template < class A, class B, typename V, bool O > struct spmv_impl< A, ::blaze::CompressedMatrix, ::blaze::DynamicVector, B, ::blaze::DynamicVector > { typedef ::blaze::CompressedMatrix matrix; typedef ::blaze::DynamicVector vector; static void apply(A alpha, const matrix &K, const vector &x, B beta, vector &y) { if (!math::is_zero(beta)) y = alpha * (K * x) + beta * y; else y = alpha * (K * x); } }; template < typename V, bool O > struct residual_impl< ::blaze::CompressedMatrix, ::blaze::DynamicVector, ::blaze::DynamicVector, ::blaze::DynamicVector > { typedef ::blaze::CompressedMatrix matrix; typedef ::blaze::DynamicVector vector; static void apply(const vector &rhs, const matrix &A, const vector &x, vector &r) { r = rhs - A * x; } }; template < typename V > struct clear_impl< ::blaze::DynamicVector > { typedef ::blaze::DynamicVector vector; static void apply(vector &x) { x = 0; } }; template < typename V > struct copy_impl< ::blaze::DynamicVector, ::blaze::DynamicVector > { typedef ::blaze::DynamicVector vector; static void apply(const vector &x, vector &y) { y = x; } }; template < typename V > struct inner_product_impl< ::blaze::DynamicVector, ::blaze::DynamicVector > { typedef ::blaze::DynamicVector vector; static V get(const vector &x, const vector &y) { return (x, y); } }; template < typename A, typename B, typename V > struct axpby_impl< A, ::blaze::DynamicVector, B, ::blaze::DynamicVector > { typedef ::blaze::DynamicVector vector; static void apply(A a, const vector &x, B b, vector &y) { if (!math::is_zero(b)) y = a * x + b * y; else y = a * x; } }; template < typename A, typename B, typename C, typename V > struct axpbypcz_impl< A, ::blaze::DynamicVector, B, ::blaze::DynamicVector, C, ::blaze::DynamicVector > { typedef ::blaze::DynamicVector vector; static void apply( V a, const vector &x, V b, const vector &y, V c, vector &z ) { if (!math::is_zero(c)) z = a * x + b * y + c * z; else z = a * x + b * y; } }; template < typename A, typename B, typename V > struct vmul_impl< A, ::blaze::DynamicVector, ::blaze::DynamicVector, B, ::blaze::DynamicVector > { typedef ::blaze::DynamicVector vector; static void apply(A a, const vector &x, const vector &y, B b, vector &z) { if (!math::is_zero(b)) z = a * x * y + b * z; else z = a * x * y; } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/block_crs.hpp000066400000000000000000000232541451676433700177550ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_BLOCK_CRS_HPP #define AMGCL_BACKEND_BLOCK_CRS_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/block_crs.hpp * \author Denis Demidov * \brief Sparse matrix in block-CRS format. */ #include #include #include #include #include #include namespace amgcl { namespace backend { /// Sparse matrix in Block CRS format. /** * \param V Value type. * \param C Column number type. * \param P Index type. */ template < typename V, typename C, typename P > struct bcrs { typedef V value_type; typedef V val_type; typedef C col_type; typedef P ptr_type; size_t block_size; size_t nrows, ncols; size_t brows, bcols; std::vector ptr; std::vector col; std::vector val; /// Converts matrix in CRS format to Block CRS format. /** * \param A Input matrix. * \param block_size Block size. * * \note Input matrix dimensions are *not* required to be divisible by * block_size. */ template < class Matrix > bcrs(const Matrix &A, size_t block_size) : block_size(block_size), nrows( rows(A) ), ncols( cols(A) ), brows((nrows + block_size - 1) / block_size), bcols((ncols + block_size - 1) / block_size), ptr(brows + 1, 0) { #pragma omp parallel { std::vector marker(bcols, -1); // Count number of nonzeros in block matrix. #pragma omp for for(ptr_type ib = 0; ib < static_cast(brows); ++ib) { ptr_type ia = ib * block_size; for(size_t k = 0; k < block_size && ia < static_cast(nrows); ++k, ++ia) { for(auto a = backend::row_begin(A, ia); a; ++a) { col_type cb = a.col() / block_size; if (marker[cb] != static_cast(ib)) { marker[cb] = static_cast(ib); ++ptr[ib + 1]; } } } } #pragma omp single { std::partial_sum(ptr.begin(), ptr.end(), ptr.begin()); col.resize(ptr.back()); val.resize(ptr.back() * block_size * block_size, 0); } std::fill(marker.begin(), marker.end(), -1); // Fill the block matrix. #pragma omp for for(ptr_type ib = 0; ib < static_cast(brows); ++ib) { ptr_type ia = ib * block_size; ptr_type row_beg = ptr[ib]; ptr_type row_end = row_beg; for(size_t k = 0; k < block_size && ia < static_cast(nrows); ++k, ++ia) { for(auto a = backend::row_begin(A, ia); a; ++a) { col_type cb = a.col() / block_size; col_type cc = a.col() % block_size; val_type va = a.value(); if (marker[cb] < row_beg) { marker[cb] = row_end; col[row_end] = cb; val[block_size * (block_size * row_end + k) + cc] = va; ++row_end; } else { val[block_size * (block_size * marker[cb] + k) + cc] = va; } } } } } } }; /// block_crs backend definition. /** * \param real Value type. * \ingroup backends */ template struct block_crs { typedef real value_type; typedef ptrdiff_t index_type; typedef ptrdiff_t col_type; typedef ptrdiff_t ptr_type; typedef bcrs matrix; typedef typename builtin::vector vector; typedef typename builtin::vector matrix_diagonal; typedef solver::skyline_lu direct_solver; struct provides_row_iterator : std::false_type {}; /// Backend parameters. struct params { /// Block size to use with the created matrices. size_t block_size; params(size_t block_size = 4) : block_size(block_size) {} #ifndef AMGCL_NO_BOOST params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_VALUE(p, block_size) { check_params(p, {"block_size"}); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_VALUE(p, path, block_size); } #endif }; static std::string name() { return "block_crs"; } /// Copy matrix from builtin backend. static std::shared_ptr copy_matrix(std::shared_ptr< typename backend::builtin::matrix > A, const params &prm) { return std::make_shared(*A, prm.block_size); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(const vector &x, const params&) { return std::make_shared(x); } static std::shared_ptr< vector > copy_vector(const std::vector &x, const params&) { return std::make_shared(x); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(std::shared_ptr< vector > x, const params&) { return x; } /// Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params&) { return std::make_shared(size); } static std::shared_ptr create_solver( std::shared_ptr< typename backend::builtin::matrix > A, const params&) { return std::make_shared(*A); } }; //--------------------------------------------------------------------------- // Specialization of backend interface //--------------------------------------------------------------------------- template < typename V, typename C, typename P > struct rows_impl< bcrs > { static size_t get(const bcrs &A) { return A.nrows; } }; template < typename V, typename C, typename P > struct cols_impl< bcrs > { static size_t get(const bcrs &A) { return A.ncols; } }; template < typename V, typename C, typename P > struct nonzeros_impl< bcrs > { static size_t get(const bcrs &A) { return A.ptr.back() * A.block_size * A.block_size; } }; template < typename Alpha, typename Beta, typename V, typename C, typename P, class Vec1, class Vec2 > struct spmv_impl< Alpha, bcrs, Vec1, Beta, Vec2 > { typedef bcrs matrix; static void apply(Alpha alpha, const matrix &A, const Vec1 &x, Beta beta, Vec2 &y) { const size_t nb = A.brows; const size_t na = A.nrows; const size_t ma = A.ncols; const size_t b1 = A.block_size; const size_t b2 = b1 * b1; if (!math::is_zero(beta)) { if (beta != 1) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(na); ++i) { y[i] *= beta; } } } else { backend::clear(y); } #pragma omp parallel for for(ptrdiff_t ib = 0; ib < static_cast(nb); ++ib) { for(P jb = A.ptr[ib], eb = A.ptr[ib + 1]; jb < eb; ++jb) { size_t x0 = A.col[jb] * b1; size_t y0 = ib * b1; block_prod(b1, std::min(b1, ma - x0), std::min(b1, na - y0), alpha, &A.val[jb * b2], &x[x0], &y[y0] ); } } } static void block_prod(size_t dim, size_t nx, size_t ny, Alpha alpha, const V *A, const V *x, V *y) { for(size_t i = 0; i < ny; ++i, ++y) { const V * xx = x; V sum = 0; for(size_t j = 0; j < dim; ++j, ++A, ++xx) if (j < nx) sum += (*A) * (*xx); *y += alpha * sum; } } }; template < typename V, typename C, typename P, class Vec1, class Vec2, class Vec3 > struct residual_impl< bcrs, Vec1, Vec2, Vec3 > { typedef bcrs matrix; static void apply(const Vec1 &rhs, const matrix &A, const Vec2 &x, Vec3 &r) { typedef typename math::scalar_of::type S; const auto one = math::identity(); backend::copy(rhs, r); backend::spmv(-one, A, x, one, r); } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/builtin.hpp000066400000000000000000001143721451676433700174640ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_BUILTIN_HPP #define AMGCL_BACKEND_BUILTIN_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/builtin.hpp * \author Denis Demidov * \brief Builtin backend. */ #include #include #include #include #include #ifdef _OPENMP # include #endif #include #include #include #include #include #include #include namespace amgcl { namespace backend { /// Sparse matrix stored in CRS format. template < typename val_t = double, typename col_t = ptrdiff_t, typename ptr_t = col_t > struct crs { typedef val_t value_type; typedef val_t val_type; typedef col_t col_type; typedef ptr_t ptr_type; size_t nrows, ncols, nnz; ptr_type * ptr; col_type * col; val_type * val; bool own_data; crs() : nrows(0), ncols(0), nnz(0), ptr(0), col(0), val(0), own_data(true) {} template < class PtrRange, class ColRange, class ValRange > crs(size_t nrows, size_t ncols, const PtrRange &ptr_range, const ColRange &col_range, const ValRange &val_range ) : nrows(nrows), ncols(ncols), nnz(0), ptr(0), col(0), val(0), own_data(true) { AMGCL_TIC("CSR copy"); precondition(static_cast(nrows + 1) == std::distance( std::begin(ptr_range), std::end(ptr_range)), "ptr_range has wrong size in crs constructor"); nnz = ptr_range[nrows]; precondition(static_cast(nnz) == std::distance( std::begin(col_range), std::end(col_range)), "col_range has wrong size in crs constructor"); precondition(static_cast(nnz) == std::distance( std::begin(val_range), std::end(val_range)), "val_range has wrong size in crs constructor"); ptr = new ptr_type[nrows + 1]; col = new col_type[nnz]; val = new val_type[nnz]; ptr[0] = ptr_range[0]; #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { ptr[i+1] = ptr_range[i+1]; for(auto j = ptr_range[i]; j < ptr_range[i+1]; ++j) { col[j] = col_range[j]; val[j] = val_range[j]; } } AMGCL_TOC("CSR copy"); } template crs(const Matrix &A) : nrows(backend::rows(A)), ncols(backend::cols(A)), nnz(0), ptr(0), col(0), val(0), own_data(true) { AMGCL_TIC("CSR copy"); ptr = new ptr_type[nrows + 1]; ptr[0] = 0; #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { int row_width = 0; for(auto a = backend::row_begin(A, i); a; ++a) ++row_width; ptr[i+1] = row_width; } nnz = scan_row_sizes(); col = new col_type[nnz]; val = new val_type[nnz]; #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { ptr_type row_head = ptr[i]; for(auto a = backend::row_begin(A, i); a; ++a) { col[row_head] = a.col(); val[row_head] = a.value(); ++row_head; } } AMGCL_TOC("CSR copy"); } crs(const crs &other) : nrows(other.nrows), ncols(other.ncols), nnz(other.nnz), ptr(0), col(0), val(0), own_data(true) { if (other.ptr && other.col && other.val) { ptr = new ptr_type[nrows + 1]; col = new col_type[nnz]; val = new val_type[nnz]; ptr[0] = other.ptr[0]; #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { ptr[i+1] = other.ptr[i+1]; for(ptr_type j = other.ptr[i]; j < other.ptr[i+1]; ++j) { col[j] = other.col[j]; val[j] = other.val[j]; } } } } crs(crs &&other) : nrows(other.nrows), ncols(other.ncols), nnz(other.nnz), ptr(other.ptr), col(other.col), val(other.val), own_data(other.own_data) { other.nrows = 0; other.ncols = 0; other.nnz = 0; other.ptr = 0; other.col = 0; other.val = 0; } const crs& operator=(const crs &other) { free_data(); nrows = other.nrows; ncols = other.ncols; nnz = other.nnz; if (other.ptr && other.col && other.val) { ptr = new ptr_type[nrows + 1]; col = new col_type[nnz]; val = new val_type[nnz]; ptr[0] = other.ptr[0]; #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { ptr[i+1] = other.ptr[i+1]; for(ptr_type j = other.ptr[i]; j < other.ptr[i+1]; ++j) { col[j] = other.col[j]; val[j] = other.val[j]; } } } return *this; } const crs& operator=(crs &&other) { std::swap(nrows, other.nrows); std::swap(ncols, other.ncols); std::swap(nnz, other.nnz); std::swap(ptr, other.ptr); std::swap(col, other.col); std::swap(val, other.val); std::swap(own_data, other.own_data); return *this; } void free_data() { if (own_data) { delete[] ptr; ptr = 0; delete[] col; col = 0; delete[] val; val = 0; } } void set_size(size_t n, size_t m, bool clean_ptr = false) { precondition(!ptr, "matrix data has already been allocated!"); nrows = n; ncols = m; ptr = new ptr_type[nrows + 1]; if (clean_ptr) { ptr[0] = 0; #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) ptr[i+1] = 0; } } ptr_type scan_row_sizes() { std::partial_sum(ptr, ptr + nrows + 1, ptr); return ptr[nrows]; } void set_nonzeros() { set_nonzeros(ptr[nrows]); #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(nrows); ++i) { ptrdiff_t row_beg = ptr[i]; ptrdiff_t row_end = ptr[i+1]; for(ptrdiff_t j = row_beg; j < row_end; ++j) { col[j] = 0; val[j] = math::zero(); } } } void set_nonzeros(size_t n, bool need_values = true) { precondition(!col && !val, "matrix data has already been allocated!"); nnz = n; col = new col_type[nnz]; if (need_values) val = new val_type[nnz]; } ~crs() { free_data(); } class row_iterator { public: row_iterator( const col_type * col, const col_type * end, const val_type * val ) : m_col(col), m_end(end), m_val(val) {} operator bool() const { return m_col < m_end; } row_iterator& operator++() { ++m_col; ++m_val; return *this; } col_type col() const { return *m_col; } val_type value() const { return *m_val; } private: const col_type * m_col; const col_type * m_end; const val_type * m_val; }; row_iterator row_begin(size_t row) const { ptr_type p = ptr[row]; ptr_type e = ptr[row + 1]; return row_iterator(col + p, col + e, val + p); } size_t bytes() const { if (own_data) { return sizeof(ptr_type) * (nrows + 1) + sizeof(col_type) * nnz + sizeof(val_type) * nnz; } else { return 0; } } }; /// Sort rows of the matrix column-wise. template < typename V, typename C, typename P > void sort_rows(crs &A) { const size_t n = rows(A); #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { P beg = A.ptr[i]; P end = A.ptr[i + 1]; amgcl::detail::sort_row(A.col + beg, A.val + beg, end - beg); } } /// Transpose of a sparse matrix. template < typename V, typename C, typename P > std::shared_ptr< crs > transpose(const crs &A) { const size_t n = rows(A); const size_t m = cols(A); const size_t nnz = nonzeros(A); auto T = std::make_shared< crs >(); T->set_size(m, n, true); for(size_t j = 0; j < nnz; ++j) ++( T->ptr[A.col[j] + 1] ); T->scan_row_sizes(); T->set_nonzeros(); for(size_t i = 0; i < n; i++) { for(P j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) { P head = T->ptr[A.col[j]]++; T->col[head] = static_cast(i); T->val[head] = math::adjoint(A.val[j]); } } std::rotate(T->ptr, T->ptr + m, T->ptr + m + 1); T->ptr[0] = 0; return T; } /// Matrix-matrix product. template std::shared_ptr< crs > product(const crs &A, const crs &B, bool sort = false) { auto C = std::make_shared< crs >(); #ifdef _OPENMP int nt = omp_get_max_threads(); #else int nt = 1; #endif if (nt > 16) { spgemm_rmerge(A, B, *C); } else { spgemm_saad(A, B, *C, sort); } return C; } /// Sum of two matrices template std::shared_ptr< crs > sum(Val alpha, const crs &A, Val beta, const crs &B, bool sort = false) { typedef ptrdiff_t Idx; auto C = std::make_shared< crs >(); precondition(A.nrows == B.nrows && A.ncols == B.ncols , "matrices should have same shape!"); C->set_size(A.nrows, A.ncols); C->ptr[0] = 0; #pragma omp parallel { std::vector marker(C->ncols, -1); #pragma omp for for(Idx i = 0; i < static_cast(C->nrows); ++i) { Idx C_cols = 0; for(Idx j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { Idx c = A.col[j]; if (marker[c] != i) { marker[c] = i; ++C_cols; } } for(Idx j = B.ptr[i], e = B.ptr[i+1]; j < e; ++j) { Idx c = B.col[j]; if (marker[c] != i) { marker[c] = i; ++C_cols; } } C->ptr[i + 1] = C_cols; } } C->set_nonzeros(C->scan_row_sizes()); #pragma omp parallel { std::vector marker(C->ncols, -1); #pragma omp for for(Idx i = 0; i < static_cast(C->nrows); ++i) { Idx row_beg = C->ptr[i]; Idx row_end = row_beg; for(Idx j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { Idx c = A.col[j]; Val v = alpha * A.val[j]; if (marker[c] < row_beg) { marker[c] = row_end; C->col[row_end] = c; C->val[row_end] = v; ++row_end; } else { C->val[marker[c]] += v; } } for(Idx j = B.ptr[i], e = B.ptr[i+1]; j < e; ++j) { Idx c = B.col[j]; Val v = beta * B.val[j]; if (marker[c] < row_beg) { marker[c] = row_end; C->col[row_end] = c; C->val[row_end] = v; ++row_end; } else { C->val[marker[c]] += v; } } if (sort) amgcl::detail::sort_row( C->col + row_beg, C->val + row_beg, row_end - row_beg); } } return C; } /// Scale matrix values. template void scale(crs &A, T s) { ptrdiff_t n = backend::rows(A); #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) A.val[j] *= s; } } // Reduce matrix to a pointwise one template std::shared_ptr< crs< typename math::scalar_of::type, col_type, ptr_type > > pointwise_matrix(const crs &A, unsigned block_size) { typedef value_type V; typedef typename math::scalar_of::type S; AMGCL_TIC("pointwise_matrix"); const ptrdiff_t n = A.nrows; const ptrdiff_t m = A.ncols; const ptrdiff_t np = n / block_size; const ptrdiff_t mp = m / block_size; precondition(np * block_size == n, "Matrix size should be divisible by block_size"); auto ap = std::make_shared< crs >(); auto &Ap = *ap; Ap.set_size(np, mp, true); #pragma omp parallel { std::vector j(block_size); std::vector e(block_size); // Count number of nonzeros in block matrix. #pragma omp for for(ptrdiff_t ip = 0; ip < np; ++ip) { ptrdiff_t ia = ip * block_size; col_type cur_col = 0; bool done = true; for(unsigned k = 0; k < block_size; ++k) { ptr_type beg = j[k] = A.ptr[ia + k]; ptr_type end = e[k] = A.ptr[ia + k + 1]; if (beg == end) continue; col_type c = A.col[beg]; if (done) { done = false; cur_col = c; } else { cur_col = std::min(cur_col, c); } } while(!done) { cur_col /= block_size; ++Ap.ptr[ip + 1]; done = true; col_type col_end = (cur_col + 1) * block_size; for(unsigned k = 0; k < block_size; ++k) { ptr_type beg = j[k]; ptr_type end = e[k]; while(beg < end) { col_type c = A.col[beg++]; if (c >= col_end) { if (done) { done = false; cur_col = c; } else { cur_col = std::min(cur_col, c); } break; } } j[k] = beg; } } } } Ap.set_nonzeros(Ap.scan_row_sizes()); #pragma omp parallel { std::vector j(block_size); std::vector e(block_size); #pragma omp for for(ptrdiff_t ip = 0; ip < np; ++ip) { ptrdiff_t ia = ip * block_size; col_type cur_col = 0; ptr_type head = Ap.ptr[ip]; bool done = true; for(unsigned k = 0; k < block_size; ++k) { ptr_type beg = j[k] = A.ptr[ia + k]; ptr_type end = e[k] = A.ptr[ia + k + 1]; if (beg == end) continue; col_type c = A.col[beg]; if (done) { done = false; cur_col = c; } else { cur_col = std::min(cur_col, c); } } while(!done) { cur_col /= block_size; Ap.col[head] = cur_col; done = true; bool first = true; S cur_val = math::zero(); col_type col_end = (cur_col + 1) * block_size; for(unsigned k = 0; k < block_size; ++k) { ptr_type beg = j[k]; ptr_type end = e[k]; while(beg < end) { col_type c = A.col[beg]; S v = math::norm(A.val[beg]); ++beg; if (c >= col_end) { if (done) { done = false; cur_col = c; } else { cur_col = std::min(cur_col, c); } break; } if (first) { first = false; cur_val = v; } else { cur_val = std::max(cur_val, v); } } j[k] = beg; } Ap.val[head++] = cur_val; } } } AMGCL_TOC("pointwise_matrix"); return ap; } /** NUMA-aware vector container. */ template class numa_vector { public: typedef T value_type; numa_vector() : n(0), p(0) {} numa_vector(size_t n, bool init = true) : n(n), p(new T[n]) { if (init) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) p[i] = math::zero(); } } void resize(size_t size, bool init = true) { delete[] p; p = 0; n = size; p = new T[n]; if (init) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) p[i] = math::zero(); } } template numa_vector(const Vector &other, typename std::enable_if::value, int>::type = 0 ) : n(other.size()), p(new T[n]) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) p[i] = other[i]; } template numa_vector(Iterator beg, Iterator end) : n(std::distance(beg, end)), p(new T[n]) { static_assert(std::is_same< std::random_access_iterator_tag, typename std::iterator_traits::iterator_category >::value, "Iterator has to be random access"); #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) p[i] = beg[i]; } ~numa_vector() { delete[] p; p = 0; } inline size_t size() const { return n; } inline const T& operator[](size_t i) const { return p[i]; } inline T& operator[](size_t i) { return p[i]; } inline const T* data() const { return p; } inline T* data() { return p; } void swap(numa_vector &other) { std::swap(n, other.n); std::swap(p, other.p); } private: size_t n; T *p; }; /// Diagonal of a matrix template < typename V, typename C, typename P > std::shared_ptr< numa_vector > diagonal(const crs &A, bool invert = false) { const size_t n = rows(A); auto dia = std::make_shared< numa_vector >(n, false); #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { for(auto a = A.row_begin(i); a; ++a) { if (a.col() == i) { V d = a.value(); if (invert) { d = math::is_zero(d) ? math::identity() : math::inverse(d); } (*dia)[i] = d; break; } } } return dia; } // Estimate spectral radius of the matrix. // Use Gershgorin disk theorem when power_iters == 0, // Use Power method when power_iters > 0. // When scale = true, scale the matrix by its inverse diagonal. template static typename math::scalar_of::type>::type spectral_radius(const Matrix &A, int power_iters = 0) { AMGCL_TIC("spectral radius"); typedef typename backend::value_type::type value_type; typedef typename math::rhs_of::type rhs_type; typedef typename math::scalar_of::type scalar_type; const ptrdiff_t n = backend::rows(A); scalar_type radius; if (power_iters <= 0) { // Use Gershgorin disk theorem. radius = 0; #pragma omp parallel { scalar_type emax = 0; value_type dia = math::identity(); #pragma omp for nowait for(ptrdiff_t i = 0; i < n; ++i) { scalar_type s = 0; for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { ptrdiff_t c = A.col[j]; value_type v = A.val[j]; s += math::norm(v); if (scale && c == i) dia = v; } if (scale) s *= math::norm(math::inverse(dia)); emax = std::max(emax, s); } #pragma omp critical radius = std::max(radius, emax); } } else { // Power method. backend::numa_vector b0(n, false), b1(n, false); // Fill the initial vector with random values. // Also extract the inverted matrix diagonal values. scalar_type b0_norm = 0; #pragma omp parallel { #ifdef _OPENMP int tid = omp_get_thread_num(); #else int tid = 0; #endif std::mt19937 rng(tid); std::uniform_real_distribution rnd(-1, 1); scalar_type loc_norm = 0; #pragma omp for nowait for(ptrdiff_t i = 0; i < n; ++i) { rhs_type v = math::constant(rnd(rng)); b0[i] = v; loc_norm += math::norm(math::inner_product(v,v)); } #pragma omp critical b0_norm += loc_norm; } // Normalize b0 b0_norm = 1 / sqrt(b0_norm); #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { b0[i] = b0_norm * b0[i]; } for(int iter = 0; iter < power_iters;) { // b1 = scale ? (D^1 * A) * b0 : A * b0 // b1_norm = ||b1|| // radius = scalar_type b1_norm = 0; radius = 0; #pragma omp parallel { scalar_type loc_norm = 0; scalar_type loc_radi = 0; value_type dia = math::identity(); #pragma omp for nowait for(ptrdiff_t i = 0; i < n; ++i) { rhs_type s = math::zero(); for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { ptrdiff_t c = A.col[j]; value_type v = A.val[j]; if (scale && c == i) dia = v; s += v * b0[c]; } if (scale) s = math::inverse(dia) * s; loc_norm += math::norm(math::inner_product(s, s)); loc_radi += math::norm(math::inner_product(s, b0[i])); b1[i] = s; } #pragma omp critical { b1_norm += loc_norm; radius += loc_radi; } } if (++iter < power_iters) { // b0 = b1 / b1_norm b1_norm = 1 / sqrt(b1_norm); #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { b0[i] = b1_norm * b1[i]; } } } } AMGCL_TOC("spectral radius"); return radius < 0 ? static_cast(2) : radius; } /** * The builtin backend does not have any dependencies, and uses OpenMP for * parallelization. Matrices are stored in the CRS format, and vectors are * instances of ``std::vector``. There is no usual overhead of * moving the constructed hierarchy to the builtin backend, since the backend * is used internally during setup. */ template struct builtin { typedef ValueType value_type; typedef ColumnType index_type; typedef ColumnType col_type; typedef PointerType ptr_type; typedef typename math::rhs_of::type rhs_type; struct provides_row_iterator : std::true_type {}; typedef crs matrix; typedef numa_vector vector; typedef numa_vector matrix_diagonal; typedef solver::skyline_lu direct_solver; /// The backend has no parameters. typedef amgcl::detail::empty_params params; static std::string name() { return "builtin"; } // Copy matrix. This is a noop for builtin backend. static std::shared_ptr copy_matrix(std::shared_ptr A, const params&) { return A; } // Copy vector to builtin backend. template static std::shared_ptr< numa_vector > copy_vector(const std::vector &x, const params&) { return std::make_shared< numa_vector >(x); } // Copy vector to builtin backend. This is a noop for builtin backend. template static std::shared_ptr< numa_vector > copy_vector(std::shared_ptr< numa_vector > x, const params&) { return x; } // Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params&) { return std::make_shared(size); } struct gather { std::vector I; gather(size_t /*size*/, const std::vector &I, const params&) : I(I) { } template void operator()(const InVec &vec, OutVec &vals) const { for(size_t i = 0; i < I.size(); ++i) vals[i] = vec[I[i]]; } }; struct scatter { std::vector I; scatter(size_t /*size*/, const std::vector &I, const params&) : I(I) { } template void operator()(const InVec &vals, OutVec &vec) const { for(size_t i = 0; i < I.size(); ++i) vec[I[i]] = vals[i]; } }; // Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr A, const params&) { return std::make_shared(*A); } }; template struct is_builtin_vector : std::false_type {}; template struct is_builtin_vector< std::vector > : std::is_arithmetic {}; template struct is_builtin_vector< numa_vector > : std::true_type {}; //--------------------------------------------------------------------------- // Specialization of backend interface //--------------------------------------------------------------------------- template struct backends_compatible< builtin, builtin > : std::true_type {}; template < typename V, typename C, typename P > struct rows_impl< crs > { static size_t get(const crs &A) { return A.nrows; } }; template < typename V, typename C, typename P > struct cols_impl< crs > { static size_t get(const crs &A) { return A.ncols; } }; template < class Vec > struct bytes_impl< Vec, typename std::enable_if< is_builtin_vector::value >::type > { static size_t get(const Vec &x) { typedef typename backend::value_type::type V; return x.size() * sizeof(V); } }; template < typename V, typename C, typename P > struct ptr_data_impl< crs > { typedef const P* type; static type get(const crs &A) { return &A.ptr[0]; } }; template < typename V, typename C, typename P > struct col_data_impl< crs > { typedef const C* type; static type get(const crs &A) { return &A.col[0]; } }; template < typename V, typename C, typename P > struct val_data_impl< crs > { typedef const V* type; static type get(const crs &A) { return &A.val[0]; } }; template < typename V, typename C, typename P > struct nonzeros_impl< crs > { static size_t get(const crs &A) { return A.nrows == 0 ? 0 : A.ptr[A.nrows]; } }; template < typename V, typename C, typename P > struct row_nonzeros_impl< crs > { static size_t get(const crs &A, size_t row) { return A.ptr[row + 1] - A.ptr[row]; } }; template < class Vec > struct clear_impl< Vec, typename std::enable_if< is_builtin_vector::value >::type > { static void apply(Vec &x) { typedef typename backend::value_type::type V; const size_t n = x.size(); #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { x[i] = math::zero(); } } }; template < class Vec1, class Vec2 > struct inner_product_impl< Vec1, Vec2, typename std::enable_if< is_builtin_vector::value && is_builtin_vector::value >::type > { typedef typename value_type::type V; typedef typename math::inner_product_impl::return_type return_type; static return_type get(const Vec1 &x, const Vec2 &y) { #ifdef _OPENMP if (omp_get_max_threads() > 1) { return parallel(x, y); } else #endif { return serial(x, y); } } static return_type serial(const Vec1 &x, const Vec2 &y) { const size_t n = x.size(); return_type s = math::zero(); return_type c = math::zero(); for(ptrdiff_t i = 0; i < static_cast(n); ++i) { return_type d = math::inner_product(x[i], y[i]) - c; return_type t = s + d; c = (t - s) - d; s = t; } return s; } #ifdef _OPENMP # ifndef AMGCL_MAX_OPENMP_THREADS # define AMGCL_MAX_OPENMP_THREADS 64 # endif static return_type parallel(const Vec1 &x, const Vec2 &y) { const size_t n = x.size(); return_type _sum_stat[AMGCL_MAX_OPENMP_THREADS]; std::vector _sum_dyna; return_type *sum; const int nt = omp_get_max_threads(); if (nt < 64) { sum = _sum_stat; for(int i = 0; i < nt; ++i) { sum[i] = math::zero(); } } else { _sum_dyna.resize(nt, math::zero()); sum = _sum_dyna.data(); } #pragma omp parallel { const int tid = omp_get_thread_num(); return_type s = math::zero(); return_type c = math::zero(); #pragma omp for nowait for(ptrdiff_t i = 0; i < static_cast(n); ++i) { return_type d = math::inner_product(x[i], y[i]) - c; return_type t = s + d; c = (t - s) - d; s = t; } sum[tid] = s; } return std::accumulate(sum, sum + nt, math::zero()); } #endif }; template struct axpby_impl< A, Vec1, B, Vec2, typename std::enable_if< is_builtin_vector::value && is_builtin_vector::value >::type > { static void apply(A a, const Vec1 &x, B b, Vec2 &y) { const size_t n = x.size(); if (!math::is_zero(b)) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { y[i] = a * x[i] + b * y[i]; } } else { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { y[i] = a * x[i]; } } } }; template < class A, class Vec1, class B, class Vec2, class C, class Vec3 > struct axpbypcz_impl< A, Vec1, B, Vec2, C, Vec3, typename std::enable_if< is_builtin_vector::value && is_builtin_vector::value && is_builtin_vector::value >::type > { static void apply(A a, const Vec1 &x, B b, const Vec2 &y, C c, Vec3 &z) { const size_t n = x.size(); if (!math::is_zero(c)) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { z[i] = a * x[i] + b * y[i] + c * z[i]; } } else { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { z[i] = a * x[i] + b * y[i]; } } } }; template < class Alpha, class Vec1, class Vec2, class Beta, class Vec3 > struct vmul_impl< Alpha, Vec1, Vec2, Beta, Vec3, typename std::enable_if< is_builtin_vector::value && is_builtin_vector::value && is_builtin_vector::value && math::static_rows::type>::value == math::static_rows::type>::value && math::static_rows::type>::value == math::static_rows::type>::value >::type > { static void apply(Alpha a, const Vec1 &x, const Vec2 &y, Beta b, Vec3 &z) { const size_t n = x.size(); if (!math::is_zero(b)) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { z[i] = a * x[i] * y[i] + b * z[i]; } } else { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { z[i] = a * x[i] * y[i]; } } } }; // Support for mixed scalar/nonscalar types template < class Alpha, class Vec1, class Vec2, class Beta, class Vec3 > struct vmul_impl< Alpha, Vec1, Vec2, Beta, Vec3, typename std::enable_if< is_builtin_vector::value && is_builtin_vector::value && is_builtin_vector::value && ( math::static_rows::type>::value != math::static_rows::type>::value || math::static_rows::type>::value != math::static_rows::type>::value ) >::type > { static void apply(Alpha a, const Vec1 &x, const Vec2 &y, Beta b, Vec3 &z) { typedef typename value_type::type M_type; auto Y = backend::reinterpret_as_rhs(y); auto Z = backend::reinterpret_as_rhs(z); const size_t n = x.size(); if (!math::is_zero(b)) { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { Z[i] = a * x[i] * Y[i] + b * Z[i]; } } else { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { Z[i] = a * x[i] * Y[i]; } } } }; template < class Vec1, class Vec2 > struct copy_impl< Vec1, Vec2, typename std::enable_if< is_builtin_vector::value && is_builtin_vector::value >::type > { static void apply(const Vec1 &x, Vec2 &y) { const size_t n = x.size(); #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { y[i] = x[i]; } } }; template struct reinterpret_as_rhs_impl< MatrixValue, Vector, IsConst, typename std::enable_if::value>::type > { typedef typename backend::value_type::type src_type; typedef typename math::scalar_of::type scalar_type; typedef typename math::rhs_of::type rhs_type; typedef typename math::replace_scalar::type dst_type; typedef typename std::conditional::type ptr_type; typedef iterator_range return_type; template static return_type get(V &&x) { auto ptr = reinterpret_cast(&x[0]); const size_t n = x.size() * sizeof(src_type) / sizeof(dst_type); return make_iterator_range(ptr, ptr + n); } }; namespace detail { template struct use_builtin_matrix_ops< amgcl::backend::crs > : std::true_type {}; } // namespace detail } // namespace backend } // namespace amgcl // Allow to use boost::iterator_range as vector in builtin backend: namespace boost { template class iterator_range; } namespace amgcl { namespace backend { template struct is_builtin_vector< amgcl::iterator_range > : std::true_type {}; template struct is_builtin_vector< boost::iterator_range > : std::true_type {}; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/builtin_hybrid.hpp000066400000000000000000000055411451676433700210220ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_BUILTIN_HYBRID_HPP #define AMGCL_BACKEND_BUILTIN_HYBRID_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/builtin_hybrid.hpp * \author Denis Demidov * \brief Builtin backend that uses scalar matrices to build the hierarchy, but stores the computed matrix in block format. */ #include #include #include namespace amgcl { namespace backend { // Hybrid backend uses scalar matrices to build the hierarchy, // but stores the computed matrices in the block format. template struct builtin_hybrid : public builtin::type, ColumnType, PointerType> { typedef typename math::scalar_of::type ScalarType; typedef builtin Base; typedef crs matrix; struct provides_row_iterator : std::false_type {}; static std::shared_ptr copy_matrix(std::shared_ptr As, const typename Base::params&) { return std::make_shared(amgcl::adapter::block_matrix(*As)); } }; template struct backends_compatible< builtin_hybrid, builtin_hybrid > : std::true_type {}; template struct backends_compatible< builtin, builtin_hybrid > : std::true_type {}; template struct backends_compatible< builtin_hybrid, builtin > : std::true_type {}; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/cuda.hpp000066400000000000000000000575541451676433700167420ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_CUDA_HPP #define AMGCL_BACKEND_CUDA_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/cuda.hpp * \author Denis Demidov * \brief CUDA backend. */ #include #include #include #include #include #include #include #include #include #include #include #include #include namespace amgcl { namespace solver { /** Wrapper around solver::skyline_lu for use with the CUDA backend. * Copies the rhs to the host memory, solves the problem using the host CPU, * then copies the solution back to the compute device(s). */ template struct cuda_skyline_lu : solver::skyline_lu { typedef solver::skyline_lu Base; mutable std::vector _rhs, _x; template cuda_skyline_lu(const Matrix &A, const Params&) : Base(*A), _rhs(backend::rows(*A)), _x(backend::rows(*A)) { } template void operator()(const Vec1 &rhs, Vec2 &x) const { thrust::copy(rhs.begin(), rhs.end(), _rhs.begin()); static_cast(this)->operator()(_rhs, _x); thrust::copy(_x.begin(), _x.end(), x.begin()); } size_t bytes() const { return backend::bytes(*static_cast(this)) + backend::bytes(_rhs) + backend::bytes(_x); } }; } namespace backend { namespace detail { inline void cuda_check(cusparseStatus_t rc, const char *file, int line) { if (rc != CUSPARSE_STATUS_SUCCESS) { std::ostringstream msg; msg << "CUDA error " << rc << " at \"" << file << ":" << line; precondition(false, msg.str()); } } inline void cuda_check(cudaError_t rc, const char *file, int line) { if (rc != cudaSuccess) { std::ostringstream msg; msg << "CUDA error " << rc << " at \"" << file << ":" << line; precondition(false, msg.str()); } } #define AMGCL_CALL_CUDA(rc) \ amgcl::backend::detail::cuda_check(rc, __FILE__, __LINE__) struct cuda_deleter { void operator()(cusparseMatDescr_t handle) { AMGCL_CALL_CUDA( cusparseDestroyMatDescr(handle) ); } void operator()(cusparseSpMatDescr_t handle) { AMGCL_CALL_CUDA( cusparseDestroySpMat(handle) ); } void operator()(cusparseDnVecDescr_t handle) { AMGCL_CALL_CUDA( cusparseDestroyDnVec(handle) ); } void operator()(cudaEvent_t handle) { AMGCL_CALL_CUDA( cudaEventDestroy(handle) ); } void operator()(csrilu02Info_t handle) { AMGCL_CALL_CUDA( cusparseDestroyCsrilu02Info(handle) ); } #if CUDART_VERSION >= 11000 void operator()(cusparseSpSVDescr_t handle) { AMGCL_CALL_CUDA( cusparseSpSV_destroyDescr(handle) ); } #else void operator()(cusparseHybMat_t handle) { AMGCL_CALL_CUDA( cusparseDestroyHybMat(handle) ); } void operator()(csrsv2Info_t handle) { AMGCL_CALL_CUDA( cusparseDestroyCsrsv2Info(handle) ); } #endif }; template cudaDataType cuda_datatype() { if (sizeof(real) == sizeof(float)) return CUDA_R_32F; else return CUDA_R_64F; } #if CUDART_VERSION >= 11000 template cusparseDnVecDescr_t cuda_vector_description(thrust::device_vector &x) { cusparseDnVecDescr_t desc; AMGCL_CALL_CUDA( cusparseCreateDnVec( &desc, x.size(), thrust::raw_pointer_cast(&x[0]), cuda_datatype() ) ); return desc; } template cusparseDnVecDescr_t cuda_vector_description(const thrust::device_vector &&x) { cusparseDnVecDescr_t desc; AMGCL_CALL_CUDA( cusparseCreateDnVec( &desc, x.size(), thrust::raw_pointer_cast(&x[0]), cuda_datatype() ) ); return desc; } template cusparseSpMatDescr_t cuda_matrix_description( size_t nrows, size_t ncols, size_t nnz, thrust::device_vector &ptr, thrust::device_vector &col, thrust::device_vector &val ) { cusparseSpMatDescr_t desc; AMGCL_CALL_CUDA( cusparseCreateCsr( &desc, nrows, ncols, nnz, thrust::raw_pointer_cast(&ptr[0]), thrust::raw_pointer_cast(&col[0]), thrust::raw_pointer_cast(&val[0]), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, detail::cuda_datatype() ) ); return desc; } #endif // CUDART_VERSION >= 11000 } // namespace detail #if CUDART_VERSION >= 11000 /// CUSPARSE matrix in CSR format. template class cuda_matrix { public: typedef real value_type; cuda_matrix( size_t n, size_t m, const ptrdiff_t *p_ptr, const ptrdiff_t *p_col, const real *p_val, cusparseHandle_t handle ) : nrows(n), ncols(m), nnz(p_ptr[n]), handle(handle), ptr(p_ptr, p_ptr + n + 1), col(p_col, p_col + nnz), val(p_val, p_val + nnz) { desc.reset( detail::cuda_matrix_description(nrows, ncols, nnz, ptr, col, val), backend::detail::cuda_deleter() ); } void spmv( real alpha, thrust::device_vector const &x, real beta, thrust::device_vector &y ) const { std::shared_ptr::type> xdesc( detail::cuda_vector_description(const_cast&>(x)), backend::detail::cuda_deleter() ); std::shared_ptr::type> ydesc( detail::cuda_vector_description(y), backend::detail::cuda_deleter() ); size_t buf_size; AMGCL_CALL_CUDA( cusparseSpMV_bufferSize( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, desc.get(), xdesc.get(), &beta, ydesc.get(), detail::cuda_datatype(), CUSPARSE_SPMV_CSR_ALG1, &buf_size ) ); if (buf.size() < buf_size) buf.resize(buf_size); AMGCL_CALL_CUDA( cusparseSpMV( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, desc.get(), xdesc.get(), &beta, ydesc.get(), detail::cuda_datatype(), CUSPARSE_SPMV_CSR_ALG1, thrust::raw_pointer_cast(&buf[0]) ) ); } size_t rows() const { return nrows; } size_t cols() const { return ncols; } size_t nonzeros() const { return nnz; } size_t bytes() const { return sizeof(int) * (nrows + 1) + sizeof(int) * nnz + sizeof(real) * nnz; } private: size_t nrows, ncols, nnz; cusparseHandle_t handle; std::shared_ptr::type> desc; thrust::device_vector ptr; thrust::device_vector col; thrust::device_vector val; mutable thrust::device_vector buf; }; #else // CUDART_VERSION >= 11000 /// CUSPARSE matrix in Hyb format. template class cuda_matrix { public: typedef real value_type; cuda_matrix( size_t n, size_t m, const ptrdiff_t *ptr, const ptrdiff_t *col, const real *val, cusparseHandle_t handle ) : nrows(n), ncols(m), nnz(ptr[n]), handle( handle ), desc ( create_description(), backend::detail::cuda_deleter() ), mat ( create_matrix(), backend::detail::cuda_deleter() ) { fill_matrix(n, m, ptr, col, val); } void spmv( real alpha, thrust::device_vector const &x, real beta, thrust::device_vector &y ) const { spmv(alpha, x, beta, y, std::integral_constant()); } void spmv( real alpha, thrust::device_vector const &x, real beta, thrust::device_vector &y, std::false_type ) const { AMGCL_CALL_CUDA( cusparseShybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, desc.get(), mat.get(), thrust::raw_pointer_cast(&x[0]), &beta, thrust::raw_pointer_cast(&y[0]) ) ); } void spmv( real alpha, thrust::device_vector const &x, real beta, thrust::device_vector &y, std::true_type ) const { AMGCL_CALL_CUDA( cusparseDhybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, desc.get(), mat.get(), thrust::raw_pointer_cast(&x[0]), &beta, thrust::raw_pointer_cast(&y[0]) ) ); } size_t rows() const { return nrows; } size_t cols() const { return ncols; } size_t nonzeros() const { return nnz; } size_t bytes() const { return sizeof(int) * (nrows + 1) + sizeof(int) * nnz + sizeof(real) * nnz; } private: size_t nrows, ncols, nnz; cusparseHandle_t handle; std::shared_ptr::type> desc; std::shared_ptr::type> mat; static cusparseMatDescr_t create_description() { cusparseMatDescr_t desc; AMGCL_CALL_CUDA( cusparseCreateMatDescr(&desc) ); AMGCL_CALL_CUDA( cusparseSetMatType(desc, CUSPARSE_MATRIX_TYPE_GENERAL) ); AMGCL_CALL_CUDA( cusparseSetMatIndexBase(desc, CUSPARSE_INDEX_BASE_ZERO) ); return desc; } static cusparseHybMat_t create_matrix() { cusparseHybMat_t mat; AMGCL_CALL_CUDA( cusparseCreateHybMat(&mat) ); return mat; } void fill_matrix(size_t n, size_t m, const ptrdiff_t *ptr, const ptrdiff_t *col, const float *val ) { thrust::device_vector p(ptr, ptr + n + 1); thrust::device_vector c(col, col + ptr[n]); thrust::device_vector v(val, val + ptr[n]); AMGCL_CALL_CUDA( cusparseScsr2hyb(handle, n, m, desc.get(), thrust::raw_pointer_cast(&v[0]), thrust::raw_pointer_cast(&p[0]), thrust::raw_pointer_cast(&c[0]), mat.get(), 0, CUSPARSE_HYB_PARTITION_AUTO ) ); } void fill_matrix(size_t n, size_t m, const ptrdiff_t *ptr, const ptrdiff_t *col, const double *val ) { thrust::device_vector p(ptr, ptr + n + 1); thrust::device_vector c(col, col + ptr[n]); thrust::device_vector v(val, val + ptr[n]); AMGCL_CALL_CUDA( cusparseDcsr2hyb(handle, n, m, desc.get(), thrust::raw_pointer_cast(&v[0]), thrust::raw_pointer_cast(&p[0]), thrust::raw_pointer_cast(&c[0]), mat.get(), 0, CUSPARSE_HYB_PARTITION_AUTO ) ); } }; #endif // CUDART_VERSION >= 11000 /// CUDA backend. /** * Uses CUSPARSE for matrix operations and Thrust for vector operations. * * \param real Value type. * \ingroup backends */ template > struct cuda { static_assert( std::is_same::value || std::is_same::value, "Unsupported value type for cuda backend" ); typedef real value_type; typedef ptrdiff_t col_type; typedef ptrdiff_t ptr_type; typedef cuda_matrix matrix; typedef thrust::device_vector vector; typedef thrust::device_vector matrix_diagonal; typedef DirectSolver direct_solver; struct provides_row_iterator : std::false_type {}; /// Backend parameters. struct params { /// CUSPARSE handle. cusparseHandle_t cusparse_handle; params(cusparseHandle_t handle = 0) : cusparse_handle(handle) {} #ifndef AMGCL_NO_BOOST params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_VALUE(p, cusparse_handle) { check_params(p, {"cusparse_handle"}); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_VALUE(p, path, cusparse_handle); } #endif }; static std::string name() { return "cuda"; } /// Copy matrix from builtin backend. static std::shared_ptr copy_matrix(std::shared_ptr< typename builtin::matrix > A, const params &prm) { return std::make_shared(rows(*A), cols(*A), A->ptr, A->col, A->val, prm.cusparse_handle ); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(typename builtin::vector const &x, const params&) { return std::make_shared(x.data(), x.data() + x.size()); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(std::shared_ptr< typename builtin::vector > x, const params &prm) { return copy_vector(*x, prm); } /// Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params&) { return std::make_shared(size); } /// Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr< typename builtin::matrix > A, const params &prm) { return std::make_shared(A, prm); } struct gather { thrust::device_vector I; mutable thrust::device_vector T; gather(size_t src_size, const std::vector &I, const params&) : I(I), T(I.size()) { } void operator()(const vector &src, vector &dst) const { thrust::gather(I.begin(), I.end(), src.begin(), dst.begin()); } void operator()(const vector &vec, std::vector &vals) const { thrust::gather(I.begin(), I.end(), vec.begin(), T.begin()); thrust::copy(T.begin(), T.end(), vals.begin()); } }; struct scatter { thrust::device_vector I; scatter(size_t size, const std::vector &I, const params &) : I(I) { } void operator()(const vector &src, vector &dst) const { thrust::scatter(src.begin(), src.end(), I.begin(), dst.begin()); } }; }; //--------------------------------------------------------------------------- // Backend interface implementation //--------------------------------------------------------------------------- template < typename V > struct bytes_impl< thrust::device_vector > { static size_t get(const thrust::device_vector &v) { return v.size() * sizeof(V); } }; template < typename Alpha, typename Beta, typename V > struct spmv_impl< Alpha, cuda_matrix, thrust::device_vector, Beta, thrust::device_vector > { typedef cuda_matrix matrix; typedef thrust::device_vector vector; static void apply(Alpha alpha, const matrix &A, const vector &x, Beta beta, vector &y) { A.spmv(alpha, x, beta, y); } }; template < typename V > struct residual_impl< cuda_matrix, thrust::device_vector, thrust::device_vector, thrust::device_vector > { typedef cuda_matrix matrix; typedef thrust::device_vector vector; static void apply(const vector &rhs, const matrix &A, const vector &x, vector &r) { thrust::copy(rhs.begin(), rhs.end(), r.begin()); A.spmv(-1, x, 1, r); } }; template < typename V > struct clear_impl< thrust::device_vector > { typedef thrust::device_vector vector; static void apply(vector &x) { thrust::fill(x.begin(), x.end(), V()); } }; template struct copy_impl > { static void apply(const V &x, thrust::device_vector &y) { thrust::copy(x.begin(), x.end(), y.begin()); } }; template struct copy_impl, V > { static void apply(const thrust::device_vector &x, V &y) { thrust::copy(x.begin(), x.end(), y.begin()); } }; template struct copy_impl, thrust::device_vector > { static void apply(const thrust::device_vector &x, thrust::device_vector &y) { thrust::copy(x.begin(), x.end(), y.begin()); } }; template < typename V > struct inner_product_impl< thrust::device_vector, thrust::device_vector > { typedef thrust::device_vector vector; static V get(const vector &x, const vector &y) { return thrust::inner_product(x.begin(), x.end(), y.begin(), V()); } }; template < typename A, typename B, typename V > struct axpby_impl< A, thrust::device_vector, B, thrust::device_vector > { typedef thrust::device_vector vector; struct functor { A a; B b; functor(A a, B b) : a(a), b(b) {} template __host__ __device__ void operator()( Tuple t ) const { using thrust::get; if (b) get<1>(t) = a * get<0>(t) + b * get<1>(t); else get<1>(t) = a * get<0>(t); } }; static void apply(A a, const vector &x, B b, vector &y) { thrust::for_each( thrust::make_zip_iterator( thrust::make_tuple( x.begin(), y.begin() ) ), thrust::make_zip_iterator( thrust::make_tuple( x.end(), y.end() ) ), functor(a, b) ); } }; template < typename A, typename B, typename C, typename V > struct axpbypcz_impl< A, thrust::device_vector, B, thrust::device_vector, C, thrust::device_vector > { typedef thrust::device_vector vector; struct functor { A a; B b; C c; functor(A a, B b, C c) : a(a), b(b), c(c) {} template __host__ __device__ void operator()( Tuple t ) const { using thrust::get; if (c) get<2>(t) = a * get<0>(t) + b * get<1>(t) + c * get<2>(t); else get<2>(t) = a * get<0>(t) + b * get<1>(t); } }; static void apply( A a, const vector &x, B b, const vector &y, C c, vector &z ) { thrust::for_each( thrust::make_zip_iterator( thrust::make_tuple( x.begin(), y.begin(), z.begin() ) ), thrust::make_zip_iterator( thrust::make_tuple( x.end(), y.end(), z.end() ) ), functor(a, b, c) ); } }; template < typename A, typename B, typename V > struct vmul_impl< A, thrust::device_vector, thrust::device_vector, B, thrust::device_vector > { typedef thrust::device_vector vector; struct functor { A a; B b; functor(A a, B b) : a(a), b(b) {} template __host__ __device__ void operator()( Tuple t ) const { using thrust::get; if (b) get<2>(t) = a * get<0>(t) * get<1>(t) + b * get<2>(t); else get<2>(t) = a * get<0>(t) * get<1>(t); } }; static void apply(A a, const vector &x, const vector &y, B b, vector &z) { thrust::for_each( thrust::make_zip_iterator( thrust::make_tuple( x.begin(), y.begin(), z.begin() ) ), thrust::make_zip_iterator( thrust::make_tuple( x.end(), y.end(), z.end() ) ), functor(a, b) ); } }; class cuda_event { public: cuda_event() : e(create_event(), backend::detail::cuda_deleter()) { } float operator-(cuda_event tic) const { float delta; cudaEventSynchronize(e.get()); cudaEventElapsedTime(&delta, tic.e.get(), e.get()); return delta / 1000.0f; } private: std::shared_ptr::type> e; static cudaEvent_t create_event() { cudaEvent_t e; cudaEventCreate(&e); cudaEventRecord(e, 0); return e; } }; struct cuda_clock { typedef cuda_event value_type; static const char* units() { return "s"; } cuda_event current() const { return cuda_event(); } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/detail/000077500000000000000000000000001451676433700165375ustar00rootroot00000000000000amgcl-1.4.4/amgcl/backend/detail/default_direct_solver.hpp000066400000000000000000000045451451676433700236300ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_DETAIL_DEFAULT_DIRECT_SOLVER_HPP #define AMGCL_BACKEND_DETAIL_DEFAULT_DIRECT_SOLVER_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/detail/default_direct_solver.hpp * \author Denis Demidov * \brief Default direct solver for coarse level. */ #include #include namespace amgcl { namespace backend { namespace detail { template struct default_direct_solver { typedef typename Backend::value_type real; typedef typename math::scalar_of::type scalar; typedef typename Backend::matrix matrix; typedef typename builtin::matrix host_matrix; std::shared_ptr Ainv; default_direct_solver( std::shared_ptr A, typename Backend::params const &prm ) { auto ainv = std::make_shared(); *ainv = inverse(*A); Ainv = Backend::copy_matrix(ainv, prm); } template void operator()(const Vec1 &rhs, Vec2 &x) const { backend::spmv(math::identity(), *Ainv, rhs, math::zero(), x); } static size_t coarse_enough() { return 500; } }; } // namespace detail } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/detail/matrix_ops.hpp000066400000000000000000000145141451676433700214420ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_DETAIL_MATRIX_OPS_HPP #define AMGCL_BACKEND_DETAIL_MATRIX_OPS_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/adapter/detail/matrix_ops.hpp * \author Denis Demidov * \brief Sparse matrix operations for matrices that provide row_iterator. */ #include #include #include namespace amgcl { namespace backend { namespace detail { template struct use_builtin_matrix_ops : std::false_type {}; } // namespace detail template struct spmv_impl< Alpha, Matrix, Vector1, Beta, Vector2, typename std::enable_if< detail::use_builtin_matrix_ops::value && math::static_rows::type>::value == math::static_rows::type>::value && math::static_rows::type>::value == math::static_rows::type>::value >::type > { static void apply( Alpha alpha, const Matrix &A, const Vector1 &x, Beta beta, Vector2 &y ) { typedef typename value_type::type V; const ptrdiff_t n = static_cast( rows(A) ); if (!math::is_zero(beta)) { #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { V sum = math::zero(); for(typename row_iterator::type a = row_begin(A, i); a; ++a) sum += a.value() * x[ a.col() ]; y[i] = alpha * sum + beta * y[i]; } } else { #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { V sum = math::zero(); for(typename row_iterator::type a = row_begin(A, i); a; ++a) sum += a.value() * x[ a.col() ]; y[i] = alpha * sum; } } } }; template struct residual_impl< Matrix, Vector1, Vector2, Vector3, typename std::enable_if< detail::use_builtin_matrix_ops::value && math::static_rows::type>::value == math::static_rows::type>::value && math::static_rows::type>::value == math::static_rows::type>::value && math::static_rows::type>::value == math::static_rows::type>::value >::type > { static void apply( Vector1 const &rhs, Matrix const &A, Vector2 const &x, Vector3 &res ) { typedef typename value_type::type V; const ptrdiff_t n = static_cast( rows(A) ); #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { V sum = math::zero(); for(typename row_iterator::type a = row_begin(A, i); a; ++a) sum += a.value() * x[ a.col() ]; res[i] = rhs[i] - sum; } } }; /* Allows to do matrix-vector products with mixed scalar/nonscalar types. * Reinterprets pointers to the vectors data into appropriate types. */ template struct spmv_impl< Alpha, Matrix, Vector1, Beta, Vector2, typename std::enable_if< detail::use_builtin_matrix_ops::value && ( math::static_rows::type>::value != math::static_rows::type>::value || math::static_rows::type>::value != math::static_rows::type>::value) >::type > { static void apply( Alpha alpha, const Matrix &A, const Vector1 &x, Beta beta, Vector2 &y ) { typedef typename value_type::type V; auto X = backend::reinterpret_as_rhs(x); auto Y = backend::reinterpret_as_rhs(y); spmv(alpha, A, X, beta, Y); } }; template struct residual_impl< Matrix, Vector1, Vector2, Vector3, typename std::enable_if< detail::use_builtin_matrix_ops::value && ( math::static_rows::type>::value != math::static_rows::type>::value || math::static_rows::type>::value != math::static_rows::type>::value || math::static_rows::type>::value != math::static_rows::type>::value) >::type > { static void apply( Vector1 const &f, Matrix const &A, Vector2 const &x, Vector3 &r ) { typedef typename value_type::type V; auto X = backend::reinterpret_as_rhs(x); auto F = backend::reinterpret_as_rhs(f); auto R = backend::reinterpret_as_rhs(r); residual(F, A, X, R); } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/detail/mixing.hpp000066400000000000000000000047551451676433700205560ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_DETAIL_MIXING_HPP #define AMGCL_BACKEND_DETAIL_MIXING_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Copyright (c) 2016, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/detail/mixing.hpp * \author Denis Demidov * \brief Utilities for mixed-precision of mixed-block backends. */ #include #include #include namespace amgcl { namespace backend { namespace detail { // Backend with scalar value_type of highest precision. template struct common_scalar_backend; template struct common_scalar_backend::value == 1 >::type > { typedef B type; }; template struct common_scalar_backend< backend::builtin, backend::builtin, typename std::enable_if< math::static_rows::value != 1 || math::static_rows::value != 1 >::type> { typedef typename math::scalar_of::type S1; typedef typename math::scalar_of::type S2; typedef typename std::conditional< (sizeof(S1) > sizeof(S2)), backend::builtin, backend::builtin >::type type; }; } // namespace detail } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/eigen.hpp000066400000000000000000000162101451676433700170750ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_EIGEN_HPP #define AMGCL_BACKEND_EIGEN_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/eigen.hpp * \author Denis Demidov * \brief Sparse matrix in CRS format. */ #include #include #include namespace amgcl { namespace backend { /// Eigen backend. /** * This is a backend that uses types defined in the Eigen library * (http://eigen.tuxfamily.org). * * \param real Value type. * \ingroup backends */ template struct eigen { typedef real value_type; typedef ptrdiff_t index_type; typedef ptrdiff_t col_type; typedef ptrdiff_t ptr_type; typedef Eigen::Map> matrix; typedef Eigen::Matrix vector; typedef Eigen::Matrix matrix_diagonal; typedef solver::skyline_lu direct_solver; struct provides_row_iterator : std::true_type {}; /// Backend parameters. typedef amgcl::detail::empty_params params; static std::string name() { return "eigen"; } /// Copy matrix from builtin backend. static std::shared_ptr copy_matrix(std::shared_ptr< typename builtin::matrix > A, const params&) { const typename builtin::matrix &a = *A; return std::shared_ptr( new matrix( rows(*A), cols(*A), nonzeros(*A), const_cast(a.ptr), const_cast(a.col), const_cast(a.val) ), hold_host(A) ); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(typename builtin::vector const &x, const params&) { return std::make_shared( Eigen::Map(x.data(), x.size()) ); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(std::shared_ptr< typename builtin::vector > x, const params &prm) { return copy_vector(*x, prm); } /// Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params&) { return std::make_shared(size); } /// Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr< typename builtin::matrix > A, const params&) { return std::make_shared(*A); } private: struct hold_host { typedef std::shared_ptr< crs > host_matrix; host_matrix host; hold_host( host_matrix host ) : host(host) {} void operator()(matrix *ptr) const { delete ptr; } }; }; template < class Alpha, class M, class V1, class Beta, class V2 > struct spmv_impl< Alpha, M, V1, Beta, V2, typename std::enable_if< is_eigen_sparse_matrix::value && is_eigen_type::value && is_eigen_type::value >::type > { static void apply(Alpha alpha, const M &A, const V1 &x, Beta beta, V2 &y) { if (!math::is_zero(beta)) y = alpha * A * x + beta * y; else y = alpha * A * x; } }; template < class M, class V1, class V2, class V3 > struct residual_impl< M, V1, V2, V3, typename std::enable_if< is_eigen_sparse_matrix::value && is_eigen_type::value && is_eigen_type::value && is_eigen_type::value >::type > { static void apply(const V1 &rhs, const M &A, const V2 &x, V3 &r) { r = rhs - A * x; } }; template < typename V > struct clear_impl< V, typename std::enable_if::value>::type > { static void apply(V &x) { x.setZero(); } }; template < class V1, class V2 > struct inner_product_impl< V1, V2, typename std::enable_if< is_eigen_type::value && is_eigen_type::value >::type > { typedef typename value_type::type real; static real get(const V1 &x, const V2 &y) { return y.dot(x); } }; template < class A, class V1, class B, class V2 > struct axpby_impl< A, V1, B, V2, typename std::enable_if< is_eigen_type::value && is_eigen_type::value >::type > { static void apply(A a, const V1 &x, B b, V2 &y) { if (!math::is_zero(b)) y = a * x + b * y; else y = a * x; } }; template < class A, class V1, class B, class V2, class C, class V3 > struct axpbypcz_impl< A, V1, B, V2, C, V3, typename std::enable_if< is_eigen_type::value && is_eigen_type::value && is_eigen_type::value >::type > { typedef typename value_type::type real; static void apply( real a, const V1 &x, real b, const V2 &y, real c, V3 &z ) { if (!math::is_zero(c)) z = a * x + b * y + c * z; else z = a * x + b * y; } }; template < class Alpha, class V1, class V2, class Beta, class V3 > struct vmul_impl< Alpha, V1, V2, Beta, V3, typename std::enable_if< is_eigen_type::value && is_eigen_type::value && is_eigen_type::value >::type > { static void apply(Alpha a, const V1 &x, const V2 &y, Beta b, V3 &z) { if (!math::is_zero(b)) z.array() = a * x.array() * y.array() + b * z.array(); else z.array() = a * x.array() * y.array(); } }; template < class V1, class V2 > struct copy_impl< V1, V2, typename std::enable_if< is_eigen_type::value && is_eigen_type::value >::type > { static void apply(const V1 &x, V2 &y) { y = x; } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/hpx.hpp000066400000000000000000000744471451676433700166250ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_HPX_HPP #define AMGCL_BACKEND_HPX_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/hpx.hpp * \author Denis Demidov * \brief HPX backend. */ #include #include #include #include #include #include #include #include #include #include namespace amgcl { namespace backend { /// The matrix is a thin wrapper on top of amgcl::builtin::crs<>. template class hpx_matrix { public: typedef real value_type; typedef ptrdiff_t index_type; typedef crs Base; typedef typename Base::row_iterator row_iterator; // For each of the output segments y[i] in y = A * x it stores a range of // segments in x that y[i] depends on. std::vector> xrange; // And yrange stores the inverted dependencies: for each segment in x // yrange stores range of segments in y that depend on x. This is required // to determine when a segment of x in [y = A * x] is safe to update. std::vector> yrange; // Creates the matrix from builtin datatype, sets up xrange. hpx_matrix(std::shared_ptr A, int grain_size) : base(A) { index_type n = backend::rows(*A); index_type m = backend::cols(*A); index_type nseg = (n + grain_size - 1) / grain_size; index_type mseg = (m + grain_size - 1) / grain_size; xrange.resize(nseg); yrange.resize(mseg, std::make_tuple(m, 0)); auto range = boost::irange(0, nseg); hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [this, grain_size, n, A](index_type seg) { index_type i = seg * grain_size; index_type beg = A->ptr[i]; index_type end = A->ptr[std::min(i + grain_size, n)]; auto mm = std::minmax_element(A->col + beg, A->col + end); index_type xbeg = *std::get<0>(mm) / grain_size; index_type xend = *std::get<1>(mm) / grain_size + 1; xrange[seg] = std::make_tuple(xbeg, xend); for(index_type i = xbeg; i < xend; ++i) { std::get<0>(yrange[i]) = std::min(seg, std::get<0>(yrange[i])); std::get<1>(yrange[i]) = std::max(seg+1, std::get<1>(yrange[i])); } }); } size_t rows() const { return backend::rows(*base); } size_t cols() const { return backend::cols(*base); } size_t nonzeros() const { return backend::nonzeros(*base); } row_iterator row_begin(size_t row) const { return base->row_begin(row); } private: // Base matrix is stored in shared_ptr<> to reduce the overhead // of data transfer from builtin datatypes (used for AMG setup) to the // backend datatypes. std::shared_ptr base; }; /// The vector type to be used with HPX backend. /** * hpx_vector is a thin wrapper on top of std::vector. * The vector consists of continuous segments of fixed size (grain_size) except * may be the last one that is allowed to be shorter. * A vector of shared_futures corresponding to each of the segments is stored * along the data vector to facilitate construction of HPX dependency graph. */ template < typename real > class hpx_vector { public: typedef real value_type; typedef std::vector Base; typedef typename Base::iterator iterator; typedef typename Base::const_iterator const_iterator; int nseg; // Number of segments in the vector int grain_size; // Segment size. // Futures associated with each segment: mutable std::vector> safe_to_read; mutable std::vector> safe_to_write; hpx_vector(size_t n, int grain_size) : nseg( (n + grain_size - 1) / grain_size ), grain_size( grain_size ), buf( std::make_shared(n) ) { precondition(grain_size > 0, "grain size should be positive"); init_futures(); } template hpx_vector(std::shared_ptr o, int grain_size) : nseg( (o->size() + grain_size - 1) / grain_size ), grain_size( grain_size ), buf(std::make_shared(o->data(), o->data() + o->size())) { precondition(grain_size > 0, "grain size should be positive"); init_futures(); } size_t size() const { return buf->size(); } const real & operator[](size_t i) const { return (*buf)[i]; } real & operator[](size_t i) { return (*buf)[i]; } const real* data() const { return buf->data(); } real* data() { return buf->data(); } iterator begin() { return buf->begin(); } iterator end() { return buf->end(); } const_iterator begin() const { return buf->cbegin(); } const_iterator end() const { return buf->cend(); } const_iterator cbegin() const { return buf->cbegin(); } const_iterator cend() const { return buf->cend(); } template boost::iterator_range< typename std::vector< hpx::shared_future >::iterator > safe_range(IdxTuple idx) const { return boost::make_iterator_range( safe_to_read.begin() + std::get<0>(idx), safe_to_read.begin() + std::get<1>(idx) ); } private: // Segments stored in a continuous array. // The base vector is stored with shared_ptr for the same reason as with // hpx_matrix above: to reduce the overhead of data transfer. std::shared_ptr buf; void init_futures() { safe_to_read.reserve(nseg); safe_to_write.reserve(nseg); for(ptrdiff_t i = 0; i < nseg; ++i) { safe_to_read.push_back(hpx::make_ready_future()); safe_to_write.push_back(hpx::make_ready_future()); } } }; /// HPX backend /** * This is a backend that is based on HPX -- a general purpose C++ runtime * system for parallel and distributed applications of any scale * http://stellar-group.org/libraries/hpx. */ template struct HPX { typedef real value_type; typedef ptrdiff_t index_type; typedef ptrdiff_t col_type; typedef ptrdiff_t ptr_type; struct provides_row_iterator : std::false_type {}; struct params { /// Number of vector elements in a single segment. int grain_size; params() : grain_size(4096) {} #ifndef AMGCL_NO_BOOST params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_VALUE(p, grain_size) { check_params(p, {"grain_size"}); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_VALUE(p, path, grain_size); } #endif }; typedef hpx_matrix matrix; typedef hpx_vector vector; typedef hpx_vector matrix_diagonal; struct direct_solver : public solver::skyline_lu { typedef solver::skyline_lu Base; typedef typename Base::params params; template direct_solver(const Matrix &A, const params &prm = params()) : Base(A, prm) {} struct call_base { const Base *base; const real *fptr; real *xptr; template void operator()(T&&...) const { (*base)(fptr, xptr); } }; void operator()(const vector &rhs, vector &x) const { const real *fptr = &rhs[0]; real *xptr = &x[0]; using hpx::dataflow; hpx::shared_future solve = dataflow( hpx::launch::async, call_base{this, fptr, xptr}, rhs.safe_to_read, x.safe_to_write ); boost::fill(x.safe_to_read, solve); } }; static std::string name() { return "HPX"; } /// Copy matrix. static std::shared_ptr copy_matrix(std::shared_ptr A, const params &p) { return std::make_shared(A, p.grain_size); } /// Copy vector to builtin backend. static std::shared_ptr copy_vector(const typename vector::Base &x, const params &p) { return std::make_shared( std::make_shared(x), p.grain_size ); } /// Copy vector to builtin backend. template static std::shared_ptr> copy_vector(std::shared_ptr x, const params &p) { return std::make_shared>(x, p.grain_size); } /// Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params &p) { return std::make_shared(size, p.grain_size); } /// Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr A, const params&) { return std::make_shared(*A); } }; //--------------------------------------------------------------------------- // Backend interface implementation //--------------------------------------------------------------------------- template < typename Alpha, typename Beta, typename real > struct spmv_impl< Alpha, hpx_matrix, hpx_vector, Beta, hpx_vector > { typedef hpx_matrix matrix; typedef hpx_vector vector; struct process_ab { Alpha alpha; const hpx_matrix &A; const real *xptr; Beta beta; real *yptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) { real sum = 0; for(auto a = A.row_begin(i); a; ++a) sum += a.value() * xptr[a.col()]; yptr[i] = alpha * sum + beta * yptr[i]; } } }; struct process_a { Alpha alpha; const hpx_matrix &A; const real *xptr; real *yptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) { real sum = 0; for(auto a = A.row_begin(i); a; ++a) sum += a.value() * xptr[a.col()]; yptr[i] = alpha * sum; } } }; struct wait_for_it { template void operator()(T&&) const {} }; static void apply(Alpha alpha, const matrix &A, const vector &x, Beta beta, vector &y) { const real *xptr = &x[0]; real *yptr = &y[0]; using hpx::dataflow; auto range = boost::irange(0, y.nseg); if (beta) { // y = alpha * A * x + beta * y hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [alpha, &A, &x, beta, &y, xptr, yptr](ptrdiff_t seg) { ptrdiff_t beg = seg * y.grain_size; ptrdiff_t end = std::min(beg + y.grain_size, y.size()); y.safe_to_read[seg] = dataflow( hpx::launch::async, process_ab{alpha, A, xptr, beta, yptr, beg, end}, y.safe_to_read[seg], y.safe_to_write[seg], x.safe_range(A.xrange[seg]) ); }); } else { // y = alpha * A * x hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [alpha, &A, &x, &y, xptr, yptr](ptrdiff_t seg) { ptrdiff_t beg = seg * y.grain_size; ptrdiff_t end = std::min(beg + y.grain_size, y.size()); y.safe_to_read[seg] = dataflow(hpx::launch::async, process_a{alpha, A, xptr, yptr, beg, end}, y.safe_to_write[seg], x.safe_range(A.xrange[seg]) ); }); } // Do not update x until y is ready. range = boost::irange(0, x.nseg); hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [&A, &x, &y](ptrdiff_t seg) { x.safe_to_write[seg] = dataflow(hpx::launch::async, wait_for_it(), y.safe_range(A.yrange[seg]) ); }); } }; template < typename real > struct residual_impl< hpx_matrix, hpx_vector, hpx_vector, hpx_vector > { typedef hpx_matrix matrix; typedef hpx_vector vector; struct process { const real *fptr; const hpx_matrix &A; const real *xptr; real *rptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) { real sum = fptr[i]; for(auto a = A.row_begin(i); a; ++a) sum -= a.value() * xptr[a.col()]; rptr[i] = sum; } } }; struct wait_for_it { template void operator()(T&&) const {} }; static void apply(const vector &f, const matrix &A, const vector &x, vector &r) { const real *xptr = &x[0]; const real *fptr = &f[0]; real *rptr = &r[0]; using hpx::dataflow; auto range = boost::irange(0, f.nseg); hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [&f, &A, &x, &r, xptr, fptr, rptr](ptrdiff_t seg) { ptrdiff_t beg = seg * f.grain_size; ptrdiff_t end = std::min(beg + f.grain_size, f.size()); r.safe_to_read[seg] = dataflow(hpx::launch::async, process{fptr, A, xptr, rptr, beg, end}, f.safe_to_read[seg], r.safe_to_write[seg], x.safe_range(A.xrange[seg]) ); }); // Do not update x until r is ready. range = boost::irange(0, x.nseg); hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [&A, &x, &r](ptrdiff_t seg) { x.safe_to_write[seg] = dataflow(hpx::launch::async, wait_for_it(), r.safe_range(A.yrange[seg]) ); }); } }; template < typename real > struct clear_impl< hpx_vector > { typedef hpx_vector vector; struct process { real *xptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) xptr[i] = 0; } }; static void apply(vector &x) { real *xptr = &x[0]; using hpx::dataflow; auto range = boost::irange(0, x.nseg); hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [&x, xptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); x.safe_to_read[seg] = dataflow(hpx::launch::async, process{xptr, beg, end}, x.safe_to_write[seg] ); }); } }; template < typename real > struct copy_impl< hpx_vector, hpx_vector > { typedef hpx_vector vector; struct process { const real *xptr; real *yptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) yptr[i] = xptr[i]; } }; static void apply(const vector &x, vector &y) { const real *xptr = &x[0]; real *yptr = &y[0]; using hpx::dataflow; auto range = boost::irange(0, x.nseg); hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [&x, &y, xptr, yptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); y.safe_to_read[seg] = dataflow(hpx::launch::async, process{xptr, yptr, beg, end}, x.safe_to_read[seg], y.safe_to_write[seg] ); }); } }; template < typename real > struct copy< std::vector, hpx_vector > { typedef hpx_vector vector; struct process { const real *xptr; real *yptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) yptr[i] = xptr[i]; } }; static void apply(const std::vector &x, vector &y) { const real *xptr = &x[0]; real *yptr = &y[0]; using hpx::dataflow; auto range = boost::irange(0, y.nseg); hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [&y, xptr, yptr](ptrdiff_t seg) { ptrdiff_t beg = seg * y.grain_size; ptrdiff_t end = std::min(beg + y.grain_size, y.size()); y.safe_to_read[seg] = dataflow(hpx::launch::async, process{xptr, yptr, beg, end}, y.safe_to_write[seg] ); }); } }; template < typename real > struct inner_product_impl< hpx_vector, hpx_vector > { typedef hpx_vector vector; struct process { const real *xptr; const real *yptr; ptrdiff_t beg; ptrdiff_t end; template double operator()(T&&...) const { real sum = 0; for(ptrdiff_t i = beg; i < end; ++i) sum += xptr[i] * yptr[i]; return sum; } }; static real get(const vector &x, const vector &y) { const real *xptr = &x[0]; const real *yptr = &y[0]; using hpx::dataflow; auto range = boost::irange(0, x.nseg); return hpx::parallel::transform_reduce( hpx::parallel::par, boost::begin(range), boost::end(range), math::zero(), std::plus(), [&x, &y, xptr, yptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); return dataflow(hpx::launch::async, process{xptr, yptr, beg, end}, x.safe_to_read[seg], y.safe_to_read[seg] ).get(); } ); } }; template < typename A, typename B, typename real > struct axpby_impl< A, hpx_vector, B, hpx_vector > { typedef hpx_vector vector; struct process_ab { typedef void result_type; A a; const real *xptr; B b; real *yptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) yptr[i] = a * xptr[i] + b * yptr[i]; } }; struct process_a { typedef void result_type; A a; const real *xptr; real *yptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) yptr[i] = a * xptr[i]; } }; static void apply(A a, const vector &x, B b, vector &y) { const real *xptr = &x[0]; real *yptr = &y[0]; using hpx::dataflow; auto range = boost::irange(0, x.nseg); if (b) { // y = a * x + b * y; hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [a, &x, b, &y, xptr, yptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); y.safe_to_read[seg] = dataflow(hpx::launch::async, process_ab{a, xptr, b, yptr, beg, end}, x.safe_to_read[seg], y.safe_to_read[seg], y.safe_to_write[seg] ); }); } else { // y = a * x; hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [a, &x, &y, xptr, yptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); y.safe_to_read[seg] = dataflow(hpx::launch::async, process_a{a, xptr, yptr, beg, end}, x.safe_to_read[seg], y.safe_to_write[seg] ); }); } } }; template < typename A, typename B, typename C, typename real > struct axpbypcz_impl< A, hpx_vector, B, hpx_vector, C, hpx_vector > { typedef hpx_vector vector; struct process_abc { A a; const real *xptr; B b; const real *yptr; C c; real *zptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) zptr[i] = a * xptr[i] + b * yptr[i] + c * zptr[i]; } }; struct process_ab { A a; const real *xptr; B b; const real *yptr; real *zptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) zptr[i] = a * xptr[i] + b * yptr[i]; } }; static void apply( A a, const vector &x, B b, const vector &y, C c, vector &z ) { const real *xptr = &x[0]; const real *yptr = &y[0]; real *zptr = &z[0]; using hpx::dataflow; auto range = boost::irange(0, x.nseg); if (c) { //z = a * x + b * y + c * z; hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [a, &x, b, &y, c, &z, xptr, yptr, zptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); z.safe_to_read[seg] = dataflow(hpx::launch::async, process_abc{a, xptr, b, yptr, c, zptr, beg, end}, x.safe_to_read[seg], y.safe_to_read[seg], z.safe_to_read[seg], z.safe_to_write[seg] ); }); } else { //z = a * x + b * y; hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [a, &x, b, &y, &z, xptr, yptr, zptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); z.safe_to_read[seg] = dataflow(hpx::launch::async, process_ab{a, xptr, b, yptr, zptr, beg, end}, x.safe_to_read[seg], y.safe_to_read[seg], z.safe_to_write[seg] ); }); } } }; template < typename A, typename B, typename real > struct vmul_impl< A, hpx_vector, hpx_vector, B, hpx_vector > { typedef hpx_vector vector; struct process_ab { A a; const real *xptr; const real *yptr; B b; real *zptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) zptr[i] = a * xptr[i] * yptr[i] + b * zptr[i]; } }; struct process_a { A a; const real *xptr; const real *yptr; real *zptr; ptrdiff_t beg; ptrdiff_t end; template void operator()(T&&...) const { for(ptrdiff_t i = beg; i < end; ++i) zptr[i] = a * xptr[i] * yptr[i]; } }; static void apply(A a, const vector &x, const vector &y, B b, vector &z) { const real *xptr = &x[0]; const real *yptr = &y[0]; real *zptr = &z[0]; using hpx::dataflow; auto range = boost::irange(0, x.nseg); if (b) { //z = a * x * y + b * z; hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [a, &x, &y, b, &z, xptr, yptr, zptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); z.safe_to_read[seg] = dataflow(hpx::launch::async, process_ab{a, xptr, yptr, b, zptr, beg, end}, x.safe_to_read[seg], y.safe_to_read[seg], z.safe_to_read[seg], z.safe_to_write[seg] ); }); } else { //z = a * x * y; hpx::parallel::for_each( hpx::parallel::par, boost::begin(range), boost::end(range), [a, &x, &y, &z, xptr, yptr, zptr](ptrdiff_t seg) { ptrdiff_t beg = seg * x.grain_size; ptrdiff_t end = std::min(beg + x.grain_size, x.size()); z.safe_to_read[seg] = dataflow(hpx::launch::async, process_a{a, xptr, yptr, zptr, beg, end}, x.safe_to_read[seg], y.safe_to_read[seg], z.safe_to_write[seg] ); }); } } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/interface.hpp000066400000000000000000000322531451676433700177530ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_INTERFACE_HPP #define AMGCL_BACKEND_INTERFACE_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/interface.hpp * \author Denis Demidov * \brief Backend interface required for AMG. */ #include #include #include #include namespace amgcl { /// Provided backends. namespace backend { /** * \defgroup backends Provided backends * \brief Backends implemented in AMGCL. * * A backend in AMGCL is a class that defines matrix and vector types together * with several operations on them, such as creation, matrix-vector products, * vector sums, inner products etc. The AMG hierarchy is moved to the * specified backend upon construction. The solution phase then uses types and * operations defined in the backend. This enables transparent acceleration of * the solution phase with OpenMP, OpenCL, CUDA, or any other technologies. */ /** * \defgroup backend_interface Backend interface * \brief Backend interface specification. * * One has to specify these templates in order to define a new backend. */ /** \addtogroup backend_interface * @{ */ /// Metafunction that checks if two backends are compatible. /** * That is, a solver in SBackend may be used together with a preconditioner in PBackend. */ template struct backends_compatible : std::is_same {}; /// Metafunction that returns value type of a matrix or a vector type. template struct value_type { typedef typename T::value_type type; }; /// Metafunction that returns column type of a matrix. template struct col_type { typedef typename T::col_type type; }; /// Metafunction that returns pointer type of a matrix. template struct ptr_type { typedef typename T::ptr_type type; }; /// Implementation for function returning the number of rows in a matrix. /** \note Used in rows() */ template struct rows_impl { static size_t get(const Matrix &A) { return A.rows(); } }; /// Implementation for function returning the number of columns in a matrix. /** \note Used in cols() */ template struct cols_impl { static size_t get(const Matrix &A) { return A.cols(); } }; /// Implementation for function returning number of bytes allocated for a matrix/vector. /** \note Used in bytes() */ template struct bytes_impl { // Use bytes() method when available. template static auto get_impl(const U &t, int) -> decltype(t.bytes()) { return t.bytes(); } // Fallback to zero. template static size_t get_impl(const U&, ...) { return 0; } static size_t get(const T &t) { return get_impl(t, 0); } }; template struct ptr_data_impl { typedef typename Matrix::PTR_DATA_NOT_IMPLEMENTED type; }; template struct col_data_impl { typedef typename Matrix::COL_DATA_NOT_IMPLEMENTED type; }; template struct val_data_impl { typedef typename Matrix::VAL_DATA_NOT_IMPLEMENTED type; }; /// Implementation for function returning the number of nonzeros in a matrix. /** \note Used in nonzeros() */ template struct nonzeros_impl { static size_t get(const Matrix &A) { return A.nonzeros(); } }; /// Implementation for function returning the number of nonzeros in a matrix row. /** \note Used in row_nonzeros() */ template struct row_nonzeros_impl { typedef typename Matrix::ROW_NONZEROS_NOT_IMPLEMENTED type; }; /// Metafunction returning the row iterator type for a matrix type. /** * \note This only has to be implemented in the backend if support for serial * smoothers (Gauss-Seidel or ILU0) is required. */ template struct row_iterator { typedef typename Matrix::row_iterator type; }; /// Implementation for function returning row iterator for a matrix. /** * \note This only has to be implemented in the backend if support for serial * smoothers (Gauss-Seidel or ILU0) is required. * \note Used in row_begin() */ template struct row_begin_impl { static typename row_iterator::type get(const Matrix &A, size_t row) { return A.row_begin(row); } }; /// Implementation for matrix-vector product. /** \note Used in spmv() */ template struct spmv_impl { typedef typename Matrix::SPMV_NOT_IMPLEMENTED type; }; /// Implementation for residual error compuatation. /** \note Used in residual() */ template struct residual_impl { typedef typename Matrix::RESIDUAL_NOT_IMPLEMENTED type; }; /// Implementation for zeroing out a vector. /** \note Used in clear() */ template struct clear_impl { typedef typename Vector::CLEAR_NOT_IMPLEMENTED type; }; /// Implementation for vector copy. /** \note Used in copy() */ template struct copy_impl { typedef typename Vector1::COPY_NOT_IMPLEMENTED type; }; /// Implementation for inner product. /** \note Used in inner_product() */ template struct inner_product_impl { typedef typename Vector1::INNER_PRODUCT_NOT_IMPLEMENTED type; }; /// Implementation for linear combination of two vectors. /** \note Used in axpby() */ template struct axpby_impl { typedef typename Vector1::AXPBY_NOT_IMPLEMENTED type; }; /// Implementation for linear combination of three vectors. /** \note Used in axpbypcz() */ template struct axpbypcz_impl { typedef typename Vector1::AXPBYPCZ_NOT_IMPLEMENTED type; }; /// Implementation for element-wize vector product. /** \note Used in vmul() */ template struct vmul_impl { typedef typename Vector1::VMUL_NOT_IMPLEMENTED type; }; /// Reinterpret the vector to be compatible with the matrix value type template struct reinterpret_as_rhs_impl { typedef typename MatrixValue::REINTERPRET_AS_RHS_NOT_IMPLEMENTED type; }; /** @} */ /// Returns the number of rows in a matrix. template size_t rows(const Matrix &matrix) { return rows_impl::get(matrix); } /// Returns the number of columns in a matrix. template size_t cols(const Matrix &matrix) { return cols_impl::get(matrix); } /// Returns number of bytes allocated for the container (matrix / vector) template size_t bytes(const T &t) { return bytes_impl::get(t); } template typename ptr_data_impl::type ptr_data(const Matrix &matrix) { return ptr_data_impl::get(matrix); } template typename col_data_impl::type col_data(const Matrix &matrix) { return col_data_impl::get(matrix); } template typename val_data_impl::type val_data(const Matrix &matrix) { return val_data_impl::get(matrix); } /// Returns the number of nonzeros in a matrix. template size_t nonzeros(const Matrix &matrix) { return nonzeros_impl::get(matrix); } /// Returns row iterator for a matrix. template typename row_iterator::type row_begin(const Matrix &matrix, size_t row) { return row_begin_impl::get(matrix, row); } /// Returns number of nonzeros in a matrix row. template size_t row_nonzeros(const Matrix &A, size_t row) { return row_nonzeros_impl::get(A, row); } /// Performs matrix-vector product. /** * \f[y = \alpha A x + \beta y.\f] */ template void spmv( Alpha alpha, const Matrix &A, const Vector1 &x, Beta beta, Vector2 &y) { AMGCL_TIC("spmv"); spmv_impl::apply(alpha, A, x, beta, y); AMGCL_TOC("spmv"); } /// Computes residual error. /** * \f[r = rhs - Ax.\f] */ template void residual(const Vector1 &rhs, const Matrix &A, const Vector2 &x, Vector3 &r) { AMGCL_TIC("residual"); residual_impl::apply(rhs, A, x, r); AMGCL_TOC("residual"); } /// Zeros out a vector. template void clear(Vector &x) { AMGCL_TIC("clear"); clear_impl::apply(x); AMGCL_TOC("clear"); } /// Vector copy. template void copy(const Vector1 &x, Vector2 &y) { AMGCL_TIC("copy"); copy_impl::apply(x, y); AMGCL_TOC("copy"); } /// Computes inner product of two vectors. template typename math::inner_product_impl< typename value_type::type >::return_type inner_product(const Vector1 &x, const Vector2 &y) { typedef typename math::inner_product_impl< typename value_type::type >::return_type result_type; AMGCL_TIC("inner_product"); result_type p = inner_product_impl::get(x, y); AMGCL_TOC("inner_product"); return p; } /// Computes linear combination of two vectors. /** * \f[y = ax + by.\f] */ template void axpby(A a, Vector1 const &x, B b, Vector2 &y) { AMGCL_TIC("axpby"); axpby_impl::apply(a, x, b, y); AMGCL_TOC("axpby"); } /// Computes linear combination of three vectors. /** * \f[z = ax + by + cz.\f] */ template void axpbypcz(A a, Vector1 const &x, B b, Vector2 const &y, C c, Vector3 &z) { AMGCL_TIC("axpbypcz"); axpbypcz_impl::apply(a, x, b, y, c, z); AMGCL_TOC("axpbypcz"); } /// Computes element-wize vector product. /** * \f[z = \alpha xy + \beta z.\f] */ template void vmul(Alpha alpha, const Vector1 &x, const Vector2 &y, Beta beta, Vector3 &z) { AMGCL_TIC("vmul"); vmul_impl::apply(alpha, x, y, beta, z); AMGCL_TOC("vmul"); } /// Reinterpret the vector to be compatible with the matrix value type template typename reinterpret_as_rhs_impl< MatrixValue, typename std::decay::type, std::is_const::type>::value >::return_type reinterpret_as_rhs(Vector &&x) { return reinterpret_as_rhs_impl< MatrixValue, typename std::decay::type, std::is_const::type>::value >::get(std::forward(x)); } /// Is the relaxation supported by the backend? template class Relaxation, class Enable = void> struct relaxation_is_supported : std::true_type {}; /// Is the coarsening supported by the backend? template class Coarsening, class Enable = void> struct coarsening_is_supported : std::true_type {}; /// Linear combination of vectors /** * \f[ y = \sum_j c_j v_j + alpha * y \f] */ template void lin_comb(size_t n, const Coefs &c, const Vecs &v, const Coef &alpha, Vec &y) { axpby(c[0], *v[0], alpha, y); size_t i = 1; for(; i + 1 < n; i += 2) axpbypcz(c[i], *v[i], c[i+1], *v[i+1], math::identity(), y); for(; i < n; ++i) axpby(c[i], *v[i], math::identity(), y); } } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/mkl.hpp000066400000000000000000000164311451676433700165760ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_MKL_HPP #define AMGCL_BACKEND_MKL_HPP /* The MIT License Copyright (c) 2012-2014 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/mkl.hpp * \author Denis Demidov * \brief Intel Math Kernel library backend. */ #include #include #include namespace amgcl { namespace backend { /// Sparse matrix stored in CRS format. struct mkl_crs : public crs { typedef crs Base; template < class PtrRange, class ColRange, class ValRange > mkl_crs(size_t nrows, size_t ncols, const PtrRange &ptr_range, const ColRange &col_range, const ValRange &val_range ) : Base(nrows, ncols, ptr_range, col_range, val_range) {} template mkl_crs(const Matrix &A) : Base(A) {} }; /// BLAS vector. class mkl_vec { public: mkl_vec() {} mkl_vec(size_t n) : buf(n) {} template mkl_vec(Iterator beg, Iterator end) : buf(beg, end) {} size_t size() const { return buf.size(); } double operator[](size_t i) const { return buf[i]; } double& operator[](size_t i) { return buf[i]; } const double* data() const { return buf.data(); } double* data() { return buf.data(); } private: std::vector buf; }; /// Intel Math Kernel library backend. struct mkl { typedef double value_type; typedef int index_type; typedef int col_type; typedef int ptr_type; typedef mkl_crs matrix; typedef mkl_vec vector; typedef vector matrix_diagonal; typedef solver::skyline_lu direct_solver; /// Backend parameters. struct params { params() {} params(const boost::property_tree::ptree&) {} }; /// Copy matrix from builtin backend. static std::shared_ptr copy_matrix( std::shared_ptr< typename builtin::matrix > A, const params& ) { return std::make_shared(*A); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(typename builtin::vector const &x, const params&) { return std::make_shared(x.data(), x.data() + x.size()); } /// Copy vector from builtin backend. static std::shared_ptr copy_vector( std::shared_ptr< typename builtin::vector > x, const params &prm ) { return copy_vector(*x, prm); } /// Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params&) { return std::make_shared(size); } /// Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr< typename builtin::matrix > A, const params&) { return std::make_shared(*A); } }; //--------------------------------------------------------------------------- // Backend interface implementation //--------------------------------------------------------------------------- template <> struct value_type < mkl_crs > { typedef double type; }; template <> struct value_type < mkl_vec > { typedef double type; }; template <> struct rows_impl< mkl_crs > { static size_t get(const mkl_crs &A) { return A.nrows; } }; template <> struct cols_impl< mkl_crs > { static size_t get(const mkl_crs &A) { return A.ncols; } }; template <> struct nonzeros_impl< mkl_crs > { static size_t get(const mkl_crs &A) { return A.ptr[A.nrows]; } }; template struct spmv_impl< Alpha, mkl_crs, mkl_vec, Beta, mkl_vec > { static void apply(Alpha alpha, const mkl_crs &A, const mkl_vec &x, Beta beta, mkl_vec &y) { MKL_INT m = A.nrows; MKL_INT k = A.ncols; mkl_dcsrmv("N", &m, &k, &alpha, "G__C", const_cast(&A.val[0]), const_cast(&A.col[0]), const_cast(&A.ptr[0]), const_cast(&A.ptr[1]), const_cast(x.data()), &beta, y.data()); } }; template <> struct residual_impl< mkl_crs, mkl_vec, mkl_vec, mkl_vec > { static void apply(const mkl_vec &rhs, const mkl_crs &A, const mkl_vec &x, mkl_vec &r) { cblas_dcopy(rhs.size(), rhs.data(), 1, r.data(), 1); spmv_impl::apply(-1, A, x, 1, r); } }; template <> struct clear_impl< mkl_vec > { static void apply(mkl_vec &x) { std::fill_n(x.data(), x.size(), 0.0); } }; template <> struct copy_impl< mkl_vec, mkl_vec > { static void apply(const mkl_vec &x, mkl_vec &y) { cblas_dcopy(x.size(), x.data(), 1, y.data(), 1); } }; template <> struct inner_product_impl< mkl_vec, mkl_vec > { static double get(const mkl_vec &x, const mkl_vec &y) { return cblas_ddot(x.size(), x.data(), 1, y.data(), 1); } }; template struct axpby_impl< A, mkl_vec, B, mkl_vec > { static void apply(A a, const mkl_vec &x, B b, mkl_vec &y) { cblas_dscal(y.size(), b, y.data(), 1); cblas_daxpy(y.size(), a, x.data(), 1, y.data(), 1); } }; template struct axpbypcz_impl< A, mkl_vec, B, mkl_vec, C, mkl_vec > { static void apply( A a, const mkl_vec &x, B b, const mkl_vec &y, C c, mkl_vec &z ) { cblas_dscal(z.size(), c, z.data(), 1); cblas_daxpy(z.size(), a, x.data(), 1, z.data(), 1); cblas_daxpy(z.size(), b, y.data(), 1, z.data(), 1); } }; template struct vmul_impl< A, mkl_vec, mkl_vec, B, mkl_vec > { static void apply(A a, const mkl_vec &x, const mkl_vec &y, B b, mkl_vec &z) { cblas_dsbmv(CblasRowMajor, CblasLower, z.size(), 0, a, x.data(), 1, y.data(), 1, b, z.data(), 1); } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/vexcl.hpp000066400000000000000000000365701451676433700171420ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_VEXCL_HPP #define AMGCL_BACKEND_VEXCL_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/vexcl.hpp * \author Denis Demidov * \brief VexCL backend. */ #include #include #include #include #include #include #include #include #include #include #include #include namespace amgcl { namespace solver { /** Wrapper around solver::skyline_lu for use with the VexCL backend. * Copies the rhs to the host memory, solves the problem using the host CPU, * then copies the solution back to the compute device(s). */ template struct vexcl_skyline_lu : solver::skyline_lu { typedef solver::skyline_lu Base; typedef typename math::rhs_of::type rhs_type; mutable std::vector _rhs, _x; template vexcl_skyline_lu(const Matrix &A, const Params&) : Base(*A), _rhs(backend::rows(*A)), _x(backend::rows(*A)) { } template void operator()(const Vec1 &rhs, Vec2 &x) const { vex::copy(rhs, _rhs); static_cast(this)->operator()(_rhs, _x); vex::copy(_x, x); } size_t bytes() const { return backend::bytes(*static_cast(this)) + backend::bytes(_rhs) + backend::bytes(_x); } }; } namespace backend { /// The VexCL backend parameters. struct vexcl_params { std::vector< vex::backend::command_queue > q; ///< Command queues that identify compute devices to use with VexCL. /// Do CSR to ELL conversion on the GPU side. /** This will result in faster setup, but will require more GPU memory. */ bool fast_matrix_setup; vexcl_params() : fast_matrix_setup(true) {} #ifndef AMGCL_NO_BOOST vexcl_params(const boost::property_tree::ptree &p) : fast_matrix_setup(p.get("fast_matrix_setup", vexcl_params().fast_matrix_setup)) { std::vector *ptr = 0; ptr = p.get("q", ptr); if (ptr) q = *ptr; check_params(p, {"q", "fast_matrix_setup"}); } void get(boost::property_tree::ptree &p, const std::string &path) const { p.put(path + "q", &q); p.put(path + "fast_matrix_setup", fast_matrix_setup); } #endif const std::vector& context() const { if (q.empty()) return vex::current_context().queue(); else return q; } }; /** * The backend uses the VexCL * library for accelerating solution on the modern GPUs and multicore * processors with the help of OpenCL or CUDA technologies. * The VexCL backend stores the system matrix as ``vex::SpMat`` and * expects the right hand side and the solution vectors to be instances of the * ``vex::vector`` type. */ template > struct vexcl { typedef real value_type; typedef ptrdiff_t index_type; typedef ColumnType col_type; typedef PointerType ptr_type; typedef vex::sparse::distributed< vex::sparse::matrix > matrix; typedef typename math::rhs_of::type rhs_type; typedef vex::vector vector; typedef vex::vector matrix_diagonal; typedef DirectSolver direct_solver; struct provides_row_iterator : std::false_type {}; typedef vexcl_params params; static std::string name() { return "vexcl"; } // Copy matrix from builtin backend. static std::shared_ptr copy_matrix(std::shared_ptr< typename builtin::matrix > A, const params &prm) { precondition(!prm.context().empty(), "Empty VexCL context!"); const typename builtin::matrix &a = *A; const size_t n = rows(*A); const size_t m = cols(*A); const size_t nnz = a.ptr[n]; return std::make_shared(prm.context(), n, m, boost::make_iterator_range(a.ptr, a.ptr + n+1), boost::make_iterator_range(a.col, a.col + nnz), boost::make_iterator_range(a.val, a.val + nnz), prm.fast_matrix_setup ); } // Copy vector from builtin backend. template static std::shared_ptr< vex::vector > copy_vector(const std::vector &x, const params &prm) { precondition(!prm.context().empty(), "Empty VexCL context!"); return std::make_shared< vex::vector >(prm.context(), x); } template static std::shared_ptr< vex::vector > copy_vector(const numa_vector &x, const params &prm) { precondition(!prm.context().empty(), "Empty VexCL context!"); return std::make_shared< vex::vector >(prm.context(), x.size(), x.data()); } // Copy vector from builtin backend. template static std::shared_ptr< vex::vector > copy_vector(std::shared_ptr< numa_vector > x, const params &prm) { return copy_vector(*x, prm); } // Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params &prm) { precondition(!prm.context().empty(), "Empty VexCL context!"); return std::make_shared(prm.context(), size); } struct gather { size_t n; mutable vex::gather G; mutable std::vector buf; gather(size_t src_size, const std::vector &I, const params &prm) : n(I.size()), G(prm.context(), src_size, std::vector(I.begin(), I.end())) { } template void operator()(const vex::vector &src, vex::vector &dst) const { if (buf.size() < sizeof(D) * n) buf.resize(sizeof(D) * n); auto t = reinterpret_cast(buf.data()); G(src, t); vex::copy(t, t + n, dst.begin()); } template void operator()(const vex::vector &vec, std::vector &vals) const { G(vec, vals); } }; struct scatter { size_t n; mutable vex::scatter S; mutable std::vector buf; scatter(size_t size, const std::vector &I, const params &prm) : n(I.size()), S(prm.context(), size, std::vector(I.begin(), I.end())) { } template void operator()(const vex::vector &src, vex::vector &dst) const { if (buf.size() < sizeof(D) * n) buf.resize(sizeof(D) * n); auto t = reinterpret_cast(buf.data()); vex::copy(src.begin(), src.end(), t); S(t, dst); } }; // Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr< typename builtin::matrix > A, const params &prm) { return std::make_shared(A, prm); } }; // Hybrid backend uses scalar matrices to build the hierarchy, // but stores the computed matrices in the block format. template < typename BlockType, typename ColumnType = ptrdiff_t, typename PointerType = ColumnType, class DirectSolver = solver::vexcl_skyline_lu::type> > struct vexcl_hybrid : public vexcl::type, ColumnType, PointerType, DirectSolver> { typedef typename math::scalar_of::type ScalarType; typedef vexcl Base; typedef vex::sparse::distributed< vex::sparse::matrix< BlockType, typename Base::col_type, typename Base::ptr_type > > matrix; static std::shared_ptr copy_matrix(std::shared_ptr< typename builtin::matrix > As, const typename Base::params &prm) { precondition(!prm.context().empty(), "Empty VexCL context!"); typename builtin::matrix A(amgcl::adapter::block_matrix(*As)); const size_t n = rows(A); const size_t m = cols(A); const size_t nnz = A.ptr[n]; return std::make_shared(prm.context(), n, m, boost::make_iterator_range(A.ptr, A.ptr + n+1), boost::make_iterator_range(A.col, A.col + nnz), boost::make_iterator_range(A.val, A.val + nnz), prm.fast_matrix_setup ); } }; //--------------------------------------------------------------------------- // Backend interface implementation //--------------------------------------------------------------------------- template struct backends_compatible< vexcl, vexcl > : std::true_type {}; template struct backends_compatible< vexcl_hybrid, vexcl_hybrid > : std::true_type {}; template struct backends_compatible< vexcl, vexcl_hybrid > : std::true_type {}; template struct backends_compatible< vexcl_hybrid, vexcl > : std::true_type {}; template < typename V, typename C, typename P > struct bytes_impl< vex::sparse::distributed > > { static size_t get(const vex::sparse::distributed > &A) { return sizeof(P) * (A.rows() + 1) + sizeof(C) * A.nonzeros() + sizeof(V) * A.nonzeros(); } }; template < typename V > struct bytes_impl< vex::vector > { static size_t get(const vex::vector &v) { return v.size() * sizeof(V); } }; template < typename Alpha, typename Beta, typename Va, typename Vx, typename Vy, typename C, typename P > struct spmv_impl< Alpha, vex::sparse::distributed>, vex::vector, Beta, vex::vector, typename std::enable_if< math::static_rows::value == 1 && math::static_rows::value == 1 && math::static_rows::value == 1 >::type > { typedef vex::sparse::distributed> matrix; static void apply(Alpha alpha, const matrix &A, const vex::vector &x, Beta beta, vex::vector &y) { if (beta) y = alpha * (A * x) + beta * y; else y = alpha * (A * x); } }; template < typename Va, typename Vf, typename Vx, typename Vr, typename C, typename P > struct residual_impl< vex::sparse::distributed>, vex::vector, vex::vector, vex::vector, typename std::enable_if< !is_static_matrix::value && !is_static_matrix::value && !is_static_matrix::value && !is_static_matrix::value >::type > { typedef vex::sparse::distributed> matrix; static void apply(const vex::vector &rhs, const matrix &A, const vex::vector &x, vex::vector &r) { r = rhs - A * x; } }; template < typename V > struct clear_impl< vex::vector > { static void apply(vex::vector &x) { x = 0; } }; template < class V, class T > struct copy_impl > { static void apply(const V &x, vex::vector &y) { vex::copy(x, y); } }; template < class T, class V > struct copy_impl, V> { static void apply(const vex::vector &x, V &y) { vex::copy(x, y); } }; template < class T1, class T2 > struct copy_impl, vex::vector> { static void apply(const vex::vector &x, vex::vector &y) { vex::copy(x, y); } }; template < typename V > struct inner_product_impl< vex::vector, vex::vector > { static V get(const vex::vector &x, const vex::vector &y) { vex::Reductor sum( x.queue_list() ); return sum(x * y); } }; template < typename A, typename B, typename V1, typename V2 > struct axpby_impl< A, vex::vector, B, vex::vector > { static void apply(A a, const vex::vector &x, B b, vex::vector &y) { if (b) y = a * x + b * y; else y = a * x; } }; template < typename A, typename B, typename C, typename V1, typename V2, typename V3 > struct axpbypcz_impl< A, vex::vector, B, vex::vector, C, vex::vector > { static void apply( A a, const vex::vector &x, B b, const vex::vector &y, C c, vex::vector &z ) { if (c) z = a * x + b * y + c * z; else z = a * x + b * y; } }; template < typename A, typename B, typename Vx, typename Vy, typename Vz > struct vmul_impl< A, vex::vector, vex::vector, B, vex::vector > { static void apply(A a, const vex::vector &x, const vex::vector &y, B b, vex::vector &z) { if (b) z = a * x * y + b * z; else z = a * x * y; } }; template struct reinterpret_as_rhs_impl, IsConst> { typedef typename math::scalar_of::type scalar_type; typedef typename math::rhs_of::type rhs_type; typedef typename math::replace_scalar::type dst_type; typedef vex::vector return_type; static return_type get(const vex::vector &x) { return x.template reinterpret(); } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/vexcl_static_matrix.hpp000066400000000000000000001153441451676433700220720ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_VEXCL_STATIC_MATRIX_HPP #define AMGCL_BACKEND_VEXCL_STATIC_MATRIX_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/vexcl_static_matrix.hpp * \author Denis Demidov * \brief Static matrix support for the VexCL backend. */ #include #include #include #include namespace vex { template struct is_cl_native< amgcl::static_matrix > : std::true_type {}; template struct type_name_impl< amgcl::static_matrix > { static std::string get() { std::ostringstream s; s << "amgcl_matrix_" << type_name() << "_" << N << "x" << M; return s.str(); } }; template struct cl_scalar_of< amgcl::static_matrix > { typedef T type; }; namespace sparse { template struct rhs_of< amgcl::static_matrix > { typedef amgcl::static_matrix type; }; template struct spmv_ops_impl, amgcl::static_matrix> { typedef amgcl::static_matrix matrix_value; typedef amgcl::static_matrix vector_value; static void decl_accum_var(backend::source_generator &src, const std::string &name) { src.new_line() << type_name>() << " " << name << ";"; for(int i = 0; i < N; ++i) { src.new_line() << name << ".data[" << i << "][0] = 0;"; } } static void append(backend::source_generator &src, const std::string &sum, const std::string &val) { for(int i = 0; i < N; ++i) src.new_line() << sum << ".data[" << i << "][0] += " << val << ".data[" << i << "][0];"; } static void append_product(backend::source_generator &src, const std::string &sum, const std::string &mat_val, const std::string &vec_val) { src.open("{"); src.new_line() << type_name() << " v = " << vec_val << ";"; for(int i = 0; i < N; ++i) { src.new_line() << sum << ".data[" << i << "][0] += "; for(int j = 0; j < N; ++j) { if (j) src << " + "; src << mat_val << ".data[" << i << "][" << j << "] * v.data[" << j << "][0]"; } src << ";"; } src.close("}"); } }; template class ell, Col, Ptr> { public: typedef amgcl::static_matrix Val; typedef Val value_type; typedef Val val_type; typedef Col col_type; typedef Ptr ptr_type; template ell( const std::vector &q, size_t nrows, size_t ncols, const PtrRange &ptr, const ColRange &col, const ValRange &val, bool fast = true ) : q(q[0]), n(nrows), m(ncols), nnz(std::distance(std::begin(val), std::end(val))), ell_pitch(alignup(nrows, 16U)), csr_nnz(0) { precondition(q.size() == 1, "sparse::ell is only supported for single-device contexts"); if (fast) { convert(ptr, col, val); return; } /* 1. Get optimal ELL widths for local and remote parts. */ // Speed of ELL relative to CSR: const double ell_vs_csr = 3.0; // Find maximum widths for local and remote parts: size_t max_width = 0; for(size_t i = 0; i < n; ++i) max_width = std::max(max_width, ptr[i+1] - ptr[i]); // Build width distribution histogram. std::vector hist(max_width + 1, 0); for(size_t i = 0; i < n; ++i) ++hist[ptr[i+1] - ptr[i]]; // Estimate optimal width for ELL part of the matrix. ell_width = max_width; for(size_t i = 0, rows = n; i < max_width; ++i) { rows -= hist[i]; // Number of rows wider than i. if (ell_vs_csr * rows < n) { ell_width = i; break; } } if (ell_width == 0) { assert(csr_nnz == nnz); csr_ptr = backend::device_vector(q[0], n + 1, &ptr[0]); csr_col = backend::device_vector(q[0], csr_nnz, &col[0]); csr_val = create_device_vector (q[0], csr_nnz, &val[0], false); return; } size_t ell_nnz = ell_pitch * ell_width; // Count nonzeros in CSR part of the matrix. for(size_t i = ell_width + 1; i <= max_width; ++i) csr_nnz += hist[i] * (i - ell_width); /* 3. Split the input matrix into ELL and CSR submatrices. */ std::vector _ell_col(ell_nnz, static_cast(-1)); std::vector _ell_val(ell_nnz * N * N); std::vector _csr_ptr; std::vector _csr_col; std::vector _csr_val; if (csr_nnz) { _csr_ptr.resize(n + 1); _csr_col.resize(csr_nnz); _csr_val.resize(csr_nnz * N * N); _csr_ptr[0] = 0; for(size_t i = 0; i < n; ++i) { size_t w = ptr[i+1] - ptr[i]; _csr_ptr[i+1] = _csr_ptr[i] + (w > ell_width ? w - ell_width : 0); } } for(size_t i = 0; i < n; ++i) { size_t w = 0; Ptr csr_head = csr_nnz ? _csr_ptr[i] : 0; for(Ptr j = ptr[i], e = ptr[i+1]; j < e; ++j, ++w) { Col c = col[j]; Val v = val[j]; if (w < ell_width) { _ell_col[i + w * ell_pitch] = c; for(int k = 0, ii = 0; ii < N; ++ii) for(int jj = 0; jj < N; ++jj, ++k) _ell_val[k * ell_nnz + w * ell_pitch + i] = v(ii,jj); } else { _csr_col[csr_head] = c; for(int k = 0, ii = 0; ii < N; ++ii) for(int jj = 0; jj < N; ++jj, ++k) _csr_val[k * csr_nnz + csr_head] = v(ii,jj); ++csr_head; } } } { size_t ell_size = ell_pitch * ell_width; ell_col = backend::device_vector(q[0], ell_size, _ell_col.data()); ell_val = backend::device_vector (q[0], ell_nnz * N * N, _ell_val.data()); } if (csr_nnz) { csr_ptr = backend::device_vector(q[0], n + 1, _csr_ptr.data()); csr_col = backend::device_vector(q[0], csr_nnz, _csr_col.data()); csr_val = backend::device_vector (q[0], csr_nnz * N * N, _csr_val.data()); } } // Dummy matrix; used internally to pass empty parameters to kernels. ell(const backend::command_queue &q) : q(q), n(0), m(0), nnz(0), ell_width(0), ell_pitch(0), csr_nnz(0) {} template friend typename std::enable_if< boost::proto::matches< typename boost::proto::result_of::as_expr::type, vector_expr_grammar >::value, matrix_vector_product >::type operator*(const ell &A, const Expr &x) { return matrix_vector_product(A, x); } template static void terminal_preamble(const Vector &x, backend::source_generator &src, const backend::command_queue &q, const std::string &prm_name, detail::kernel_generator_state_ptr state) { detail::output_terminal_preamble tp(src, q, prm_name + "_x", state); boost::proto::eval(boost::proto::as_child(x), tp); } template static void local_terminal_init(const Vector &x, backend::source_generator &src, const backend::command_queue &q, const std::string &prm_name, detail::kernel_generator_state_ptr state) { typedef typename detail::return_type::type x_type; typedef amgcl::static_matrix vector_value; src.new_line() << type_name() << " " << prm_name << "_sum;"; for(int i = 0; i < N; ++i) { src.new_line() << prm_name << "_sum.data[" << i << "][0] = 0;"; } src.open("{"); // ELL part src.new_line() << type_name() << " " << prm_name << "_ell_size = " << prm_name << "_ell_width * " << prm_name << "_ell_pitch;"; src.new_line() << "for(size_t j = 0; j < " << prm_name << "_ell_width; ++j)"; src.open("{"); src.new_line() << type_name() << " nnz_idx = idx + j * " << prm_name << "_ell_pitch;"; src.new_line() << type_name() << " c = " << prm_name << "_ell_col[nnz_idx];"; src.new_line() << "if (c != (" << type_name() << ")(-1))"; src.open("{"); src.new_line() << type_name() << " idx = c;"; { detail::output_local_preamble init_x(src, q, prm_name + "_x", state); boost::proto::eval(boost::proto::as_child(x), init_x); backend::source_generator vec_value; detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state); boost::proto::eval(boost::proto::as_child(x), expr_x); src.open("{"); src.new_line() << type_name() << " v = " << vec_value.str() << ";"; for(int k = 0, j = 0; j < N; ++j) { src.new_line() << prm_name << "_sum.data[" << j << "][0] += "; for(int i = 0; i < N; ++i, ++k) { if (i) src << " + "; src << prm_name << "_ell_val[" << k << " * " << prm_name << "_ell_size + nnz_idx] * v.data[" << i << "][0]"; } src << ";"; } src.close("}"); } src.close("} else break;"); src.close("}"); // CSR part src.new_line() << "if (" << prm_name << "_csr_ptr)"; src.open("{"); src.new_line() << type_name() << " " << prm_name << "_csr_size = " << prm_name << "_csr_ptr[n];"; src.new_line() << type_name() << " csr_beg = " << prm_name << "_csr_ptr[idx];"; src.new_line() << type_name() << " csr_end = " << prm_name << "_csr_ptr[idx+1];"; src.new_line() << "for(" << type_name() << " j = csr_beg; j < csr_end; ++j)"; src.open("{"); src.new_line() << type_name() << " idx = " << prm_name << "_csr_col[j];"; { detail::output_local_preamble init_x(src, q, prm_name + "_x", state); boost::proto::eval(boost::proto::as_child(x), init_x); backend::source_generator vec_value; detail::vector_expr_context expr_x(vec_value, q, prm_name + "_x", state); boost::proto::eval(boost::proto::as_child(x), expr_x); src.open("{"); src.new_line() << type_name() << " v = " << vec_value.str() << ";"; for(int k = 0, j = 0; j < N; ++j) { src.new_line() << prm_name << "_sum.data[" << j << "][0] += "; for(int i = 0; i < N; ++i, ++k) { if (i) src << " + "; src << prm_name << "_csr_val[" << k << " * " << prm_name << "_csr_size + j] * v.data[" << i << "][0]"; } src << ";"; } src.close("}"); } src.close("}"); src.close("}"); src.close("}"); } template static void kernel_param_declaration(const Vector &x, backend::source_generator &src, const backend::command_queue &q, const std::string &prm_name, detail::kernel_generator_state_ptr state) { src.parameter< size_t >(prm_name + "_ell_width"); src.parameter< size_t >(prm_name + "_ell_pitch"); src.parameter< global_ptr >(prm_name + "_ell_col"); src.parameter< global_ptr >(prm_name + "_ell_val"); src.parameter< global_ptr >(prm_name + "_csr_ptr"); src.parameter< global_ptr >(prm_name + "_csr_col"); src.parameter< global_ptr >(prm_name + "_csr_val"); detail::declare_expression_parameter decl_x(src, q, prm_name + "_x", state); detail::extract_terminals()(boost::proto::as_child(x), decl_x); } template static void partial_vector_expr(const Vector&, backend::source_generator &src, const backend::command_queue&, const std::string &prm_name, detail::kernel_generator_state_ptr) { src << prm_name << "_sum"; } template void kernel_arg_setter(const Vector &x, backend::kernel &kernel, unsigned part, size_t index_offset, detail::kernel_generator_state_ptr state) const { kernel.push_arg(ell_width); kernel.push_arg(ell_pitch); if (ell_width) { kernel.push_arg(ell_col); kernel.push_arg(ell_val); } else { kernel.push_arg(static_cast(0)); kernel.push_arg(static_cast(0)); } if (csr_nnz) { kernel.push_arg(csr_ptr); kernel.push_arg(csr_col); kernel.push_arg(csr_val); } else { kernel.push_arg(static_cast(0)); kernel.push_arg(static_cast(0)); kernel.push_arg(static_cast(0)); } detail::set_expression_argument x_args(kernel, part, index_offset, state); detail::extract_terminals()( boost::proto::as_child(x), x_args); } template void expression_properties(const Vector&, std::vector &queue_list, std::vector &partition, size_t &size) const { queue_list = std::vector(1, q); partition = std::vector(2, 0); partition.back() = size = n; } size_t rows() const { return n; } size_t cols() const { return m; } size_t nonzeros() const { return nnz; } private: backend::command_queue q; size_t n, m, nnz, ell_width, ell_pitch, csr_nnz; backend::device_vector ell_col; backend::device_vector ell_val; backend::device_vector csr_ptr; backend::device_vector csr_col; backend::device_vector csr_val; backend::device_vector create_device_vector(const backend::command_queue &q, size_t nnz, const Val *host_data, bool fast = true) { backend::device_vector val(q, nnz * N * N); if (nnz) { if (fast) { backend::device_vector tmp(q, nnz * N * N, reinterpret_cast(host_data)); VEX_FUNCTION(T, transpose, (int,k)(int,m)(int,nnz)(T*, v), int i = k / nnz; int j = k % nnz; return v[j * m + i]; ); vex::vector(q,val) = transpose(vex::element_index(), N*N, nnz, raw_pointer(vex::vector(q, tmp))); } else { auto v = val.map(q); for(int k = 0, i = 0; i < N; ++i) for(int j = 0; j < N; ++j, ++k) for(size_t m = 0; m < nnz; ++m) v[k * nnz + m] = host_data[m](i,j); } } return val; } backend::kernel& csr2ell_kernel() const { using namespace vex::detail; static kernel_cache cache; auto kernel = cache.find(q); if (kernel == cache.end()) { backend::source_generator src(q); src.begin_kernel("convert_csr2ell"); src.begin_kernel_parameters(); src.template parameter("n"); src.template parameter("ell_width"); src.template parameter("ell_pitch"); src.template parameter< global_ptr >("ptr"); src.template parameter< global_ptr >("col"); src.template parameter< global_ptr >("val"); src.template parameter< global_ptr >("ell_col"); src.template parameter< global_ptr >("ell_val"); src.template parameter< global_ptr >("csr_ptr"); src.template parameter< global_ptr >("csr_col"); src.template parameter< global_ptr >("csr_val"); src.end_kernel_parameters(); src.new_line() << type_name() << " nnz = ptr[n];"; src.new_line() << type_name() << " ell_nnz = ell_width * ell_pitch;"; src.new_line() << type_name() << " csr_nnz = csr_ptr ? csr_ptr[n] : 0;"; src.grid_stride_loop().open("{"); src.new_line() << type_name() << " w = 0;"; src.new_line() << type_name() << " csr_head = 0;"; src.new_line() << "if (csr_ptr) csr_head = csr_ptr[idx];"; src.new_line() << "for(" << type_name() << " j = ptr[idx], e = ptr[idx+1]; j < e; ++j, ++w)"; src.open("{"); src.new_line() << type_name() << " c = col[j];"; src.new_line() << "if (w < ell_width) {"; src.new_line() << " ell_col[idx + w * ell_pitch] = c;"; for(int i = 0; i < N * N; ++i) src.new_line() << " ell_val[" << i << " * ell_nnz + w * ell_pitch + idx] = val[" << i << " * nnz + j];"; src.new_line() << "} else {"; src.new_line() << " csr_col[csr_head] = c;"; for(int i = 0; i < N * N; ++i) src.new_line() << " csr_val[" << i << " * csr_nnz + csr_head] = val[" << i << " * nnz + j];"; src.new_line() << " ++csr_head;"; src.new_line() << "}"; src.close("}"); src.close("}"); src.end_kernel(); kernel = cache.insert(q, backend::kernel(q, src.str(), "convert_csr2ell")); } return kernel->second; } template void convert( const PtrRange &host_ptr, const ColRange &host_col, const ValRange &host_val ) { size_t nnz = host_ptr[n]; backend::device_vector Aptr(q, n + 1, &host_ptr[0]); backend::device_vector Acol(q, nnz, nnz ? &host_col[0] : nullptr); backend::device_vector Aval = create_device_vector(q, nnz, nnz ? &host_val[0] : nullptr); /* 1. Get optimal ELL widths for local and remote parts. */ // Speed of ELL relative to CSR: const double ell_vs_csr = 3.0; // Find maximum widths for local and remote parts: std::vector ctx(1, q); Reductor max(ctx); vex::vector ptr(q, Aptr); VEX_FUNCTION(Ptr, row_width, (size_t, i)(const Ptr*, ptr), return ptr[i+1] - ptr[i]; ); int max_width = max(row_width(element_index(0, n), raw_pointer(ptr))); // Build width distribution histogram. vex::vector hist(ctx, max_width + 1); hist = 0; eval(atomic_add(&permutation(row_width(element_index(0, n), raw_pointer(ptr)))(hist), 1)); // Estimate optimal width for ELL part of the matrix, // count nonzeros in CSR part of the matrix ell_width = max_width; { auto h = hist.map(0); for(int i = 0, rows = n; i < max_width; ++i) { rows -= h[i]; // Number of rows wider than i. if (ell_vs_csr * rows < n) { ell_width = i; break; } } for(int i = ell_width + 1; i <= max_width; ++i) csr_nnz += h[i] * (i - ell_width); } if (ell_width == 0) { assert(csr_nnz == nnz); csr_ptr = Aptr; csr_col = Acol; csr_val = Aval; return; } if (csr_nnz) { VEX_FUNCTION(int, csr_width, (size_t, ell_width)(size_t, i)(const Ptr*, ptr), if (i == 0) return 0; int w = ptr[i] - ptr[i-1]; return (w > ell_width) ? (w - ell_width) : 0; ); vex::vector csr_w(ctx, n+1); csr_ptr = backend::device_vector(q, n + 1); csr_col = backend::device_vector(q, csr_nnz); csr_val = backend::device_vector (q, csr_nnz * N * N); csr_w = csr_width(ell_width, element_index(), raw_pointer(ptr)); vector csr_p(q, csr_ptr); inclusive_scan(csr_w, csr_p); } /* 3. Split the input matrix into ELL and CSR submatrices. */ ell_col = backend::device_vector(q, ell_pitch * ell_width); ell_val = backend::device_vector (q, ell_pitch * ell_width * N * N); vex::vector(q, ell_col) = -1; auto &convert = csr2ell_kernel(); convert.push_arg(n); convert.push_arg(ell_width); convert.push_arg(ell_pitch); convert.push_arg(Aptr); convert.push_arg(Acol); convert.push_arg(Aval); convert.push_arg(ell_col); convert.push_arg(ell_val); if (csr_nnz) { convert.push_arg(csr_ptr); convert.push_arg(csr_col); convert.push_arg(csr_val); } else { convert.push_arg(static_cast(0)); convert.push_arg(static_cast(0)); convert.push_arg(static_cast(0)); } convert(q); } }; } // namespace sparse } // namespace vex namespace amgcl { namespace backend { template std::string vexcl_static_matrix_declaration() { std::ostringstream s; s << "typedef struct { " << vex::type_name() << " data[" << N << "][" << N << "]; } " "amgcl_matrix_" << vex::type_name() << "_" << N << "x" << N << ";\n"; if (N != 1) s << "typedef struct { " << vex::type_name() << " data[" << N << "][" << 1 << "]; } " "amgcl_matrix_" << vex::type_name() << "_" << N << "x" << 1 << ";\n"; return s.str(); } template struct vex_scale { typedef static_matrix vector; struct apply_type : vex::UserFunction { apply_type() {} static std::string name() { return "scale_" + vex::type_name(); } static void define(vex::backend::source_generator &src, const std::string &fname = name()) { src.begin_function(fname); src.begin_function_parameters(); src.parameter("a"); src.parameter("m"); src.end_function_parameters(); for(int i = 0; i < N; ++i) src.new_line() << "m.data[" << i << "][0] *= a;"; src.new_line() << "return m;"; src.end_function(); } } const apply; }; template struct vex_convert; template struct vex_convert::value>::type> { typedef static_matrix src_vector; typedef static_matrix dst_vector; struct apply_type : vex::UserFunction { apply_type() {} static std::string name() { return "convert_" + vex::type_name() + "_" + vex::type_name(); } static void define(vex::backend::source_generator &src, const std::string &fname = name()) { src.begin_function(fname); src.begin_function_parameters(); src.parameter("s"); src.end_function_parameters(); src.new_line() << vex::type_name() << " d;"; for(int i = 0; i < N; ++i) src.new_line() << "d.data[" << i << "][0] = s.data[" << i << "][0];"; src.new_line() << "return d;"; src.end_function(); } } const apply; }; template struct vex_convert::value>::type> { template static const X& apply(const X &x) { return x; } }; template struct vex_add { typedef static_matrix vectorA; typedef static_matrix vectorB; struct apply_type : vex::UserFunction { apply_type() {} static std::string name() { return "add_" + vex::type_name() + "_" + vex::type_name(); } static void define(vex::backend::source_generator &src, const std::string &fname = name()) { src.begin_function(fname); src.begin_function_parameters(); src.parameter("a"); src.parameter("b"); src.end_function_parameters(); for(int i = 0; i < N; ++i) src.new_line() << "a.data[" << i << "][0] += " << "b.data[" << i << "][0];"; src.new_line() << "return a;"; src.end_function(); } } const apply; }; template struct vex_sub { typedef static_matrix vectorA; typedef static_matrix vectorB; struct apply_type : vex::UserFunction { apply_type() {} static std::string name() { return "sub_" + vex::type_name() + "_" + vex::type_name(); } static void define(vex::backend::source_generator &src, const std::string &fname = name()) { src.begin_function(fname); src.begin_function_parameters(); src.parameter("a"); src.parameter("b"); src.end_function_parameters(); for(int i = 0; i < N; ++i) src.new_line() << "a.data[" << i << "][0] -= " << "b.data[" << i << "][0];"; src.new_line() << "return a;"; src.end_function(); } } const apply; }; template struct vex_mul { typedef static_matrix matrix; typedef static_matrix vectorX; typedef static_matrix vectorY; struct apply_type : vex::UserFunction { apply_type() {} static std::string name() { return "mul_" + vex::type_name() + "_" + vex::type_name(); } static void define(vex::backend::source_generator &src, const std::string &fname = name()) { src.begin_function(fname); src.begin_function_parameters(); src.parameter("a"); src.parameter("b"); src.end_function_parameters(); src.new_line() << vex::type_name() << " c;"; for(int i = 0; i < N; ++i) { src.new_line() << "c.data[" << i << "][0] = "; for(int j = 0; j < N; ++j) { if (j) src << " + "; src << "a.data[" << i << "][" << j << "] * b.data[" << j << "][0]"; } src << ";"; } src.new_line() << "return c;"; src.end_function(); } } const apply; }; template struct spmv_impl, C, P>>, vex::vector>, Beta, vex::vector>, typename std::enable_if<(B > 1)>::type > { typedef vex::sparse::distributed, C, P>> matrix; typedef vex::vector> vectorX; typedef vex::vector> vectorY; static void apply(Alpha alpha, const matrix &A, const vectorX &x, Beta beta, vectorY &y) { if (beta) y = vex_add().apply(vex_scale().apply(beta, y), vex_scale().apply(alpha, A * x)); else y = vex_convert().apply(vex_scale().apply(alpha, A * x)); } }; template struct spmv_impl>, vex::vector, Beta, vex::vector, typename std::enable_if< (math::static_rows::value == 1) && ( math::static_rows::value != math::static_rows::value || math::static_rows::value != math::static_rows::value ) >::type> { typedef vex::sparse::distributed> matrix; typedef vex::vector vectorx; typedef vex::vector vectory; static void apply(Alpha alpha, const matrix &A, const vectorx &x, Beta beta, vectory &y) { auto _x = x.template reinterpret::type>(); auto _y = y.template reinterpret::type>(); spmv(alpha, A, _x, beta, _y); } }; template struct spmv_impl>, vex::vector, Beta, vex::vector, typename std::enable_if< (math::static_rows::value > 1) && ( math::static_rows::value != math::static_rows::value || math::static_rows::value != math::static_rows::value ) >::type> { typedef vex::sparse::distributed> matrix; typedef vex::vector vectorx; typedef vex::vector vectory; static void apply(Alpha alpha, const matrix &A, const vectorx &x, Beta beta, vectory &y) { const int B = math::static_rows::value; auto _x = x.template reinterpret::type, B, 1>>(); auto _y = y.template reinterpret::type, B, 1>>(); spmv(alpha, A, _x, beta, _y); } }; template struct residual_impl< vex::sparse::distributed, C, P>>, vex::vector>, vex::vector>, vex::vector>, typename std::enable_if<(B > 1)>::type > { typedef vex::sparse::distributed, C, P>> matrix; typedef vex::vector> vectorB; typedef vex::vector> vectorX; typedef vex::vector> vectorR; static void apply(const vectorB &rhs, const matrix &A, const vectorX &x, vectorR &r) { r = vex_convert().apply(vex_sub().apply(rhs, A * x)); } }; template struct residual_impl< vex::sparse::distributed, C, P>>, vex::vector, vex::vector, vex::vector, typename std::enable_if< math::static_rows::value == 1 && math::static_rows::value == 1 && math::static_rows::value == 1 && (B > 1) >::type > { typedef vex::sparse::distributed, C, P>> matrix; typedef vex::vector vectorB; typedef vex::vector vectorX; typedef vex::vector vectorR; static void apply(const vectorB &b, const matrix &A, const vectorX &x, vectorR &r) { typedef static_matrix VB; typedef static_matrix VX; typedef static_matrix VR; auto _b = b.template reinterpret(); auto _x = x.template reinterpret(); auto _r = r.template reinterpret(); _r = vex_convert().apply(vex_sub().apply(_b, A * _x)); } }; template < typename Alpha, typename Beta, typename TX, typename TY, typename TZ, int B > struct vmul_impl< Alpha, vex::vector< static_matrix >, vex::vector< static_matrix >, Beta, vex::vector< static_matrix > > { typedef vex::vector< static_matrix > matrix; typedef vex::vector< static_matrix > vectorY; typedef vex::vector< static_matrix > vectorZ; static void apply(Alpha a, const matrix &x, const vectorY &y, Beta b, vectorZ &z) { if (b) z = vex_add().apply(vex_scale().apply(b, z), vex_scale().apply(a, vex_mul().apply(x, y))); else z = vex_convert().apply(vex_scale().apply(a, vex_mul().apply(x, y))); } }; template < typename T, int B > struct clear_impl< vex::vector< static_matrix > > { typedef static_matrix vector_value; typedef vex::vector vector; static void apply(vector &x) { x.template reinterpret() = 0; } }; template < typename T, int B > struct copy_impl< vex::vector< static_matrix >, vex::vector< static_matrix > > { typedef vex::vector< static_matrix > vector; static void apply(const vector &x, vector &y) { auto X = x.template reinterpret(); auto Y = y.template reinterpret(); Y = X; } }; template < typename A, typename B, typename TX, typename TY, int N > struct axpby_impl< A, vex::vector< static_matrix >, B, vex::vector< static_matrix > > { typedef vex::vector< static_matrix > vectorX; typedef vex::vector< static_matrix > vectorY; static void apply(A a, const vectorX &x, B b, vectorY &y) { if (b) y.template reinterpret() = a * x.template reinterpret() + b * y.template reinterpret(); else y.template reinterpret() = a * x.template reinterpret(); } }; template < typename A, typename B, typename C, typename TX, typename TY, typename TZ, int N > struct axpbypcz_impl< A, vex::vector< static_matrix >, B, vex::vector< static_matrix >, C, vex::vector< static_matrix > > { typedef vex::vector< static_matrix > vectorX; typedef vex::vector< static_matrix > vectorY; typedef vex::vector< static_matrix > vectorZ; static void apply(A a, const vectorX &x, B b, const vectorY &y, C c, vectorZ &z) { if (c) z.template reinterpret() = a * x.template reinterpret() + b * y.template reinterpret() + c * z.template reinterpret(); else z.template reinterpret() = a * x.template reinterpret() + b * y.template reinterpret(); } }; template < typename T, int B > struct inner_product_impl< vex::vector< static_matrix >, vex::vector< static_matrix > > { typedef T return_type; typedef static_matrix vector_value; typedef vex::vector vector; static return_type get(const vector &x, const vector &y) { vex::Reductor sum( x.queue_list() ); return sum( x.template reinterpret() * y.template reinterpret() ); } }; namespace detail { template struct common_scalar_backend< backend::vexcl, backend::vexcl, typename std::enable_if< math::static_rows::value != 1 || math::static_rows::value != 1 >::type> { typedef typename math::scalar_of::type S1; typedef typename math::scalar_of::type S2; typedef typename std::conditional< (sizeof(S1) > sizeof(S2)), backend::vexcl, backend::vexcl >::type type; }; } // namespace detail } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/backend/viennacl.hpp000066400000000000000000000312341451676433700176100ustar00rootroot00000000000000#ifndef AMGCL_BACKEND_VIENNACL_HPP #define AMGCL_BACKEND_VIENNACL_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/backend/viennacl.hpp * \author Denis Demidov * \brief ViennaCL backend. */ #include #include #include #include #include #include #include #include #include #include #include namespace amgcl { namespace solver { /** Wrapper around solver::skyline_lu for use with the ViennaCL backend. * Copies the rhs to the host memory, solves the problem using the host CPU, * then copies the solution back to the compute device(s). */ template struct viennacl_skyline_lu : solver::skyline_lu { typedef solver::skyline_lu Base; mutable std::vector _rhs, _x; template viennacl_skyline_lu(const Matrix &A, const Params&) : Base(*A), _rhs(backend::rows(*A)), _x(backend::rows(*A)) { } template void operator()(const Vec1 &rhs, Vec2 &x) const { viennacl::fast_copy(rhs, _rhs); static_cast(this)->operator()(_rhs, _x); viennacl::fast_copy(_x, x); } }; } namespace backend { /// ViennaCL backend /** * This is a backend that uses types defined in the ViennaCL library * (http://viennacl.sourceforge.net). * * \param Matrix ViennaCL matrix to use with the backend. Possible choices are * viannacl::compressed_matrix, viennacl::ell_matrix, and * viennacl::hyb_matrix. * \ingroup backends */ template < class Matrix, class DirectSolver = solver::viennacl_skyline_lu::type> > struct viennacl { typedef typename backend::value_type::type value_type; typedef ptrdiff_t index_type; typedef ptrdiff_t col_type; typedef ptrdiff_t ptr_type; typedef Matrix matrix; typedef ::viennacl::vector vector; typedef ::viennacl::vector matrix_diagonal; typedef DirectSolver direct_solver; struct provides_row_iterator : std::false_type {}; /// Backend parameters. typedef amgcl::detail::empty_params params; static std::string name() { return "viennacl"; } /// Copy matrix from builtin backend. static std::shared_ptr copy_matrix( std::shared_ptr< typename builtin::matrix > A, const params& ) { auto m = std::make_shared(); ::viennacl::copy(viennacl_matrix_adapter(*A), *m); return m; } /// Copy vector from builtin backend. static std::shared_ptr copy_vector(typename builtin::vector const &x, const params&) { auto v = std::make_shared(x.size()); ::viennacl::fast_copy(x.data(), x.data() + x.size(), v->begin()); return v; } /// Copy vector from builtin backend. static std::shared_ptr copy_vector( std::shared_ptr< typename builtin::vector > x, const params &prm ) { return copy_vector(*x, prm); } /// Create vector of the specified size. static std::shared_ptr create_vector(size_t size, const params&) { return std::make_shared(size); } /// Create direct solver for coarse level static std::shared_ptr create_solver(std::shared_ptr< typename builtin::matrix > A, const params &prm) { return std::make_shared(A, prm); } private: struct viennacl_matrix_adapter { typedef ptrdiff_t index_type; typedef size_t size_type; class const_iterator1; class const_iterator2 { public: bool operator!=(const const_iterator2 &it) const { return pos != it.pos; } const const_iterator2& operator++() { ++pos; return *this; } index_type index1() const { return row; } index_type index2() const { return col[pos]; } value_type operator*() const { return val[pos]; } private: const_iterator2(index_type row, index_type pos, const index_type *col, const value_type *val) : row(row), pos(pos), col(col), val(val) { } index_type row; index_type pos; const index_type *col; const value_type *val; friend class const_iterator1; }; class const_iterator1 { public: bool operator!=(const const_iterator1 &it) const { return pos != it.pos; } const const_iterator1& operator++() { ++pos; return *this; } index_type index1() const { return pos; } const const_iterator2 begin() const { return const_iterator2(pos, row[pos], col, val); } const const_iterator2 end() const { return const_iterator2(pos, row[pos + 1], col, val); } private: const_iterator1(index_type pos, const index_type *row, const index_type *col, const value_type *val ) : pos(pos), row(row), col(col), val(val) { } index_type pos; const index_type *row; const index_type *col; const value_type *val; friend struct viennacl_matrix_adapter; }; viennacl_matrix_adapter( const typename backend::builtin::matrix &A) : rows(A.nrows), cols(A.ncols), row(A.ptr), col(A.col), val(A.val) { } const_iterator1 begin1() const { return const_iterator1(0, row, col, val); } const_iterator1 end1() const { return const_iterator1(rows, row, col, val); } size_t size1() const { return rows; } size_t size2() const { return cols; } private: size_t rows; size_t cols; const index_type *row; const index_type *col; const value_type *val; }; }; template struct is_viennacl_matrix : std::false_type {}; template struct is_viennacl_matrix< ::viennacl::compressed_matrix > : std::true_type {}; template struct is_viennacl_matrix< ::viennacl::hyb_matrix > : std::true_type {}; template struct is_viennacl_matrix< ::viennacl::ell_matrix > : std::true_type {}; template struct value_type< M, typename std::enable_if< is_viennacl_matrix::value >::type > { typedef typename M::value_type::value_type type; }; template struct value_type< ::viennacl::vector > { typedef V type; }; template struct rows_impl< M, typename std::enable_if< is_viennacl_matrix::value >::type > { static size_t get(const M &A) { return A.size1(); } }; template struct cols_impl< M, typename std::enable_if< is_viennacl_matrix::value >::type > { static size_t get(const M &A) { return A.size2(); } }; template struct nonzeros_impl< ::viennacl::compressed_matrix > { static size_t get(const ::viennacl::compressed_matrix &A) { return A.nnz(); } }; template struct nonzeros_impl< ::viennacl::ell_matrix > { static size_t get(const ::viennacl::ell_matrix &A) { return A.nnz(); } }; template struct nonzeros_impl< ::viennacl::hyb_matrix > { static size_t get(const ::viennacl::hyb_matrix &A) { return A.ell_nnz() + A.csr_nnz(); } }; template struct spmv_impl< Alpha, Mtx, Vec, Beta, Vec, typename std::enable_if< is_viennacl_matrix::value >::type > { static void apply(Alpha alpha, const Mtx &A, const Vec &x, Beta beta, Vec &y) { if (beta) y = alpha * ::viennacl::linalg::prod(A, x) + beta * y; else y = alpha * ::viennacl::linalg::prod(A, x); } }; template struct residual_impl< Mtx, Vec, Vec, Vec, typename std::enable_if< is_viennacl_matrix::value >::type > { typedef typename value_type::type V; static void apply(const Vec &rhs, const Mtx &A, const Vec &x, Vec &r) { r = ::viennacl::linalg::prod(A, x); r = rhs - r; } }; template < typename V > struct clear_impl< ::viennacl::vector > { static void apply(::viennacl::vector &x) { x.clear(); } }; template < typename V > struct inner_product_impl< ::viennacl::vector, ::viennacl::vector > { static V get(const ::viennacl::vector &x, const ::viennacl::vector &y) { return ::viennacl::linalg::inner_prod(x, y); } }; template < typename A, typename B, typename V > struct axpby_impl< A, ::viennacl::vector, B, ::viennacl::vector > { static void apply( A a, const ::viennacl::vector &x, B b, ::viennacl::vector &y ) { if (b) y = a * x + b * y; else y = a * x; } }; template < typename A, typename B, typename C, typename V > struct axpbypcz_impl< A, ::viennacl::vector, B, ::viennacl::vector, C, ::viennacl::vector > { static void apply( A a, const ::viennacl::vector &x, B b, const ::viennacl::vector &y, C c, ::viennacl::vector &z ) { if (c) z = a * x + b * y + c * z; else z = a * x + b * y; } }; template < typename A, typename B, typename V > struct vmul_impl< A, ::viennacl::vector, ::viennacl::vector, B, ::viennacl::vector > { static void apply( A a, const ::viennacl::vector &x, const ::viennacl::vector &y, B b, ::viennacl::vector &z) { if (b) z = a * ::viennacl::linalg::element_prod(x, y) + b * z; else z = a * ::viennacl::linalg::element_prod(x, y); } }; template < typename V > struct copy_impl< ::viennacl::vector, ::viennacl::vector > { static void apply(const ::viennacl::vector &x, ::viennacl::vector &y) { y = x; } }; } // namespace backend } // namespace amgcl #endif amgcl-1.4.4/amgcl/coarsening/000077500000000000000000000000001451676433700160365ustar00rootroot00000000000000amgcl-1.4.4/amgcl/coarsening/aggregation.hpp000066400000000000000000000130231451676433700210350ustar00rootroot00000000000000#ifndef AMGCL_COARSENING_AGGREGATION_HPP #define AMGCL_COARSENING_AGGREGATION_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/coarsening/aggregation.hpp * \author Denis Demidov * \brief Non-smoothed aggregation coarsening. */ #include #include #include #include #include #include #include namespace amgcl { /// Coarsening strategies namespace coarsening { /** * \defgroup coarsening Coarsening strategies * \brief Coarsening strategies for AMG hirarchy construction. * * A coarsener in AMGCL is a class that takes a system matrix and returns three * operators: * * 1. Restriction operator R that downsamples the residual error to a * coarser level in AMG hierarchy, * 2. Prolongation operator P that interpolates a correction computed on a * coarser grid into a finer grid, * 3. System matrix \f$A^H\f$ at a coarser level that is usually computed as a * Galerkin operator \f$A^H = R A^h P\f$. * * The AMG hierarchy is constructed by recursive invocation of the selected * coarsener. */ /// Non-smoothed aggregation. /** * \ingroup coarsening */ template struct aggregation { typedef pointwise_aggregates Aggregates; /// Coarsening parameters. struct params { /// Aggregation parameters. Aggregates::params aggr; /// Near nullspace parameters. nullspace_params nullspace; /// Over-interpolation factor \f$\alpha\f$. /** * In case of aggregation coarsening, coarse-grid * correction of smooth error, and by this the overall convergence, can * often be substantially improved by using "over-interpolation", that is, * by multiplying the actual correction (corresponding to piecewise * constant interpolation) by some factor \f$\alpha > 1\f$. Equivalently, * this means that the coarse-level Galerkin operator is re-scaled by * \f$1 / \alpha\f$: * \f[I_h^HA_hI_H^h \to \frac{1}{\alpha}I_h^HA_hI_H^h.\f] * * \sa \cite Stuben1999, Section 9.1 "Re-scaling of the Galerkin operator". */ float over_interp; params() : over_interp(math::static_rows::value == 1 ? 1.5f : 2.0f) {} #ifndef AMGCL_NO_BOOST params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_CHILD(p, aggr), AMGCL_PARAMS_IMPORT_CHILD(p, nullspace), AMGCL_PARAMS_IMPORT_VALUE(p, over_interp) { check_params(p, {"aggr", "nullspace", "over_interp"}); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_CHILD(p, path, aggr); AMGCL_PARAMS_EXPORT_CHILD(p, path, nullspace); AMGCL_PARAMS_EXPORT_VALUE(p, path, over_interp); } #endif } prm; aggregation(const params &prm = params()) : prm(prm) {} /// Creates transfer operators for the given system matrix. /** * \param A The system matrix. * \param prm Coarsening parameters. * \returns A tuple of prolongation and restriction operators. */ template std::tuple< std::shared_ptr, std::shared_ptr > transfer_operators(const Matrix &A) { const size_t n = rows(A); AMGCL_TIC("aggregates"); Aggregates aggr(A, prm.aggr, prm.nullspace.cols); AMGCL_TOC("aggregates"); AMGCL_TIC("interpolation"); auto P = tentative_prolongation( n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size ); AMGCL_TOC("interpolation"); return std::make_tuple(P, transpose(*P)); } /// Creates system matrix for the coarser level. /** * \param A The system matrix at the finer level. * \param P Prolongation operator returned by transfer_operators(). * \param R Restriction operator returned by transfer_operators(). * \returns System matrix for the coarser level. */ template std::shared_ptr coarse_operator(const Matrix &A, const Matrix &P, const Matrix &R) const { return detail::scaled_galerkin(A, P, R, 1 / prm.over_interp); } }; } // namespace coarsening } // namespace amgcl #endif amgcl-1.4.4/amgcl/coarsening/as_scalar.hpp000066400000000000000000000102511451676433700204760ustar00rootroot00000000000000#ifndef AMGCL_COARSENING_AS_SCALAR_HPP #define AMGCL_COARSENING_AS_SCALAR_HPP /* The MIT License Copyright (c) 2012-2022 Denis Demidov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /** * \file amgcl/coarsening/as_scalar.hpp * \author Denis Demidov * \brief Scalar coarsening for block matrices. */ #include #include #include namespace amgcl { namespace coarsening { // Takes a block matrix as input, // converts it to the scalar format, // applies the base coarsening, // converts the results back to block format. template