pax_global_header00006660000000000000000000000064145770174000014517gustar00rootroot0000000000000052 comment=0a0a08c203b311d10d840444886e61c87e2d20f0 SpFFT-1.1.0/000077500000000000000000000000001457701740000124405ustar00rootroot00000000000000SpFFT-1.1.0/.clang-format000066400000000000000000000000771457701740000150170ustar00rootroot00000000000000BasedOnStyle: Google ColumnLimit: 100 AccessModifierOffset: -2 SpFFT-1.1.0/.github/000077500000000000000000000000001457701740000140005ustar00rootroot00000000000000SpFFT-1.1.0/.github/workflows/000077500000000000000000000000001457701740000160355ustar00rootroot00000000000000SpFFT-1.1.0/.github/workflows/ci.yml000066400000000000000000000103201457701740000171470ustar00rootroot00000000000000name: CI on: [push, pull_request] jobs: ######################### # Build and test with GCC ######################### CPU: # The type of runner that the job will run on runs-on: ubuntu-22.04 strategy: fail-fast: false matrix: build_type: [release, debug] compiler: [g++] use_omp: [true] use_mpi: [true, false] use_float: [true] include: - build_type: debug compiler: clang++ use_omp: false use_mpi: true use_float: false env: USE_OMP: ${{ matrix.use_omp }} USE_MPI: ${{ matrix.use_mpi }} USE_FLOAT: ${{ matrix.use_float }} BUILD_TYPE: ${{ matrix.build_type }} COMPILER: ${{ matrix.compiler }} steps: # Checks-out your repository under $GITHUB_WORKSPACE - uses: actions/checkout@v4 - name: Print build config run: | echo "Compiler: ${COMPILER}, Build type: ${BUILD_TYPE}, OpenMP: ${USE_OMP}, MPI: ${USE_MPI}, FLOAT: ${USE_FLOAT}" - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y libfftw3-dev make g++ clang wget git make cmake - name: Install MPI if: ${{ matrix.use_mpi }} run: | sudo apt-get install -y mpi-default-dev - name: Build and install run: | mkdir -p build cd build mkdir -p install_dir export INSTALL_DIR=$(pwd)/install_dir CXX=${COMPILER} cmake .. -DSPFFT_BUILD_TESTS=OFF -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} make -j2 make VERBOSE=1 install test -f ${INSTALL_DIR}/lib/libspfft.so test -f ${INSTALL_DIR}/include/spfft/spfft.hpp test -f ${INSTALL_DIR}/include/spfft/spfft.h - name: Build tests run: | cd ${GITHUB_WORKSPACE} rm -rf build mkdir -p build cd build CXX=${COMPILER} cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_OMP=${USE_OMP} -DSPFFT_MPI=${USE_MPI} -DSPFFT_SINGLE_PRECISION=${USE_FLOAT} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="-Wno-error=maybe-uninitialized" make -j2 - name: Run tests env: OMPI_MCA_btl_vader_single_copy_mechanism: none run: ${GITHUB_WORKSPACE}/build/tests/run_local_tests - name: Run tests with MPI if: ${{ matrix.use_mpi }} env: OMPI_MCA_btl_vader_single_copy_mechanism: none run: mpirun -n 2 ${GITHUB_WORKSPACE}/build/tests/run_mpi_tests ################# # Build with CUDA ################# CUDA: runs-on: ubuntu-22.04 container: nvidia/cuda:11.0.3-devel-ubuntu20.04 steps: # Checks-out your repository under $GITHUB_WORKSPACE - uses: actions/checkout@v4 - name: Install dependencies run: | apt-get update DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y libfftw3-dev make g++ mpi-default-dev wget git make cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.18.0/cmake-3.18.0-Linux-x86_64.tar.gz && tar -xzvf cmake-3.18.0-Linux-x86_64.tar.gz - name: Build run: | cd ${GITHUB_WORKSPACE} mkdir -p build cd build ${HOME}/cmake-3.18.0-Linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=CUDA -DSPFFT_OMP=OFF make -j2 ################# # Build with ROCm ################# ROCM: runs-on: ubuntu-22.04 container: rocm/dev-ubuntu-22.04:5.3-complete steps: # Checks-out your repository under $GITHUB_WORKSPACE - uses: actions/checkout@v4 - name: Install dependencies run: | apt-get update DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get install -y libfftw3-dev make g++ mpi-default-dev wget git make cd ${HOME} && wget https://github.com/Kitware/CMake/releases/download/v3.21.0/cmake-3.21.0-linux-x86_64.tar.gz && tar -xzvf cmake-3.21.0-linux-x86_64.tar.gz - name: Build run: | cd ${GITHUB_WORKSPACE} mkdir -p build cd build ${HOME}/cmake-3.21.0-linux-x86_64/bin/cmake .. -DSPFFT_BUILD_TESTS=ON -DSPFFT_GPU_BACKEND=ROCM -DCMAKE_PREFIX_PATH=/opt/rocm make -j2 SpFFT-1.1.0/.readthedocs.yml000066400000000000000000000004531457701740000155300ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 sphinx: configuration: docs/source/conf.py formats: [] build: os: ubuntu-22.04 tools: python: "3.11" python: install: - requirements: docs/requirements.txt SpFFT-1.1.0/CMakeLists.txt000066400000000000000000000173421457701740000152070ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.18 FATAL_ERROR) # 3.18 for C++17 project(SpFFT LANGUAGES CXX VERSION 1.1.0) set(SPFFT_SO_VERSION 1) set(SPFFT_VERSION ${PROJECT_VERSION}) # allow {module}_ROOT variables to be set if(POLICY CMP0074) cmake_policy(SET CMP0074 NEW) endif() # Initialize CMAKE_CUDA_ARCHITECTURES through nvcc if possible if(POLICY CMP0104) cmake_policy(SET CMP0104 NEW) endif() # set default build type to RELEASE if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" ) endif() # set language and standard set(CMAKE_CXX_STANDARD 17) set(CMAKE_CUDA_STANDARD 17) set(CMAKE_HIP_STANDARD 17) #add local module path set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/cmake/modules) include(CMakeDependentOption) # Options option(SPFFT_STATIC "Compile as static library" OFF) option(SPFFT_OMP "Compile with OpenMP support" ON) option(SPFFT_MPI "Compile with MPI support" ON) option(SPFFT_GPU_DIRECT "Compile with GPU direct (GPU aware MPI) support." OFF) option(SPFFT_BUILD_TESTS "Build tests" OFF) option(SPFFT_SINGLE_PRECISION "Enable single precision support" OFF) option(SPFFT_INSTALL "Enable CMake install commands" ON) option(SPFFT_FORTRAN "Compile fortran module" OFF) option(SPFFT_BUNDLED_LIBS "Use bundled libraries for building tests" ON) cmake_dependent_option(SPFFT_BUNDLED_GOOGLETEST "Use bundled googletest lib" ON "SPFFT_BUNDLED_LIBS" OFF) cmake_dependent_option(SPFFT_BUNDLED_JSON "Use bundled json lib" ON "SPFFT_BUNDLED_LIBS" OFF) cmake_dependent_option(SPFFT_BUNDLED_CLI11 "Use bundled CLI11 lib" ON "SPFFT_BUNDLED_LIBS" OFF) set(SPFFT_GPU_BACKEND "OFF" CACHE STRING "GPU backend") set_property(CACHE SPFFT_GPU_BACKEND PROPERTY STRINGS "OFF" "CUDA" "ROCM" ) set(SPFFT_FFTW_LIB "AUTO" CACHE STRING "Library providing a FFTW interface") set_property(CACHE SPFFT_FFTW_LIB PROPERTY STRINGS "AUTO" "FFTW" "MKL" "ARMPL" ) # Get GNU standard install prefixes include(GNUInstallDirs) # set preferred library type if (SPFFT_STATIC) # prefer static over dynamic libraries with the find_library() command by changing the order set(CMAKE_FIND_LIBRARY_SUFFIXES_SAVE ${CMAKE_FIND_LIBRARY_SUFFIXES}) if(APPLE) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .tbd .dylib .so) elseif(UNIX) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .so) endif() set(SPFFT_LIBRARY_TYPE STATIC) else() set(SPFFT_LIBRARY_TYPE SHARED) endif() set(SPFFT_EXTERNAL_LIBS) set(SPFFT_INCLUDE_DIRS) set(SPFFT_EXTERNAL_INCLUDE_DIRS) set(SPFFT_EXTERNAL_PKG_PACKAGES) # Options combination check set(SPFFT_CUDA OFF) set(SPFFT_ROCM OFF) if(SPFFT_GPU_BACKEND) if(SPFFT_GPU_BACKEND STREQUAL "CUDA") set(SPFFT_CUDA ON) elseif(SPFFT_GPU_BACKEND STREQUAL "ROCM") set(SPFFT_ROCM ON) else() message(FATAL_ERROR "Invalid GPU backend option") endif() endif() mark_as_advanced(SPFFT_CUDA SPFFT_ROCM) # Fortran if(SPFFT_FORTRAN) enable_language(Fortran) endif() # CUDA if(SPFFT_CUDA) enable_language(CUDA) if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.17.0") find_package(CUDAToolkit REQUIRED) else() find_library(CUDA_CUDART_LIBRARY cudart PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) find_library(CUDA_CUFFT_LIBRARY cufft PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) if(NOT TARGET CUDA::cudart) add_library(CUDA::cudart INTERFACE IMPORTED) endif() set_property(TARGET CUDA::cudart PROPERTY INTERFACE_LINK_LIBRARIES ${CUDA_CUDART_LIBRARY}) set_property(TARGET CUDA::cudart PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) if(NOT TARGET CUDA::cufft) add_library(CUDA::cufft INTERFACE IMPORTED) endif() set_property(TARGET CUDA::cufft PROPERTY INTERFACE_LINK_LIBRARIES ${CUDA_CUFFT_LIBRARY}) set_property(TARGET CUDA::cufft PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) endif() list(APPEND SPFFT_EXTERNAL_LIBS CUDA::cudart CUDA::cufft) endif() # ROCM if(SPFFT_ROCM) cmake_minimum_required(VERSION 3.21 FATAL_ERROR) # hip support only added in 3.21 enable_language(HIP) find_package(hip CONFIG REQUIRED) find_package(rocfft CONFIG REQUIRED) find_package(hipfft CONFIG) # hipfft within rocfft is deprecated. Use separate hipfft if available (not required). if(hipfft_FOUND) # Issue with rocm 4.1.0: Symlink to rocfft provided hipfft.h in /opt/rocm/include. # Workaround: Only use hipfft include directory with hipfft target and place before other hip targets in lib list if(HIPFFT_INCLUDE_DIRS) set_property(TARGET hip::hipfft PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${HIPFFT_INCLUDE_DIRS}) endif() list(APPEND SPFFT_EXTERNAL_LIBS hip::hipfft) endif() list(APPEND SPFFT_EXTERNAL_LIBS hip::host roc::rocfft) # Previously used option for flags. if(HIP_HCC_FLAGS) message(WARNING "HIP_HCC_FLAGS has no effect. Use CMAKE_HIP_FLAGS for flags and CMAKE_HIP_ARCHITECTURES for arch instead.") endif() endif() if(SPFFT_MPI) find_package(MPI COMPONENTS CXX REQUIRED) list(APPEND SPFFT_EXTERNAL_LIBS MPI::MPI_CXX) endif() if(SPFFT_OMP) find_package(OpenMP COMPONENTS CXX REQUIRED) list(APPEND SPFFT_EXTERNAL_LIBS OpenMP::OpenMP_CXX) endif() if(SPFFT_GPU_DIRECT) message(STATUS "GPU Direct support enabled: Additional environment variables might have to be set before execution. (e.g \"export MPICH_RDMA_ENABLED_CUDA=1\")") endif() # FFTW library must be found if not set to AUTO set(_SPFFT_FIND_FFTW_LIB_OPTION) if(NOT ${SPFFT_FFTW_LIB} STREQUAL "AUTO") set(_SPFFT_FIND_FFTW_LIB_OPTION REQUIRED) endif() set(SPFFT_MKL OFF) set(SPFFT_ARMPL OFF) set(SPFFT_FFTW OFF) # Look for MKL first if(${SPFFT_FFTW_LIB} STREQUAL "AUTO" OR ${SPFFT_FFTW_LIB} STREQUAL "MKL") # Use MKL if available, otherwise require FFTW3 if(UNIX AND NOT APPLE) # prefer static MKL in Linux. Together with "-Wl,--exclude-libs,ALL", # symbols are not visible for linking afterwards and no conflicts with other MKL versions of other libraries should exist. set(_TMP_SAVE ${CMAKE_FIND_LIBRARY_SUFFIXES}) set(CMAKE_FIND_LIBRARY_SUFFIXES .a .so) endif() find_package(MKLSequential ${_SPFFT_FIND_FFTW_LIB_OPTION}) if(UNIX AND NOT APPLE) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_TMP_SAVE}) unset(_TMP_SAVE) endif() if(TARGET MKL::Sequential) list(APPEND SPFFT_EXTERNAL_LIBS MKL::Sequential) list(APPEND SPFFT_EXTERNAL_PKG_PACKAGES mkl-dynamic-lp64-seq) set(SPFFT_MKL ON) endif() endif() # Look for ARM PL if(NOT SPFFT_MKL AND ${SPFFT_FFTW_LIB} STREQUAL "AUTO" OR ${SPFFT_FFTW_LIB} STREQUAL "ARMPL") find_package(ARMPL ${_SPFFT_FIND_FFTW_LIB_OPTION}) if(TARGET ARM::pl) list(APPEND SPFFT_EXTERNAL_LIBS ARM::pl) set(SPFFT_ARMPL ON) endif() endif() # Look for FFTW library if required if(NOT SPFFT_MKL AND NOT SPFFT_ARMPL) find_package(FFTW REQUIRED) list(APPEND SPFFT_EXTERNAL_LIBS FFTW::FFTW) if(SPFFT_SINGLE_PRECISION) find_package(FFTWF REQUIRED) list(APPEND SPFFT_EXTERNAL_LIBS FFTWF::FFTWF) endif() list(APPEND SPFFT_EXTERNAL_PKG_PACKAGES fftw3) set(SPFFT_FFTW ON) endif() # generate config.h configure_file(include/spfft/config.h.in ${PROJECT_BINARY_DIR}/spfft/config.h) list(APPEND SPFFT_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/src) list(APPEND SPFFT_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/include) list(APPEND SPFFT_INCLUDE_DIRS ${PROJECT_BINARY_DIR}) list(APPEND SPFFT_EXTERNAL_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/ext) ############################################################################# # All include dirs and definitions must be set before sub-directory is added! ############################################################################# add_subdirectory(src) # add tests for developement if(SPFFT_BUILD_TESTS) add_subdirectory(tests) endif() # reset cmake library suffixes if(SPFFT_STATIC) set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_SAVE}) endif() SpFFT-1.1.0/LICENSE000066400000000000000000000027151457701740000134520ustar00rootroot00000000000000Copyright (c) 2019 ETH Zurich, Simon Frasch Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SpFFT-1.1.0/README.md000066400000000000000000000213711457701740000137230ustar00rootroot00000000000000[![CI](https://github.com/eth-cscs/SpFFT/workflows/CI/badge.svg)](https://github.com/eth-cscs/SpFFT/actions?query=workflow%3ACI) [![conda-forge](https://img.shields.io/conda/vn/conda-forge/spfft.svg?style=flat)](https://anaconda.org/conda-forge/spfft) [![Documentation](https://readthedocs.org/projects/spfft/badge/?version=latest)](https://spfft.readthedocs.io/en/latest/?badge=latest) [![License](https://img.shields.io/badge/license-BSD-blue.svg)](https://raw.githubusercontent.com/eth-cscs/SpFFT/master/LICENSE) # SpFFT SpFFT - A 3D FFT library for sparse frequency domain data written in C++ with support for MPI, OpenMP, CUDA and ROCm. Inspired by the need of some computational material science applications with spherical cutoff data in frequency domain, SpFFT provides Fast Fourier Transformations of sparse frequency domain data. For distributed computations with MPI, slab decomposition in space domain and pencil decomposition in frequency domain (sparse data within a pencil / column must be on one rank) is used. ***Fig. 1:*** Illustration of a transform, where data on each MPI rank is identified by color. ### Design Goals - Sparse frequency domain input - Reuse of pre-allocated memory - Support for shifted indexing with centered zero-frequency - Optional parallelization and GPU acceleration - Unified interface for calculations on CPUs and GPUs - Support of Complex-To-Real and Real-To-Complex transforms, where the full hermitian symmetry property is utilized - C++, C and Fortran interfaces ### Interface Design To allow for pre-allocation and reuse of memory, the design is based on two classes: - **Grid**: Provides memory for transforms up to a given size. - **Transform**: Created with information on sparse input data and is associated with a *Grid*. Maximum size is limited by *Grid* dimensions. Internal reference counting to *Grid* objects guarantee a valid state until *Transform* object destruction. A transform can be computed in-place and out-of-place. Addtionally, an internally allocated work buffer can optionally be used for input / output of space domain data. ### New Features in v1.0 - Support for externally allocated memory for space domain data including in-place and out-of-place transforms - Optional asynchronous computation when using GPUs - Simplified / direct transform handle creation if no resource reuse through grid handles is required ## Documentation Documentation can be found [here](https://spfft.readthedocs.io/en/latest/). ## Requirements - C++ Compiler with C++17 support. Supported compilers are: - GCC 7 and later - Clang 5 and later - ICC 19.0 and later - CMake 3.18 and later (3.21 for ROCm) - Library providing a FFTW 3.x interface (FFTW3 or Intel MKL) - For multi-threading: OpenMP support by the compiler - For compilation with GPU support: - CUDA 11.0 and later for Nvidia hardware - ROCm 5.0 and later for AMD hardware ## Installation The build system follows the standard CMake workflow. Example: ```console mkdir build cd build cmake .. -DSPFFT_OMP=ON -DSPFFT_MPI=ON -DSPFFT_GPU_BACKEND=CUDA -DSPFFT_SINGLE_PRECISION=OFF -DCMAKE_INSTALL_PREFIX=/usr/local make -j8 install ``` ### CMake options | Option | Default | Description | |------------------------|---------|--------------------------------------------------------------| | SPFFT_MPI | ON | Enable MPI support | | SPFFT_OMP | ON | Enable multi-threading with OpenMP | | SPFFT_GPU_BACKEND | OFF | Select GPU backend. Can be OFF, CUDA or ROCM | | SPFFT_GPU_DIRECT | OFF | Use GPU aware MPI with GPUDirect | | SPFFT_SINGLE_PRECISION | OFF | Enable single precision support | | SPFFT_STATIC | OFF | Build as static library | | SPFFT_FFTW_LIB | AUTO | Library providing a FFTW interface. Can be AUTO, MKL or FFTW | | SPFFT_BUILD_TESTS | OFF | Build test executables for developement purposes | | SPFFT_INSTALL | ON | Add library to install target | | SPFFT_FORTRAN | OFF | Build Fortran interface module | | SPFFT_BUNDLED_LIBS | ON | Download required libraries for building tests | **_NOTE:_** When compiling with CUDA or ROCM (HIP), the standard `CMAKE_CUDA_ARCHITECTURES` and `CMAKE_HIP_ARCHITECTURES` options should be defined as well. `HIP_HCC_FLAGS` is no longer in use. ## Examples Further exmples for C++, C and Fortran can be found in the "examples" folder. ```cpp #include #include #include #include "spfft/spfft.hpp" int main(int argc, char** argv) { const int dimX = 2; const int dimY = 2; const int dimZ = 2; std::cout << "Dimensions: x = " << dimX << ", y = " << dimY << ", z = " << dimZ << std::endl << std::endl; // Use default OpenMP value const int numThreads = -1; // Use all elements in this example. const int numFrequencyElements = dimX * dimY * dimZ; // Slice length in space domain. Equivalent to dimZ for non-distributed case. const int localZLength = dimZ; // Interleaved complex numbers std::vector frequencyElements; frequencyElements.reserve(2 * numFrequencyElements); // Indices of frequency elements std::vector indices; indices.reserve(dimX * dimY * dimZ * 3); // Initialize frequency domain values and indices double initValue = 0.0; for (int xIndex = 0; xIndex < dimX; ++xIndex) { for (int yIndex = 0; yIndex < dimY; ++yIndex) { for (int zIndex = 0; zIndex < dimZ; ++zIndex) { // init with interleaved complex numbers frequencyElements.emplace_back(initValue); frequencyElements.emplace_back(-initValue); // add index triplet for value indices.emplace_back(xIndex); indices.emplace_back(yIndex); indices.emplace_back(zIndex); initValue += 1.0; } } } std::cout << "Input:" << std::endl; for (int i = 0; i < numFrequencyElements; ++i) { std::cout << frequencyElements[2 * i] << ", " << frequencyElements[2 * i + 1] << std::endl; } // Create local Grid. For distributed computations, a MPI Communicator has to be provided spfft::Grid grid(dimX, dimY, dimZ, dimX * dimY, SPFFT_PU_HOST, numThreads); // Create transform. // Note: A transform handle can be created without a grid if no resource sharing is desired. spfft::Transform transform = grid.create_transform(SPFFT_PU_HOST, SPFFT_TRANS_C2C, dimX, dimY, dimZ, localZLength, numFrequencyElements, SPFFT_INDEX_TRIPLETS, indices.data()); /////////////////////////////////////////////////// // Option A: Reuse internal buffer for space domain /////////////////////////////////////////////////// // Transform backward transform.backward(frequencyElements.data(), SPFFT_PU_HOST); // Get pointer to buffer with space domain data. Is guaranteed to be castable to a valid // std::complex pointer. Using the internal working buffer as input / output can help reduce // memory usage. double* spaceDomainPtr = transform.space_domain_data(SPFFT_PU_HOST); std::cout << std::endl << "After backward transform:" << std::endl; for (int i = 0; i < transform.local_slice_size(); ++i) { std::cout << spaceDomainPtr[2 * i] << ", " << spaceDomainPtr[2 * i + 1] << std::endl; } ///////////////////////////////////////////////// // Option B: Use external buffer for space domain ///////////////////////////////////////////////// std::vector spaceDomainVec(2 * transform.local_slice_size()); // Transform backward transform.backward(frequencyElements.data(), spaceDomainVec.data()); // Transform forward transform.forward(spaceDomainVec.data(), frequencyElements.data(), SPFFT_NO_SCALING); // Note: In-place transforms are also supported by passing the same pointer for input and output. std::cout << std::endl << "After forward transform (without normalization):" << std::endl; for (int i = 0; i < numFrequencyElements; ++i) { std::cout << frequencyElements[2 * i] << ", " << frequencyElements[2 * i + 1] << std::endl; } return 0; } ``` ## Acknowledgements This work was supported by: |![ethz](docs/images/logo_ethz.png) | [**Swiss Federal Institute of Technology in Zurich**](https://www.ethz.ch/) | |:----:|:----:| |![cscs](docs/images/logo_cscs.png) | [**Swiss National Supercomputing Centre**](https://www.cscs.ch/) | |![max](docs/images/logo_max.png) | [**MAterials design at the eXascale**](http://www.max-centre.eu)
(Horizon2020, grant agreement MaX CoE, No. 824143) | SpFFT-1.1.0/cmake/000077500000000000000000000000001457701740000135205ustar00rootroot00000000000000SpFFT-1.1.0/cmake/SpFFT.pc.in000066400000000000000000000005361457701740000153770ustar00rootroot00000000000000prefix=@CMAKE_INSTALL_PREFIX@ libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ external_packages= Name: SpFFT Description: Sparse 3D FFT library with MPI, OpenMP, CUDA and ROCm support Version: @PROJECT_VERSION@ Libs: -L${libdir} -lspfft Cflags: -I${includedir} Requires.private: @SPFFT_EXTERNAL_PKG_PACKAGES@ SpFFT-1.1.0/cmake/SpFFTConfig.cmake000066400000000000000000000003131457701740000165670ustar00rootroot00000000000000 if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedConfig.cmake") include("${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedConfig.cmake") else() include("${CMAKE_CURRENT_LIST_DIR}/SpFFTStaticConfig.cmake") endif() SpFFT-1.1.0/cmake/SpFFTConfigVersion.cmake000066400000000000000000000003701457701740000201400ustar00rootroot00000000000000 # Prefer shared library if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedConfigVersion.cmake") include("${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedConfigVersion.cmake") else() include("${CMAKE_CURRENT_LIST_DIR}/SpFFTStaticConfigVersion.cmake") endif() SpFFT-1.1.0/cmake/SpFFTSharedConfig.cmake000066400000000000000000000027641457701740000177320ustar00rootroot00000000000000include(CMakeFindDependencyMacro) macro(find_dependency_components) if(${ARGV0}_FOUND AND ${CMAKE_VERSION} VERSION_LESS "3.15.0") # find_dependency does not handle new components correctly before 3.15.0 set(${ARGV0}_FOUND FALSE) endif() find_dependency(${ARGV}) endmacro() # options used for building library set(SPFFT_OMP @SPFFT_OMP@) set(SPFFT_MPI @SPFFT_MPI@) set(SPFFT_STATIC @SPFFT_STATIC@) set(SPFFT_GPU_DIRECT @SPFFT_GPU_DIRECT@) set(SPFFT_SINGLE_PRECISION @SPFFT_SINGLE_PRECISION@) set(SPFFT_FFTW_LIB @SPFFT_FFTW_LIB@) set(SPFFT_GPU_BACKEND @SPFFT_GPU_BACKEND@) set(SPFFT_CUDA @SPFFT_CUDA@) set(SPFFT_ROCM @SPFFT_ROCM@) set(SPFFT_MKL @SPFFT_MKL@) set(SPFFT_ARMPL @SPFFT_ARMPL@) set(SPFFT_FFTW @SPFFT_FFTW@) # add version of package include("${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedConfigVersion.cmake") # add library target include("${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedTargets.cmake") # SpFFT only has MPI as public dependency, since the mpi header is # part of the public header file if(SPFFT_MPI) # only look for MPI if interface for language may be used get_property(_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) if("CXX" IN_LIST _LANGUAGES) find_dependency_components(MPI COMPONENTS CXX) target_link_libraries(SpFFT::spfft INTERFACE MPI::MPI_CXX) endif() if("C" IN_LIST _LANGUAGES) find_dependency_components(MPI COMPONENTS C) target_link_libraries(SpFFT::spfft INTERFACE MPI::MPI_C) endif() # Fortran interface does not depend on MPI -> no linking for shared library required endif() SpFFT-1.1.0/cmake/SpFFTStaticConfig.cmake000066400000000000000000000061201457701740000177410ustar00rootroot00000000000000include(CMakeFindDependencyMacro) macro(find_dependency_components) if(${ARGV0}_FOUND AND ${CMAKE_VERSION} VERSION_LESS "3.15.0") # find_dependency does not handle new components correctly before 3.15.0 set(${ARGV0}_FOUND FALSE) endif() find_dependency(${ARGV}) endmacro() # options used for building library set(SPFFT_OMP @SPFFT_OMP@) set(SPFFT_MPI @SPFFT_MPI@) set(SPFFT_STATIC @SPFFT_STATIC@) set(SPFFT_GPU_DIRECT @SPFFT_GPU_DIRECT@) set(SPFFT_SINGLE_PRECISION @SPFFT_SINGLE_PRECISION@) set(SPFFT_FFTW_LIB @SPFFT_FFTW_LIB@) set(SPFFT_GPU_BACKEND @SPFFT_GPU_BACKEND@) set(SPFFT_CUDA @SPFFT_CUDA@) set(SPFFT_ROCM @SPFFT_ROCM@) set(SPFFT_MKL @SPFFT_MKL@) set(SPFFT_ARMPL @SPFFT_ARMPL@) set(SPFFT_FFTW @SPFFT_FFTW@) # make sure CXX is enabled get_property(_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) if(SpFFT_FIND_REQUIRED AND NOT "CXX" IN_LIST _LANGUAGES) message(FATAL_ERROR "SpFFT requires CXX language to be enabled for static linking.") endif() # Only look for modules we installed and save value set(_CMAKE_MODULE_PATH_SAVE ${CMAKE_MODULE_PATH}) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/modules") if(SPFFT_MKL) find_dependency(MKLSequential) endif() if(SPFFT_ARMPL) find_dependency(ARMPL) endif() if(SPFFT_FFTW) find_dependency(FFTW) endif() if(SPFFT_OMP AND NOT TARGET OpenMP::OpenMP_CXX) find_dependency_components(OpenMP COMPONENTS CXX) endif() if(SPFFT_MPI AND NOT TARGET MPI::MPI_CXX) find_dependency_components(MPI COMPONENTS CXX) endif() if(SPFFT_CUDA) if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.17.0") find_dependency(CUDAToolkit) else() enable_language(CUDA) find_library(CUDA_CUDART_LIBRARY cudart PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) find_library(CUDA_CUFFT_LIBRARY cufft PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) if(NOT TARGET CUDA::cudart) add_library(CUDA::cudart INTERFACE IMPORTED) endif() set_property(TARGET CUDA::cudart PROPERTY INTERFACE_LINK_LIBRARIES ${CUDA_CUDART_LIBRARY}) set_property(TARGET CUDA::cudart PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) if(NOT TARGET CUDA::cufft) add_library(CUDA::cufft INTERFACE IMPORTED) endif() set_property(TARGET CUDA::cufft PROPERTY INTERFACE_LINK_LIBRARIES ${CUDA_CUFFT_LIBRARY}) set_property(TARGET CUDA::cufft PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) endif() endif() if(SPFFT_ROCM) find_dependency(hip CONFIG) find_dependency(rocfft CONFIG) find_dependency(hipfft CONFIG) endif() set(CMAKE_MODULE_PATH ${_CMAKE_MODULE_PATH_SAVE}) # restore module path # add version of package include("${CMAKE_CURRENT_LIST_DIR}/SpFFTStaticConfigVersion.cmake") # add library target include("${CMAKE_CURRENT_LIST_DIR}/SpFFTStaticTargets.cmake") # Make MPI dependency public to compile interface depending on enabled languages if(SPFFT_MPI) if("CXX" IN_LIST _LANGUAGES) target_link_libraries(SpFFT::spfft INTERFACE MPI::MPI_CXX) endif() if("C" IN_LIST _LANGUAGES) if(NOT TARGET MPI::MPI_C) find_dependency_components(MPI COMPONENTS C) endif() target_link_libraries(SpFFT::spfft INTERFACE MPI::MPI_C) endif() endif() SpFFT-1.1.0/cmake/SpFFTTargets.cmake000066400000000000000000000003461457701740000170010ustar00rootroot00000000000000 # Prefer shared library if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedTargets.cmake") include("${CMAKE_CURRENT_LIST_DIR}/SpFFTSharedTargets.cmake") else() include("${CMAKE_CURRENT_LIST_DIR}/SpFFTStaticTargets.cmake") endif() SpFFT-1.1.0/cmake/modules/000077500000000000000000000000001457701740000151705ustar00rootroot00000000000000SpFFT-1.1.0/cmake/modules/FindARMPL.cmake000066400000000000000000000062221457701740000176500ustar00rootroot00000000000000# Copyright (c) 2019 ETH Zurich, Simon Frasch # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the copyright holder nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. #.rst: # FindARMPL # ----------- # # This module searches for the sequential 32-bit integer ARM library. # # # The following variables are set # # :: # # ARMPL_FOUND - True if double precision fftw library is found # ARMPL_LIBRARIES - The required libraries # ARMPL_INCLUDE_DIRS - The required include directory # # The following import target is created # # :: # # ARM::pl # set paths to look for ARM set(_ARMPL_PATHS ${ARMPL_ROOT} $ENV{ARMPL_ROOT} $ENV{ARMPL_DIR}) set(_ARMPL_DEFAULT_PATH_SWITCH) if(_ARMPL_PATHS) # do not look at any default paths if a custom path was set set(_ARMPL_DEFAULT_PATH_SWITCH NO_DEFAULT_PATH) else() set(_ARMPL_PATHS /opt/arm) endif() # find all ARM libraries / include directories find_library( ARMPL_LIBRARIES NAMES "armpl_lp64" HINTS ${_ARMPL_PATHS} PATH_SUFFIXES "lib" "lib64" ${_ARMPL_DEFAULT_PATH_SWITCH} ) find_path(ARMPL_INCLUDE_DIRS NAMES "fftw3.h" HINTS ${_ARMPL_PATHS} PATH_SUFFIXES "include" "include/fftw" "fftw" ${_ARMPL_DEFAULT_PATH_SWITCH} ) # check if found include(FindPackageHandleStandardArgs) find_package_handle_standard_args(ARMPL REQUIRED_VARS ARMPL_LIBRARIES ARMPL_INCLUDE_DIRS) # add target to link against if(ARMPL_FOUND) # create interface target if(NOT TARGET ARM::pl) add_library(ARM::pl INTERFACE IMPORTED) endif() set_property(TARGET ARM::pl PROPERTY INTERFACE_LINK_LIBRARIES ${ARMPL_LIBRARIES}) set_property(TARGET ARM::pl PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${ARMPL_INCLUDE_DIRS}) endif() # prevent clutter in gui MARK_AS_ADVANCED(ARMPL_LIBRARIES ARMPL_INCLUDE_DIRS) SpFFT-1.1.0/cmake/modules/FindFFTW.cmake000066400000000000000000000064441457701740000175510ustar00rootroot00000000000000# Copyright (c) 2019 ETH Zurich, Simon Frasch # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the copyright holder nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. #.rst: # FindFFTW # ----------- # # This module looks for the fftw3 library. # # The following variables are set # # :: # # FFTW_FOUND - True if double precision fftw library is found # FFTW_LIBRARIES - The required libraries # FFTW_INCLUDE_DIRS - The required include directory # # The following import target is created # # :: # # FFTW::FFTW # set paths to look for library set(_FFTW_PATHS ${FFTW_ROOT} $ENV{FFTW_ROOT}) set(_FFTW_INCLUDE_PATHS) set(_FFTW_DEFAULT_PATH_SWITCH) if(_FFTW_PATHS) # disable default paths if ROOT is set set(_FFTW_DEFAULT_PATH_SWITCH NO_DEFAULT_PATH) else() # try to detect location with pkgconfig find_package(PkgConfig QUIET) if(PKG_CONFIG_FOUND) pkg_check_modules(PKG_FFTW QUIET "fftw3") endif() set(_FFTW_PATHS ${PKG_FFTW_LIBRARY_DIRS}) set(_FFTW_INCLUDE_PATHS ${PKG_FFTW_INCLUDE_DIRS}) endif() find_library( FFTW_LIBRARIES NAMES "fftw3" HINTS ${_FFTW_PATHS} PATH_SUFFIXES "lib" "lib64" ${_FFTW_DEFAULT_PATH_SWITCH} ) find_path(FFTW_INCLUDE_DIRS NAMES "fftw3.h" HINTS ${_FFTW_PATHS} ${_FFTW_INCLUDE_PATHS} PATH_SUFFIXES "include" "include/fftw" ${_FFTW_DEFAULT_PATH_SWITCH} ) # check if found include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FFTW REQUIRED_VARS FFTW_INCLUDE_DIRS FFTW_LIBRARIES ) # add target to link against if(FFTW_FOUND) if(NOT TARGET FFTW::FFTW) add_library(FFTW::FFTW INTERFACE IMPORTED) endif() set_property(TARGET FFTW::FFTW PROPERTY INTERFACE_LINK_LIBRARIES ${FFTW_LIBRARIES}) set_property(TARGET FFTW::FFTW PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${FFTW_INCLUDE_DIRS}) endif() # prevent clutter in cache MARK_AS_ADVANCED(FFTW_FOUND FFTW_LIBRARIES FFTW_INCLUDE_DIRS pkgcfg_lib_PKG_FFTW_fftw3) SpFFT-1.1.0/cmake/modules/FindFFTWF.cmake000066400000000000000000000065571457701740000176640ustar00rootroot00000000000000# Copyright (c) 2019 ETH Zurich, Simon Frasch # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the copyright holder nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. #.rst: # FindFFTWF # ----------- # # This module looks for the fftw3f library. # # The following variables are set # # :: # # FFTWF_FOUND - True if single precision fftw library is found # FFTWF_LIBRARIES - The required libraries # FFTWF_INCLUDE_DIRS - The required include directory # # The following import target is created # # :: # # FFTWF::FFTWF # set paths to look for library set(_FFTWF_PATHS ${FFTW_ROOT} $ENV{FFTW_ROOT} ${FFTWF_ROOT} $ENV{FFTWF_ROOT}) set(_FFTWF_INCLUDE_PATHS) set(_FFTWF_DEFAULT_PATH_SWITCH) if(_FFTWF_PATHS) # disable default paths if ROOT is set set(_FFTWF_DEFAULT_PATH_SWITCH NO_DEFAULT_PATH) else() # try to detect location with pkgconfig find_package(PkgConfig QUIET) if(PKG_CONFIG_FOUND) pkg_check_modules(PKG_FFTWF QUIET "fftw3") endif() set(_FFTWF_PATHS ${PKG_FFTWF_LIBRARY_DIRS}) set(_FFTWF_INCLUDE_PATHS ${PKG_FFTWF_INCLUDE_DIRS}) endif() find_library( FFTWF_LIBRARIES NAMES "fftw3f" HINTS ${_FFTWF_PATHS} PATH_SUFFIXES "lib" "lib64" ${_FFTWF_DEFAULT_PATH_SWITCH} ) find_path(FFTWF_INCLUDE_DIRS NAMES "fftw3.h" HINTS ${_FFTWF_PATHS} ${_FFTWF_INCLUDE_PATHS} PATH_SUFFIXES "include" "include/fftw" ${_FFTWF_DEFAULT_PATH_SWITCH} ) # check if found include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FFTWF REQUIRED_VARS FFTWF_INCLUDE_DIRS FFTWF_LIBRARIES ) # add target to link against if(FFTWF_FOUND) if(NOT TARGET FFTWF::FFTWF) add_library(FFTWF::FFTWF INTERFACE IMPORTED) endif() set_property(TARGET FFTWF::FFTWF PROPERTY INTERFACE_LINK_LIBRARIES ${FFTWF_LIBRARIES}) set_property(TARGET FFTWF::FFTWF PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${FFTWF_INCLUDE_DIRS}) endif() # prevent clutter in cache MARK_AS_ADVANCED(FFTWF_FOUND FFTWF_LIBRARIES FFTWF_INCLUDE_DIRS pkgcfg_lib_PKG_FFTWF_fftw3) SpFFT-1.1.0/cmake/modules/FindMKLSequential.cmake000066400000000000000000000131331457701740000214520ustar00rootroot00000000000000# Copyright (c) 2019 ETH Zurich, Simon Frasch # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the copyright holder nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. #.rst: # FindMKLSequential # ----------- # # This module searches for the sequential 32-bit integer MKL library. # Only looks for static libraries by default. # # # The following variables are set # # :: # # MKLSequential_FOUND - True if double precision fftw library is found # MKLSequential_LIBRARIES - The required libraries # MKLSequential_INCLUDE_DIRS - The required include directory # MKLSequential_FFTW_INCLUDE_DIRS - The required fftw interface include directory # # The following import target is created # # :: # # MKL::Sequential # set paths to look for MKL set(_MKLSequential_PATHS ${MKLSequential_ROOT} $ENV{MKLROOT}) set(_MKLSequential_INCLUDE_PATHS) set(_MKLSequential_DEFAULT_PATH_SWITCH) if(_MKLSequential_PATHS) # do not look at any default paths if a custom path was set set(_MKLSequential_DEFAULT_PATH_SWITCH NO_DEFAULT_PATH) else() # try to detect location with pkgconfig if(NOT MKLSequential_ROOT) find_package(PkgConfig QUIET) if(PKG_CONFIG_FOUND) # look for dynmic module, such that a -L flag can be parsed pkg_check_modules(PKG_MKL QUIET "mkl-dynamic-lp64-seq") set(_MKLSequential_PATHS ${PKG_MKL_LIBRARY_DIRS}) set(_MKLSequential_INCLUDE_PATHS ${PKG_MKL_INCLUDE_DIRS}) endif() endif() endif() # find all MKL libraries / include directories find_library( _MKLSequential_INT_LIB NAMES "mkl_intel_lp64" HINTS ${_MKLSequential_PATHS} PATH_SUFFIXES "intel64_lin" "intel64" "lib/intel64_lin" "lib/intel64" ${_MKLSequential_DEFAULT_PATH_SWITCH} ) find_library( _MKLSequential_SEQ_LIB NAMES "mkl_sequential" HINTS ${_MKLSequential_PATHS} PATH_SUFFIXES "intel64_lin" "intel64" "lib/intel64_lin" "lib/intel64" ${_MKLSequential_DEFAULT_PATH_SWITCH} ) find_library( _MKLSequential_CORE_LIB NAMES "mkl_core" HINTS ${_MKLSequential_PATHS} PATH_SUFFIXES "intel64_lin" "intel64" "lib/intel64_lin" "lib/intel64" ${_MKLSequential_DEFAULT_PATH_SWITCH} ) find_path(MKLSequential_INCLUDE_DIRS NAMES "mkl.h" HINTS ${_MKLSequential_PATHS} ${_MKLSequential_INCLUDE_PATHS} PATH_SUFFIXES "include" ${_MKLSequential_DEFAULT_PATH_SWITCH} ) find_path(MKLSequential_FFTW_INCLUDE_DIRS NAMES "fftw3.h" HINTS ${_MKLSequential_PATHS} ${_MKLSequential_INCLUDE_PATHS} PATH_SUFFIXES "include" "include/fftw" "fftw" ${_MKLSequential_DEFAULT_PATH_SWITCH} ) # check if found include(FindPackageHandleStandardArgs) find_package_handle_standard_args(MKLSequential REQUIRED_VARS _MKLSequential_INT_LIB _MKLSequential_SEQ_LIB _MKLSequential_CORE_LIB MKLSequential_INCLUDE_DIRS MKLSequential_FFTW_INCLUDE_DIRS) # add target to link against if(MKLSequential_FOUND) # libries have inter-dependencies, therefore use link group on Linux if(UNIX AND NOT APPLE) set(MKLSequential_LIBRARIES "-Wl,--start-group" ${_MKLSequential_INT_LIB} ${_MKLSequential_SEQ_LIB} ${_MKLSequential_CORE_LIB} "-Wl,--end-group") else() set(MKLSequential_LIBRARIES ${_MKLSequential_INT_LIB} ${_MKLSequential_SEQ_LIB} ${_MKLSequential_CORE_LIB}) endif() # external libries required on unix if(UNIX) list(APPEND MKLSequential_LIBRARIES -lpthread -lm -ldl) endif() # create interface target if(NOT TARGET MKL::Sequential) add_library(MKL::Sequential INTERFACE IMPORTED) endif() set_property(TARGET MKL::Sequential PROPERTY INTERFACE_LINK_LIBRARIES ${MKLSequential_LIBRARIES}) set_property(TARGET MKL::Sequential PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${MKLSequential_INCLUDE_DIRS} ${MKLSequential_FFTW_INCLUDE_DIRS}) endif() # prevent clutter in gui MARK_AS_ADVANCED(MKLSequential_FOUND MKLSequential_LIBRARIES MKLSequential_INCLUDE_DIRS _MKLSequential_INT_LIB _MKLSequential_SEQ_LIB _MKLSequential_CORE_LIB MKLSequential_FFTW_INCLUDE_DIRS _MKLSequential_DEFAULT_PATH_SWITCH _MKLSequential_PATHS) MARK_AS_ADVANCED(pkgcfg_lib_PKG_MKL_dl pkgcfg_lib_PKG_MKL_m pkgcfg_lib_PKG_MKL_mkl_core pkgcfg_lib_PKG_MKL_mkl_sequential pkgcfg_lib_PKG_MKL_mkl_intel_lp64 pkgcfg_lib_PKG_MKL_pthread) SpFFT-1.1.0/docs/000077500000000000000000000000001457701740000133705ustar00rootroot00000000000000SpFFT-1.1.0/docs/Doxyfile000066400000000000000000000007541457701740000151040ustar00rootroot00000000000000PROJECT_NAME = "SpFFT" XML_OUTPUT = xml INPUT = ../include INCLUDE_PATH = ../include ../src GENERATE_LATEX = NO GENERATE_MAN = NO GENERATE_RTF = NO CASE_SENSE_NAMES = NO GENERATE_HTML = NO GENERATE_XML = YES RECURSIVE = YES QUIET = YES JAVADOC_AUTOBRIEF = YES WARN_IF_UNDOCUMENTED = NO MACRO_EXPANSION = YES PREDEFINED = "SPFFT_MPI" "SPFFT_SINGLE_PRECISION" "SPFFT_EXPORT" EXTRACT_PRIVATE = NO EXTRACT_ALL = YES SpFFT-1.1.0/docs/Makefile000066400000000000000000000011111457701740000150220ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) SpFFT-1.1.0/docs/images/000077500000000000000000000000001457701740000146355ustar00rootroot00000000000000SpFFT-1.1.0/docs/images/logo_cscs.png000066400000000000000000000211371457701740000173220ustar00rootroot00000000000000PNG  IHDR'?tEXtSoftwareAdobe ImageReadyqe<hiTXtXML:com.adobe.xmp !>IDATx]xUŶ-I`Qµ=Ы*Xgrb@WP@ *E)(Mz$ $of}NN93{쵧f͚5qʐ!Cb 82d()..^%ugr-пe3lpG3JWsCB+'5d)>L00)N3&go%F%0-GǗPIB ]%&vRB]$ũ"~7I_ݖ^0&V xJXݿRhdCy DٖZjD)@`Z20e m)?=ҡgH'!K‡NBڲ:GPxۛj<Pg<$8 HINkC=tCC%N0 Lk$0@uPZ.GQ4!ajѭTj봘d]?/^yF yܻȖw8 Ω^` bEjWGArX*3%TND>ПuE#a@iDJ% 1[0 It=sG~9seggc9rE77ޝQVQ#)p2)SM#L&նMI8,a9A=zO0D|&ZܩgV/<@>LՐxA;%4ү5kO)cgnn#,R=TЮt_9UO)?^.ދ$].mz1 I*1Ӡ`GɦS/꒵oG,g0m4̴.(p"0xNL,`*۵F5U.]p+ܻ:窼0<ōd\I^jKiiB1 ],y7l %D[z2*%Xzݐ)@`:"#N7?yiݭ*&+Yz*P |D:QK%=ܤLKzLFKmWky@BzKBK]ңl(}s K p L®L2J>au˔JG͓+6 >dpZTxĄ&l]ha67}8v?U|>YGsϧ2B۳]1Vp'|ĻDb JYʷA=>ppJ J;쒵sݿwu*GI*?X`mlup;Fqi&a||syAoܯFRY p7ן7a†i8<:ݍhA7ܱnVk%oMCtƃVa(ˁ<a @vTm)=vAqr0=},,.w"l ¾Fo7vsrL,)NByR %:7;ݯ|^ϰW)[yINTt6VJc7*Hz $xI9q:ӀZw6x*t:HJ<> ! WmHN8kCO#hJ<=W',qxW@&Y5C}_ ?RS{2A~;n>4Ct~ݘ$d{Kp3N??gﴚ4~ u *'<~W<9c:~/]A4:E8;n!|L78qA?#ؾා̿c9#~C8B{6:7H.KTTaC/pPB[3Nm2PNJ?c? 8B*JQ)j[`R3/ww:NeGb4VƆR|ttޥK ?S6'bT_lWsO*<4NC`ֱ"Ne{kRjw#{M۴r,!ӰKYpwyg0B)|nÎ7%&@ўRr`ZCT+D܇YF6/V}ZDZ:^+ gRBB#ȩ(-&[A7%i4=ʷ w$ojH)D2sԉX3u%{7l{X<'0ٶpGGRV8=Jq@s#$J,.8#%,*L%7f*oݑbtvђK*ZKHqR޵ BpTD6rDg +CQXh` @ Ar6[NpНZ\Nzh` c* .Re;Thϔ 6ʹ'fx%~ 9xtgհM':bUms01CM%$k||C07ѽʻSOe!PkYN u`@tj=UksT:*z?HTV )Kؿ/LP>7OE|q? i@0m8P⸥j!N.US3RѓSs;M3B.+N XXv)Yi,u谯Cꕴݘ/v C,0}Lͩ[֍a{me=[SCXbC{L%l ?8N-X4z]'W-]|o g艿O=T`̣TAu -(bGC"2} L1doN`*‡(C 0A'`[#Sg6 S2d()1/lڌ\UhRi}ѣnylXYyھisu,n~| >,XaHWf4pA 72I+'Ņ%qoUX*VmXƊM_K%\X^»jճ۸:t[++]r1+slXlr:ND +Vg;)Kl,? C0k*1L``f.UX״]8XlKo3*:쒵og02KRC熭 FuoF[CdܰL/{oS`Y!K0fU8ٶH(gn 5{`4|^{4ےP<yLCRRBPk{jmR*O+g& .a+ߣp`r 1{!~HmX.f>ʷ SW ] z]]<4lyh j}b ;Y@Kg}_ AR|ɏ:zDD{,HPɔ $~$yxl!hr072\O>l`?0|bySVѨh0Ƅu=DZ/syf4iBWgkzn`} O PqT7yXaܓ,/s.Ff08 Xcu~|c_ppw6{\HQ)A5'@-A-(8sM &t20a+NЄիpg 4ތ}X|4;bau"06t,K)jqv&ݧٰ lOG#Q|@m˟mز ݴ&_ j@ ]j|SA>@C08vTJ)|Q0E(s^<ƓJĺY;9uT^z3@+UY#80 \H0exdBa0Nr9Q)9{1TuFg; 0}e`RƿE*u Ӆo\U51⭡/G.lDa\*=@ױ1b½9.D'R0OK 0eS`<D@gLj+Pj<T RDoWld~R)M-t7vn+1SRSTuw,^!'ѡ}ajw=NJz7j^;$oT.a;#y3UOi髛e94;sOSIw6=ϳ\^pęk>D{T2\m*ev`w;eJ PWPߐthHdKog*ۻr 1 :Pn/󩽯 ٷtgy e>W]ת'LΦtV߉:J\kzz.V?jt*y̤6)M %.m-%ִW;汮 {(= p$S{O3BĄW ~}\{B_<󨘐yr7 툾`n.FJ|4hS̃!C%:1Z@J#()h2%uT'A!C p2dȐ'C 2dȐ?5jWGFIENDB`SpFFT-1.1.0/docs/images/logo_ethz.png000066400000000000000000000074371457701740000173500ustar00rootroot00000000000000PNG  IHDR% AR IDATx] ]E>v(ʳ">nw}UmhB5,$&`P`j RFP(nԶ[l҇{Sgsf9g=ɟs9?3[ r)SSxS("ּ҄M6f\vYy.By8-@n.WA~\ǫu |Oy4y"yW슂SsM =x8xDtm,ԮqөwBEzRT)S-_q9Aq PRTH>QtUn2#{1#aD˩Yy8>QS>'xF]G~F}i6+Wd^-2VVڱɂp96ǜ@]ie['ZV Ҽ W-8%wiζoi S]yhݵ_nr{ə|2šOh;k\kciA QX, l~gPEuG||e"$oA?Vuqʂ!x$9-n;9 Rܞ N%fg=#[|RSjrKMhX= ~hGW g]}&x F]v'߿Dkm`bm!"ngKϷ5ȝ' 96Ӣ9\Rr0A(\1zUm7m}뎪PKsXWι/΄H`n(|V9 7;%:CZ?yętz8[Kp~1>4.y֩3@hn#_%%hxP1c XeYuy-AP<z:yN#YON{ilFޚ~wۓ250*P] ML<5|!=|VS$22;-eG4TUK;T@BZBa^rŽ'yWR4x9}Su|.kIŔ<+y[ASόxWgRSU`"JM!c[+ywTy *=ǧZ]+$::v{cH;E T!Q%q^լZCG 7j:g.IK!o;M(%(?E߫{#KXwE.Rh%) u?:|@#!-Ŵdr ;h+Ji# }aqK ܃~IsLx&er_ _9nG#LuהxB6OE^ţNGQ\S)NfVj޻(Ǽ$]ot>li&gd(_Z :+yڧ%\IәJL3Sq Q]qK1UwE _:ӛZ}1Ş5;REOrS>(恠-$I] 'ȡ>BX_wC-Xv5:O0c5[D[`.6sPIB9Oir/G~㡾ղw(2y;ZmI$FEu%(,C_A[^a<UmPk~{8%B%o!t4Pc*eo9zAHױH'9nd%eG\hY)cBRwzC[(|%[ydNwBP=rcEÜ |(}]z>J{>=;EŁwA5LΏLKy3 aFH:aҞ Kzej ySKiWAg+õ<-:b^9 [ضעނIENDB`SpFFT-1.1.0/docs/images/logo_max.png000066400000000000000000000257271457701740000171650ustar00rootroot00000000000000PNG  IHDR,T8{ m pHYs+ IDATxwxՇߙVcjhq-\MSB)!香N(Pf: -b ھ3wV3+i%K>>;ww{9X`@]` |3h Ϡ7X>>>` |3h Ϡ7X>>>` |3hi00ߐi|^&P:ROaJ;҄ lK_pΪʺa| a XBE~? #aY7Z`Sm0}8}!@3n ӕ`Cח@| J޻X hA- M8a,`3(ք L #^tL6~Q< $;ǀKogW. 9Hcx+p~ YU&0x˧R( V:  B(X4J`A4,qPd +| JSõwX SD2gUEvx-p:p?ˌOhvf" sρ-J"g<7kBlS41 # 4p4h䌧e/ՄcfnMMG4!6:|:dL+!07|~@Wߓ}ci #V:Mwkbc0K'>'(` V*IhĦgRw?&`APBAg}EHdP Ͼ *8?& #S~_?Rns+v) aJ"a^r~qa`>rvt+.)k[㌓JKr,4-eYX$N&r(&s&'(M<8gTЖR7D}KQ*VkC]35Ӽ8Df4JSH`߸K=0$JmM.]sa,x~thӄQrN1! (@w8}<ƭc?HbnOxGRcU4oYQ Dm0P̶VFqgjڑ~bvF" `&D鶽;rXMn!i>XJ)O_W"H`a}Y'@WIt/5'Mtmq6W\V +Aml$vTv$S 9!4!F &r658R7e42c+Pۡ6 ^ MrUl[t>%xtc+$8|#B[ k7j4Fk65V3oa%۩;Og_Vsv2< 3x {png^Φc2qF@"z^30a?y{?.2c=d jmcori^~$\ޫ#w^. voKin.I/XP;F@!am`M5 i n= Zb2~'LF.3ǻ#cJс]~O)C~VgYր4yνR?s~d+?@ky4f^|-f)@ qG:yq>n\m{+80nfYd)q3V8q$}iG,~( "q Gc&S{Ήħ^ڸbE]M9vtmcu$i?m: ۫0 SCB/ "s` ܧ `y^ W:`-+TNG&]`1Wl9Mk;ȝ g="զFJ&ѝ3 ~}b~K9Wz;B#j]Z BmHxzrΗ=E.._lFj![.HcQ9H}m>D*D#}R[;iz}Lɱ+#x0K:X|lhZiU7.l.KE%oE=#Ip 2g_A66ɘٺ g; x;M#AӷkGfw> B03ws{;Mn}uZYe4!T/&DrsoV9p/[\{Ul_~1V{>ghԞ~fG敍`Ps4Vdz Ud`@&k놱7V=jK]e(}wJABt.D 7E_J]>ꦝDhMVJ$ vuwU(}wJ}lpg zD8.MJ$%߯S~EjlmZ]LLB5~ݣb3I\iae2Į:uEe|H\~ jݯtg@QUaJS'2bLݙKJSu E鞲5]C1-p#GeޤXI$㳜nr0HTd}w^}ٺȁhzbWMӱr9 ]ݙJ,FϦuO'X djo~W@i82>jW9x_!C|7 "{:d`lm&O4Y3IbWXut̸ܼO][M Įb;]~j?}C\ZS_Zs8- %"2ap^hx꼛=d 5Nl"){B!Heh?eLdD؛;g^$"յ|fzOqbhkodXƓXM7 1,j\Lp*V̪ޫ uHS_[JC=7ߦ;;XצםNIyq.5!.E&BLdp~"dBCʲ,(:n?8 4"گ U-} eOHc >il91p{7\l]uD" Ryj(F?4ӄ2 h2C`&[vZ7AS=l 8WXvoAQ,)}6w9DDڇWKDoJ=0cYUl]Bt4sctHz{ kGu&li%~tAKgԦήtT.pS-ʗ;V7z4μB1a q2S=8­p22ͨ 9qӧ it~_ڗ&zH{4!&j5jB \ %\4`]T5!@*8Flo;n=Y\"9VG0-q<j}SHGw, [R`9X9ͬIZ8s/6Yjc^!yW3$j곗P/nj'Y s7h/F#\fI u:>[psf+82G!g=#VK=wC- I?,Xy)0+G`ثa%TVufe`+SC+gV$`/*@63Y9h;g."J?)dMOaN׏\ -\Bl9Pdȝq(2,K;NtMt;Ж (F`biqnp;k%TC9ԞsJkY1$n[UPJ m>F:o =ep?: ja>20=g,;(Xm턶A̵Ffd^~DΜD~@9䒫8 i8i+oɥ+>2H>ۥĶF$JvmeԙYuN~BЄ8\7=9XF'GnwiBD0r3u iaP~Aw ] ߐ[y-)OR+$`2sV?0ۚ9`^Yu!gϜVYXI0Hl4o3+"}>\0\4!~q.v@\7{pz'iBJ?f⾗IXiƪ-Nd^+X 5-bמ۳`r~.0nݮac iY:b;÷GnV|d &K E,V'2z7EgP}{rsiX e آ|R}w}v+Ttm:gYft+~8Kn7!}Z7=˫O{kA,;<5e;et Mp%:NK G?X\=Zڶ촣1XTE%hB||ttVluPb/d Ǔ7 ]{mt} &0 |niBTBQUWi}6V&dr w^{< cUgꯞ/!qmX4Im(R?s-{&uRD r'KJBS\ Qi·%X9!rxmoP&aׄ˲OY|ާn҄iC~Lr R c& k!T0ޅ_7"c*dd+ر)\%̾!KwRIːs K0\D1;P$cDzeȼ8YG_㲈yJiz»n8ƊZ`-e7lnu d^뙃h#9A⺄( fK35ឫ<}$^U%'_HQVq׺>8D~לDxZ$p4V97J縉 jR3ٹXi5)\XӃWd»mV]^46ȼyN Xnd*!nS0sلwֱIt8Rw?&E^Ĉx^Q"8`HmJ(H첳PnwTEt H']z1j+6iEz@t] l0<ؕ:8DbϯȘW/EqH^4f Ysp ncy-*^ƪIvgSf[n( fk q=[=~%w 1[Zi?u:MϹϴ"cv#zxwoE(BL֓:YDľX$zlŮ2=jΠ],u{(_Ao$oyegD#1U7X cRzdQ+ J& 2,*PEߕea-f0+LƉ]x:߮ErDj\ch&H슳2)\U[Cy?s.Xg »oYDxPWu WUiJQZۈ35Ol?r?, QEˊEm b&%l]Jꡧ;h"Cǒz,A2J';XX; RcɠQ#Ij}Bmh$SDzŮ+WjpN- bt:'!^ŭ}:a8Jir,Ztn]!v LipDn);Aԯ:)xU7yy'3,%%84wފ^:|%RgcvjZVs S.!) J8J0v.*Hn)X}W;ۯ|pMTWlqiӄrƐ+T4!6ЄxhOc{UI.vd$J4l!.{p=Lu'ϼ:Rp6Je+4 }IDAT5Qr~A5Ai(Mș9sr尗Ǹ+YcW׹_:8}vSK[S<Ƨ^M믻֐A9V~lg^A$:K|F# RJj  a`80Gc4!LTAX~0uSQQI'&~Ob¦1Eο l^ri 3(R$da2ozMKʞ>U:Ԩ~NEڅN o\OOO¤رr7VK[i{<з&ͨ+ ~湞j9sI[j}&zյi용d+qTu4 EhPjءާAJvՄEL@;ilZnAɺӁ7mb4!nׄ}Q^h)#lqs@9]bxRݚքxJ7yоvslGZBLڊȼj/c6jj6ZGv;M4ڞBM9M?op1%3:PE ߬>{$7>J]+uy@Ga[iؖﮡ$ $V+ԌU-kdFcFȥe݋TXrY: ˢBP 哻KUmCۏpL5:X2oc PcRJU>niv$fB5+=B52NU~*x)-Cv2d@H(VjNDh-]Y NQ ;Ӧc%6ې'cv,0{ZCbe5DbUMm('#}__w=p0V12UMy34!v|%KbY^ݾ8.pH +zȹva)i;/m%->2H\=b.q-d?۫g@#G$.ٳiٓ o9V[, sȝ—qEA:ɷ*%6 r7 ;b|Una|Hu=r6 M.BIҹ^ \ z~g~,fUBQҏ<UH~JW%KҲdߞ9 xb\w[Qzĕ^aJ͜&gv=P@z/x(pnꆱ=A+~V༅MA:GjBhe[ʾ .0DVZ>i_Vt]v՟IWsȘɺd</FuUgEʹ>; c(m-sQ;i?BH+jm =BmAG1㦞Q(,`&oiw9p`T0gA#4!Vu ?MQg/ՄzP =l@:BMHJ@s]EH kZU) :c2)|]zgwym\P/óiy'hCx(rkuණ썬9R𚏌_4"qܩRu~YfrC /!e]4TRo0XgXw%jMg^qBɼjm7- %bzgeyZ*uC@/AÙ+:.KRzo Ɖq )۴3{u2J4{"2oL0wx]\#)Ҁׄx>jmȘ3/&!s]gv|`ki\JmB)-(ppHp"*+_t$ҤZEA8ԕVtcJS7KoaN:{%D#ufUYR~4+}aDN&نc/ 46bx0ք8Y8e2Ž d\]MK0˒?G_3pӹxdEVMG5!!gL+L74!tqdJ8L}4L5 xXpKoYD)`fm  yMȻ8{8o{R~V2EG DX֒|PXGBy?wn۳un-"D :"SXVӢ !G4!D>C 8\${jqx,!v*bd~8n*J)OB~0(=ƘȒKQ,T"aO%\X!;[^i@??eJ~,Tʗ} "gw]L(챹YA9\y]ͮwMq0Մ XAb0Jg_p 7%X]< ZU,32]֩a\WrJ{&mΰ1}%*>#=_#㛼#24 e[*1&6Hcj(tøWb%B7׀씗T :%W"ub{ᨇ|gABjtK3gR>>>` |3h Ϡ7X>>>` |3h?tVTIENDB`SpFFT-1.1.0/docs/images/sparse_to_dense.png000066400000000000000000006733731457701740000205430ustar00rootroot00000000000000PNG  IHDRG?t IDATxiו&v|KUX(X\Rl&mК4#'<8a1?<8h^"O[t4G $R$(D$ Ǒ^f{UB"^e޼y~\@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @0r9.oذa⩩zԏ~}`@ "}Aihs]?!&ܖ?X>lT*( ۵wOMMVV'R#ѱ}M˝"@ h?ݧ "i:4YZ)|=BY2^/ը @07 SO=UZrzm&x * TUZ7#8"CZG}tȑ#k@ =z񅝈wբd7Ŷe|y<H{^{齣3o\T vzzZ"ZwOOOCVKD4 Cpb.@ {pMCeU}# g1',&K" 4Rkמ߽{(7Bnil߾iӦbqR!X.#zCT I":BD={v'<$@ C'jdoNx+K`o@ϦuM:]>7C{nrc=wYfy__Dt".V)Q("^@ďkǗ.]:|_|Q|@ 79y+[uEY6ĺNx4H ~EQ"z7gN+~H& !GBի]'+&MHoeb$Ir4)z}J>vA+5Y߿Mn)!`^#`ٳ{ŊC+zT_HHR*Vl~%V(NJr%@ sv N?=_F/!THBn$Jۆl,8676@51<:?ֱB &,f!G}0 ÅTJF%Dui-seY2QJ]|ϟ pdƍcB@ |>'R+~ Z@#Bl$FlIX !^5J$K .i8 O鱏>* Tm%#yT*V]v j"}D^o?nr$!O$HfqR7/^xfjj8]h_~Y @ ɈU"+.z7,,]|2p;~.KQ5*Oj&""iKnRr$;Q5bFh:ZU|p*DLLO|L)#98c=`ʕK}M(6tfKStrLBaPZ8+&ccc"@ qm?^Pz|AwQ8+ 9EpXhk\"o`WY] $88-P%*NfVlj`G ,QtЅ tu]?왒J rD0+E?STCVV[2G~Ǵ*Ī b$V\sjHN:uA@ k;EPxbA-TZą XLRMxe)SWwX p5Eudd2$ۖI)D5h* hpZ{]!CG^ڿRn+ !Gs۷o/l޼ym-_TJ-Znv;;]Cf8쐚vǀ'EANQ(9DD. 7&@ -kwO!?Q@Luy g4 XszM*%\\I݌11pXWSB4ҀtK =Mz/T>~Ge`!Gs;w ׯ_"J{(,)9Hډ WGX6Ɗ"OLLL|hb@ 'OWޅc "3܀VD `$MJF،2d`H F:l3hL⚒%R`Zon/""ҏI~U~o.S"!sSO=UZrZbfG ȀGRYs>:vÒ\ mG{[AEaI2-pv(ܹ$ EH'1ӬfWyNZ@kMJM*U}av Hxun99"\X*mrB%8*'y#>G^Hv2!=|jW>55uw[xqu׮]u!K@ gēnzۓKQN"v>[fIEI\yJI!Xx%We )kc&ٓW5P D"Eo},M{'9" <8ydC=P( [x?$A+6"h2C^\."E":EѩC}o` @ DĄțc~uFہ7=sQcF-sVɖ>ӡ:%S ["w=W.'>\D0! &Ev?xbqRN"Z!|a3#/yDN"vYfR8XQBD'=zZ^Z={D7i@ f_onc"uxDMTogd0$E2^Qsʔ'VVߒ<;ڻp, =I+Z/f(u.#cj?;?<G. C=O0!"&E{ryՒ%K6a"7tf.$WgX5rW>&O d{@ ׈?yꩾJcqHV!B7wZ;?G>`99E8;(d _j3NZė5;T5sGo2FH:×6M֐X>`H9k 7GDӀ |pql7 __0@ALj׍Ri}_Z"n}F'$ǜ; A5**Su]VYy6N,1IXZ'xqWjv7TR"1ß:*!ނ!GW7yGPD&}*_6lT$Pa_bFR; ҏ@ׯ驳ȱĩsX\};%c sDIb V 6oӗ%F ۺR3 7~(%I2QI*p%4qGgp'}$,pV(VxX8LGNH>4Xl?Z]N=1.Ə]&:߮B^z%w/@B Ą#0;/̱e*~KDp4~oEߠ pm/XD 4Od͚5+ Fgд;n!8 P.*.YBZIg~v)BmlT;[;u^W.NZOu"E| `;]Nx>"yR$}P׏{֮][߹s~I>f@ O6ƍՋh0ғtp&ܼ =2Ax󒣢sC0gz^ M m}lcc=oy];0#?f 8ApRJ)`j[D.׈c蝈x8@1o}Ǐ]:rh޽ؘBƈ%/BqK.]"zQq䋳$.AƐ"ب蠫JҢ  [RJ'۞$$ڈN^&Ν*WF1z Tc6 \oGD 7֟DQt^_=pzD X%޽ˆ#_/"5_ 䦋s]h+'Į"H6dΚ9jt477gp+H;'Tn$$*  Fr3F0< DN;B45/Hĩesr* LJ9r{"&E^z+V,_AYE߀00КAL*H-ץE˨8@U;^f?A;C 6Q IE'c6YUU7ŘRJ] VJ:3<<<:444oׅ``Νʱ'nuoOēoDn,/HC0<)xgEu% %n}a"J82HP5C,zXT4n>Z$quu?哀TmO>de欦඀#o^(J/^߿DEZ3>EG,+go q@ T=c u/] E@xIbXd 7ե (_xzU!@MUD5&K֧GFFNLL\￴Orswv,O]\Ժm_]@#t'B#|\}K8΢8P9Wj=gDJ߭jo1+㧣 ~JsWnzoQTJ#b,}Gız~ܹs'b¤… & @p3 ѢO{ {YQ]Џ!ۀb(&l>0\z61QuADn[xra0r2ULNo"9 JRK}R+g[/שD{t[j9> d \I)D!To8N5!ԏTz{/}_V[!-냅Bacww(V""*)C| y@v_1*(*vA A$J[@06Hr^+ \W "/AooV6$"^0==}*3bL*϶?W8kJ!" #$qtuNB-gl}*'ye@:5>r{!žg\ٽo_=g`Bbc={q+ @K$h<'oa.]0Ɉk-~RC,P0&J %Bj]ޯѴ!@G )UN 刽톼8b~tfHk)Z*"^:_R@ f8tKKl.]|lsQ2Zvm$!: 2Yɔ`p9B9;-Cع-a/n " V1P0 rr">e ZI]\ 7 GЂF _X\ޗ,NjolE# *i= лZWGNdCȑ[2i_WK0|!B" f?INЎ@iUV; - ^D _oP,CXꁰ%ŬO~Ú;uQ4>l*6T?]~*g r$%ǧevo)̉ 8޺Z IDATQ jO`_|`>"1'`UN9Ҋ #$Bp8Z^CLz6O`'dD #ˁA@X. ,wєPc6]! _Li:69>ՑB"%KpJTLʼnfj75-'xRiTаȕ̳eJKeK&0;(ux80Hzz3>*qȸfUCgK٢cdh>~X}͋{d9 !Gx׭[ݽ'U="8ڕ!ɫl'褼{kot1>*Uqn}LF{+鱓Kï+v힌5OMGz:75}fcuIk}, ѣG[N⋱dv+@ vNyZX  CzQ IɟCTҀ01Nb[ Ȫ/3IHjD* Wh."1LxĊN[ 3%)s lc Ɗ#R2^V(+(S#)VY]t{#`>7}T_'',YR駣!Gcǎpݺu{zzxڵ";:ϕTAҩj$x)߫֡xgr8̈ r$/UK K0V#찚۝-Z"F FD'wW:NT*wygtÆ {F|Elc~KGp/[ ĵ!Ͳy1l{chkc*0bq:&F[u:Qp u2?H+U•oנ bZHL2WWXA(h٘rV;㎶kG3 1ʶR^o7XZAoOnLݛ*!7jv!{ #JQ57rĵ"W0>&T{땃ϼy֭Ӳ g!Gv-^x0^yT*mDJ" gҼv1DMkЩңStv7*R|F/|IJqHX*dF r&ɑȁm@O| ЪkQx\OYЧ_hJD>bTxkD sv .^ŵ@5 |@= WQ f3rS8f9n;`9>HpYk7sYA%6țiZ&I$g^Ec%HaqJ ip ^W-*~%k哮"< u_ e˖M Yr Ν;EZhXKDKqj] Z8yܳo[5Z_wM?I#3Erb}{2q>r৷rN":::Gg֓JzbP87==}?>W*nZ@ wv,L,zuOCEDu y@6㳩LՈK 0۳E3ٹ:T~g'r[؊Ʋ#mcb$ҳ p= s֣0&C)BIxՎ"TlՈy}6Ȏ}Gc(ΥUИt'2/w\;YIwöLZ_}APS]^طo` :ƬX%o߾x9+VؤZCDJ7 SBA/o6{Eqxpy>RFk|X6k'ZjR=h9`rpR' uP(LoٲeLk=VVϖSBإ~{JH@pD]`^M+ ;ក` ,Ϥu2Ǒ6]\#|iЭ%e% 0-՛H5Vl\>DN,W93! `B12kӼp (Ic[>5]ɽC!Sʤq%ƦyY3EKFW =U~q 4JʒfLBgbSAD/` Ǿqj !xW^0(G>cĎ?344tgX[))^o><؎f^HCAlU#na8V 8ڎsX=i~#㧣s |ȹף6JVuRʵK VDJjEZ󓓓bx?D\tV&|kp=3¯4!tD<έFG[%ې`X׮ _Mm凹*īa.FUCD񾹗by?조GYa-\)&#LūqTfκp ̓=*JcY&EҐN`x&^${2iS}M1lW@tQ:yyKJ!go=juRBz5huJӮNtLhm(okT{7&mjVS2>fu*Ńrk#pِc)Y7{>'?A8G;R ;ƗR*秦)K/:@ 73R{Ko"c8 SLFxWˬTá_uș$]Ŵ-qHSUAC҉y7g0"8IbP$$ձ1xTT;?.pOeZqQC邙:'SVD/B`0:f9G<4E3|,&,z#BRDtV2="/ @ȑk׮bl;{4mDI~֗âs8(酏LGmx6/s(/\Ra;pҦ82~zqUȑل<׾SZ|!I#C"K82\%TLKkŏjڙx$uAv.Yz N/_T }o; G[PtU-&HKfR[o-m̜JTR)$Cj%T10#e@Z/]ee}73 jf/m"º `ƾ"d36 `(Sٔ1uh32rC'e2YB.19k=ofx2n&+-DD*h B N?9%Iꇐ#7 q;sYE|jIVlt4֥D/T!(u8WgV$ը:z>rm{;G'x 3>t)퐗Cz#R1E"?$O@ bSO n i 5yO1mlLΑm3{ ?s$>a$Ϳ> G4;![("ҡEK:SGyđ4p1;_1wkC 8s|pFd9Z(~&6ix[940I(>:J1>uxt|,!?8e7KͥĈ#;p#|, u q}ؘصkWgyE]JEsvǵ#1>ȾN|%(GMҽpelƺjF9R6?NF\efbte]:gH'*kֽnOaҎ<7)SE˵Ab+y8_I#DtB)aXo+2 ˴ Zĭ+(D ,weӯ2ڥ^/ʑvH+uH'!VyT!SƆ/&/$t#::IA5"+%Q|}@ ܲW7z睞E=wܫ}JkG9HCrTd1W؆3 ̹uBeG+Kbvs 5SnpEHEy#I.AD1i%1î+$]oF~p8TvyTx?,;ltVxL~t$2rkV aK]2D5Yp!_ I; @ո>u8G1&Jӏ/FV#׈ٱcG۳jժJ(雗HV2dE|;.#GxTb͛0ĵԛ֊wO`j[I<.*NKDmK1#Fr#!Fė6rζOq[{Rym;X'~'SD=S}q1Er.${"^raF}ςlrļ@)~PzW޵[j!G:@;K]]]wu׊u3A Qiw.VÏ:Ąz DMi| ױ޲6 GzbIO9r=$xsPv$X}B&$&BɏEѹ09uK7oUp`n V_jw>(GGq=4Jmׂ[*jd_v@"Erm Ҋuv*Ɯԥ?& 8%M1FdIG3Ԟ; Gr2G56cu b G)MTF˥8缫GtR(8/'[5#rՌR9Wg^Ul Q‘$*ꦱҜ'Ka ,޸hѢ8Ju"ss.LGQy6m Tw$-x ,dxt~jzR`ܙ9kTQe7ct~_A0%wk|­J,ZLh'R D7V*zO#{~˩9T@ ؽkW21vmO0 Q*"@I܂B]6HVUL"# !TpuMg  # ] GH%ΤIT-ˡN{D4whMؗ# ڼUn"u0tH0&ǹvזs |2Wf8ČZ]y۬q0 ci lL_X>!O .2: ])szjsTFS߮ϽvUbhZ#%Mp\Cة˗U|LJ?ziJ|;+W,- 8&NV 99J<`[< IDATuz5{lV*1Vc2C#c,7}s=!kNO.솗$,-1n<߳ 9H:ŀ eme uHiV]e@ v?T_Wu@W0(DD:`*ͧ 1K ν_֞ Jұ^ITec6Yi~恭ԖB8 6ȰJvgH^HG`&!,d~V^h͸;R}_C˺Q1QpEd$+ ?=s毲"F뚙ͶB4C|I[!PFkZtZ瘍y;#sΞe˖QJPJm@^"Rqv9n\U<'jqҦDw2?9o)7}7_ -FP8Y~u#Q jMenVc6䧏8+ˏiE%܁n.-uk _WIj*[ Wv ~\ +P}"QȜs#Aj$5MXԌ*`kXP,o,J7;3ɰ{s5B<^ޓ|c['` &ERY2soo.j! 8؊(܋1,c}ʇ\c%!-;*՘XJN`~n. B2BTIw}7|% mOg,0r&LggYu39(tْ`S5P _al+\7cL&ͼF]~ {rWX )<&ă1Ʈ|Lb!9>p1QF9d#M{Y{TYAA@%8H{SKM2&Nz >BT_}s1or$wEfD|FQHj+@mV"NZ#ȋr yu`Rmz~+Q`_HJK WłM G[F9WՊmw-~(JHs PNL v/~Ԋ v?$3`<8A3,"}EKP ~(<ƪ+wᄊ(@ 'ޖ /+O 7`5O9m0;oFݖܡw )s|9_n?Hr& s3'\\})yJTv q̶w줫6dvv$@nC)H; 9yg!O!9.mKiy׀0v!`xvͽ)=K@qXfƛ׊l = sbY#M  ^! ?)Co|&GbBdǎիׅaz5"x<sDH_FTG"}WG׮][߻wo$JYVJ? J}w Qo6&.Iow6;)jNe6[Ƽ)eCcB!m!o'[{/bk*c^ù#g n;uC6f[Z$cc<|>{uˈ A\cN8^/;-ϪoЬ0(MTMմ1#c3U&oyK#3a,]t)lmD$kE ne"RG].M+@X0LXoe>AP !X1Qu;ɑVhS D9;Vy~AqkbU0X~n؜aL&4*f8#P@qţ__ַUe@ '6nX ,+ h` *5Λ[!gaSQ=|[,\$uLfr`CJ3)$aj_ _@Y%4.~ELJPkɱvCmFU~&HUp刡"3;P&f]L?wdTvEcYb[fDɘx8y6.˅l׬61y3o"/ &wO~cl y1Td=plxrͩ6&!TD_D8z 1or$`m۶p+V*Nk",߮hE|>> 1PQ׺ ;Xp9H-7 ϳ`ȄHO?/rdMЎظ^6. bu&廃VʎBcq9 . Ry`y&S@F<t)ȸw2#Mc<"+CBxF_կ^/~!׆X%uΖ-˾lţET#43+j 6㛇Mlbʰge 2|7gɍyFY`򧌬hpxr'aN+0"2!ٝɱqi~ l"ZcMwͶ8oTXIGM%B. :{l{>22s~Y \{$4𡶉'Q8@䏀A,6Iht|xζφ?<, >D7myO={t ,^z:D܂+)'yGnwb<_YrV/vNo?"^;^{+cƈfǺ/Rb(4ڣg ^K Y۩=2׫0 AP5 TW"MNԺYm a.Hz>4P\T+Tj!aK.B͆*la5|4բBV#GȊ7`"V"]:s]#$ >/9oo*h23 `JN]9udf=Ƴ"IZS^V(ǨXHsjcyYm< 3ZȒڤvdwB&rclZ 3=zt୷ZsU" y4fI݉|;-÷nŜ_ h6$l.A ,9bk,9\о&yV+ IAaѪba@X\ vGDGFՑQ c:cD[@c#l@u6<V],ݷX*( B( 4M룡by ?@P1GkZ#$v?2񾁇-xhKv^$4'g'ĈM0'Șd6r49.>Ɲ1?ab;]z@R`FyZ?_洵sIc>ҋoNndLP-dVɔ<$C' `=/J;o5"ʓa%0A5^>y>`)6^ 4r96_sh )NZՓ1\}+ @-L91ARD5#j=~e<t¹zm T?F-"4(鄄O?I^_wzj|ua:B* tlCynK۳ɵcbMS5M֭¥] _ēn}'G1s w~m(I2 *J'8,{XsT)LQ2¨djpƒR2oGk_^ت==V7CXx8Hr/w1-Pvsۘ9 CcDW-sBO0rʘLJU X4:F̺]BګeƉAʒCm{2cXd,9yb9D&F@F9% {{Rq!ʯ0{ɹ>{ʞلD?1/ecj uژ?X}QZpq "ށDNU`/Ѫ;vY囍V[>i$I 6#܃KfhV0MʘmژC:n g3wӀ+ b.XS(9T,l.pTt1 & @RPZ4:Puu/Esc::uV PhvKl㳎Z:`ϕZpV*S(B1 ǧy-u`_BҩaXy<=fGTQDZk0Y֭[iϞ=Ao/ .]v3~ W6#<6L'܁pRi̶zHc2͸C| NxhHHO ;#-K͠xbL> Y B$} Mט3R;CL`b0 _6CCk%Lj-ZĮgEKf/M:7D3_kmVYk5t'k9;}A*‰ ' Ϸ"FؘYRg#rIQ_(LjCs_fD ܗb8Į:9ⷲh4HlU;O{Qd&ND6fI4l̙6Yu٘sWZg```,n.i ɪy7>t(sn#4h+|vIbM|۴HJ)֧2f ˖BBj4+%u|Cs3]V1!/kъi.߯TqCr(,n Õ@Hfsj/B_׈*T"}uTN\_@CNTh :y&s 4uDP:LDu0= 0b P͜$1$1J%A1cDEEPTizF]S |"߿7$* K^Ȩwr9dI}2ԽB2 Hϓ~C!8Z1I<8-&՝9P6nY;BcUfPQ|I3lǚcdh,2LŔ@ɸdY\B sz&Fk\Lb˧^ImZP8u3{yeϘ>T>֔F\*K򕽄7~fdNl\œɈgލ3٬ zaw.dG+guoz9R gͳ/]8(3 >}\*c:.I}C#)F_9 $Ka]_(}aXآ4 fIパ4Č;GcmyklS)2760T'`~NG-C͵g $ Tv@1^"fJ)8?=]A zc$ ':-R I>2ǚh 1)f=eyX>tﭡ&eFKZԒePaA$2(C0  l? $L&l mؐ4)Bݶ&ꮪ[u̼yCD|{s2uVfkkE|EO%&Ǧg0E;k (8{X __ܤߒdLQPX4ܸ˓MLyjD$YPȰPrpAOm=q>"hAR'dg =,"HDT-`f,H$ icD@?P? c\d[d:&&V+1crXJ Sanh?3-3pJ9C\_A2eJO}VbxOcڨfI0HKsa c(n7^Io)WRJk\U~䏟ge_u{\;'(:J]紨jPһ/ 2($u4GB͌`|Mq~6&'vW [z;%+;E1flkn;Iq IVCΑ IVu GT vSrTd ۵+fd> ٠K@xa'Dh`r!HMO_.3 !FCCC> 8áH`P, ~zyFy aއ0]𵱑&𪅱X[:c %@mF<`c1;V]o6^o|~Zc:S>#m>xgWDt\2nS(OQٕ Sc>4g3m4uP_STD8"`PfDhOܢxBg6)pRO;|9꧟||ѿW8R='I"Y/Bݞ%bPtly u.y.H]LO-{7OVAm5=^hvA. ]G[I-CXAr[6/׎tqd 2lLyL82\\JT~LG1^ciʓ;fF09TwhhWZO+ /?s{~#PG&I#.1"u63;=F5"=!>c ^19RxAT <$|CxǙ,> C"A wf*m@#s7 Hzo5F޼GOLҜat )b`Ɗ eШj0DSe(o<>ϬLy~05-|"Oߎ6|Fu ႍ9d')6J>#3/}K?%"c"9f8ߞL(S)0dud_/ZޑNe2)A,_7I X[ڒ"fٞs}xkJ@IS`I(dihH|NAisT o :ҋaiP uzي~?'LgQhsM4]كuJ *-U}ʄ %6aep ˶ay|ٚx 93(- }mFoqo_ \B!:LIw[?5ֿpFɌNƬоN ӑu1}W? #n#=|bЇ0>`-qn00vS.׌B\.t)'Ѷ%QVd+o]~GabYCn.1Gv'Aq|!b%Ȝ5< 58%[]@Q|pCGZ@o|W"'$sUJ`ݕOd*0|ƋxTud*4cPd:*-IR|mWlc! k Wc 6D<32N屘R(O:\jjZjzYF9qՁ$݉'U娘@->m;+kcw L8KuЊ=j@op< iXTVs |6ͺʇ=J#d/%4'r}J}<ۿ_ $DS"tLaU2b ^eBC%%\zI:b׳[nEcQHi|n7,1B)vKȟ2;I-A*0`up;2SdC.hQի>pI! X?G:pJ vW%L>2&G p&IF82qE'oXݘ-pvDptarc[s{ČxOd u)UrF$ {%0`dIkX/tt;Fl{e!ڛm풯0Pfxm}ނy E o+_1f "w[uTvHN:dʘ8{q).FN/TyJXxImkf GAqTPۑfkM{L\a&\I{ɖ%U3:όUr% QDgU݂!u.K@O_ .)UE|G-AJ:܎A4AA[rP qrͿ; 9s-_W篟?\n?D%՟`/Sş"Eoa A9U &>$ybX8:p۾c2vA2Zr h^㦎|׎RU-Q{:BEO]7Xnڏ0sP?CN2{)nUɒtr4JaxS x)'jr˞.͚qEFF^ tv 'FĴL7h:%/ЩP֥Y8lx`U3tLNz#3e,)^ٲD6(Xɮ2%{Q}`/o|?/'l{ 3H]1rdeqSOuO9XF 9GT5S,C31]VdLPPz0!*̷7q.S̑)&J!U4I ۶p|lց%'yd>>H?-SR+[ͺ ׸IT/;W̑$s>nދ~n56|1i3G6ߴH$zul?eor9/ӟ+|_K,Eno ىOSEhI⎃: llJi@&xOʎj Gl9_fH@?솜3Q^4T#3Gpcx"[ \ ^-2hzk )ϚGdG@Ɵ7< S\{-Dľ0f7й(#R$*pjMr%GT /1wyΖZ){K+ HVm hV8qW1dR EP L8=ûyZD6]݇l]b PX8C]WGb=ݻٳ?X,~^>|"ו]SR2v)=nj"0 nSA(-Avpx;srCd"`H4rԻvQ6`H-Bd4bN70 eIZ,'xN `@L4Cǚ= uطpfmQ( 7l 6>&$&aD ?=A,sz=$؇ɣaGhv`sVVRm<|x`tDr"Addpa R| EP i6m4Y^cڬxO? }#Kt??ED?KDwme=2vCp2d= $e!YoS2KTUumd}]_F/ɩ`1F/uUPJ}+|i߳{H6b1hϬР5mhYoњh[ѫ!T5ljN:{j DGаiYTll"5W E//D{Yb4/DG ׿ջ_ 7?fOrwH[6eJH'י5@ %d<&ӑ,SR@̸dY6k @$PfֱbpĽ@>wb @2|8d^KL% ۞= c zF m+Xm\&4/kY&1PI 5ހ +*g| fso)d`W gXû ^BVS=焠'Y+>ƺ3Sh- i"b#?Sh]^]ss]2Bft#r+:?^{'+UU43w5~ԢS (勲N ]Rb,7ut(+&A X<4爖 ^kve&)!)3mλn. J(M FpѺeloSG 9"uGpz/Jd h| X>;reLKa%RGfG^to1 _K蛡*8g3:gi]3FO`QY( ?dд.s1)̓w9lH EGcvg/9&&EH Q4 H/~V1L. 00yڲc#&O4FOGNJo=xͻlsu&7 h5f <[1 >c?MDx5T Ae_ή}sog]n "](ɟsLuƊK JF8}/49㳇;u"k%[]ibwO'-w?紩BUUs]ww KJo/YpU-ڻr{K0n`L̋Y} LfGs#8o BIj?>|ρ@O#uУ*6us-׫5l,Gcmp热hďcG?|O3'9N*YS.W$4)~EI(Y2~Ϟ} Po" ,ByAV Ӱbn8=)IHndԷ9R]!\uFA >*bet@5r̩vc^z@@$D$|ONy y_?W33O"L;\O9+ڸ@}, 04.xnCrE3"xþDn#Ks,3p1,p5fWnJΚh@@evU[(' c|Zjo@oGnlcd|:N׬?@WVgB،i(΁r&;`BCӼ.ˠ%:T]E#6$xpC#Lƽ{r0sVOs;R1W1=pBh& }`[x _Aǔ^sEկ~u/f皦S_e:: ٕ?$v,5{ IuFf n.:G^{;/})Y~sJ 1t%\CEbeM6^R%TJUѨPP#` ޴8̬j.ɓ39A>/Vӿ\m6?4Mܳ'm U@ o>/zW"8CP2t 5b]*0ZqG}@$ѫD#AQbYeFIrt1٨v"2 aiHtg(10Csိ,>TZ,LF#EaoA/w%A$>}a xI70蘈M֟a E &4 f;"H`~؆/>w?`\ E1'%w3=u$i ̴ղ wXJ1And"`B;GmWkjJ:u>6T[{" 2Hr(THO)Tyq^bdŐ7U-95oԶ CMW\$Hk@*ya|6ˆ( VP,T1aX}0ur"3]8_ul}!sYw1}U=CZH3#i3n*:~Y5lh o s}7yt׷f_ y>O[#魇Eۘa<.GdM;:'TK 0py%`(=!!TռfĀ@4BP#%$Afչ)ƺcjC_X{J\&%pqy[Uly%9u@z΀U&!}fσLtLC}9GapBTy![^/Or1+ *iڅ%k_|; _|RFV|]}u lʽZiZvTc>l#+ g',2񶩫Gw^c9RUyˠW@Kfݿ"y}x<{) E)[^wuorM$H pduBAzm1NS ].p"QQw.Hw4æ"i͔q5l*Y m׋[12G Us9s>OTuґ`ʛ뚛wBc4"/a_(_I:Bg"JuySPƿᯑ*e7c{<"7??=#9A:aⷈh;,kjEOW+:xc:9:[7+ 7pslnB}ϦbiTy1s^qff3^ܛӋrrlJ)EY!,.^C/0PU 8 d0b:4gm*]|3EsM795Ji;ʕm3hȠV/{$˾c|ۿgK]NNkNS5sw̲~O y^dFL|ED/nMlr|N'∎ U%E0d/`{  Ѐ)0M@rk{֌V0ij!-gz)\ 㒍OF0!6Gq=8^8bS`"lL5q#OvD?W; `bMGƠ-7 q~3ߘ{EfNb g3p.v'xx#p-T9MI 3pzaXb")Dm>&^+4WBy|"諁.~p-knnEǼ8. {a P@yn@]OA$.%%Ŕa5C\ ~Fĥ֠IEPו(d#n (1`Lue}}5^z";NΰIRH7-  MP)8yoRUNLe6A-iJfS)4tbc |KB#X7v)e9}臛 9H'[]fYsźFm6 _]b6(9>Z Pa} w~"{\?C מLO!Ѽje}!ݣQ!ưNn4x)=0 G8h`k.Ja{'0cF bvV ,[F,(,A)+hGs^f  9_TG`l }2\gIs/΃053*d} d??\IP@w}lcG\yvq$A>f݁"M8u zu={ڬ~2 K7V"'e/8Rr;"iT,iCn(ydWL6B%T,{993Vwv9'I ,Iޗ}S= A > JXx1l?vOJH94eО4U째:;* ){$il%y;m qmy.9lأ`$e󨚹A|3֖W~w~gн{r|貹:fPT[dϋ,dG *kj'%=>N:F|6vKcAOL۵4^W冽;zhkHFmܷhaq/*9Po`(TpsJ n# )t`206çR~LWA0bljRh1RL&q\bd 9g}S:&!&0FxsH'Z,OOO7 ojyN}0~)| a~t=ЛGO{{z. GLL6EծRSF" $ Qm7dB<##OTf0g)GyVz0Ո"/;ӧ0~+ L3qLxmӅ8Α41鏌cH }$ca[4R֌o; LDx``]ðL:Sp0z_bgFuҹm 8KOQI@Pz8 9 ֡^ͺշ:HPM<\snkF [OLŁv}W|v-՜(̽ .DR m%yf|cv(%^]]]N9A!~Yۜi{ِU =ζ*m9O7dcr5q38bBqP C%i ;ODTXIeuLHPy`< Vq pO.d(2Map5Cqѩ4%!"Ʒ{66RQf`DC\Ww!)=V-ҲE+EPUQ% l?U kcŀ(WF͓fܵy̍-8$S'ДdS'Kd L]QD%XK@ .lrY* }1dcp\v ϳ҂!(0<#l \k+kNj5.9qnʅc]Yf1 2v^\^l>ST˧ψxD9!BJuFl6n[k^[P7G4Ms( ArXfQ(΀ 㥽v&|tuEO:pKr;"8H8{s[߿Rn؅};ժ{?}^ QDAp/=Ć*5yH;6|–.+Ӣ:?wmzsVpGuٷ$]<$pE4lv Q,|_,JL I;w|ϓ:@B2HF<2q]ӺD$3CWP\UPR([<)ؾt>v/KsZWӆ&MGٰ*>jꦹxni=Y}vIȮ}KǍkS3>g}Y|s'SU")ӟBYjlz^~[!zZ#OYV~crܪ,Oho5܁\ֻk$Uݓ.7I"( I5$3̰|'DnL2MŞc cJo#!л S&ᖈ$t@aNQE=61I/dD zSTFW7!mFl2u5~*a|(#jd ivՁq2}_*Vab$~@ 0 s # '-N}GGO,w66/'{t챍"Dl5jXҴu[ކ:s4"c&Ky^ ˽BE(p/>rM_k#ܞV-u\(AEi7YJu<%>n¼hsnUܓǴǸG < DZܮ ߶xMOYQu L*cܰH sY%D_0w褚;mgD[0V UU;ߜZ: L˺~_{DI5>ArV{$Uf3 ATn"h[MMÎ-..S:;:;tzrұIfcRhZHP{~5$hX"9"0S(bnBލw IBZ0j2>TUBNؙ5RhH0`ƎbsؒP` 1d~dD1k-;0 rS((^'O3ԭݩ}Xs>{X 8lB[,<( b41$ )oneN$mթ~*uSw9E6MqP݇Z10w1\xo fU__usH=~&ii}@Iɘ n)riJzQdBO Y!,mW3x1N8ǏyDB}J$ ,(C;BbP*]JZASa"%ެg?>iO|W+4wHx6șJEEU{.Fdy=h`<^83 L7m{/K_whʉG|Յ 5cYIzƿ۬~T4 d?AbboW yX#xT ;[EO@QyZvtX1|}f[F1lv %Cy@ =}C1Ebq?-&eY x FJ쑠P =XGmQe~TN^+6JyT87b>#U #NOA, QK8~yDErqi(a3zcJZR)Yqy'6L`lz&q8q0`T`0}`L5O hlnãn!y+3T #! (| :㱙و,z^1W#~s8E߆ѽzpl̇6J)z5,<&%%0BLC}셍* GxFR)jdm/\&wX!7JTQ.,~!rHS۟ W*i^!د/Ciwoo]yԏW/bV`^]oS6DGD;މcW>q$23H?XֿzÇR/s'": 9ȋ%Mc" `E`C2SA6k:o-t7{f<$ @3kڤtzH|wvyCv1 m֦b#6=fIW|6f ^>&wNiΙVc8%B0f4N }i6)##[(Ab5  >$pi äySꄸF\!Q=mȥw1$۟ƌOĜ]" Ϫ lF,L6xK18[ʯ$G-h)P\?딚}lfu ͮp]L$ٗ|7C NLa5@i=XLM,)IU)K'Dg%7JB u-󦩪'OYY'gDmMcݪͿ0QGb<-Uz9 r1A#@cbWQ~PtM2WKǏ}Vo?Z2}zvUz,z{Zl7Ms_}YFBNTȮ<$SR : '- BnC%QP)U=,0~1[אaMOGu=3F~'o&(d{DT&ɿ#}_ԫRbb=S>qGǎml߻X4C53 M*+>P`GepS /Ŀw}?Hq 9HAwdF<4zv>фKLzfk>4ݍίbΟнzNOi^;0- T*c٘|uԞ!>$ WhGVk45sN%@" B72H`@YEѐ $TI s'N}0&l,>N"HTIO4%x1p/G"{@p؄1 N2IHC#uca8W8rY4' .NyV)/@RR.DJn"dC9ۊC)&1;En޼:>5vQf~X[IZ}ɵޯȣLJS2Nd׵%ɋr3x@Xΰ k)jaW Hhv!z\S u.:2萁 QF$%ϛ&|Ub-TUMj>?j'+֫{"Wd]W4K

H '''-*sv{$$̧SfrWة{@1ɔDVXMN lXu=8De T ~bMw^?ꭼz|3O߼~a .gg!!}UTx7r4W,U6c<,ZO)')0ct]. U*kr_9r,=m|5w Ih7B?ȊruFQ0,夯D#^(m6 |L5ܭKm$)F~d5lL\Hst.62LO=蜆z%|r<*9|n`X3dWEesX 8yl  *`g_sSUHZ]Faj y#P bz#ȨC,PY]?'MJsRHsW.|;KMu$P]pי뚉z>jJ+ ?+ -0Nz i*&/Qb\6\&"OY9A>"|B9t H6PfA'@RFtVC<v}®p4$u"B%/|:=>9) F ;"S}׻Lc#:G0@!F ytӄ7z.PW06F@$%ϕ,RͲ`:HQu:M*z &4G!9bpI:EhUddHH2}:Nj۠Ot.ވ!PL 1"qM qXn% h`N5K*{w#_FfW|mW,'L|?gxW47aN+:6}yj0K޷܄9genr8 IM5R;i2FN-4C> Otɧt%+ lZ^9Yrrg>7~Rz+iJt}4=(ӥ|2^L ?JQPT \',Rw,4SeQ?779A2)9G}40+Goz 3r^cc'.P/aϵˉh]^ )|ޟSNb܋@ǹqk@H=&$,{dyؤD'W`C\"qgVWC:r18^L9TP10 0LWq A:jjVi b5ة"lt4$:'`8=\q0hu 8!w tyifڝ6uw(IsnY#q._/  :4)S)'p̯2Fp SxN?^z_ <ˢyI&_)c Q)4W2kbeF1LTsr+"l҆,c (.ImU͏׎/l[/僫Rޮ(`MsuB*h`7eĈ/.ճ Gf_?^M@H3dVN֎X;A~X%r8(4!cjdxeooeVEݠד咎tΎOdEvӲk}0ЫmF'L3ഝĆxd Q?H82|:> j26-I0i2pLi#"U¿0D̀-!0(!cNT?l3 y r0oq voS|ЗGCD8n ׈͟T<%@mG$%d6Ts/[ W9#b sJA}OXalqmgh_CryDt? _+oϋo3dR )%#E#~Ėh=+-&[Xh~VK:ygWUs2!֙Wa1?Y|f~kwUpszoՃvEn\eRvtLn{V߅f82M"R_Vk3H9W֎X;Aǿ=cjO aƏg0lfXk|:V M85q5Z6Ox>#:;:N9U^$zƋ[#C}d#(h2FG 'h#d;AaXC1+ ~GC.-!o%!"eM \L e0% *ҫUg# q/Vfp0aF 4&]0Tz..,o0CSx 8#W9TomzWʘk!8 ]Xc3b;mH Ą9?)ϥvmѭ u'!rjI_׻[fAq| !!͖gDǟ.UwM5}uXdhslotq|ΪWo=}~27<tŮ;g'@0"*SyDv&ƤtJ w9Tsd)$5sG,.0FKڸ">ηT]2#7( 0*TgvBf度t4|AwN;Vba$1Y4 D0Q&`.{MO8I¹;l<0 IBcÝLW08bDA4 0q.AthBc 6z(?|2m{e\V:2)r] !bR7:Wuh'cP:9؏40.e 3L( n}2j ԟ}m)Ċp40fg;)o8"94XMd/d1sqtt#A"7ESQޕܰwf\/؂2Guȼ)BsٙK_ ="6bی%G0  #g|[}fluwoʶK2dW"]qJ!||$yz]yFh\'i1nQ*T?(7 +iD[`+fJw $ =CfױmL`z?>aU]1]S306MGZӓͺЛNϺDjF IƘXs9C 0q0 BcU&aQ[8D=p`ЏQQh l }vH-R2-F1 4edX&qv#x3:@?t@@$(g|Lx`b)MB+ h 2z0-^ ['9!>mM4#Z10tK{u*F,ESRn$ _>PJ~K79yn\"GPz?gc3{d , Rl(&`.ѽ/}N\j R h3#_)pQ L\y7RWu3ˌgřqO=!8)ɛ]ezl3&]/~/ſ~̦ Ͽ̅SpGT+ [:\zCWfuRxl1`NQFbжi5ݱ.c;wm~h_4f@ҵ9AXMʍ/a9Ի+xdcHZ[na`10"d+n@s$njhК GՖ bSe'% `/)Mef;7NY. [x0ߪ'(7)~6ҧg^9$@/'Bi)݈ŃC# yCF2L*Mu 3+ji2e{cP6[{.DmJs!l>vH]sr )ug=Oǩa*%և걯,M\09AH:҃{Im/] IDAT zDaO:W`Bf#H#~eI Iӂ(҃-iED?axRғ-%OWtZv!7N{6I&`j_?  j`Nڲ0铂C@0ÜIH0JA sk/6U-fxIk#vY,z9!IYC㮜כ j= |n/II,ފ 4 cD *`2F4 PPt2x`L"oi0(zN@R+}AkЅs)1 80*Hb-V[kV޹mFHUUO|6+,&i M=f @26K,]yMŐݙVrC~gs::;'ttzb}b #nBisOL*`oFۅ3|cTbذ`M)Fe#Y}/:aaS_QHwlDR ?lPZ}߂zM< 9AtXm1="WzĸrOn`Rv5"6cC u_ϕ֊`=(GZ RPfM͚}rNg-^sKAx#ptDk`8"*Gau=0XA?Gϻ^.A%x =!V,u A tkгMCC:0 (ѢE`+:9qn@e]rq\Kư9d` K[,3|"5YIenS}[״^#I tx3 _wwRHΰ2D ]K: L sѹ0&5-!,u#ߧ[#oSH!jfIhG~,S^|T*yd ke#GEպs.)ܻCGGGa"q2< 9zCx@66 $SHi2K ]Z3v}ÒF3jN4 b;KϝzT="@]2G+?8AzEczGEqVħKTS 8nZxG2y Ւ.WKz^9;Wܥ㣣~7 E# td@΢@y `/ѱ,%)/,tj t)cp80t&e} ~φU )Sc2̳~ ѣƌA<>0L"b  hK kiBr 7$lA8x,vc>#Ҋ'=s+Q,Yj лb4 /0=t@G=pxyLZơy>{`Bߒ?Џ֖FH#htU֒FA O΄.#J[ۮI2HRJr]v*Du+>5 GY[}|Kgt|vBl ŗEs`j(qS7wYRz؜J]L}q)bT>j WwEϻh}}rb_T͔)oX,Ar2o6BvmLwbdV g{v6jDk|)TDKdI\%T}XjPFл^>p;.հ_W wqANN!7|6 >iATcDP"j!Htv@tpYȆy8123!DH< FA :dmb} @D/LƓ2A 1RBgZ7DEpaT\̕GXW(kI!Sy uM$DƢt+"y|K|'Ь}U,S#@Gy;ȥ E 9o4lw{Vu Zc (/:;FooGSȀ`0jTjw:߻S].F}3>#9A%7a,v"M;(H׽@51Cl*dc8c:ahIUr^=ѫあO`5mׇO/鍷ߦ>+Z'+#L@9mreV/egfXXwA]N q@UƎl>@cs7gj kpՇ lTdkCrշy<>r6*kzK4*72' `Nclp &\=sZcZhNjXZ `ޥafOLS2gB>&(gXM7 ùmr1UqSs$?4u_`UjiUUխ?ŕj6h6cQ z؃hQJ{S2Vr/o3\f"eC jb=ۘ-+vitEC;WZh w:]c9zUd`TQ=BƋ1lf$|5\FUJDb6 !+2l()2! x-q+Xi $;4,TAVDp:2$ul`x($l?HM>'7kmI})vD;O9UU֭VK B-Q zh /ЃnDuMԤ̮̼әO{Zr3;v9'n={/w>3Ѩh4=[WOtwa [02J8. xd&N43AA 5dX09 o΄>yO`XXvLLmzhoŢuota,e( r =yerŀcQK޴$B4)Yg6inK)*C> "u0W=xtA~rn(a_P5 %+f,Kr5 `f0Y5(XguU{rep$9Gms}l H#&$I82o|ʝ*8]X!&b>P^KBw,}ZlA~X5,_^RBusEfݔ}@ȶ9GY' kȫ([/g[aM&J&6`U~Ez';+&ѹzhX&5{adnhm~WExz~oh!5$'LUkbQ6T'a7@|j]\h$ Mö`T1 .gplThN 1qEG2粁חU():N{H0 W{~22m; (ixgb`o!n O)`F .kq_h&f1L Nv Z*SU)'4L6@b,S8}S`%MÎ/l B:؝[$yt4mj7X$)^fYq֗)Q*6n?}pWGRB\M˳xo|jtaOa8*MH)C}>rN1ƽb@HJ{)~ԄTE[vN } J$҇aDYBA`:tR=_$*š༘iYuF=g{fsM28ZNa!{-ȋ0ήb`$MҔ'&D6 b:Xix8.J@B_!{,2ssWpyLHk[bm>2m 0Ā8kG\hU@D=ݤ $4[.|>:`H{ݜNiosܠ`3/BdrE$DeIkEHHY%q:H3Jj˕{]_n=ݿ,˶CIvp 'ZGYwms%⩞MGѝ$epo8Wlށ &bbCf#4tƅ;܇: )D2J@&/371GsJ|[x!tu6@a#˺y]O{Z~i>zl^h6AVڣC9h I6(9r+ )y|6m舀(" ݤ`')ԓ_j[@5 |̌?vW< iXN8<@DohJ! ^k|$@@0H2b( ^Z1+ vy5  T361Ez repq4>/̉uu26eL$'Nk@6*ؕDQE#W*NUovv'˵˅ ?ͳ\ bbW[|'?<ұI6}MXFK*s{gX/y~}u-?g0Y7֢Mz^09Kd';`]<GvC.dƞ2ET7Qع Q@좚M>1rcz6ؖ v`88i&I׬I h:HsOt6ktn7lA/I [>* y6mgr2flLעzZ6|:nDQ敀Zӆ5e,L @8&0b }t1"8Gah!܌%2jrQcۤ"q+Wl 8<KmG͗ )A pa;(xJ( %vefD`sN:QcYm eY .^ ۍV.2UQ5Զ9w26U)\ՀaIvm%f&_L* }FXy_e&I h5ӏVUǿW쇎h&T1c~ul j=!{*f(*j eM ue@ ȅM`[\N]cU%"\:c[T3^uux9}rZ,;N?ӿwy@j* p:37( s(sӮ}/:!R 7g[p#9 ;V0F #@kLWo~"m_6`o>BêlG>g''MbI"i@ cc]N +l %a@j;1-%yw,йs(p2Qɿ̛\l I?Ϳ 8YFo2_T<|ϝp'؝sȹInA'c#NOE0EIFËy^)jzlMo n _9fr]av{7wk*u1R{ϳ)RS_QNm mt rxYNvrtʹHS `3(&lg%/ CC& {clCHA +v~9Ġ4Mw 6ʠQSNZ!Bǧ'Wҏ>>{.s` dGlB+‰.?yࢂ(ǡϜؙ 7wYƋ3:!%68(;0ؤp\N8rώo̼7@HĊ&DW j`899j pXHF $Lb|- Q$v CT0bWQro3 *>•xbGG(0AԈ /k6R-X?2NFW1jRשV8umԪt|ixmcorv]83`nR &p-ϡ5U L|ޝݯI5]^HoVͯ X8rmIV)飸qc}wu׺yP2OVJb ]#a IDAT>z𛩊ڛwǧ"t~Ϟ,ֵe=w=W )%'-NvFN_9yr굅("jN Dvf{MexD/i ij(jzbDJx6@H.;!|.{Ax6X$?!b<8>{S}p&šupi%J;|}M|qhk<-8gBlhkbV68CLEt0LBFD;+-f0C yL*/𦕉j $,'4v64 0q/q$ _=ZiܮC%bTgP<]lQ<Arub|"s9G |i EqC'Is'ͣĐ\MZA,VRvF˗N8BxG^}p\f!бmc(^Y׎>OIY,\ 6&U EʇT vͦZ@F0ɚ ,^paÌ,ź\9zzՔ?Ǿ7ۄX[wwt[Iȝik GFyV0c:t7ޟV|'?{0?[=<[PC **\H9wvzz塔d';y%$8OMI+Ԅ{$P1! @زQ#,Az]e/3(sA2_:S*De) {(HFC"*-i )n'3zvqN1ߧ6k*&a3@)&T{\"H'Q: D054VnRF H`Oᾰ i:Ԡg fb",TMfؗϙer~veB_. qUO'{h(ܮqFc@mʴxd:a\Vrl2.4;6]46+trI`Nc\}%6 YJIt$nD@b`1"i>ΞWn(>/D]Po)\.^İr_G7ލ"j0;7Gۇ_λO-?Ѫ",.-p q\w@r^~eK!1OsrnN1>a3@0 F5_'Gփ‰QȋDD05gZX&b%FB;ji3 0rzF`1lF'Gt8k$шFari!8K|ܨg?QI6h)L*in{@? &{.J>>h!7`*8mlfm1(HZ<73!p e9H%fY/)`hyTtZմXoӱ bHVdey "w&4||uT&U"y]lg]8Ȧ0TT1H*R~G\ o[<=Ĺjl^I9\o6 _fPC@V<~g){S8 D.rİҶw"}47pc5]ve :eڿu~mjoL=0: =i<Ǔ[o~?NW>]>Yt6pq:WeSH}̓M}_7_TO~G;NvIFT=X)5nb!{~[1m.rZYĕR~h.ŨG B{WnS-"pN0DBw)ON͒nI!P]dZ䄞5 M'czE8reb#CgL?z?8czB%F rmѠe$+c>Af!X!3t;Sf;v[>M96%kZ+Z$r0~~p A倵Mii7BCx:b#1]0Hhas"{2rM. #ś i~΢!GΎS9ëwˎ x.sJyXg(Daq|8~ 37?/}Bny|R@X5lDt%UFBb.[:|(mxeԽ\T"۵eg|˜ ۼ?C~{7ޜ|gWUy?&@좎_x&Tпype<>8^=||\?::]>yxR/VQ: E>R.; ?z՗Y=x4,E[Ŝ#j# 8X\gCoNno1w_Dt/# -kˮsE!I*  ;dHz 㤷[#%p]t6jaL()=>gָU[N9!Fb1b갩h[r2\nliT ꤐ0άD8 P3i_X%Z9 Tv@ٔ]xe+Zj D `Z /Z{y Y-cGX"m6nf$~8J\ϊIU8dX!tD7bi3;):8#GJ *Wa\ߖwm:οM=O_HM&6?7ގ[Hnx~2jr';R},̎g~~\=%%_N|$E[ZC *\!ŒwEPDzG$ Y͂1k]x:f%UC}؋' 1Fzm{w9<+lұadN;/{ڄ/޻h~ddgOκ5 EW5Bm_'p+Дd'_22SWdcM~0~DH"gv #CZbBQQ0  5 L7/4 H>vK1APF1u5gttvN&7dB̨ N> 5>+td b`30bQ!7 |(d T̀1~IxaN `L1pkq8ܯݓaF͠PpTd2; d1DSU^ԭ~gV Qɼ(yTsCs݁!f6JEpvK(En^;q(冢&&]e)J4hdXGjG8rvvL^>t*8VM>aN^fLƦ链K5>7a[qڷm@4fQvTE78$x@7z|+<}vV?9Y~OŦ\ %c*aC~{d';g?w_$UV*%::eCƗƂV8y*tWSuS;5c:6ob;P~0"'Q4pcqu^:F>QM ɢ^|NONNi:7lSD8\2ɩIE<9HL?F8nx)*0*%M`]OWɋ O!\iqt)KjvY'qMJP1@=ζ&\6 [5INH{\=Ro@/@sv3 Tvʚ/f<!P&,D^*Q;r5i&'wҮ/2RrbZJ4@n#W $8o<ſXjo<te^u@M_X&@6}F4qka_ᷢLy >/ʸhDax&Zq͟8^|) RjDMflb\"I\+E#,Јjr1P-Y=z$T!ر򌔟)*g[{@þk]g>g;?wG[Dq ڠd֝ !xY 8?*GC?GzNfW?>&y"{ʶn';++Kvȃ}zF:Nag _ ӉdK i#d[-mS8 f@p稢l&7tzNM&ASb Ck6&/bJ{8~o_&?e5/IE]r(h0`|[hyŧOj^m yU$)pŹC+{b7neAa%fH&\|(V>) (}y߿oO+;}o;FitC^-F\]wj`I I`8ȿ~z=t kK$+Ўd'_2999q#O1Qs#oܧѤgA KǒDzmeCTC >>tX 52K;@ϐ!ΖnL!vYe7(M2π,eW1a@ T6DOO)ts:m*ޤߛǂQLt*f+F7  YBc0qN(M}:(LbwTHqgڠOa>FAwQ qI@`|ޠ{=D @F$|4iu^*hMMre #tĨ8a5ɢGΉ:oeJ܊c8,c2<2q~ޏ[d߮{2z:zIr-p䣏>xg'y]hͦch (uutuLMc57qk[*KԠh4m l^ݻa|ßUG?'sn2>WIIb`H).P(bHH@M7z 3YBf.$љ8}56ult۰611l0ɇ1Md7 !QUsP`L6@ {U p~N zBⲎWRNv+a]N*D#r#E@'iE ϕ_ đ2xpR@ICZEM$k@ʳ+oP)&>e81 +c.hpnѝCڟNi$EUܓ/dJH}˜#hH}U^$Mn!CF#;dGC(PyE"u \0`0T>'2%O(O}X.-Ч!n&:XO.GaC#Iz 8AX{<(a X&gIJ:}Drw r A<' =쒑\9?K(Us~h2hCE>&ƺl{yMa>m;:ߩ^eLqзO { $:W [qgV=0XI^uc%ϋc.%R6DP@*T:5`cՃ?b.f SSh 0M1)Dh?|3kqNb+269XE .8Xޫ7὏gGV~DKK?T-ײOW(i:5^e2%p2q}Ab 6I\&Tc/",})c]|oy{l gkB+9>э8s9n#^f"f.3~[ωx0X/Bu\nӯTUu;pd';ɕ%?'ܐ+?ڒ-eeY)B!&9U6!8Ѹ6Cq. v3uBp Ä=FNdIC0q'gP#A }c7hw#eGCP&],B3%E]`4V,1fez EH+,t/ڸM( v8f1[r!s 7tJ|ǧyӂ)0؀ܶ eH~l7'zpٝ\/F8ooRޑ1]bKE+_ 9}otk-?:|GG1xuEn2@d[`cc4"ׁdݱ۰baߩo|{n|T =.b8`VԥGSNb\ms>:7$$%wX~aReHts~hD@ x괇I0H6Qeb9.-UMUޛ^dąDEMѳ9 %#L4w (s:wrC3bXQWM|5 y=sMcIwd+IiǠf` ~S }wfRV|ĈN 4qQàJ 7 6-WuF@azb"jWA%Y.}4(Bi &Xy Kt1(\O>>ߟyQrΏhpg|kۣ︪$i*K~`='|Z}k8]aKHH&QRt4<3=CV}J/Q VyX2'? F荽wFߨF9CY՝/_=Z?]ޟ=3:]2ԛۄlaQΛIց IQaG8j8j4dB~0j0En*.*Q^=4hmK%ݪJpM\DW_XU[TA3JZO;@,Q C7Kh†J*.ekO2K-Bp-P^y1'GsN?鈦{Wɂ%2vɉ-E :q -q.%QE|';W_~tr|+7&#ٽT9^ e"hX- 9o_Ǡ/܄gZYDH*mJ7fn}u9oFv\L4π'+5e#Sb6Ԍ,HkC|7_ =9=xD=:d4nV˾׆=[n:Zԑ(@W9nk23s5`:|\_5 Z &W=`8Sv=HhveF!T /#`4X  *$Rŀ?1«n:5uzxw cDƜ |6tD$^}*0x}Cj<ťt_\wr\]f~Gսɽ[w!r}/$`:z7|{twzZ_,/??_/ twY)~g()IE3sZ^T-82`Gj`cY0̢]4_%X.h xK.bEٷ%B}~aI-ƅ]&*\@&ԓܗoH I˩oޯMRE.]SU9&/AL|vӋ M*ڛ `joEZ (a9nlvH1%>~K*?~b,qY{NbiwnFE!B{~ņU6Fg^EX6M-a~Fa@%"HU_A 1</#3~#% MnNf3zrzBǓM23<D3`mH_Fq:OC@ή805yyz=gkRBǿ-koѼQּ?qMsJhV5}.pJGEqd"4Lޭ|.!EloLb3jk&D}7}Ú[@nj+rpG;2BtV/L d9iX 1e(dwV{onMrŖ2eMrn-77w[g'?[=?'EBXJ) fu?R>|% +ZWT/.hY%FɈ$`4nRxZЙN&iـ۱7m2r KF4pT >DWGL#O^`RPY,<7@mWlblwt vU)Mh9+:يNϖx4Ѩ逦!M(J5qĺG:S!aj1G/^d';K#"e&f(R !aXók.NT s}@5`0M Ida{H6fEmd$-,!5ӄJǐ$qs۴N0USǰbPtRHc}r|/ ڟdB{IxQ`怀o8&0=4z:!6^_0Cg:UDZ&[biCNh[2Cl\ (Αm@AMp?zxg0nj JϽEU]0 /W0& G Oxexc4~; rwB|yDg *_>D:NK ݾCwxxx}I idu^ BPw0ȴEHn&~2xs<63_?=?]y }u (1?% J؉tZC H[/iY/i5?Q 4MW1oäd6_=צ\%ܜDL$#Pv]rg { S::P1C%qMh|^MrkcT>(z;hjkٻ GǧMpM (ҍq*vBS+`'aJo23|LJz~vvҲd';zH Q!r3L+n#TtuFeӸw"r(|<|!#:M>`)2|͡(`"8V y42F;T`&gle%Iˏ'V"1TY2gV:{uVB/9ełNfd0[S@Ѱ S- \$xuH(֏19je3:jBz^'HNFlղ7i:vBȀOfa{k.İ+a3E~\^!n!\|$!P)C@5>/ ip(mE{h#;!PS,8{"`n&r\fיT,jB-Hmѹhǻ.jMsOyK4cιsnD՝77alϢ<&f@&袱WŁэtx8~{,~z?W紆R)Y$ >c3]o^XbSL'ģ P&ܒǴ8!?pr@ýYZXY+C"PQ;qP.RD $2/RP9K @Ks5-(աBж+6n_A=H׽}MZI{ƂYrJ2x"~.cBV拚Ot9 ݸޗ&볌! 4V2̾FEjD>2;NK3O.H@ &TOiA{s4Ƅ=UqJ{QC'0R<&$TgXXd~Kk:j49\F &Ol"!"\,S?aH>| v[ h#>"<4o=c)a IZ,SŒ/QM!?Z(HR$c@0[+Bh cOqq@lhd6gOcؓ`5W t}jL= h( 8ͽ$łP)5F6 ~g^P)X2 -@9χ8.J\qP̣o8t9QrXfvȾSӻ)ȗ@n8mc\ DL Mςs9Dy#q>Ϛ;:;`x{joo2+"Td݅ <,ov~0tU?c*y_NrY8¦mʥe-]ZՋO`OCL.>qFeA2 hSΕXdXP򢒸خnP&䢼 ,q-oęS6@5%m`@H7ׁw}icbi+ik|Jl,-p ?+pHG.ݷ&tH]r^KUU_NO3"O? ;E~t2D1Ⱦ)( F8 XM9KMQ-8b@\2} Љ۔(38P16%1+P` VṌo@=Rj҅W]CO G 4y k}f ztrL $?4z[W-%C3 , LP3Ʒ"(^lt)Y1/nXEx f=?d$VjUS `t4oi-t]v0I*@ I>dqD('ܑf>5́hunGwQlJb`gHΣә+4?,G>U^j2|k,"},vsmդL_J! Ro3agI_Xκs61ǥܵHGX8옆{7i|nYrmX@e?i!sSOZJm2R)# yYIb68p y e"K΀sװS 1 s|ynXg<n)F77v^gCw.* H8.egQ[OŪv_;Hx<jv$ ":%e8WQop#6q6X0)Aj@1s^t{B]K.F>TeVoOs{ǃ0bYэhm+`\6h{s. m#'^gצoց0$p[7 dBMA^N?w8ޒGأC#]\>[M26nZ;q,L%9qĨ.UL V)ʐ^ޣs|0 aX&0$ݐtzNJۜ·:#\#HW]RBO?SvPSw@B`lx?}~N'=?3j_R):կFnhd>*-!Bhz`s};?}ujݜXOЃOD;y`J {j@`L6ZŹԟc(awaQ^^xu;Hc$A3z//yM t2tPҵClwQQMHEidl$\Us=B N=AuCQbl 7/E=>6~.D|[:P e*4ۊq]Ո‰ݺ;J_`!LmG=(He u>E+,>Nv"䀈~3$I% %?:rr=xvsC95Y!G鑷4zMx^sehn1b%e44r}0:Jƣk|xvw9)% 4`CBiub8'3c8e2J9(K L҉r6Q*+ /6CZ?>j$q:8l$P~̕+df=}`k)8@@ )8>Ī{K&z ^!p}q?!.$/JVh3mI_r&f!D~q~tt5 qh[TP'| 9h|MF&}ea uwJ6 |X c/"I&RCA&1ŽtP_1Q@Bpa}P/}~qqAٌήR{<ĦnD 78yׄOPEDPUxg)e8R_BM}bC\H_ .u P6)qR Y~Iųl2:ȡh,&I%'glA0S1%v?a.!<9LƒxFgUxne$EeJ tubN]ML^2C"2Sʯ+htLW :5#92I]T֎t<#)Q3$tG&8JgzQJ ɣc]P M ]lbT4d7# [Fbz@>ˤ()jR^/3$ e4(]I_"./6eIa\Jc$\(j*S!32POc,\:%c|>iJZ2onMRu ʭ kJYHq2{1}8u7}া̢bާ$`yh!N*ۓN?-s,֚"'Iz$/_4LfrKG_j¨WQsaZ%;IMD96= ? ="~U!阿G4:&UA z=#d}Xp ^cAփaiDA.RwE5ហh 1h:0>Zl:6odžL#Y0i5\&u yӏ>lZ]A>]"J&utRZSh*"2 "j\¾Ms>*\A3[^5H3O`!2 g IDATInO%fn{XC El bI$Q4g%s~5#%m:h3S͛[Dߐ<.4i@׆ MooK Y,óz=>+:vJG?OE (ča^dM~JU} N EPak&D# /, %H9faluy.&(a3` Woc#"l%6ܔOKc@dĭ"4y%pE"R irv3c*Yn 慱&uvbOƸZT_-YV\'Nv$RWDS"Dx/Zጢ{.${)_x72FJ6c0.l&p\'l*|,|c: aޏ9XWirVDaY}=Pak!l 8Jbr dxڰz QS32Kۑݚq]Cʚܣ9Ev.6(m?4Ԅ״ ρhztU=xpϵq.3GsNyHgsZb8{cGԼjф/AGA ri#z\4sh}$8HAmŔmH_N.PW{)[Ǫ냫"D]¦c_97IK~$R c x6.`h{MtcǔH@'Y&y*>D#!UtNR%|_\qZ* _2س{bޛ'POtW_d~xxx c';:JJ#c"$ɗOc:idNI"]8r 4@ <ʆP6'LJg-I,Ao 09MhFо@%p&6kB:|щ!Ja8y.:)!{y.N^ne`{W ΅HC&@E|EtsBш{t7шX, w̤Hh?vLU!Ŧ o5@ݜjKH9" 0Q!k4U1@+`NG ӱ1BFGlo Ekcpmɍ4݂T2y # / ߰@R[͖`Pn圣&#cL=\JxFv=82/]uTjtψjzɌ!퍨Q4IeOu$ڿsRpHQz'PQKbyր)g KXa/f*|@.Zi߬&y湶4sJ70dx7 ӣϺӻ8ttE(J 1{9puxW*I&IeEb]YU(G:'oiuq{qx>0\$Ux{PX7&AbE$9rmnbWE(U}织SЀ$>xYLıR5@ 檳)udbua ]4߂+B0`Ou#;"E^Uu:$f&ACژJNfxxB{Lp27!mQ?0H1Ka1 2:P A&Iz8 hL MzՔs\d-Xah!d|8ȷ}ɜ.؅sӪL 5DP)| dhiRgkN̼Lo 'Y֐)].qmɕU u:޼m'awJ/Y 9<<\Tѕo\wӚ V4xLɨ|hVLI27dWX +q+a|@6m>+ T>a;[*"_syc-ځFmXYŔХ!e^Ɛ9kby a9`@Wn&9UFu#T*ȉ2̏αBWEb1dv''_@r^I:&“Xy 0M'8UOD)y9+5d#Q{u$JKăc ~X'!\πN{MX 쪀1= IKp5%0Ғ$gP 'kV>Z((#qG_ +$8ogGv/B/KD%,7ȧH:4iO>_٣֢a(qķMa7ԄݴlaO"qT|*qfc:x$pA0AV#~]1S5o MS[#.ߖ4&>GpH N "|hPIsN`'m]Γ։p,t|~N) |8٣>GC) }ЊBC%惼1XS;Qd;D)Ig CK8}!)jPNN<`B"k<TҼ>`Y`w(p"pXQk@!%y~4<{*[2fz *))1ሀİ0L)IZ^_熾~ anɁۼ8gYJ1ϗ-=x0vnXŅHu^]|N~X&F7@Vh e?*7HFe/.2d%Qח$/rJ\|Eu *Ȥ: : u)8Y!-~Sug6uV-BcSh6yаUgLJtiACrE*E9DMDWFg{׃YˏZ~juué)XC)͙ě X6#fK~-lJxR4bN^% ]M_%(/ԘAwl矿CD AP~nţ;ps%:L Q#D6?IHIDSLI$ @~c 21ꮆM^IKd0>QF+9uxA^F %υ60 b@w7ogG_q#pu1 ]0u'̅ 7 z2!P5kZk-쌪'Onܠ^>(R7)eEbp9$$$19CKmzƆs\~h"FĆLص@Ȟ[JKT@ %!ɻ/&s9$l\ZR;B\3r;e!4CiY{dQ/hV$A, s3jd"VT7X:> ()$. 3E)`p{߻Y ѻ8pb,[$@{Py:nZ kCX1<GR'&HBbQ{<^5`I\ΕY xdDFA rRt7RY zHNם|8&@U`]f1WOmGc>8 ڿO]^)(k&rr<8Ur/΄ Ǘ~ TЛSapo aԆu>&9)+N*^Ts*7|Zo}}[~ &K)¥:[M~_mD[g\$i$Bl10G0 J*b7!5<ex?h8~诏 s*SḪ|u`KU|R'v]l~au QRM_de2(_z2L)-7`*KZdž Exԫ uWsl2'??I7,y)t_;'߼s*ymuJa{`->c"wHrN }X;h0ネ; kуAב=ؖA™ {r2KKv$M+ߟBwksuBCH[ٶ%Irfq;TYUU5h 4 @.p_WBza"(RHCUuuUVwJ TMewiHmj% j;w]G&N m*ź ӏ1Dݧ~֋0Hx.!?Ile !7Ŀ'p'w#u1'|0֑.{) M"!ַ{wЌFg40>,45B|~?n_Ÿچ"˸%W?1 (5Yi̐(w6)2?Y,nUi_q68?::ځ#+sH .H EtllJg'  <6$Y5$!9dsέ= {9Rx49|7:?;TN͗~A̖*ZF6^nSk6ph0A!0!NBhTձ3f16֞w&0˧YGÃ`9E֝%#I }Hփ;6yӸu#]Vt }qnǷ؝]{C!a'?%D䀊!oɾ\,}!pm}:~ݕAfN1BUIZt@M+cvdsxoG&H7Wɗ09֩ϝ bF3l|@Cb4G` ;_X#;ĸDV8Iw| $\@LڬG"3< zHP JH?[qI0hxMB sls^xMCgV!UІo5ˎ{t 8_PolST&0Kds4:$L oVZZQ#i<, giyI7fvF' Wbmi]mEۦ5Z[M" Mo3U"z`0[?'?]-~IRx0ufFnDj8$γ؆@ʆIbϢ(k{?m-C" "ZVI'OO^qWveW^$͑`5GR.ĝ X÷qqF{7LvfvSD( ܚE.ōt`>^ ZR:Aj*v PJG9P×{-rmt ?{kcf_tL>KV4v;n2xR4C :. k >=-L5xڑrL+Z} t;ܫggnӝ#ڛvOQzB=lcۨ*$VY3˜d $€N(cHK}]& 1W-ˀ膢t9|Dc+v(lt9:J(g(HJSmuۯSJq(L4}z_|p$*<-{[_p+$Ti֝u\?*ph(tr~U̦69ߛB4mML>`G噲^z8qkcżlbjlRN\pYs]mo}S \n?I#"-5V4jF.IJ,Tq>dR=rگ7g4=ܜMw.ͧϛ\0  [dBnp20&6ѵV9')i}Ⱦ'3jޝLoL&<~.~yXwJ 4 *i~j5 @ֵ8MbP&rZִ IDAT|ouv ҎF??}{߯wsgWvD?Aܐ8Q$6 Ps~V[jMjB #ITv!$V_6|ò@H'9qj㾄νeV* 9دlxS$X= 鞌a>ԮV ݸOp$'B H"7D }? rTd!By1o(,*˺N,?9!<dvJTH-9t|N?{Jwf\Vϥ=_H7r4<Hc1:eniF?L}i>q8[@yq㨀u4=y̽;`C bEF oDekQGkŧ~`M]ٕ])Cj|1r5mެRzL wp0'(d^$qO<`R9flV%g!^M1T3H$'1:k5OԱo2BCcv!  A$%neaH@{Z;bh!3hJ[ 9h[;0R|2`øtryI9MNN`2}:ܛl2h()HvTցai& mﻞUۂ추(oq M^'}LDV;10[.۬ x,ڨd J :kuMrw8RK 1;^}^j_,4ź@cxB>^훑? uؘ_3J8$ԞnVCbON A'^Mqw DuDzIȵMU!5+bEW1 jk вMHU>)C1B^h +E*aEn.*7e/Fo81Vr,o.Z%Puih7Ʈ{ƣf%"{1CȠۘ~ J̄i[t$in̂ʵ=@JΫ2 yH;ۨ@Z9שCm <50ʗ" smU*W*\2k{S)ѥ.\Y%L@Vtj/v 㕗%],}ZI3xLt+:s] 5*,$`$v=A6>4\yU=@u5$KbgSELCĪr-̻ C~idx :5J+ ^'qaL9tFnT9Q)'LR;$C;tH/%oypaƜ /Ӵkt׎'l%Wa6`&]rյWϛu"mRXiP{ ټZJWj wưB]`Ʀ{nmWUلRR~.:x&v3y/e0HRG h0A]A\ )=wQ,ɶnjOYOǷ?i?mO>D柟J7^/Q͆~aI)ˮ9Ft8hg tq"m @ #5 <$\ԙ K;$FH.hOǺ*6^t1Jfʄ93/b{܀nAxp;uh4]ٕ]J `ȿEQ}%٘ʓ۷.`tٰ t-GO~N_yOQ8 brj.3DU,?CE>"N~O% _zQԇ(m:KRabο9A^'M0d"ll%-^1l䝡A|djȎ"Jd}":ŸY A{v..WK:_xo/ zZ`?˾_oKاO˛[d81Z0ԀY00s<*0M40\&؝|>S)60hCy1`]e$pDr;Y esW )뎭䭡> ^5g<Ƈ8( Pհ:!=t!KGT, !lc3аh"+ݺE&4af:3nse0<6$5$]ݼ cəE |U)OJ{0?,=897y *z/vU_6Jw=6˞; zMj#_vPF,}8Ed Wښ0{z紝?i?[]t/ `IMuIjEUt(ͨC6R[@@Fa)6&`zWWG b-YTg$ 1ꄖgyrbWVzr䷧;N0:i?\.[E ׉ $kcu#?!I??XDH.̨p))({lO!i!rsJ]D)tAEp`~:K=H`5~Og69|,gwXJDTX_FY]Ԑ청 1\TR@s8(LB k5˜H16S>79@m-.]RSzEmw-Ɛ6U.˝%WKz|vJG%6I4Yr+;KqIj9DXvm夏5 `- pφBa~3}y-S shm)ءhsX=(UDPb^<#Yv6*F-IhZɿpDH䤗MCWhΠ Y ^^b,z^*9͐kFNLAcyu  Ml+d)EvoN]G݆tq1G|GwCM15t2W׶PM nuacMvs:؄ҭnM>Jф_m1rpq(s^1W2K=d "i8pwLJomFwϗ'>'cD9!4IoD<[,rIz6dLј=1q`Ccۿ3PO֞U0F|?(6Op ,vAACj` sJ;"l=^QNiIT=kwOO&4閉Mw'~{l~rт`H-ׅV쵽ۮ?DTӿ?NdaƬ"N!ԣI N"OY$jc| "3!Z=;Ͱ`b`9U  97 BL Dy*one =YZ-hz>!\ll?rr541ê/QFCwPsȞ1/2 ubMSd<{"8Bg{"=;?痗Ǐ>9O'cKo5N%R\gP`CX_| ɩm]/ 2EFw>fޘ)*kJU0`S'PΟ>IHt? I"utW,z `6-IF`g+$?&!;pk[pfƿ<j fJo4mo8ʄu4Gt'gǥ~LiC^w&h4l miQΑOf Yn͜Y@y@{(Li&9Z,rg qZtE nUu߽5זG/#r?}7e3 pST8E]/E#${ľelHUEO=0{qr'-ˋgL FT >;-,ܚL{%1$6PH-ITXka/;zӳŌr°)of&2<3Jd5LZ.-mZh)ίa3/lg(`4~gtvOOWY/)0c~+WvqI@?Ҏ$F9 ħA`+Z!U: NOIŝ(&Ǒh?>#OhM#d4l8a5dԁb{:%iNO?g+BKmC /: wK΢!KQ5A*rl9P8zKѶT{+5VQ$+4%r C9z@$>dQw<Ηg&wxDwio7vF,\5=$8[&bXeAkޅ\,@b.t v_VCMZj+H:%R ZF=Ѽh~h`(;Ok.lDl fJFuhL R$+6<99}J_wy%ȸ|f17!hnJc.ڦ #:{DL$eZIW\nⱍM`&g @Ϗf|o 䈴8T4QN栓RDhm+(gsZX/IjMB궫?- U` ~ךBer<ߛ{{d/쯳GlvU9Oָ =Xf݇iHg ^b4tԭnkv/$^QybN=dBq9dF@Lyh/C(/nhvmdړ Kzrsq%m F;{tӦy᫲h䃽佷fӋv~ZiK@b??_veW;HgKCCb(JgxS;18$x؅Gz ~'ŒlY/@@H$6/ )_8Y1aSw tu95 >mgrԉE Q*6Z`q(C*8=v,Z_JB`ai,;OL;Hs;{p eRR쳲@ۤF2?6 p'`cmT0yD\/>/N}w|h4j 0}&Ѵm{xzmNRiK 3m c\`!qP D59nEn{Ņ0Ϝ1{7Lr?R<(&DBv 7 YkR0{=ih4H'G:,% - 棓1GYDBv5dڗ8BTmAXX9G뗮b=Z=\fBBr *xMe6чw7޾Gv1ǟ<~qO%R l|huL0èg5ʾzeAЌDY"B\^0Z~vJ8;{?M3vϠWQXHiw՞fXKEӅInt__!ĔF]eQ~"Q#: xY{޵:zGIr./ YuZі'U6=.5]4luunNFfA!mJRMBfgoߛL~7p@H68[zf IDAT9ܺ5:O~j񏖗?q>KȎlNLxʮxgW2n?tB0#{a7+8z&R)| %5 %Uh4i?iS_ eX89eSatL*;I&vAc\A7_ӏ)9AxU&;e t>\"E5*(@kߢv} EPN~}a@9-oN~%}?ϖ&‡F<))4jC^+ObN)oZŸE|zhG:*Zˍ(<3Ɉ#n;{o_y.)}SΗ)_$j>( Sdb%E<_z젻ЌC-\W*ݒMU̶М;$J^ROiwDc)4A°.yu@7tB47Q3] 讞P2 z)G "\Ziۜ0Wc?QNhNB( 0QG6]тf"z]15H1uc,Udœ*ʋצTfkɃg4oZ>JH-S*a>{Y6xD|0j[oO'OO>^->+O?yuWveWv+DˎV,Ms`?k.FyMqH ?W M=UEf2d> p N_-8^ovpsm=ShnKLi?#}C+,RMfCSd^{(_G fsks"Pl-=l,ykY"B.@@@ú0I1 GE&O4fVz6sR k)֘l+QE?rQ?1n8 "m!nn"HGL]1ba \᫡fW1ԝp+͒l$xς",)0CDiWeЃMZ$Flof1|ltՖ)F js"V# ^3>z}~Z~yY.!֮ʮ+*NwqǷnSn2)r!5a# X<8RVvz /WY$.)~8<ʢ]bZ3w!q~ (-g1l@Wp;CTU탇t2֗\]$DEdvI ls+BHԩoU!0NN m Gyh*͕ܵڟ>l@TQn)6NhbXJ֚[:`mH+cF//'Q3hTDeM9MY['RؿƧyPks`^] j{e:Όo}޽wLM2#lkr̯>Mcz[t$_z$Oma j`‡jT R2|xÿ}Z(zOA 5+%ʍK2A1wc0(ۡ 1,\[ _ixJޡ 5C %͞.w}Y#:r׮ 2 '߼3,6M[n%훝Xe5jw]S Fz04LFT}5Eq{5 RaMZY7\?WM3#\Wm;fBu1"XP9K_2ujK״VNJ:pv Fl83@!Q xzGH?˚Xź.y8Lf%EC(ƣbA=Q(Hp tMG4:_S^l7M RKX3g]>.E _:F 7GlJnؚpPyr9'3ztGϦh> zT&(dNdY 0e Ix6:mtDndlFsU֠;fNvyYPOh4٣!{41]cMⲝ[[t]O?Q~lR䅴5k` 8(B,jIO#Xm85HsG'+i 7AYvKp^|PʋӄC7ߟl4z6t:B*smcqɩQ[}MOz6mHhٕ]ٕljo8As'cN0o0nTǾFClp' r nIXtC(}ÅͲ@jbS!2mXC`F[Ľ [! /=-\C)5ТBb@c6Ԧ(6 3N-95g@#38'} @4vSU{C'QΡzpfvq}x-[f bp񳲂DE FPhq4̟OQP,ZiSzޔ|&,\$< b 8/#k zZ?$)DWt]wc\)Rs~J?>ʔޣӪ2vFrDwV??}=E\J_x#2 d$<8o z'ܟf$uf]߷ހth`ނ:Чӳ1-bI\ ^9Ä K Dop%7()l.W4^]^I"f*F/.i53JI~{4?59m^g]UrL'=i-ǁ.X"K9!&Ĕ` qc_aܔLvobp JYA5?k&^?Q>F7{+IjMュ.^Ʉx]g UQB}bʹ"?ʮT:G|֯@]h) @X ާ #H3N; x;A 8LBPar!L]m7mmӋO$IzW\T.#mԮW 2/oݥߺ;@D6^ *9Kl(*D4E7-]һwF>zrD?9ӢCWm?%J3 |_M S?i%n+B\wuٔwwK۫F<#@8KvHkuOw,4c,xʝ= u jOH$O&I`  >8q‡Q9QTc0ϐ88TU]2 6MhXWNg[ 9|H~~r}Qd f",hpɧv_x5?P_kfŴӞDbf>I3lM`+̄Yl\Vg]S橎aT|@8ser]:OZ"|;1E%*djk4} kz挮y>mLg2%JEAANw.Z+z-啁# 7D߻-9/#x'mRQ3 ݪ{=Ǘ?vpxuejy4mLzxmy}=~I&,/D*6'F22&B3Y7<{'t{_*56`J!zQ3u9oK{U ݫMwm lTgor6\qv8]>gݺK56o}MV$׻CZ̙ f%?!} 9d*V 4ꌾ-!z 3$"WAZ3|K䚕1K|Ƈ%c'iZ_zh%=YDzXЬix2۳F:N#F J\ U `D 9.}e׮ʮʕy${=nYҙǽ8{:QԴ-0PirA"r#GD"]Ɋln P֋JfO6p*(Bl |sX\+F]dۛʬjyG @{dgq0!*ǃɀlKo#*q@Ƭy`e%d֋B8Ćp1Tř}=j :{D4VdH5I]؏u 1P FWɀ'RV*j"$Üzm3}njW!sr(܊wa^D} {B^Ppb$g5 `fM},b&cV$+ d"̐}cFK(Y|,J\:&!Z̗˳w;ccJ͑38b !.B* = '#ޗAOe{1J?i;8[<0MUsK&7GQg½*X0~ͮ+C展CSK{M]`}kbK\s}p&ito\u]@euU*mm`z"DfƋT;pҢA$$ $E{4=8S'YU"uԭm;Z14[%,nlbPŮQzFW-Pm;# hLxj~E%4I| $P5ns˵Y%W"h[}9$)N45fb_ o4jE _jlʮʮ\W|1 pa H}))uZPB豮eQ,vA؈^1,RnAXnr i&M{ؽo6?՗ttFd.:pn8_XPEK<r-8:F c &jA@ Lu&/TNJA & ]ЃV<F=ݺGC銴֎@K IDAT~y*0_*7B1=,L7MfRZ\Jag!k>sNӿL4Y{i> '.VqMah( el%nMt{G.1KIcdMz??/ʮʮ\Wڸ7@V[CIv/vxkd5T1xRJzx7aT럜R43"NgD@mM_C V "S.0\M3N ܓTN>]^kIєyϼ]Hc8#C $b9h:>,%. \8{]Qma6Y4(2 tFYPum`u±8۔c!O ]i2 Z´}b` Wq` `;,98⡼^]R%}hؔh"PT8:H{L|ގDppnօ;:Y~5<7эFC\/%9ɯ>!m6l_%DgX\n젣3_y.~ȶpmIjGy.:QAH[lyc tmG]m`V"EzEq@)70i7\6mM«&e^$opjAtkU" ./hy?WCJQڪ p (k^YH{h?ESQv%(Zm35Yw2 ȦFv7{ƆVO^WIjmWwJ&[a_KV"h>?>Gg4_uyxXL.]r`Djto~6^ʮʫ-{]\H LjShf~CX\3_%׬}xvj=٫MHt,oIOM\D*7 W@ԱmVd/$Σ)3#Tg:XcU_4ItD 2`3 DL>\LWAD76J_oKCTDjY ےc@s |=52)GcaN?2=`Wm~Jc d{-Iѩ63 @GqfD_a~ 20O0l 6!"sY X}{[IS`yY ɠIO0 # 7fM6 3{_n~TV-.b/'~ZmW<"U&:&1xe5亐MžNŗtv.AČC;ν0b2mTNfS%;C$^$m.*)/r/HVdBr T2[(3*`$M_J+o`{u@b.dc7hN %6X Sycq==uJQ&ұHw 8gYNlzFYOOGϞ'']VԦ:ԃ#!@7!%^=++/Y?ՓǬnW-\:g=a|r6l9d=b(2X;n3ɛ[4d5[~SNCH0 /$ߨƚoj 4X)aءv5ԭ9ĝ4!&@IMuB`'CLW|(" )9i}v:j)W꙰BCbt{;Bk@c}s93d- B5 00U @>!<(bU[2:2ɑ) A6:Q= piK_&'K3" B<?)':p[꺡dԥz6dM i:5:Фfz\)zlҴ$\I}zͦ#::lDQS"b3O\\jـDV $]tCu!H-Cp4&mmE 'W}~սӒ9_UO '-ExU,4OǾmf]6Y,oR+;Zc͖W=͐|!DHa,dm4;U疼@F[ O  L0~;)TC/]JW|"ײԀHݧy?EB#3ֿC93]Ha[9=Yi4?dB1M!Mtqj gWveWvfHPof:պ@DgTE@)/["@UFYN*ݿ4 Nzzh4c|$Bbz)(as}?u;211k8I @ lزFnEl,`JEM b7,rdUn; 1PUG]ffJvLikmגl*iCF٫vRJ8\n'#?$JKgfg䗂aO_ T/"=mԕoV- ]! 1YH~'ش7d?'V^%U:64gS;0NԐ&8Hdc~V @76aB0*$kWU|'-k1_)8̗1\*\qbe}]$:lbtlԃ$IC{*6`ZG菱pUVf'^kO(E(SX6btFI X Va='}MD @Z#/"Mu 麫Qn^|Ư[uucMߺ\=kX3K[9%I҆pgUo3VS.qD[IjE>l>Q>$*j_&|p*<_8OCJ'!Z du(37:kfZ?Zw=]i֌`<73 </JSNY1GveWvk-M#JkC.++;\d'%COOAgEv4=\h z[0SwldⓄ 鑤4LwNhi~2)DZ7*IΘ\bxw)Cho6&XYruOis9^ . SR8:x2W"ۧcZ)쫎2@QVJ4}οZXemq,RCI4X06f]}ַYGXTkȳ㐃c`cf[4) &`Ԃ?%aB>R@@N=$K u`pm"(%v]wB/1sDD3s+/});EO7eyݥ_VFKMڛh6kh:QXq11Zh R-T]$p$U)EâZ쐫8Z9q&L6ץ?)(vS 8\qi׍\3aWXl亁klG94 Q(ғ)S7dOyVt}QV ؀ciH=?mNL-[=RM֌c \*n:3B`g+5c_b\tXoP9c*%V3hˮE!jS/={B,g++_cYb kXf'KU[ݹǕMۯ4ȔmZ7u78(ƥ{ꋕc<C!Dk;2'}$I`I\8a}3P(ڙb# E =%e4NgAȴN0WȴVp6 C*d&y11mm~An !>&e8k{M&r 퀫> WaG% wOS+e5GVMO{`ZmW t]w.")X¥Тg:o)(#2tj[FЃ${#Ka7Y= E9.rPؾ$jd46`<(}uOPpuȰ. 8l Nu|0,l6tCHJFOid x7my-H*1<%|_4"{"o;A NCu820dv?~Yvݏ>]by%܌z?j*F]:ТMO{oҵWkz/5 @4 BSyéޚTr5$?'Xvn~@7UvSL+++򊊴G"| ßK6^ެ^[_LQ[s\zD^w}Y}FcdFcĉC/$"%JV<+Hb a@iq4Q^V, BT ;S*LC G8UB!3Ii9l @6c% o֮cȝ @խS7c`^1l=۴ϵZl4./T)+Y5צ1?]#SCawSM.Sb}-? ݁-Bu0?.灿\e=-\>Q-0}>k+^ኹb3"}-^ڕi5[dz1_)8rΝy<6*UaXŋ!/,ٔZDV: /ww-]O2::&u#DPl@"ԄP+H^ P ]S /!@^u^Vw[H̦uԟ?_/" ppThIЏJ*9xt?|h;7gK@-!}:c9:}#aW I.Άn-Ү/W1!E&GQ Rf"x.ۉAe1V[Ft}t=\c7ZY$Rk&c8u2KgKVDf |iu/,/++r2nf'$>aL6 . όŚf u3Fn[X`!C v]a~}zP# +"6_̰OَX7L_:Z j_H|}lp.UxiԾ 6>G w ,&gAl@iXLIJPI;(kD3]d AwpOB 0FP !}@ }򵹹Y 0P5G @)Z7ǹ# *.,GA[vO >T٘WςE ~YrG?;MebyژNrytGZa谩\%zĦ"Z6 ߖ$1G+b}{q弥K::㐙" |14ʍUHv4 -ԯ2*̡uM/b U5un&yd@jm:&~W~[:)kͯƋq%A8F`i(S/ 4P WibwG҉7eQdDwޝ 7r/n+.R~ڶg}!@>5SGgZƜ Z`Y+vE) '܀m>}9~pB]_h*fd՗E`4"ciԸ0ߑh}?k`f89C!x 8zە]ٕ]%3|_`l@ԕG> Oaf۩,u̮C-ӭ ~C@t0L"!/z)ˬYx`x<TlhcL,0 )2b϶#T [KE Ц~a1Z(ϞMLGq9{N'h /R-mϴ kc)~oxAF2Fm ViH=*dF Wi?80jk9(RHz3`RE5-8Ey)v->TݶA$0q7>{q$lDwbⰏ2V>Ob2  \=IP)SFcL}ߍz]=z~p~tx.byrژ:n+ם䳫*.5LM߭-0)*8sMDE򒞟.M&j02'j".3&w|IXP06YM GmqbPhS[U)zo>H0.^ 7oG݆Xz_(PO>; V\N:tݪ/bl~~9EϏF@3cN$>:+a0졛 рFTc[tG726y:SN\񐦫Fx&\MeR'M']KzJoaV0΄hWcʮZ>\1mHяs0z/`?נwl=t6R }=ïڠ î:1﨧c99u0J>T0IqM!$Ijvi=TƋ!K]ߏq3Wl1u^=%!KL?w s^mK+0TF+>dQbalmBЕP-+Ӥ2 +A^, ;4`G)*Djla .amV;TzTA% XGc,쪿{oEvZ6 ֧pDD#aY^|_ܔRːlL60+oEٿvO3ʟ @G 𽽝 }qt>'4jcR}{1pʐGWmǶ(7t-zlsH‰FY-Duхhv/|L0g YNi"mC*P* /f24{.us>{K$MLD{xDntVUtu`s!8wx#Zsj @ 0誙Zs5b*|GfxDfDg'͓wF=<}؇t(pFGwK꿳[ͮ(p4 ׍b=0ۋW>$|te[e[^b‹o2-+MJuf22a7`/zCj0)hU d2`ffˆfr1#(+O{s16Ք*˘.Z0g*ΣO!抂22 k,^yjnFdQr;s\JBF~1鳦\;}'d+D63x,>K7(sBlM6mQ@fwt5T}Xg5Y<4D**5J }=8`+_-x&_}FNxg "y1\eskV*hZcw*jLR,uU^_|Ag3 ˘/aKFKM]g,^OB6ŲH;EÂOBQG*0qp`BÄJ=`UC)KD!;T#Z[:nEC|j($T );*b_U)Ƽ <}gw_On9No(eD|j#( 66S"n$Bq[6(ʛ12 gawni1|W?Ӄ:w*F\rv by . 꾒8Vϛ6KFU.% :m˶l˶"'S)ځݭdNe0s ,7Nb_ ,Ի^#C&13~F,S!CZoW2YUnEzYd`yEuB!]"IϹsx] a-,+ɉ+Ð*;*R **"w08:X0 |*[ V=GL qH,e@z'm,,( >\hH1gaNB1#ڼ0@byhIĺe[3xkF}pͳblmS;B~̎ra߰0㛃0 pDO%:|#)sS"4R$Qp|"}7x"IjQ>4~H$#d]'h ׍}>Q+s x&DR4W?!-#%@jNS'JqoUN,j?׾(f>sP,e2]7.C Jݭf6iv?ۿ}Ԅ} nL СR1v=&4+xTԯG}~_7&\OqۗRoKgt7\Nxo*?=n&ѥdʘ_]I "2%GdAD4xh0%H|]V9# Du9h]F/L(=LP6*v,JDbN; VEə} , {T xVVІ-s?3bQe ̿gT*S%OYlEih 0V} 5J{e̗hwuCd CŦ"C!ezS&bwRr0o#tjҰң )y1Lߧ)>C;h@  ӡb}\ǥ|keKI3{kvxgo{[t61M F; e "wT^l7ђNszr&E9X][ǝp!ޤ94h.6R~((8dSjJOGttF .w'$о0Jɺפ:̆01A&ȹ b"k ܶl˶l,ӝգ(*M&`TEÅ_l3q%W[gy iPf> c/b%cR`>}r)GOIL1},藁\HXbz6y8L5 be2gP2?#dQ#Nԃi <.ϥtvi@`Q*C& 7Q1O/xy`V.YA , RH\盳tQ6/wBf7Y@&U>6u<;LbN3D`{ |rW&J'{1*㨕20ޫ,3zxp>?1'ogae̗˝=Rw1w#AYjg |#Mu`6{y$&֏G|٧*&7ķuD'!j ךͳ ؔrwSf\~pM ϛY(@"G p F?釲&6cȞ)~|"'g He0VאyUW#nV$SΑٟݥC6'\ДPy6=dE:[}:]~܆wH۶wNtnv-Ytrl5 k^R#rl˶lM0==:ӽє%<=0o@//~f?榀 |nUWs rgQ"ZF +)F;/t凜;PK86Kdqb̪R1|50JV,(thDBVݑ,O)˚FyUÖYG2uxN4Z3fXљB&Fڣ sp(X6@G2DHHPzr Y{rG(0 *A|bEL7 jx>wm{ްDeV̼.}yxӊdT4ӓ[e̗fyUd\ s*^ \0vmVR mQ "}$ӝ4Cqy(tsļe}ȶl˶7-uw8V:i-Eq&Emr+j(tw2LsTQ@UY; Tʦqʞ07+ϠJYT6ry&ߑ=Kuѣ"NNSrb 8gQ dsՙfGj|"ܘ Ul:ʲ!)S$C8RӘ#O\@m(YiE׋>FuGWwo?ga MP^ [B~PlU9SI->'/q e[e[ٓݷϿ4RucA_Y0Cp]-\#l? jr@gAݧdP30gLGSgi èvE&qXiܪB4>&CC̉ވ'u7áKVG emC0h*} r:|CFV_lɧcsjC3G i~=cU8#,ks*/$F)X;zBnlcxM%XVR/9>5ug:oy`?xxy~1ɶl˶l ſ՗ݩڒǏ*&Z)Wȸb9s%20N@ F&1t=RgFjfu㨂PyDK\fՄ䎐LNS@(&On7u̜6jIh/?O^#+VrUα2V&dS]wP`9%lH,fe]!(ÁjP.ӧ!%#nJ\]_sS2-1$HŒ.Nh3ݣ==ܧl..IygD]÷?d|3{9"z}n kJ IUBs)V>{:Q"}p]`?6Ɍ\l&:X!8(|O+eD M37͵U1"]JX ;c^)["kc?i$o 6tZӭ`/k{w˟}_c|y::p۲-۲-/$06,4¡ؿ`C,,Ь ACkyL-&olU2&(Vd7A|<<Dee;W*_Cd؋DGDt([` Z[]8v$ Д<SZ,*@.zYZ`"‰,Q{S._VD֤yKn>B uQ1_2΅VȳH7.LTrB`\w ~[*@)޹H5u@Qlꈃ/O0,ފM;9 l;mS+|1yQ3bs0 aPa`gXYq90WBVNu/'SzxxLEWھAfȽ=Er#H۶g!ȉ>y@5`1c]E:U+^1NNR%8Qb&8GHLK7P&Ml&Adb$X)" !L=u^ä.UK9=xBtpf;'ZMn}7%:).-D-٘ϙkZ<]?[dq&csr .z#53jcx2HpI `I]vj߱OD䋧_N&݉[1x&n Ws ~AOs h XbхT_?m61bۖmٖmyE"?LsǕ50SK7nq^g^Q9=AK~!wK*Uq6ZP1A>"'/kX  ufE\!&a:a*^NIRqN .Ć!>+U)t $p xBFV%=_4IF8Xp^Mk4A&ٻ4pۋ[þOH]ZVҧ3l|ļI;ň*g93 GZ Wa8a.F+[q'M"y3R뱱 ec1V2xz]k:4(qQV\H'4{idԏ ~鳽Zy G[u^7]-m$[Tp$4j (2bc,UU W)C.3!P8䛰j5h@VuPD{n@£9xb$f4(EtO>[k[DH|*:yQ->|NgO)>.5M&6LO?c j ](+@em/zq߅Ү5Xac@u~s!J 1{h#:oߡwTDj) UZWv);Y1} w#9~<m7 @I@6Gm_.~y"q^6l˶I>a`7RݻA͔zzJ?g[^p:?&O˲](]䮯#c`TzT0p}г1cˮ GC$TH.db2=g1A7 .gx#|X%b!#Or>g`Lֶ!&|Eq8ט?cz AC5[ ,9C?TR$}k= byMb;uV.Wb S@ ^єz~\00RkۨS*isE]^Mr?k,Xyfsk$PƋI?wcߋtoAcsFkɎȧzlgڧe I6@6LkH}uCm)A l@=7 9|)sUAFn_b P"[{VG2YqIҮ>#{d|Jc/B8=gQDoIz;PG?J}7M-N'-W?jFW{{{4۲-JK02})FV^(\(OTH?<ǩTP3m 2CC_Uƴ{θp`wTQ-xHՅ01D1BdT1 8Zl0=Ri :MqŘe{hrA{IE <ɿʎRd9йQ)ѻr!Q\1k/n5m6(粃g$A ktXX:Z`P8w]i{>1ڿÈT6s%DE$ךּlsg&uI-OI>S}C $+fE\/*kӦqp l@0@ ^Ŗ ˱{X%JLYwDT&`ظ|)QWŜ :{tBiRɤɛ\}zMdEvVty:_6tERvk#lCiX>կz`^ErHR=x1@edݾ&XQ-l1cO@ )VKfbı1 +'>GK[1}H]4"/3Mdžxٶ?hd~Z !\yӶl˶lM?e[SisWP1 n-k׮A2P|uy ,@rW˘Lq \qJE06 5!(Hb7DKgS,]D;<|=+@ }c| lD::s ]cpB@W4k`YkI#H ;d@%'ЛZ"|F@$3b DPoiJW.N@R}K_^aйDօ* t3S1{we7 ftb[CZrr)N1=eQ>׉WuƎϫU3&m!FJe!wn=y-Gvvvn2\[k!}fx@¡-E"lߴ`aq!UT]l~3 M w1 aэh")5| W6ZM;agO9vhvv4pXaCw#ŻNބܧ?i2~>Dz|il3X禺Ƙ,+c2Ba*jEZ<7qB^/;|Cg3ԟ5X2g̟x|Έ`(}m?h0B>o~3۲-۲-7TN޺_-۲-x˓'?/Ѿv8QXkD0@]ȀRPk(0Q2( fF1|!6Ҭdߠv*xߺػúAZM!9@XLg_|۟ &wMlUkqs"+mRCH١qU6t6хR {]lz_쳫޿36Xœ2$^.BB$opx]^h۪gH>!2,YtؿEOwwO1 ( F/u^D/fˍ#)o1N uH=u{>g4MUER #by;VBAB@bDP =PiFT"H*K-Eנ~ถ?l7CRiy|:,!G}u\` ]羫ڐߊ'3*&9we|\Ky:5OaF$}fu_7Z&aSJ3+w3"1M*w 86AM-뱬VDǛ= DVe]`C=LSl:wYi@tt@F{zM&#O81fS-5@e"s6%sIW8cا22O8LڂEG\u]6`H)7n-Ɯ+=QKoscsUaZuYQA> (d+ro6.tgJ6vxuby32ޫ$d"$!6H-1x}aE^̩Z!<䪶Ml/l˶l˶KwV.& =ݝ.4] l.}F N P#Y#րwиe&+@u(ܤMbL\ y$gԚF xX@HZ2J JN`u/āOLk1wD zu@S#wGO46IPP>eȔ: 2gV.ϷNWO D8gar{4 ;<# O1d pM2Yes9LPCa%G=Gн2Ke#9OD}BL-D}􄊝Xdཱུ| KD9 xB5jH|7<Mh zj`e.Cϩ|m1͡Q iTH7X85&/F)EgR6ẇΏ:뷤pd"M79G70Qa^s}cךLwyes{8Q3Pqg+C65?tyD]Eqbk:gRBm!:}4 }c,-|qΝQ7mٖmٖ,r4^L!==أUڦ1^(^8!J>!ܟ-Z[ *[]1j -#"9J1 9;K\u`Ĕ5ss 0^YW$+6DHjIj-|eQVeuidcPz>g8,D )֝Eƒw#?jl4MO*+fvW>_5Mvy A2flsw)$%> ]\f3F]i5cxιq=?ukL Wcn!204(~Fܤc|2eR3NƘ)R3L kIRDZ;<ĬIOupLoy}RU6|h\A"Za0G wĿ/ۤHo3H^@Z;>?%_(@0W)CeL{kd#&M_] [C,[\C)|sK"^IXwyޡbl쫖m._Z۲-۲-/PTyb7 ]ӝݹ\&Z1Y|Jga٬ 0 WF[؇qWL *ː1R]ފ..i.n=2b)pS iP<|xYPvGR~;. $a b$E l 276C\eF&bDͦ4ҨXL.G :(+ؠ@ {1h2EyQ@h}bHmhqeݠRnOjđ}7]= HU~.$ǒCP AA'%!?ÌksVg!`} Et6ZLf7鋝.MW aeu~Yz{֍j%~fQpimG4䨯ri?s rtz3ȼlt4Q46KrBƑ@),j>Ӵ" `ƌ!P?mKG-6B Y[䠟)Fn9O+kc kWd!'52Ta}jfG C05:>󟊵V:Gµ}A(ǘq5,0jg蚡k7ӫio--۲-r~v~zp4: }|1ݙ/bnO;^ >T9 x0~g  )hAK%U zp7Z]gP@9i{="p 5+T+ҩ^uq@ IDATtPv3ǔ( A DLt!ı:H·ʙ=-)2cέe,%1uRYWtmqBDk~~@;"ʑ5xAtښQp| hr;mӡ@t4_? er }u+5hy *5]$kUƉΓtl>}1)SPpxۑ޳ars%pȠX]z=(rz3Xx/ܘ[OefP/jb244\cSܑ!&.6%yHqG`eb:39`VB+&_p_j)I!m靆vm|2GXO&~G et8M bDb *]8|Sܠ _C9R XPeu] 6.uk@7u?y#sc+{ `V둀" +DUUq9_m{-8-۲-]H]6b¤^4>ߥo}J1J Dc=5w}tm|s7L8#>}y\28vT 㤌|;c-d1pgʈ`N!{1;+Y`2a>`<*\:Q\)}f@ ]Q[Mݣ,ה)оsSVS/U 程RݼkGѲMAOb!%K!!Z)5 0G#P1tP27"}PYt[y[K\ Sac37y6T[e[qnazuBc>7t\L&t`>9 $bxt Z>K}fr dN[U|2 D1 (_;uWLB'.+P`@JnCKLbu,Uږc[9mV2H\m\2ZG8ٱkI,j $@paQKXN4>[s!̮fWA;)Nhb|}*ڨaH~\]ݖg`10 8 1ǫq+d0+9J3 4cmcoD&$>NyML6h:6Zf_Ԏb&n: np,|ɿ_͜^SQk?3?P*j.zFkjPFxM;UaNh?nf>דX,n*S]HE.Yi:R1.^_e2TxMGni{E/1F,B0t6nRϞjq˂XJt<6T|,5cdlxzޚr`rrA+MTTXV ́/4]B߻S .#~g!۲-0un'NfS:/ NrT;vZkHqP)h 2U1+Șo4E dZtu^((gTx XʂAT'=  vwJfiFTf%^ &>Ev(J-/>eLЖWlPѿbYsL- LeUF֏fܑL̰VĶy+`>yZu[b㳣는-!z3ϜI5;5;8e5Q6"KBO6zHT\H!°F۫@?6g7y L]wxLwviifKb R<{sߓ׏D;y|NGuwrHYtA/ڐAfS! d̅;ur& yap N4+<pyh>POH@P("YZ+e3q3 !p!oXcٸrd}ʰ1!&6"*0 Kxuj~.̑otovR1K b 1Aj&IgCԥ.Ǚ'%XIt4U+6_gCq0k\d~&5%顯-ڶl˶l {ɳߔg] =ۙrE%HH@6C@|>DPQX#Ĥ8)GEJͪDdLrȪVeYף  T' $X]Q$51S+bw:')3!:A[Y`N$K6&+PP `sg.CAɍ&T2fn`ձp \^.&THЀ:'`lL@H;x.W~_.\~B 9\ &4Zr'^e_,R ?BG0ămzO_ޢdz< 6'H>vڔ3tKKdZ쓵 _a8InC].jcS-CYiD0R[;x'/"ɚQ46%$o ^p硂>%Ӷ 0: Tu3:uCpo*GBgWҗ`L=VDfJa2mrފnL2^ö7EczF91VHrhAVJ!<3VwfIIZŚ;*føJӶmca.A+Ui+"Zw,%]剤 |`#2lZFQ+(ΪpChI4ÐJ? mi_a1`yשj`yi":㈶?/VVkt(qg2c߽F咙n5۲-ZK\MbgоgێrӼ 4Φ H.4[(4opyWJ5ˇ~kFRH*2&H:msXAdk[ [ _ U248*(/}w!h+v) Q<$K2nY6? +}jas)\Iuhbέ J0 +ꊒ2ŧ!nan+㓦'zR6 ,=$Ły3vaO3rtlX׹aSȇ˞SZY:ßatP5hZyT>]DXIkp.}yp"qly>A^0`Ky1Ri[GfYR ҿ2 M@X:.>VJzW,er~xxx-۲-ۂe;.)M2Q\X@&Eӝ)-Wt|Ų<[ An#w `hÁב1K8U; U#$C=u{#b@0@QElAB`YsJx9f19t )4L3SX3L I*&1I Cng.?]Hr<%✈-F44N(@I ͥ]! XWnWU"-D% $9N 7w.4]n.=E>ɵ+_dΚ(c@IAs5`Zj@MR<3EzMr7tۏlNRN f~*611p/:@QfGL7}ZppyUP<ΠE$ ObZ p7yn3w4nr+W/RZعD=})ʝ׭-5sCyUrk )w̑(lɨ( D5V<&hKJ˼ \Ks˱D|`Fg`TFDU(̃Dߜ IJ ZB<]1%61( Y.Q(Tkzi;-6vP^<+aW?dZ9˦&~ɴ-۲-rooV$^gv#u3{|H_|617*v< BSUS0S{ 2& QϿ@dq &UUxY<>ewK1Vo`g`0Y}nVlBc)CLIbHt[(N6E3}KOg`D "c̬٦Fqc5"[1]fܓ1&Z1#rVG\([ =CYD}N983Aw X!bZe2'z}(>Yl1a9ZUUԮi8Dp&|p!ǯv;oч4+LDV=D綁'}[Qn6}1N.}T(?ǯ5MgiV;^'HvCC7z]kY6'*j hƸ`29$ b!+~ H)U{!p?M/?w2M=)7ZuZBxg6$hxzr["޶ikkCE f "uCajB]ƀ\OكaTy4Y^On?'jϯ t|,mE2Ge[嵖w"H޺npA[T A20]6=rEo\ibhHAwүRH_G~0`ܨ# ɛw ۤ[n1oۜYe\\KiseI1Wӯ1⚲M2 6gX ]>0DѠ~cLVdKazqA hޠkX#lU1H`.sFS2qc(؅c|#io.%By'w.9QPPȡ la/#lل74dDV@g1vMܟ1&a-hV_})}r6= @b$\uh|2u-vzϲ `6 $A\T$Rr3{X+63يE!z3"ZI9`_nc:]6hk"XH LݺC%/?|ש)[*]1nY{l\h`$('0k֠h睤wXɫb5kd1M2Ԯ!ewX>'qr̾'`Ӛ`Y;kXS;clWO˿m#+7z~³oMe[kǘ/n1-* z. V ,wozf,,tbN;&6vVB3ȘCF\5@FA@vR R(bA)eCPe0"ڻ"V˜~[Q:"kl_`\ϡSY`W`$=逑b1#]`!gD1t4r)~'F?fX*0D9p|S:9T˳x$<7*|ͨ F.A uSO=iՙ[{X3]BC67 amH QtK5j_k=xTcI˵Eˍ#?={a(&φ@Iu\'Ej_DUf!B!O<.(62=Su[]4|\l9J@ obǎ*3J,00B]̬/,5 ͔ޢR3!ݘۋƝ,XKe_ԁzeZXѠcC^k_zjz6N"2 Ycn\'>H @r$FX$K! Ojqt{uEX f7¯ߞ۶_{&~ 'Cxv/N>O3F6WyoHI`ɷ۲-W["it!KSUhY#n(q"ہ$G.4[RL3d t~aL-vB}USy2(`tƗsO(u ÐA*Tk T>0(ipZDYr:buY۔.P(P.7uU.C ĝqH5E=3FЭ|M?_u1%Cr$Dk >JJ ,QE`B?* :hiTzv]%dw:R ]dJ簺Ń,pޑ#^O+msO|C˽Zc.Yv`09B8vL9mz z݋juIDX 2e(a$gmο_>ٟ.,o4fMW9w˭¾=f$ gdfA?@zЛL|hHM"  BY-"egq7B" V{cppwDrM2ѤI&vb#0 oqv|j;C3>+5h_qKa2 J2Wϲs?M6 ' vr-4:$A2 PBh!PTC %aX4E:>oqhrI;Yzj>-/wmmuQ]%@|} NdIaレ>93]m_|5fgDRŠbi>O=0.)| 6KaZ8NȊf>'.J= 3 QN0NZ%972]ǎ"Хĉ$4GKA`L&vIin{t6&53!捄pPa*kK4&M[Oa)Āq jA`0*os0&dݳ@Y`jYwKLwsH2Zh8Z0sgXa%}\k$  wD4;=q՞%39R&Q}XOF1hu:7F4 䓄uT(N@Atsf-G8vZvqV5WlooOy׾PuYFβ4YoVZs4G>OǦ/Ջ9WDT]hȡRoWp^m8]Ʒ*j Û5f{!oAYf|ycSg ELdg~M强&Z]t†9Fٝ rS8] F꺆r eY>:Feǥ :vǘ YvU3xʠx-Ѣj0_nduykg68Ჭ\6 _A (HF=W>zP{X:3(lzxx46e<̊G ^/LzsPJ;;*C_|i  d\W P2*Ky]'g@w/"h$%p5vHaŎpBzGD|RϭɃ;v QK: ЕvI,j j 0 )>/ }A=M+^R]%%vlLeY,Ce'b5 C 3: /a&ͫf@=p@5n$ ޢ8 BUfr{zA?%9;Do^'Ȍ)q%4|G-Y _*MCLۀ:)}C_VPNܘ[^8fڄfO?tyгVSF@b<* Nu}wXW&yuk^M} nSՅTNNA&mgU Oe?>ᢶU{`V^l7-z7q,>z^~)]H*滊QO?S# .ƒ6E$Fiyf `kE ׅe%HXggsVMeo\E&E:9WsD6W^r'So} U Duʥ}kOSml|Úы|TMUaY-?Gog]}^Wo9Hroo{llclNk^P-ĕ/[i֎ʭ82G&%l-{^nmAܫO5&.DߨϘfzK%iʩv\_"z Cla1I$QX63*I'LucvB6&wkرSmROA^cJv 7&U˿Gc 9Tba5k'(n:hEPXt8)`F4H#u :-8H|i6^.P jXfUyʢmx0w=[]SU6dX!(c6ZJ6xw0d_988Yi` qhjg\-,fс? y8@:uTV=><޿spne00?2>ڣW0%0ͳ1r<7l~7fqqLUa.Z|f%5RK rk$蒴lѠsjxC IQ P⟁%?V}9y-+n5nf^G|M99^DME͵"ˮAJYWםs>\?X+^6UR޾m6]9[uvofp z3r?=7ny2]?rY mmgP@i1vx4_9T' ENjOy$ /:VY$+6vdȗ"''rDҰT%rYcƞX]Cm E0XSP,J>{hr޴3Cvm UHa!dA5éh#qz+;Fhh /5 N|؍'Y;;NSgf2v腵BHFXVEh&R xv/XmkV0 LOV! ME~֤с]@QݒuW"& )u҇` Femc4Ԙ>چ'!SEcFPe`m!e:AN,1kyb$!i/e=ԝvvjveF;krk_AtX5 l{;CSi 6$.~g^]cU[gw ]l_n&8;Fg}&/Gj>ڙ_[,4:iV¶Pjg z\d#&SnS ґp@1 'XeW>ՅI#)ʗ/0#޶ |%pd:48)$ԤBue44h+&:ugUU!jm q`},umW\᫈8tey>źu `4{:3icC6ghA ׹J7!#g ՕO0J@upNe[A Cലl7m,Cp+ms`A7&p /(O[IЎH?6Q0c!a^`ALB1{SU00l_5CFDdW1wq0K-5߂ :.f};'^ptP wvh NaN A=ƛ݇Qa!9a1'vvPu|sgSF?,[:BD9+y>?˞<-kX2{WwY: $3LYpe E[Ol2 =Sw>X]`?cyp\}>+0h[V}¬>0mmsu:K0"tsiutqN1 h'/j?Z0 qܼz;>M!@^GUu)TĚݛ`7GO>vStcQ}FcoŹUC6ldXJ9!;BvpQp&T6M$YxlHQ/'#M2Q%ea>hqјe@ɡ~6N]+j: *^"q tPeD1U1V(^W4 aN"ÒMu좵pB& *ݱKjK89j+m ޻e=?8 k GYu]/T~*0NR[$ⳃwePShٷWF}t|o~q<< "}nY<˚V.=0iԗw]>NfZxK;Y Ʌ0:O$|PûN6p8lHB$'0.p8+x Q@.5r:@FYl7XSBaU;}}!C}ɬktɓ{[WߊC6 ݾ\|$,$fZdfA\.cף,r}ZKT`P᠀%LfQ⭰gg`SwU 9ѢN9J)Ԏ|p$ KD;H\`\9ht-qJ1"C9$[vkb[P(k{k%K4a6dZS s Px`g^~aleBJZ O]b_Ewe`a/:p{uSE,&tF:p`C~XH[v?0tx="WWb ~a9g;W`| :lwMAN0DmfD`dmU8BOC%n'áYH))LW'?q?O@>!Bd@c8g絺,`ٕaNhP5<9~}9L*ꐍ0-/Vi?aZPdh0Y6OQ{Z% (tɴ4u~<@uI {dUXM޷[/Y::gh &1_8shlSw2HvM\o_My^wlzo 0sBG8d0dc߳_<5>;4w祭jχu@!0XBHV\o"T$.$YAuJk+WF66~>Lo]zZbY~ @&Z͉X<#S(K!Po]r,p<`EL~2B:yڐo-@_6`6vɬ.a{nJF~vP 2,NN"FBF>zuwVBrXponYFRE DQB-N(Ө4+J.Zg hv|Ē%JG-"6'swF('a +!q-Dgi`Aɲo|;V)w^'Z!v`S xbO\ٶ3F?JmS6LY T—-5 PȺ0tس& bq ͰaɌM$ݩ' L3ZI[GS-Q6 F}ֻNx{'[pgk䀌cܩ#8.]T'b&q1gk HQ SZwl.}kGւ 2ٜǮ Yk0{%>Y_*`d@uVuI߹aDEMkC*d; Tn V(O_*b #|s5g,IS{=&?5@VV=k;E㉬O_ IDATҳi0ERZ!W `("Kr0Zt*FbsHss$J8Hۀ!%6N@{*D9) OЛg'GG"mPq]v_ʹHwx=H3Irn] mz QH9@lF, NQ)}k= DdUkD7Q^[ r058<"#$ ƈZ (u >LrAH֩&M`9tĂ!7Ǧp x-Ez{qB7 :;ޛ6m eXTbԶRюC<s-*(<*7$m"lSuSb jh QqFuf$ G28lg;p U!4f1um2dMK :~#:T84U@/bުtjXp B#|^U?cZWL/r&V3LF}a1\&rN^ȈLu&^>%jC]v ˣW:[#d ?#c˵϶*q_O$cӪiU0[v1|EfZD^Xˢ.?Ԥӌ&^kim쿯-Go{Z_8YAmιx<^ C6_OuGSwx Ȯ*sJ7$A9Zj~C͑roq jc`X e + y]Ѵv Ϛuge C-j:3:)6mٗUeY}FE*#J K_,-0Ҷz_aҨЎlzf3"Tf\huQv !bAXuCnN Ou zJKH7$&AU-&ݰgܪG:Wq3d5 0a>Tɍ5q\dDK~ J fZ /10ƈ>R?ނ-6t%V *Z.h1G&sMQP~{:mGpp8ۍ ;$&Иv'e3v!'|2;ꫜ>4Sӈ> ಱhUHYO, ב5#ZNag`y|bij@sNPNGQ&䀏^Kog1ix3Hc+f>yϜEVw62k(P$dzz=>$$|0IvA^ML'ޱ|+W`kD|&G JjaP8GC (#S U-MJ]̀rws( ZyJ}|^\ 7`2g6 9C/CvĹ6݊xɨ\mH;%]/’9@tg'pG>=# Qb&BNg7U.4qkU@\~}׷ GԷn0HyUԪUa"}_*6Jc!P6Bw @2a4GZs[ԧנ.6W# `x&Ln~^sϱ|xzfM:Јۮ(d e%4&&7% L,I }O_聑x%S6U3Y#}DĬs{=o} oҽh^%Y;š T ڕoP x~_;w@H0gȘ90M2YPNwq$k0D^t}_z(͂ISҲ3AFtU ^ځֺf.Za"I]J< њ3T8Za r29~E Q=8. cߢnY1ÖMba s9:̔V֤AŁ^z ˓Way|r䕖] oLqŌýW}(?3'gၱe ke`a2%C $eRKuS|ɖ(ćV5-XBV΋-@Woy1P@F:5>kʱC (`#o/;W-k(ٲRm3IL%1z~)q^L7Ӕ0 ZIXLa4l:P ǶlI[P5kfGV}荿n8J+& ک1jK2'. ezɪedjAKHL+u6@] SE~gQRN ?0l4Ɠ}Cg}B;rNy;ȉ{ K;&5;UẎu Nk0? ӇW];pI/懈VwгUϲ qL }j3uaNg#bMdhp`싐yl\A.4M(n 8L'lR,WsEeG3~޾ھq?6pN%?vyש$bI\6 nQVQ@esxՆ-5w#Uvh:Yen%#{#$.0H6̑mf 4HCh"ͷ94#}HCZQ^ fEIߓ-xid4kIAҔ:AuNYP*EPɳQL~6@@L0{|]4g!*G Zmyg=#0HҁZubRM.'a; Y!Y9߯fU@ 5v^m(MцC9-)]Tw,wn0 A2N' @@[Q$mw3;ijfbQJ`:VZywC&)#Ԩ\Nt*?jʘ#"Qe5F{EZar3CO'ym9ٓ0 k0\pwuO4GltEՋUYcΛu& !,2ݤ,5!'._1}[.wa9tc9ig= ç_W!V[[/^9ujBQE5( ~(`<]3ڳýf駰|<@ISQ}FHH0#Cp0 -֯ќc.s@עlkywvh 10sl =Wډ_Qb;ҤUr\+N*l̋Z\ࠊ>0;GWԖp.yz88_8`䫷Uh`yusҲ7$g>ͦnn-|7.Mؼ ƽ { tv !$Ƌ; cdP^{dn>2߸ӏWi 6lÞA=ޡ5ݭ F6vA* N˛RSKl7kzԞ7λ}̧͛9sZl ЖȉL^ 8%$694b&̾dޤ2A:`4t{ .3CQ[H&w"6N:B;aLD:9iOg}xG%hz:^5/EJi3pN](6 LGYOJ?lEGmK~Suf5iI= m |mM`4?i= ?ڃ#O몼4V3 UUuTh p=~[ ӀG_Z4Ԧݱ.s*Kfc(1{ ]<6yr'`&ˢ<Y,aˠޮ7;$1G1<;tx O[2jr(4hS%t+9xV!PƶallcϹyv?֍7o>nP~픓6Fk0D;Ns 65j` ~8׎NO9i"$Asx=Y6`69rQ@:E`f$VqƌNx:ig>EVECk"@wAǡRa8u{\T&+n0Cf#uPN2YC[^3=: nJ>&I|wIߍM.g1BjXTT9}Z\GP4aN"\wk LCq0^'S$u 2[Ԣ9ʈ\2*gnX~yv=Bi:ܭOkQO"c &zrGahl9Ny)˜#ȇ \>[BsalR\svI}@I/`d̉}O̯?(բE@E6f9y淁9rmmՏd>+!>;ռ.1]uIqI'tHQz[we"QT:N_AyH\eh;Yh0 u =eDWg7`D],}ٸ#Lڗ1 yGi&lh!BߩRys 10DH~)yV_k`D@)#Jƥ!-p[{q]d %T!: ?Aԇ6-p"!TBE4`DG&0DNGkGT3"G[;jgU_?I&TvTch"ʣ¨~hI!g+@J.54:K˓]> cqwzٷڪ>@\K KZ2BC$N5 9wښlabxtГU;H0E ClG[+5N6jvh^쥻Z};i w3QOi: gc_">O,.UU5wqTE vb2&k`$wNz(W1Ascd6`N &"ֆ9f ѻMv NFتj0& 0գcRw?qnyQ Vrlx1A%^t D[Pi#W0 PY IDATk@|P/JF KVDoCmk*:T)w+kW@}t_i0CL_ +P_)Ђ,Kj? \hS L&`PK.$fg}Ӱ YW/bY.SX@>`{dy.K =Yǥj0M wg8[rU}xyhr۟#dg)3IΡV ^ SӚ-zN9 lmÓb1{k'e1I3&[K866Nͺ tOkP)ɘ>dt7a4G7`y2/Gu4% G|gYEd wKvV?\ޛ|oh35&U!2g1V@$n_]iA3\zw,%$n_b&#zBӴ9@ӗUUFѿљh^$̺ɼ<)MX7ⅧzV~$dBsfa<@F`"eYοI5,ƾy󦭜=,Zјx9ȱZ*.M EEtw!9[Yi1Ezn=/zW*S4`XޢE YUuHlPPuʩ 8L'9ډҮ{nͬQLR\qf,}~`H$>A*$FLz'!#o&Jp)tN@!5ײKPGɶ8श b4'_rqaAa]llUPN`S⫺DZ4zѸ]6,cs>,Ba⵭{CB6@Wz1G{Q4~'kf87 ^9N@ Kn%a\8?^Zk_:[k?a Y>,n wNS}6lHny?uL_ ޾c@\V^V*[%'5QoCgH_29;;a['@a}p1|`IpT`ٚG7K6~))ތZ29`G $s}Ybvz#.z OoG9N~I@H³tTz~kIMEqKu^,p8Ύbm6K@Gl DqBY#MGjGJPrFPbIEj6P6P%&mCiӴ7 3~MJLꇮᗄʼnfV'T 1"q/黩P(1l fC3; 7#彅PJqf^@2> gp8ނ;-8 `g = +зAG8< f -^&\HFx*޿մ}Ąt#v}MvaG^yE׋~ YS\,\8~o8sfv sY'>u}%Z,$Mk&ź )S>6f<*4P'5chfhEW7ZIIUDF z85b+nYkg# 1 N%grOǠ=pWe, rq6].ŋ,6v{ɟտ $ ĩC-ͻU:Ռ ")0F=h˔6Rxv)CS1={!p /A6}!mSaVp8NYl8ɚʙwrʡdrcJri^#Dϻ)4FEB"ZrYJKYkE{!MH돷A{$a>6) ۄwEv]T $J#ߛj#JΣ6b"S P)[ ANo .7ω;\.|>@=2KzS}.I p% \CcW ̴c|an)@k0}p ꙑ~4, gtf/;oЧ>9f`+w[8En 2{_/>9]yg_a:/{ZlU ĿwԬm^<هbkMɌЋ%FR 4?Ѕ1pLbvnK!i!|L:t˲g-])S>C$cΗk86e  PB`ƾ:mlcS8`anʐmMF@'iS}!+1JwzTW(h@A^ݹ10ZYtò"WUʝhD ga!5v$~(FǂwIkkȚF &@J†<7!L&BtPjHezAZh@8Jk-32qM\6PWXX7%6# :k5PB*Y[U^ m:Q}Rf^ ZqP0ښ3G`FنD\X@(<bW $cY暱^xV2P.'Z/m\(Ռ+M>M8{ llzQѸDyyp\*I G1>lon5~aCk g2);׆׳*zeU'銬K1?_wlz>=ֺriRHk$kg ) H2ܻhm@M`!x[.4$*UՋ4fv:cCߏ'ghݮ钉*hB]\UX|!G<f(ɆIՙ1x崱mRb!fbLX%}fo|WM [DM֣PsYF;z@ME$d(CEs#;Ref“%PUz6a5ѺDФ OX!S"qNQ^t *A֖aB˙aЕ4&ƈ$?Mpt/ X8⠖-8[kMqBfB+{v1{IBd*5t9a`QTrUy'`>]x0t 51t Մ%`*,q.13F-Y.Q:Q!42z'/.JsF̡0\9~aˢ7 ujvÙnWF`p-M-`\nŕ;_.pQd9J9UH z" @Ǫ2"93m %1u-H,4 . w@>@f2?r٤XLcg{Y%=5lg?'[ƃ$dmtZС&[xdz8p$%άG~mlc_e>O)HXfRL(gK``w˃c u Ơ.}Ouy5źrU,>guWh0 ,y+:_B5I\t<Em8zm7t;'h$f,L/Iml{eGR 6Tl kPl&piY"ž a$[P+J"%z`ä ^kPsMvQ>Cۆ$N qMx$K@UW#8R֓0j$mqj)%4P ZRv1a9FtaР*1^C\UN`XTa?8QT G|z̵G~9lO^ޘ[0p\ID1y!oEvkο}g/qS 3Mik8O8Kz{@w:˪[GE-fhpAz4N}֙>,O@>m[톂W:$)b%5BEggiQubhY~'*: I}j Ad2O%S>y&nx6 ƾyd4mj6]js iDZjݩQtvE>cg;;Ҵ`H!.vMD(ǂrS@ Τ H)?8~FMjB aDPFRRfepb [M *SH}v-Ҡ!#$Kco$cMrz!\3}M:YB%@JBaJ H"NzHߴܯ4Gq lQ/- p:i@l!] $U-rDnOV)GH3Hx449<ށF&Mfčtu'ְm: !Aϴ}PHQl~{߁fw$2D,V͘g1aRZ}YcuOzatyl h%l/JU5r(0 uJ<ѐY".r c( 2aT⟤JPkH,VPl0 ;:^69FOB"`' _#B¬ LEx ;Aoz{U t;huǟ#BziX50 6<ԛ ' =VքWQ%Lp euHvAa>YVY$yw@׭u/"XC9݃<Zsac`$D g֭[ˇe G_^[kgYEv66W×o,b23X< hT w+;Ogo^ /9K"=/J l_κϳ5+8Fx?FIw-jv%#Y%lj\9nKVWa:/l;X,_+A]_A8d'ضJG1|eϪY2t;E2;=˲t$%3DeY4?Ϟ"olcKgCY vw5S Nz"I'Dv6c7n8<-@%X%v35 h B^ aF&``gIYAV[TW@7t"P}lRRJ'gSǠr0kig>KWzAgCy|L@'/ XN)] Q޹r؅ѐڋw*Y.sf%u5@ Mp9l5l/K,K(ZBn %gտpc'*n {OιbCNOt20/J"ā^Fa%5S2ptղ1w:HJħ @J>9-@k%uwvSI')Y P9'p4,1T{th^ QEE`$qLf(ZϨе0xOr [m .0ۿ C:W*hyU>zwOS,vٿ/rڟ;xi7`W@$!ha@CQKQޡgh$;e~M;_+F:)伂RƮA4 -64*wR#t:Cե`C*=7t(oj֘zi"ˎf|Zܭ`~b7G53 g2KW7w&^o,ܣIݣ( S,٤7]̇B^Ʋmlcۘ՟>d69Z^6zY՜9:/]#8XC3AdE &&qAߩb@HՆ[bFA 8BeN v K9+-\f!*G5V#S齔GgT!CՃR46 IDAT `qH2֗cy1@QNwd1Xх4Z(H=0VEh%ê4-AϻF/nm.ЗPlwPmiJT 8q7ƞsJfiMZl XLU2܊ 6W*tm](8ɨxcgџbf83F0;T)#4EjYP?YJs:}k i9C{4Y.g}YEYȔ+ѮdNE 2Ӯ DBo4 ⅊Jm]hU8{+`5ϳ 3̟Udxkv]|+!:"e9渽3Ɨn﹃ʇ5LJ6l1ܘ˕~c;oطo)Ư4!D{훨][r"1M,QF=m.2's^-hAxb(Egt6>{ " Y(ab2hT sxoϷ!(rT{-{_`-+`#ZP"<"̇hPTn++a'SZ8D3E7VǀJYTɦj4x)cFuKřٵZ؁郛P2*N;VY[#㞙?BoL&KJ]H&y b$SvE \aFx2 ߯8P&GzZY}풧 z2/5)uYC^:r^WBk|mrVZ>ݗak 7%EW [ݬb2_AgYY4kF1mU?\7 Ikι&S6tr2{$\{Tp$(Hk19>Q9%U'cBHTG,]S};=$ A B83JoZOskdDk/gEpe Ƌ Ta h(WHUt>'}CA[Dm a EUu2t*U.ll#5i,980hG>ekvjI[q8Lvچ@9VΓ6K{YCej+9(j3SZ>Sj TGNva{ }cA0ƠkcK ޿ ^uH1s.:B n_6bhsm]88AvP͡.`&71ⅠH;1.&b8PHںaH 2cU3>}3='Vˬc% {>+hz{l$tY;Y(D`YA|vr%Imj~vmE^za]GpF `who1k;Ľ}Jxf~C'(|&mIVVmlcY~`樬 ߞ`L1f@OH<@ЎPa!N@݊9ӌvF,LyI7'An&;jZC6.]皨'7>cONץZ}3'vF5\/`(!o>*U&Q (>ڥO$E@[y$b`p0-teugmd1tY^5: d(RBlH`+J6`A6HJYW}.y/:Zũsth KSmɄl<L(J| 4ig"4b6# #ZwZf3P 'W-ːOgkziG*͗*ȹ)겭1/1Pύ#g\]Wu^͠-H3‘(]\7?{dq܋eUw @I뫠"LO#O`C7dt-+S@Hx/ٝǙ9~T92+b;9OwuuUuU/\m q+;T^Ah(ҟlI(Y)M= yt۶,6]GXocWql,9yOoZ_!4X'Z6Yd3$جekC'Ֆx>y`8䅛uv]ȵ.Ai @S7 ~}zAE$iocb4}iTK/_r)`Յɢ8=@C4]1DUƲ t DkfF,FxS"[Eֆ #$b=8ɶG bKv>Z4tileDi0+M$Q1Lm`< E!c]xDx3wo`0+ ͙w6̕H{W@⪆ #Z9U7؞!Ӡ<:Ґd:S#p'F . > ^'bX<K)sK7d`(xWP8q𥲖Tݥ}`Wxۅ`/fB#G XrDPFVGO > !)LJnxOK Gփh^XX{yIRM0*&"!"pEd ZG*--E6"<֥$w!Ã+Vkt)fm EoĆ 9.ׄ_QB de3f|*9'M.ЎS^1Oo8WjWgXC.=،F#l6.ovuzzG^ eS4)Iw _aw T1kf1иʒ*dD<k+Ǟۓ]ߟ[ҁ);tbYJ{΀`{VO_+(6Fڦ 3Z<+$ \IST1I<{H`VpQ~W=:DH;CH”uq)*@&LIxю-h5s)#\f!ٺLX(NEe9Mwp#,~ӻոߩl >k4)qĉx}.<hĆ=encރ76 E8APX<]Ʊ<(`m)RF:ǥxdRK>GzL#;N2n XBN+d0ԛtH,! %]/#O⪇zxblSC@"1lM$,LEG,6ydH:4@FM_hLSr1rd[F#kmn]rVPtuzlb_qE}7vؿU ۂ)L)U,ʎlb^F}tH#}/ &+ $)'E rq4`0^z剐BRjUAUmH_ǰA>B{ r.uof*r2),_a; FeT 2 {70z c( Xj a-,$?I2]axv25~?=Xp 6^ } 廢;ozV蠍XUIxAY FL5b@_, .h٬{nIr9|ej_84nZ|23Dor˘,T@cz&v16Fmiľun(@a2 B~.@;M`qiؔzq' .]F!E)J~yZۓQ*MoDȺoiS6 80+5k1/G܃KG0ˍaR#˛7ojz饗'BYya>($g'#WOA<]C3h:hY,CDlIK˨}tiRI{jվ)~C pjldx{x6d'|@Ubl70Ns!ōIEq7>8T4,.(kXC%`;)N2ƒ-Z{ Ż̸`WK֖&0,ЭS=Ÿ/¯ b1D8IJYǐȴ sn"wv;뇇e~N2 bY%I͆ߜ/ꨞ+J6p8dT.j"׃#!sy|1Sꥃx ⚎@DQJ}/?kFik1UP݂W#H"m -lu=6 SVeA<鐰MZNRJW۳ ٙxiȠմn@|o %>@EBFvUIM`Ex]ܻt b0.xWF&5s@l_ܒ180mUt_e H Ɲc{GoE_dwvdr\[?wcÄ}Ϥ`}rc(j#_899n jgc؜^ŝ =ӶZqRTy\^fuOkmeڍ3( Vq3?4mNV:<)OWu Ri1\x_"Ln)J1ďMڄ)6.]8UmqE.({RR`SkYWGrhp&$\8Lg~mek\m9( le=/>cݧߍc \(MqӨ1#Hvò&|g%P)`)cUm$)r I6b^zIQRnʥEkd7aң+`qea©u3&0&AyP_<_(` GaVOe,w5Bc6ג8s3hQmq? _$3 NF$P(U"۬mw ÎRٞ'#Hhkn=)8y{s/Q0 > y` % ց%,w@>3bc`3{f@Ep4­]?B& 0`xOVU&Mٷ/^[AA/,:OgCzSC1y߀xW nIqGӷ@AA1G?{(WmnWmYr9 C>,~we/Ie[l.Mc ! 8@y^TF7VhS !P2ͪ%I:KE^zeX.xpaSdw`҉2eU*ɾԆAM`6U hʫ#SP[wŞC^O٧W p#燭Ít{BNҷ!*@w_vWByA)l)o)6̽rLgu=Hgl2b(?Xs٬pdvW:|dZD H\J{GD̠}"xVPcX<{!_%=Bfr|N~vむ"ؚuDnIox<`UJ@>~ JY iv,#rì8(^ /_@ږQ{Khc sl/mM&0g|ޱJc}Fvxmm_Ma t$ظ#[_Ki4`ZYjBWPU`L1vmS欔]*zFQimL`ҒctD 4-"1-V68+I|r,5i|?ZK/D` RO.%PPAq0ϩRlan5t3|1dgl:I"0>c0#д3ao蠽*-R媡pRAދ3QT L%}踿>5X䥏q&p2H`R%٤0NS 2e{c}7b x FU shA9 x$6h *j\cv˂r:ֺ^[tf5kT vax'cX$Pd9.a)5J2>`)X4 lAE~D2Cs ;VA ,o@z4[xɺHG p\׬<5mÈt_(TqDʨS#8-,n P^F"M(e}'Z?)Z=nB-GpIҦU,<`Yc<B|9*Q;rcϯB7'~yMO17|-,ߎW,.Gds! IDAT]To+i UZo-T@a4G6Ce?z2@lqVwGm nXBA^K`D7$e A84'N1ƚ/ߞup?6:*$c 6}ʄ YSgܿ%DqX7W~.,ބdp]K]?&8&lY[G]T3ޟꪣt1_% Fũp_U.GJ؜xe 3WS)户@H ̶2|̓G!6X?{+FQ@|V*?e M N0>C(N?daꇶrYV>yMװ3{#cZ,2>JZEaC]ܔ2 *L y$]60WJeq7@^z饗K-*u?RhssiqZ(}91 O`MNg\׍ ຮצsE)C7jcW}2C@lb)Sm!T2|a7  qFk']~ a1а# pmu 0ъFI p ԋ~ýwJHeƸ`ø߬ӅQ?ǮA&l>5;a)dPVŬ/!u@w$_lzzC$pwwFC(DL:sGzF,@v;k1a^SXl RCi6y 5XܾYXxI<”CԕxR]|\* =,Q_P 8ӵ5pberU/ ʗG'z DL퓥G;*f!R>ڧ햮 (]c oui▄mn{.Pu]Jb9Oj\U1~ـNX8/"eb[O.6|\xu&3q˨ L jMdqmfa~VLItǴIJC#%byɘQl*$AQY r1fs)^z饗.8x4#tFaLd8E$?CCKG&5HlUpJ)%{Dr0@ϋ 6xd6CT5i8(,ŵd4#nud1wmQH_Tr)$ |:ہᰊ-ϳFQא]3k XQ\oĘ @bpž(O f+"X< {{%יRl.JQ3>0o{d]Ṛb,/J 8s}XY3(Dt]  p`O1>N@8LH0h?WJ1)8.P#Cfu_ #t:+g8CeҬh}k 3]+zC2 y/,9,_7X=:Hmm/ng6< oXeŨa9c(&7 v1\28F*v?Lu˲|_V Џ$rS˫Wz^KYEuXn5%vdQ4( ~4’> VP-X#(Nǐ`7ht|QxjN zO,hTu3[#t_euehsQ- 7\rHnHY0(roL*&R=xs+`*,iv]M\x_c2t:t)z饗^z~] (Q 'qA҅P0 mE w?TTA~<'Oj+`6Sg૕rf[ 3$_ gK@qi3P.m ?JPQ~ѼtL)9Ąl U,6{2.|;S7ɸ:f9S j8vKc b V)ߞ=6¼B8i|l x_Dx>X1!ܹp#9s2C>Gq ıF|;%+150;~'|&%7MD۬Ll(zxz2;X6Vc@ZxC4!am01UVkcScлQtg(1Ӥo;ىf|eX|,9qrZ VE;]mG &DDOX2:Zg&-->EmbxiĢH,X Ub.cx0>ؽjrV#Mg1zyvӃ725ND],.mR]H{M=U^[#}Zkſ')0FpuoXo "o6+dybaK'.r[K?GLm?ֳNo޼ك#'*u9?{S;wQxc@1 4aG301{'LAL՚>u1No(A(FUB[|:[0|?)W{x9~NE׫Z2 (׋XA :)eBAGvF*tBR&JBl"ا rK>n5@0UPֺ%%4~ߛiL#\JmxX}k3A5G]\k8t :ƚ5o2 *P" wϡ9#B &ܐ:6[qoj}W>G꥗^z|d0Z3$^G4vRymʻ/VC0! )ģI^{i$ eNxpYT:yOG}{L+gB̛]#BH3H+nQFsET1G L7 &e"X-n=a__h b1d4a|~HDib@SIah9½X P# bL^'ز IÃ|5#KʹZkRP qW*oB:]_d]h/>j 0ͳ˟@1s{kZ/G%5=6phĈvZB?G3 Xz(䟮~e3V)&OMS7̷sI WFȣv<~y:x0/n_OOefRP6Ct0fhQKTn *]Nk2\M+r<ͷ ;+MirVgamʈL\=rusWyx)`1U2hN `J#޴>_m$M \4Y/kmvzzT!u^z P\*_ps<᭷4:(uU`_V ˶d\2I [uȖ}A!IC<Asz,b&`r,YͣW^JCUOӀ=r"c,(cYC{`lPR@+%6{Oo_o^2A50}晫]~߻{?]& 8E?ճIjQ* )6eG4d{BsybXLwU}DF]f hUhPgza|p̒rZ %LEQ%@R%I֦Q=\K/8>(;POSzv#aw>bk iIʥ-אb:B>ZC<[B<>PP@y!AUzU5<3֬  WrH X_UVpDZyYh[U6pigLcjPUa\aq8ya`e0 ),.W'r)O @v sqۙDޖ= H -V6TD+?d_p&S8$^m~_ڷZ{+gd}4|\(a1&o+@6PC݀.6PxXY,h094.Vt&KxAq T8śC`j 5ebej8֋ 19!k#TdQ_n>bc3Gs(;={>wsxוQ`Ax|]o|e޺}~q{Ƙ.ŷ+}0q}."gٗ]YeЋ=yctet1xm>7MqAfomFlPpʯ'pM<V::-k[ps \$&aP^.<`/$Fsc] eIk՚41X 1½S |)b61l b t ѢTwyVr%ލmY5J±!W]$bk 2dFX% icˈX+L 21M"j}+YoSW֑M4x,aN!)cM2Du%)ij0w&dž“9P\4<7u+\꣨1]||)"e1| _y?0Gư#0Ǔt)]0戥6 6tKWJy6Q ݒ * : H'UF`F7G%E20vL6"hDY t5xl!> *uOhPsfy|-J\S0Sq(:0rэx:x1%']>Gs{Z̳0䳀"icc}s^yF T\E^2VpU\ٗwF\?9o~2/hU-pKE[ ;.ǤدNWnm|yG2~*j!4|x“IB~Q oy67ja[6isicl7(EQz饗^42|jdMɂdWэW>#-?U)*X+5$`e4s̖0;=Xz%k\tofl¯@Pƌk41Eh_ !#[ˌ+cx%g,/ j.-HDE ;rY$" N`p8& 2fPL&e:/O US[ *. X"~CbkU,iب<+ ^WWSF*6Z+VD\sz*^3O1! tjcgCKF~v ƐK  :,\bT$`hoײ|{I2EbԌ2EƘAqU3y;oվ | ]PkuEiЮ6 IDATPMbapQ` 3uHJBrct>)JJxe+iOS8Bd`%h؜܀'+09Q)EycSNq<2=S_tC+lK2 O sH' =OMvSZ B0ʸ_xչʨlRM?\,yY4mZԕjwF_|j~;`/Eh4w)dv%޽2e/>RA9AG6IWՐEp^=#W[_9ng>Op׮%],m ozO7v/0ZeźRS[ i( yTE fS#OK^zr5Q;k&Wy& \kTE7NV e7+SBdQM "XMEB~:d#q,^y#$R/of +?A`X\N\ڐk~h<2KBicC>O# $EļFLߑkFh1ι5t#Y "| rqOYv;&d3eفѰJ.|8cР}/uo@8Kٳlpq Lgt jֲ`wcX݄ݽ*Ux34bwMnWS\|+C ]HBX>i咅 2:).-;#p.16K} fp\ & VGઈڅ%e@!0|vYduS-W7翽3Q/ D՘ wT\W4[.cMo?s o.h+"1~^u(mל%MKt dUgԎ2n cI6+ σ'&q,\pT~#e-nRo M ` >Qn޼y)]z饗^#}&$̘"%W T!QyUc5Uh eCe_l>'Ǡx3>-*zfX,~Rw߯sE =CP€BGen8 $:xu(m=d +@@-ˁ/ab[&60ۤ$8x-;6Y[, (W @ Q&/]P Ra RrLi )یpg6a_Xq?kkWtM9 d@ 95s%{K06M6m\T$>{Wع}` uQj Rǃq{r/ #u۹sI:9T1NXd_hQǪZk6}{l'l:Β"6ұ3-wO|/@,./|g3 HJr-Їlo1,u۳P9Ƿ1:7y"ۘ#]myWƙ#hCˬ.Pjn1 ]嗡Yrm *3'դ L&0*۸W{^K/O( wPXu霭LKt@%S:%B k,dnfJRn]߹ _ ʆydT =o0:5Gnt $fiKHI$ZP!h`-ψ#Km{A$Qn\BM8o$3UDytEl\ف$z 8^A%#R+Mb[ Kb)!|=c*CZb4[{{?:ކ^m4@^4R @Bw܊4Pʖz1D=w$& z9у*E?1DX(*$όn$Q*Sw.2GRex2بu߆=x^I~ buMiMme(0KZfc7m?ˍf"_#؛?ek6/)`1꒜TCyNyo(–E%Dkz꥗^zy$#N*A鲩h=IP7SbIL]=^Vk2a[Q0X!YA3AȺ2D6`0@nUeAe9r'L `riuڥ{[cH =C7`o LA쬵udT NImܓIPcJ#@*Y0@ƭe'>h ,B׵`38( 3ѠVxT| >_i*CeٛVd)PI=ar ۅEkm}c(1oBɵ.gp*JNB}xWD*!@4N[뗦0?ۿ^9&EQ,$)#9ߞ85?W3A1y5OcbK.nnBMma<ia IIUyA r# NZ}a-{i+MYD7k]6.&a61̇I3nRDf{ق"~RVfg6$,5lkq-<&(L>Op#IfZ2q,A vZC̀~}-sk q&Օ@ x i'7nǐbbxʌ= {O}N|&yS h]XcA!ތsO?\Zy^\.W]p IԿ-̃}{b֜1lТ~J6Mra<`:4Z ɇQ {)הn4EF#h*YZUn gy(A.]yh ƺ-Yu%|).,Vb3 P9s9г]QJwx \c+FcyowOt|^z/$[c+wXs_A(~Az1c{3 ]oQ(% I#HGta%J L^EoS `y:70ԩ1 {ҷ" ܀]me~,qx&זw_Rez|0I4`0,XVAh!q050sx1@ lқ`jL:H/hSF$`2VZ>x, e#A`@p݉$ Xc1=ʂ]pIM1%/|`g6WadQrۋWҳ0z+G-%-ljM֕P'?]b`78|'׎u5%h*N2Dwoct)mVF[NM9.I!3JIfCM{~1H 0zq*qE |)dP|!a]$Y6P*쯮(t\$V)tJz)Г%J`LO総74J`s'?[our K/[͝|=V"[X=M CSA_,2 $tA<]T=XE;`xp0km`:\7a;/m]G5x]x~e: Sޖg p >Bc~~1@euyffv¯m2Vn'Ia2=RnȯI3uqw^}ɆWXkw}wy K/H7ߴR)`HgR, 2Sx܉x:orpW&U:!QϵrK2,A5 }[DC19;de {q%m⣴,[anY)Zm>}m@Ie$AyW]Kn|t`(oB x\rbs{VQ5FlGdp`u=QC傿yLgO>?~q k{sNY!r)Hvd1؂@"HxDK:ͮ( KPem÷!gIo^ۿxҗP+sjee},mE 2ٖVQzo ",+2$G% |K1.qc=AH̤ۤci^?b[Vd\,zyxٖY/ g1DplC~ǃČ=1[SޜDbȞ=eܮM zoÉF֩Ã02 MSa~'}>/e^z\TVq {e8c&єbFj%\bh%d c,a弎eܚUiUOnsTKzb%[ÿ{}3De 1 l;" F^]@Kn/FqV)-@I" GV,Ґp\%2[a:& 0eY-\_(Fp@IV[qck LFt pq}r|qDzfP>; (x Pьb>[h.Y @==XܹjPXD @H]as}ZKۑA@.j HelsZ~`}孿[o}欱_cZONj|p{+;^JE8Ehᦅ\UqTupZ>x)_ ?}K/YĂ}CB\h Jx޺W(35Rfd[eUBw h2b@A@ЀidaZbJ }_#Rfpg%Q`- p! yv|]< J:U  0Obfpeažpf.AcXYwpRpgu6bYS$hkD0*BO)xȅ$(DQn ;4ZXJӰ9˻סX'47ԯ@ kcy5Qo!FgtO~w GX!*_eOofPoTs(*: '[F)_ѧK|K/\e2 u2R#CM} 0@ +JזeTQȂT !:f}]{VWD}4E):R(Ȏǐ/M0;hrNչ7/̍E3T^T̀`6p%GK IDATA%6d+ ]o+ k!$02]^ZkX 8N"rT[ѵ M%ƃ2\ ʹ*ȃdgx@MQz xec65u19{AQQ]5 5.A{#Tq FyT.UozzwA43s ]i+Vg>:}ƺpl\Y>&R%3_24-}+1,ӟd#p89L>{uqR:V8 MWr襭X!e),%HRlQO5Wew9 h+}9+NxOځ;g9G66X=]W)3 leOUq UAԪ|YD0EaǛPqwww?yWzW^zK/w,614@^# $ )cE} S` a902`I)rΏP,F *T{R@6T h`5G P/[hFY25[%fFlWY&m.3Mp%n1ˌ/r3aƔ{M4I I ׃g?5@1PK8>QR0O*`dE4L4l+ۥERb"zԩ G1͑Y*T0VVVs,k'M9oSvEu$9jcclkËEW,zlvmG<~mޚFsC.AkHBz[TMGJ s޿~饗^~]t+3=sTR9YIyi1T&F X>' ,Ԗo,Hj{ Q)}#BVt hySǠLXݑAn,P+φ!!K>/,v %B哻x;C2)dP,1 lb`kg]_Z *}` 汱/o4gCV7l֋rWYVE$'m̃6yX$((mJYCH@ .TNJMu#,jͅn5mRyuu=gvmܥBE@ɳ.0%8ߌ`0JuBgc @FiQ2YW?9U͞P)#>gg_G쥗^zib ioS͝& FʀCh>t@q>RBV.iIr 4g @ iWʭfef $;'6,<ꅩw!!rU3>Px\ :>D>8x ̲C ^5u1UBDkb()nYv  :|84yr q`8fw͕kwE &tJ`Di @  `#zZj3XmKW8\Q7pN+u Ų1Uu`}tVwl|R{[Q:ȐW!%?ׇ&dCi3m9A'͏!|I$z܏^zGQM@*| o; #k$o A'c_HGuoa:R_8*ܮ\ ,wsu%C?ekP!O(mk7އw.tQyC(w%BK m%{N& Et0zQ>(q9 g8J7}{ynZ輫'Q\<%뿾տV˯fA<)DcphM3WAm*p|x;%4c7 0ޅe|,|=(U<ҧHSpɘ JM3{`l6 Žpe8A3 m(nEg ex,C`(9Bp(,YLxoe@e2I #4KPx 5Fpg6 bب-]Xl06< 'ơr?+yU:xzsjth  8H۷{9 P{9Q(E-= "Z,;D #7uF=7Q$n`xAۓv{E076cc:lzSl Ft@ , J>f۱d(Vhq}ȣ ڃ !tסsaRq4S!XM$4uWܽ' FI|Аl.+sF1`tw=HWRZ,4EQVf!F b,#\E|keIfҒ@^4"F^,XQ ڥt%Dhl+%V佊d|7 tHब2ן04]kk} ċX5NqE.#g/o۷oX%TR{j7Md-bDܠU”s$3zd<:bA,>f8戋`>:%`hS MrOvm5/ ^Cܙ\xjmZYC](M ĪσZ:b0Hfra1eӄtQNɦ6(m-gXEہ^ĺ^~=Blr@3x?^?2,X߹.X{u4>-G5 ILMݐ6Cu:t6xhuq22Di4)w1r(@ֱsbȉɟQ=hAf^@ԣЀ|p)QE#1=TƎ@L|ei͐AԦ`xſC^\p)Լ1Xd3p-w5`#P]lP+i h&GϢ'8 ې0 ޿խ" bAȲ0DB$HhVШO+rc 6FBEb jG+mxXǹDkd)v:t}n*TRI%ks> J5MYtK3!WE: @OL@c c&x%!Û|_ѻr͌]: dK5WR gQ&֟Iro YHYǵ:KY$.ApZP$up\DS2o{N !KqF+0 ]ger!W5Ơu@1*ӎj0߬`: $}:X7n6`f| XÑ瀬_,枮œb m#P?Pga ƃxl~]^cW^~{wA 0N@{/h_X$] ,-#l nHy}e3s8&7ʼn_kZst[(R7= u ;t݈. :/ -}SLPۗl j,7{{S @ՂS<+YL 6ؼNf( au_ƎZ e}*·mec k_{FyvoVI\iR;Ηxc{쩀J*$!%{fg)AJPd ~=|WtD֦~^1#u gά^O3diA#/L@й DaF(vzXP 0Bͳ:O4|!,y*X˺@y xv I =Ls$Ykű& .N!̍iH{8'~ȖmDl;;TvFcgP>ʹ6y_kgMZ$B$h62CНЛ+6'HڇXe *i}#kevͰz֢hVLָ!?:fԽ̜$%blʒBBk}}Fʞ3 d:]찛Lbԡ J*rOgE'J5kE] / F}]x[8Һ,E/;T<;e}4?c:G96O Ӌ;.e4/B4z$ϸP Sٿm/Gn"DcS#aoF85QAқǘ$\p a'f# ٰ.IY㌱9q&W7.t+[%, l),0À YVaD\p(xBOޝ g< 7`Sװn0I0/g a!@i}ܥa-ϵ-x'Yj.^ geF$~&C{PR,u ?+,v$%q]ڵO5^Dl\ƭ,ȍppk"ƿ=~k؜4OS܂ Vn^#.cg z*e-[ce~%ceX[, M;*J*!˰2ܹXѫ,$GXUi7y[GdTTNHc$ B ЙYكc.8:@QBk{֦<{ĝt.nm0XF=aX4f &J$vAQ2wV]ĞIcCO7kϏJ;V*V(à{[Ȩ- Xd6,EͅKi &9 QA&?^;K dlLld aF9cV/I늠^L*;mH\ Gkmcv<Ͷgg8 27m_;05 ap "v}1ks=~>6QRhxSWCcX ^RNtBkBVLkDXa92ڷLG_PЬGnOj|W{Q+vccB76TZZ IbZ}lͷ[RXo"nو޻lmti8VO<;5bh09!ÞE[}fwef |Ä3V57$"{wur_|TRI%̝xsp٣aG( XbY0:V9YP<{L;Av# XE X.[B{@ =\yM~qЃb@]T n)tKly,ٓiFDBڦl8!vQP)YzB./|%9˱vlN NYa FMC^V " Qc G^f؂RjSИ^\vwBxtK};AEj2ض{c62c$z h@um!H q|-@K3vCr-献ڐ)"ASO3pF*+`0]CĄO d@wq aef]x@Q7y!{/[@ d{͇v,}_~ k05ڼtϿ?pM(q;JȨk`vݰؾyz~h6^z񩈬5%=[Ng/.]Xl8~Qp\"$00P8=9yD6Lo}mGk#Y6wՔy颳PQ=e.Ue>9E!0 2 (ʗSQ*w9 =1b%M\B0Iaڝ+X"pڰ IDAT ?iAO@܂ &@;CƑ Hv? |ho,tGð+ɏ hGHB :1VV5]^D / ZKUϘQYQ)P"ւkaXQxj +\%Ƚ){)0hmv`oA\3P@JxW! zǗAxUK?n[RѢ~7egyóW.WG 6 jI|0¼+6Oo\ J@X +u'7NN{(0a ;J6*Cz܊:~qv"^J#(v)ǵ]I&Nc{@=PxhIH"6&}&k+sg \XVɞ53 VQsYvjJYlYfTepiᨒZZ#Ν{qݗ *Jna*:F'T%^8yРV& O@ :UA`<`LC3P҄͞R?5Q)0bopl$IRN"]xDK=ggɠv̈:w#nj~ȓ0V$Ïph>6&/b=EA %Z/eLf5'HD8DD U4h"!†pKON  9pѢc@?c`ОM_l^ԍ'܇4E-d<5!`{L{b/=/-Gn#H[l=? qEҎ;r5~.Wg j=1D ^h> q N]4"x w[֦m\ɵ"PH Vz q 1E;q8U{nn)t? HQԝBq jw2ńyOw,6򡉢nF 5QKڥ5ۂuO}!j X"2@ə7)=51= =.#NY BB kc>#A^BmFaYd!%B[iy-/}casa c1XW&S]"S \w"'}[ n ^7+)7 s`aaaqjj(`l"~Tv(V[eru?l[ ~ + i%8'bJ!6[֭{3m@,wikB.qzG\`F9 GMQ9{|r,؛v{#Pc,[ymݻWeNrk?3?31>>È9c]eWˤZbMdxa}x"P)1,]^9=W;7mĈW˲}e{/Q3JBz»ؠ!l l%5b@/ Suu2jٵ28n C홙?yD /J*JI>coӦK^aQ^irR:hfewOdF8O ly@9Sðz'Ñ8˼`0DQc4ƻX?Qk9.D!i"#);l=!Θl,lq,@ "j'<46~ hnXFUȳ<]a A.(|&~54ڲnOPg;*㛐9"ѣCݶmF# u qU_%b|PV cPkv{:j/.Քnk0٪GsW민{vC_4R3aQ@cB,ZI`Dpgس"޺d=R 1#I(w4$uX-}J@q-RƘ(cH|$K PܐPмX}ʭq]'c~~O}S>UH%TR KgOze|V["alX  q#XPI\kYρ๒)c>0o ,;K65XVw,{-8;Df%2asD^ ăD @)ǀPPT6=CR;,8Á02oDcţuW_2n)W1ЯCWOTfNB6v Sgnj2DE@Աqk`6͏\jٱA6z ${+|4߿r( myb%/=Q}fsd|c$d tPc{6LɳK=gf^KeƱ? c5IUSKb4 pe> %6t3CwXϵYRVaF35[|-dk\߱1ݳg~m'k%TRI%7. XU(![L 16%x>f2kr(G޾bZ[4}4g X{{JSԦW>9)e2W;&т르¤  5b5ƒe])%:5 %0E6nVG 8O\d/IP< ʹ<'Ox$!) 0r/5 S \1{#,ΈȼC֌=}gd`o橣ww/[nz戔wygСCo#9ci(6jaFJ0ʘ&2.5eH,zh!5XW:S0iE2*;CZGQ/HzV1#̖K$G$CwkX~.{iD\26YAϫ|B}SY59:;; /pjhJ*Jn8ۚ_MD0noVlZnJ^E2(ovc+4}=_π RZy?YL&lX2Oj2M҄xe4@<=Oqr7lwN [u!'qEC=$+1]Ha lĭf|je1䌙^uѶ͆'>FHre~dk?lT r43튚ya9_L<1>ȳ..T8&c?*䶗89|ƍ6Z-qL(@<2劂 Ry.3OAYZC/@, f;PP[w Sj7Wڏ@%8@7w=u-V o"lW5 8rf&i5@"!놬ÌHY>Ny=}&=Bϛ[[Nh͏9?cIߙg9SZJ*/'N|Htύ4=33'ON]" @XI#"{8`$ ֪Rn8- JnF { 4Ip U t\2EA01 ɠ=ԳICM~r# ? Br.Pñ$eP)µǕ#),| AX'(@2ЄEwn!v`U1~o{l\ҷ_ cZ6/dZƋ(b#ZiC9;ȵ0JPQIBϥGDql!\7NqEB,a#q/*^p{HXI%TR t?CzE8 DTys?qzR(Dnk!6I~:$T~PYF0R w 搸AcF'/^\}qw(0p t Äd!-醂6Ȁ/tG ̄&Yԫ.\ /p9XQ%TRI%?09Hq꼪{Z f k~ 0Ã3s En xSrj-<2 {aSE|(҂A0mٙ…41~+Ua|i>R'ȡ1tG>|9K8P2Px!/|*^LdCA\xApPppV ZNh1CO$OӜfvpTw{uSű[|+3gZ6oޜkƘ(PV') "%{? /Q*~c\@4oox\=aeЇb/Z &Rp$q!$69[$3v"xaՈ̭kV&>ڿ% RȶAEd{C%㐌vk?rP%TRI%7,/>|2; SES]@#Mnŗ߱2Dv"#1$.H-+Y5D$r5;OÔmeʿ֪WG)8"CƎ(Ȯ ,/bE]Dhyv( +mH6!# wșKpdzĭF@; U(Gwv߃]2@$ YͶ//4g7Cq ׮}5Οuk% &{/}/^I2^w'cL1O**NRyVh$ZYC. ѐjHHVg*I^/G9ѓUX琉Ԣ(@SQY,m솚'/0rF7$=sspɰ" M2Bl2 FyZ("(LƘCJfffڵ묒J*G/mϿsiSڦhA;0HP3 HuZjP*Uz\M(h`K)IǴ]!kl~U ZH|M~bdџc54[ : n7>b}۲rԨӗkŸRLJ":S'xaX~<]Qۧ}Pɚv9v<G\nݏc>fW-I*ӆKZVQ˄4G1dWKZ#ź Ij櫓IbX`рu1˃`<4M2GHzǂ+ öov("CtAKy3xq\&h_ZWM1ϗ;˭m/Ă * $|ܺg+bTt}Gvv+Yܪ .<}ic`JQy}HR,!L+kTnG_H9o1Qe= ^ (`p| Mn4a~vPfѠc\8]Կ@8&`:f5)\ʞ )a c,[e6.l.}K.= /\4\%TRI%XA4 W~N/>ß;; ]ܐi& RbVFC4Ip U`3IyFt@Y怈3SdV1L5ӦCnYPXeg$Law0叙1 gI\b$$.7S0X2ԧAWX*b/xPV_!f"!ihQ'iWZt]0-2^)g]?,U),8"7v̐fЂ-н2ύ~(e;?d|Kay5QgjMV . ++ݸ9vWƷk!Syk<|GQJ}V)uֺ8Je"V)cd %Q^k}Ե۴dPr,)/Tр ^gQg4 /XAvK]j 5@=굽]+\ϡ2ÞEZViC`Nx `dݻw2TRI%@[}t~Uhz+NZsYgBlSR ;O)h,pAp%R5ԊrGAǻ -2߀HJox هԧ~.G9S,)9;䋢ebl36n轅Ke|8E34<(255oXI$,O6,1P%KYPCfQ "2fðnw6IdL}?P#7ysBrk_$vұ!1I JI"Xdbm [/!_E3,.Fp9,#$ZdY%Fko(^|Hzf%xdVzsz&ֱ1s|*`J*2;;}M/NLm'1ﬦOϔ00bG]ޕE3`$EF*}T<>WƳ(7p86b(\y^ 'i \qcIVT eP"x!nR.0QЃ )cHU F83ƈ蛭⪆06ո h&!p,k=h a1|ݚ, +cY3XPز⇒Q2pR|,}|WvZ 7ΝxsǛ/=dT+gϞm:tdVR[2}Rb˘`V("0y?|ߨA) hy8I߹U$G{{86ƥY95B%)H$I,?I(9U-JKb AؗsASӵ0X903q挬U *5Dתٖ8XVS"(+}N ~}:k<,'^z]v-}%TRI%p;s'on5L :˘ ,ݞ$AdYW*"1DSH.Ojm-TT H!5r^)| 4 7R:埔wbg߼axƀנ4?)PQGdMmHB8pP[2/aL!2(Uԋg. k+@lӥԤ/ml\2T) fnJY@Gom[h?g X3`|2Z瘟-KVܸEW.~߽YJ޳ܶH.ѣW>"N%7gNéx)+ĈT "Æ!2! IŮ̲-% jn 9pۻk2{pIXg1mµ<^Gy t `1;N75@pv]4y< ܬz"˄,dHCw@@2ABAK}H`Dk}d~~;wVJ*M0~#gt{o!-!u(oTArڜ˚Ϥcg<&b L;<*tЅ۷ Zm: jA2`Ċ"S#PTSR,e>ho'Ijz{1lLGsނ`g"N$i0M x/u_蟖Js 8&᠖$$esHCKWmuKBN=1fEk?|Bi'+J*Wvp붇Ӻw$7!i^G"i8B?6 3o( \҈"oG{75qr;>)6pX"D;@oн:+MP|<H")d(LGoo\0N}fH ]uN'¼CM97ldL~^ɱTar8DX1C0:VXuGW+%{>5b׉v}1o\3A86W7-ϟ$USK_΁¤Vrr{i#g>㟘zKE IY2Y-GY`XY~z?UV$ Ⱥya&zS }ggNbEJ0sijuz*calt/ΙH}LMЈnT^k,jWTY}A9d(֯ͽ3,\TRI%TrI?~/#|}(A~a -[C*oLeȰ@$[+:)Pb)Cخ] Wĝឹ,*n6P. Aw(M2}P ScBve-KfSQoauܙ/1*6PvzaklV^;ڗ5>$gj~Qeĺt3βXf}|-s,e?ARvI# i߽w+ 93gδ?O<{(zl:Fja, IYGQNqCE"dw_C\0kdS#:(v0@DQVJ'$Q7TO^Ĉg^el5Y^$tR"!Y }.sTfZ J*$A[g8jF_4 ώ /ڠvOg"C$fgk%wk ed׉ MJ!0[k|X,Zo֒ ,^bbmBkY{j}5r7e'ylb{XJ֑S_Q|g`:YNrv}X ǯ9NwҰsoAf\3((.M,nS(F R'2zJI}i~ܱ˅*nRi֭['֯_V1fRm-27:Mj-HٺXݥuwa g^3!Jrx[׉/pB 't|^VNգoԡXkװQKԹzaRwXj`+elr&}uo8q={ׯTRI%ܒb OmllGn5_Ry>\BWs~_ЭAY0gJ LBJ AOKhQ@abCDlP_h Q 1-Z' z1[Q,cMZU|hE<*B<t_z}೻wI*Rw,sw}[SSSjUJm]X\oX@ZڲUJ[g mA nNiOڈ|UKuWo QʠMd!@`AB0m-#+W/~'?ɷ棈z@'E!~/g(k7R#d}1$=6қ}LGCyc@_:ԛjtCȿIX&HccXloٵ%}!7|otΩTRI%T2 чַއ?_PW_c+ YM,.!)eJ& <С,CYD)8^@:vc< Wċn4!φD)ύF9buԤ4O`y VjS+)Uܶk vォ^8PFAs ac)xR 7x#nrC[ kx iǛ3Z`MйN4,c?| Hy ȲbY +͹ '>~(̲ALr\(JرCq^-xD)55,j/]3P{ĭ接0CŔ!-=yo{#`^ 2Ħ,-oCqT]z!uT`Ὦ1YZ=mWJ<oq[I%TRI%<{N<Ѻ3wa+O$):gxq%.ij),@rF"R/qVwXSYw@ 0j΃Lx[}YG( Cc2ԧ@$/Up c$, 5/*y@VuxX!RiIN81W_f٧j$oÔK$bGPHGUp+)AtDtUu 6].OA!Ⱦ/c(D|ߪC1m"0ˀCSg+8H^W#TRI%?h={ |%Q콍*[%Tq1̀OݝH6L 0 eB2$*#ƬlH=cϦ%qw[#_#L¤H=KBwlP W^M& wNnㇾYkNV B͉'L!AmN JX\/6O³ ~Y&B8p'D/kYJ8v#ܚA)Viָ3 PQPi1CdKD''Vd1mY*W}3rİuuhYU,}nK8)/!iV_Y+`A[۷\(.ڳBorug-.pcd+4,gοÁ[T>J9qDСCgZz>3DE!T @ 0blY{ ;LnW`$#Xo{U׍#߻ojI#`b>Ji]tR~O?~rzYk .klkA%.;<5&zW~.(肆WW_Oy*Ggϝ;7kkZ$IB[BDZ)SHqHU*kU*ȵIۈC|X}i曕)$Zc98ℲmY!MPJKW2 gkPF ͂DnF<)F7 l8͒!:t-[ٹsg|CUI%TRI%IΜ?gbcԒ poSҪdىm pAfAr 7t="\L \ 9{Xyg UM$:$B~ $M--e nÙOm89?0 'ujW͒gؾ={wo7T`ܹ>+\YYscw1z?aGW+;T)z$u t6V^ ` ۤm͜h!'PfPI(=4]wL(M#bkZm1f>tg~ؓO>Y#TRI%|dǎԩnνou3JIX>քpq/ pg C⧝B^ADmTjʮa"SR)Kъ~o$UteYI,wet/l;`Нc8 a˶؝ع|. Z< ]VR!%cc"d%;s[`f/o M|1JS,J|akŞ1b*@q AZ䪷& 8)51uL,Js)\AmОM:dpF~ҺDׂ+y.#2_|?|:cR1GnB1//MR? [9E?TJzzGە2G|}(1-|SJg4j!wjȡ?`Ck ML)z b ÌvzUy[>VKE+={v Pd}='.18hk_gG KRjմ|O<77mtTk *a (e fT9X(tAȋ^"g1w[[I}Zdr c@L8!vYB%F7e#ƘN篟~.J*g̙xϿlϣ1SR`G{$gQ9(9VmJ{rUuHAO~5 䊱?=ѝ3e}Eůg/AI-`:pI XMV,VIR` }D0Qc54xu߀Y6n 9@ϊvF4E4DaBAKFexß b3C-q,ǻG1b1k#e0Q` I[)(<5۽~\ɢ&*[M,'NaÆwZ֬Rj1f6jehR5%*iNކ֛Aqd@j ty}- HJ(ْEjeKlq;ڱbG>g;Nx:=>t mO҉vJ{k9J;m3"%k%Hn @b_s꽺~z (ql {ֿHiH"D[2@ VԘB~)!/pl󺦥?yDN_ Qdn?jE2~s1K:&lLcdPALEj"%Rg("z^3}*M2d*UTy@an )f ӿ 0)K)Bz8 rI7YYEٖryT1!Xc'H,2&$)Ӷѹ#gXY?[WWw"N= E? mUTU suĉ',T'GrŠ5D9&z:ƒu$$5SjA_r֕{ ZS%zT9s ڢ-u+4rk_i$k<Q&Cs7987$BHGS]υz`$:Пv fCs98fZ -F:ƭ̈q, _@r [OJt3A Eغڔu0b&DUP+O֦!Ur 8ԽCOYP0+JBRNv)w42RVG/ $ɘ8'6Ź`کRz]Y?$0 G ]]]cwyӧOw<RZ*(ܲ8W;\aA:lv}2CKjBqXcu$I0%?G+Q' BHp$ն=9*~lׅvx4ޡ}}}Ϟ>};v92 0̒]?}5 66vrwMc@YB(ODPI]X%UƠ$UvZQEԲ ף!vXU8lCUx$(@LYcnM IAq$>%ľ#f}%WA)Q/jRhg鉣6 .m"n3Xq󅜧$bUTp%dLR*d r~Oo60,,Peoox2>4#HJ)Pb3t_)s$!Y +i*Ts0/DK&!OA$SADsdty<-T؈;hH\IL\Hܹ[MܫF===<|%afQ׷/;ѷ~΁{Z֌ 4h ܨ c*B^RzCg#l(vTܱ,cZj` :c87[ ,Ga"6X9(E [[j[j, 0fz{ㆶ BA9LuiHSVj*)6Q(b++1541r]::.ߢ lv8Sn $zr统{,XY ǃ 8dNeRUkrm(4f'f;ي=(b\$ἵIJ8zBR##Cʁ,@ jp &QMqjȅp {:O$w 0 Ϗ{L W"Bj7SB s~zOBS%G50.SANlB9qՂrT|g6 &pAՙ2UeDl 35 rRXnnի3*L 0Iզ!Y C =N`UoX d+HtHZڟ3T8=5Z7X(TEo?Ƥ%=Lv^z3 dwёRkϯ[5\sO !R nݻ'Pu P苗TMДCG mHXGbĈ@Ţ4c˭ֶw.bmnυ1j >nݺS[n-:0 0 ;61 n뮣k׼d vP3nF=԰n>.0”D*t ZY%:DIЖUA4jRdBKKt\.TQ1o2HI}iel(Yz2+ kT`: IjDʄJBʮXSTN)UpWˌ-P)Ȏ52rXj#C9Ϊ/H VѢ1Hd;F:j=ű)qR/=]ɋ#^D===ӟ'ݻwkXqRʠk5.vz2HT5J. .@m& 22Z:1卄 pυق[/f=Fc?>,0 0'+80zkF~m"נ;{E ҒM@-5#%#97']3ċJgcbGKTkV{Zı\ |C^tƄсy0sn9L33MNbZ[$bHf i L g3=ϟ'3DrLџ!Yza^'[3W )Pr_1vt}XYDT1k1)%~mjj=?k➺ӟ ݄: Y=1@}.Q95E^I%LlBZW55ьe/Ћ/د]^Y/ 7=1_1ő%>L&s7"~[^&0olXY5~&@(Ga@Ȗ^us,yP;f+$ACM5IFO- w%]RΪ~_HI*vĕTD Ki>m۶1 0 3$>Z (ә~ؗ e֙]N2Pʹ[b*tӗA&jw38?y qRs=Jf1IZhR_!ԿhaFDh/+ЬaySIAbfW@vd~N$+b=U.N5HKpcv:fH}V`vF q>SiJʟ=S3SOYpt);" IDAT СC3G|LGITQ ʁozDȚ],Z<֡ 괌*Qށ,:%SL$t1h鲬鋓8[01$ i BߥGE9 ISjhpO*!BadGPx'r:0 ,U՝'_@$^A{aF5vp ac`L鍞EOR*aV@3ǴץS |4.MAl|X8^՛&" )7 gJy.J%^`>4$VӃ :F dʨb֢>.tJ1ς&Sh[[AC}X+97#??|a(vE/Anԭz`];קL<~-ޅa5"l<[΅R:Gj"OusUBĕB.J%vĕƸ߻ǝOs-ciMP(lxu۶m 0 0+6wISaJfTN1:ոc";,ǘ XxsC M:G9\Wqus|tޜvH[lq\;SgoddT-+\8-P[_gs0=ruEʉ㆔i !RU&djRayY1*ю8++dQ_mIadxxxرt:}bٲeӈ î6BTdΑoK@7zqWٟy*QLBO8ZK -y GB[kTp!AK(UcpAl `O>0 0'ȝ>[mnLAK@{v M@UO /z@*9ҝ싚NhI1WUc^bWt.9}:1> ?F,,qewwhsssؙ|$gIX:6ofקhV K9WHˣ]TOZXhTPɅa#'f0vu%s3-q3JJkٹ,0 0<;8}ǯ_Թ7j[Ec$n!`#ʏ>< 7`em$QBο]+ƽbߔy,PPI:/m#jH;씝$4"( zȎ` BX9HX)AT>mtz1j12lV㩫4?țCN~}?9\J`q)S8{PKKKW__ߙ)Rڹ8IG t"(6iz&?nu !p&<< \%̄=~by_Ulj#u%hP1>Ṙhp{{_=x1.aayH_{v./1j_ò#މ|=DP‡~C4@MAd#BthV*2# :a}?fԲ6RP.dN ˢ.APNCOAXL7Ca|%d!(xƈ]g0_hvCa='82t4Lj8[ ([gozP$d2gNXb&!DwLbMcY j0RZ"՘j;(Jקҥ.+,71Swh1*Hrug*ܔ*P uҙ*Im?|0 # 0 ==32pf 6{i\ لp a^z:=syMhLY8uRqK58R1)IsZJTڪH:ƌOI [ "XA@ 0ۧ :C(DGi3vӢM= EE@6$گ$A#m .3 GX 333&'';WXB?Glv1@,}28›Y/ )Q Oy:B@bM&#f s[$("K==wb 1c.ӹ᬴ %HW_]K[d2d2{'0 0̕ftĉNvg^kJ4a :sK,ܖ5RI43ʙP7fY?|rJU蝻v)` HiZt|60$A F]E@@"#dcLb}@vY5cb)&+F2"mJ7XaN/rГ:ujJ^0GY- uRJ#cBe|>E! (۸]F5F#(X-MC}8- !r-Q sOc=V0 0K_xޚVjmPUx?r9LptJ2MلAH"v8PT T aC*QPB(\a1Ў<m~ݟdM=x 7%ӡ%4WL^SAnVV}OAO-P:G/F8J'n0癱?X.}v~[x衇]R !Vы06(XlmLߜ*%N]/=1b(̑PJ[m0M!:wzP1f%R 7dU Tt )eAwxx۶m0 0|C5-/UHE'{Kp D$QiA$Z9ƾ-s-.zX;|r3*H\nm|zPO8@^ XPDEq─Sg|o%FqBJ FH.K333/lݺu1aY|7=YoAM5ʝ(,H!M鄥$E ZB-xؙ"xGgUoqb*!BInqsIpD  #Ѝ4U&P߼80+Kz K1ĔC6t[lKW&j ]:LyQٵ 3 &_?+ 8¼S>O]H$~7rGzP%/Ts.n&{{PX!$O5$ ECAae]=$_v>' 98\]%#Bsܶmea% Ih{HU q]T` hsBB{j`-NM8ѷ@I썤IuZx g+!_QxP۲R4qp.kO-{Ni$z"d#8a40duI RSOS?Ég0ǏZt:}j<I Tc?4 BV֓2W Ux^X=q00Ջ%n5ڗBuƉf 3YaaǙcz74N,oGf pm踵oI8 H^[Y!nMvT #ƍ`/=Xw.G 8LP/CJBՆ:E&,s Zp?Z/o6J [j1h2V#,iP~i7Yjs$H){mXr彾+h C[ukI/F+goN@6>ѕ`+!4e5Ed8}:B,jK1}f8;|,0 0&cnW-_y[/!MWV帠Ybޏq(#gO@V%f ;txs:T qzkt`Jat@xаl9$Ri7&I^L:bte4Y$J=?Tcl%MJL~srt⇯=Uv08\R6oޜhmm]JBDl,/f bnu|smpC*IلŠJY)!!A7N0KJ9fxFaa?^ᆻk%@*rRIq9-h9X#)1hS-|_!s'FK)푮HdcLZE"$e5+!n[b%29>zd$V,붎t?;IJ$]:zؕ/Ge5%/]fMR ÛE$` Fme9SD_ 3@RHH^g> jw5pEKUDőo3Kxaa(݅s]$&r ˋ N˥495j&R\"_$8ƂI״T]B*X GXi1sHYjJ/q%=7rt LKq(q-b C'}Zbh7v0T]! U[sjeHֱ`"jgb,U:=wZy c8z;Yafj+nذWy睛`cX\%Xt_5q &T֛ODPh/tUj tq_e4 NHwwÇv3 0 k ᦛn5'%-&V*Z̪e!q M݈P#Hzf%` Q d}{2UNDZlCZeB`erJj4.h*!ĝY=;bG@9|mjlo=oNU[ sak֬ݴiӪ;i!ZӋ \G4 (1OPdsB,R!D(E&9V0 0BCJ\wk6z ׀v {}b6?k SK0TTbq73)0XFD[W'yA\ uhvƈ*Q1 @pV&*IO2́o\ᖫ7su߿[P8(\p"ͣ^s#n04BFH^{?ӧYaam({z]o=%#”ǁVI&+M:(aD/-rUqak~H92)iT-XҖUT3npQsl׌P%znZI,iew8W D>_]>F#UYr]=kc 9GD"1)X@g=㏳őaaK;\>"7)_Z]MH nHBd㦌 qR#kY+zG4vB,b`qG?ѕ F"bBTf+'nˈswqf?S,C[0 0G3+]9\f zN '*3 R$pCc\ B:XTqli7Ah. CC D5-?vf0BYU/D.QBiojY/i-@/w~Ws#x`qO~]/MO!O∺0.0ȸ⅑۶m[9) 0 ,@y{ wG!`Jpr4@gYzތG9#nبj2Fk@PiJx& r0e-h/Vݣ8OJ|őDd-SS8%Fѹ#+oDR5G-S:;[&Wy8;zT* |ׇ-6.j_hl (i6A1!xk?aW]}:)!R_@B }cQ†I9C%9Ҹ.rJziSc}@Ŕ٠vXRc9Vb|o^^ dKYwt!Y*ңq ս1&Ւ^̾R~s6`q|ӟ^nO& !VUr^($ x7f_޵k׎92 0 sUWbj3dFϻdf| `ܸY%?)1u -A$؎ l8WH.5і1&ݡ:DR/Yrxzk4E96$XVIv.'f\Y{7w<9ƿU;f^#}CkkkW__G-IrlS^1 88W< WsBFrܫ'N_aa-[zOORʤ%)庰J$[JX?hƇ$VQ+_U)B8NqAxRn3>c=@CXs'uѱ{|#0#̂#,6mt}2ea?R*'\L9"(Apbjjʼn<,aa |O\~ ⟀un0[:ctJgh1% awh _TF˴Jj:i(-O!c\sY46k׮n AD\{񹄬ZZeR', őyЭf\\oo6aafQ|ՃnpmuAj[mJPMGVIJώvs\m=S+9,E;8%5tkv@$HO)ODk?y "6sF3 "nn勈bΝ_ aaH>/ߵ&_o# [3,~Ҵ`A_Ѽ@[Y8nf{v@D" M-V@7 6dcCZ![tBv8B~-5?غ|n3WGE˃>XA| 6H)/K ʅBX2#q˾ځ8/y䞃00 0 C8Pk~C6─-Z[;ZBxQ;"Ƥ2@Ѿ ^" t-]Vi'!; SAvm+ 77sZpjZ[[oB*h~'ǣZ"3[9M<0oJd!8 0 0ՑR7vXrݯ@u1Ee8UJ{81m8C5}"]Mf\-q( ljb)ӑ( (qG0\\Ul`3v9}H{{3SSSn);ʸjq^鶽Br> 00 0 3;}cY]|x&3G8PQT4My j%˰ :̘TχSk[D\x'5F#L2dfhO0Y򴵵͛ںɋ9>oGĈ+kDPS?x' bVJs^xw;Naa.3lv4_!—bᆚS֨$ᬖDdO{OB"*gi|*?ѐ7Dm LL1e_xT|jG^~xݞm[8q1T+B[^P~ݽg>Oaa>7oN]宆E7' INKG:(ٰǁ$x2XZIRV.HB]hA'XЊ%V:TbߖřǞ O80UزexǒsbAlBlBH\~l#qG(H$ Ν˿Ӆ1 0 0A܋u>]е clc_r!2#(䖄#u ^ġqHˉQT!_{o}?"ɧ3`qa.@~9@c6e.A":G.!A<ٹaaG =oZZ]_P~AMKCV6rZB^nE-B]C1jrH@\VF){|LiGXa9q-ܲAJaDYJМSn%վH jީc$j;vj>0 0̕a͵|P#tD57h;%) Q6T TJΑ&@+DAke8a@Qʙ,b^z|Z1de9200P# 6oJtF=Ʈg1,Xaw;O+ !B~&rAI7%GQ۴bѣGwݻ70 0<]?komdkonENAĦ. mesD x.BNd, Ya:QԧH8P7Μ:ׯmSY(p +\B|뮻n `bؚmn{]HwȊ8:22633Vsaafrj\Ui!W%»0„%2D{^7gP sr[N<# s []nvwK)7 !שNln;i+xk-[3 0 ±njnX}斵7Q~WCGtȨ[M4*gHpkIk b-5ᯒ; (6WGnZSYh1̥%*Yމ˿744"ɭqsq!qm|MsRvڵsΟ۲e+?|c$a#<+{wPYaBa߱cǞn82 0 ,p6nܘ# sg>ӊ !> l(45Uv0 0 ,ngb֧>Fclø9 $0B hر >0;'۷e8=[GjP8#bk/ # 0 0p˛x˅b"7FcHSVf1f3(rd? #bZ]0 shkkǏM$'-XDK$}"tWO# 3O8wmvLMM !k%JrI seeaY´ɑW֖}DB8-(H"D", gSYp +C6o\׮YD"q#$|(_:Vϕaal-Aܲ=ě$B]odMm/v9mo̢# 3t={gŊR4%TBuȑg?яvZ3 0 0̅ذasSwy}r?COeOy|cO~20Gf֭K/_|ݍ7!:K?~?Gaa梑o|oc{6}Mb5ݑJ۾g2 0)%~u?7&Sbaa)[l35k6eaaafI>OaaaaaaaaaaaaaaaaaYE@IENDB`SpFFT-1.1.0/docs/requirements.txt000066400000000000000000000000311457701740000166460ustar00rootroot00000000000000breathe sphinx-rtd-theme SpFFT-1.1.0/docs/source/000077500000000000000000000000001457701740000146705ustar00rootroot00000000000000SpFFT-1.1.0/docs/source/conf.py000066400000000000000000000125601457701740000161730ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # # import os # import sys # sys.path.insert(0, os.path.abspath('.')) # run doxygen first for code doc xml generation import subprocess subprocess.call('cd .. ; doxygen', shell=True) html_theme = "sphinx_rtd_theme" # html_theme = "bootstrap" html_theme_path = [] # -- Project information ----------------------------------------------------- project = u'SpFFT' copyright = u'2019, ETH Zurich' author = u'ETH Zurich, Simon Frasch' breathe_projects = { 'SpFFT': '../xml' } highlight_language = 'c++' # The short X.Y version version = u'' # The full version, including alpha/beta/rc tags release = u'0.1.0' # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.mathjax', 'breathe' ] # Add any paths that contain templates here, relative to this directory. templates_path = [] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] html_extra_path = ['../build/html'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = [] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'SpFFTdoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'SpFFT.tex', u'SpFFT Documentation', u'ETH Zurich, Simon Frasch', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'spfft', u'SpFFT Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'SpFFT', u'SpFFT Documentation', author, 'SpFFT', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # -- Extension configuration ------------------------------------------------- SpFFT-1.1.0/docs/source/details.rst000066400000000000000000000151701457701740000170530ustar00rootroot00000000000000Details ======= Transform Definition -------------------- | Given a discrete function :math:`f`, SpFFT computes the Discrete Fourier Transform: | :math:`z_{k_x, k_y, k_z} = \sum_{n_x = 0}^{N_x - 1} \omega_{N_x}^{k_x,n_x} \sum_{n_y = 0}^{N_y - 1} \omega_{N_y}^{k_y,n_y} \sum_{n_z = 0}^{N_z - 1} \omega_{N_z}^{k_z,n_z} f_{n_x, n_y, n_z}` | where :math:`\omega` is defined as: - :math:`\omega_{N}^{k,n} = e^{-2\pi i \frac{k n}{N}}`: *Forward* transform from space domain to frequency domain - :math:`\omega_{N}^{k,n} = e^{2\pi i \frac{k n}{N}}`: *Backward* transform from frequency domain to space domain Complex Number Format --------------------- SpFFT always assumes an interleaved format in double or single precision. The alignment of memory provided for space domain data is guaranteed to fulfill to the requirements for std::complex (for C++17), C complex types and GPU complex types of CUDA or ROCm. Indexing -------- | The three dimensions are referred to as :math:`x, y` and :math:`z`. An element in space domain is addressed in memory as: | :math:`(z \cdot N_y + y) \cdot N_x + x` | For now, the only supported format for providing the indices of sparse frequency domain data are index triplets in an interleaved array. | Example: :math:`x_1, y_1, z_1, x_2, y_2, z_2, ...` Indices for a dimension of size *n* must be either in the interval :math:`[0, n - 1]` or :math:`\left [ \left \lfloor \frac{n}{2} \right \rfloor - n + 1, \left \lfloor \frac{n}{2} \right \rfloor \right ]`. For Real-To-Complex transforms additional restrictions apply (see next section). Real-To-Complex Transforms -------------------------- | The Discrete Fourier Transform :math:`f(x, y, z)` of a real valued function is hermitian: | :math:`f(x, y, z) = f^*(-x, -y, -z)` | Due to this property, only about half the frequency domain data is required without loss of information. Therefore, similar to other FFT libraries, all indices in :math:`x` *must* be in the interval :math:`\left [ 0, \left \lfloor \frac{n}{2} \right \rfloor \right ]`. To fully utlize the symmetry property, the following steps can be followed: - Only non-redundent z-coloumns on the y-z plane at :math:`x = 0` have to be provided. A z-coloumn must be complete and can be provided at either :math:`y` or :math:`-y`. - All redundant values in the z-coloumn at :math:`x = 0`, :math:`y = 0` can be omitted. Normalization ------------- Normalization is only available for the forward transform with a scaling factor of :math:`\frac{1}{N_x N_y N_z}`. Applying a forward and backwards transform with scaling enabled will therefore yield identical output (within numerical accuracy). Optimal sizing -------------- The underlying computation is done by FFT libraries such as FFTW and cuFFT, which provide optimized implementations for sizes, which are of the form :math:`2^a 3^b 5^c 7^d` where :math:`a, b, c, d` are natural numbers. Typically, smaller prime factors perform better. The size of each dimension is ideally set accordingly. Data Distribution ----------------- | SpFFT uses slab decomposition in space domain, where slabs are ideally uniform in size between MPI ranks. | In frequency domain, SpFFT uses a pencil decomposition, where elements within a z-coloumn (same x-y index) *must* be on the same MPI rank. The order and distribution of frequency space elements can have significant impact on performance. Locally, elements are best grouped by z-columns and ordered by their z-index within each column. The ideal distribution of z-columns between MPI ranks differs for execution on host and GPU. | For execution on host: | Indices of z-columns are ideally continuous in y on each MPI rank. | For execution on GPU: | Indices of z-columns are ideally continuous in x on each MPI rank. MPI Exchange ------------ The MPI exchange is based on a collective MPI call. The following options are available: SPFFT_EXCH_BUFFERED Exchange with MPI_Alltoall. Requires repacking of data into buffer. Possibly best optimized for large number of ranks by MPI implementations, but does not adjust well to non-uniform data distributions. SPFFT_EXCH_COMPACT_BUFFERED Exchange with MPI_Alltoallv. Requires repacking of data into buffer. Performance is usually close to MPI_alltoall and it adapts well to non-uniform data distributions. SPFFT_EXCH_UNBUFFERED Exchange with MPI_Alltoallw. Does not require repacking of data into buffer (outside of the MPI library). Performance varies widely between systems and MPI implementations. It is generally difficult to optimize for large number of ranks, but may perform best in certain conditions. | For both *SPFFT_EXCH_BUFFERED* and *SPFFT_EXCH_COMPACT_BUFFERED*, an exchange in single precision can be selected. With transforms in double precision, the number of bytes sent and received is halved. For execution on GPUs without GPUDirect, the data transfer between GPU and host also benefits. This option can provide a significant speedup, but incurs a slight accuracy loss. The double precision values are converted to and from single precision between the transform in z and the transform in x / y, while all actual calculations are still done in the selected precision. Thread-Safety ------------- The creation of Grid and Transform objects is thread-safe only if: * No FFTW library calls are executed concurrently. * In the distributed case, MPI thread support is set to *MPI_THREAD_MULTIPLE*. The execution of transforms is thread-safe if * Each thread executes using its own Grid and associated Transform object. * In the distributed case, MPI thread support is set to *MPI_THREAD_MULTIPLE*. GPU --- | Saving transfer time between host and GPU is key to good performance for execution with GPUs. Ideally, both input and output is located on GPU memory. If host memory pointers are provided as input or output, it is beneficial to use pinned memory through the CUDA or ROCm API. | If available, GPU aware MPI can be utilized, to safe on the otherwise required transfers between host and GPU in preparation of the MPI exchange. This can greatly impact performance and is enabled by compiling the library with the CMake option SPFFT_GPU_DIRECT set to ON. .. note:: Additional environment variables may have to be set for some MPI implementations, to allow GPUDirect usage. .. note:: The execution of a transform is synchronized with the default stream. Multi-GPU --------- Multi-GPU support is not available for individual transform operations, but each Grid / Transform can be associated to a different GPU. At creation time, the current GPU id is stored internally and used for operations later on. So by either using the asynchronous execution mode or using the multi-transform functionality, multiple GPUs can be used at the same time. SpFFT-1.1.0/docs/source/errors_c.rst000066400000000000000000000001021457701740000172310ustar00rootroot00000000000000Errors ====== .. doxygenfile:: spfft/errors.h :project: SpFFT SpFFT-1.1.0/docs/source/examples.rst000066400000000000000000000266001457701740000172440ustar00rootroot00000000000000Examples ======== C++ ---- .. code-block:: c++ #include #include #include #include "spfft/spfft.hpp" int main(int argc, char** argv) { const int dimX = 2; const int dimY = 2; const int dimZ = 2; std::cout << "Dimensions: x = " << dimX << ", y = " << dimY << ", z = " << dimZ << std::endl << std::endl; // Use default OpenMP value const int numThreads = -1; // Use all elements in this example. const int numFrequencyElements = dimX * dimY * dimZ; // Slice length in space domain. Equivalent to dimZ for non-distributed case. const int localZLength = dimZ; // Interleaved complex numbers std::vector frequencyElements; frequencyElements.reserve(2 * numFrequencyElements); // Indices of frequency elements std::vector indices; indices.reserve(dimX * dimY * dimZ * 3); // Initialize frequency domain values and indices double initValue = 0.0; for (int xIndex = 0; xIndex < dimX; ++xIndex) { for (int yIndex = 0; yIndex < dimY; ++yIndex) { for (int zIndex = 0; zIndex < dimZ; ++zIndex) { // init with interleaved complex numbers frequencyElements.emplace_back(initValue); frequencyElements.emplace_back(-initValue); // add index triplet for value indices.emplace_back(xIndex); indices.emplace_back(yIndex); indices.emplace_back(zIndex); initValue += 1.0; } } } std::cout << "Input:" << std::endl; for (int i = 0; i < numFrequencyElements; ++i) { std::cout << frequencyElements[2 * i] << ", " << frequencyElements[2 * i + 1] << std::endl; } // Create local Grid. For distributed computations, a MPI Communicator has to be provided spfft::Grid grid(dimX, dimY, dimZ, dimX * dimY, SPFFT_PU_HOST, numThreads); // Create transform. // Note: A transform handle can be created without a grid if no resource sharing is desired. spfft::Transform transform = grid.create_transform(SPFFT_PU_HOST, SPFFT_TRANS_C2C, dimX, dimY, dimZ, localZLength, numFrequencyElements, SPFFT_INDEX_TRIPLETS, indices.data()); /////////////////////////////////////////////////// // Option A: Reuse internal buffer for space domain /////////////////////////////////////////////////// // Transform backward transform.backward(frequencyElements.data(), SPFFT_PU_HOST); // Get pointer to buffer with space domain data. Is guaranteed to be castable to a valid // std::complex pointer. Using the internal working buffer as input / output can help reduce // memory usage. double* spaceDomainPtr = transform.space_domain_data(SPFFT_PU_HOST); std::cout << std::endl << "After backward transform:" << std::endl; for (int i = 0; i < transform.local_slice_size(); ++i) { std::cout << spaceDomainPtr[2 * i] << ", " << spaceDomainPtr[2 * i + 1] << std::endl; } ///////////////////////////////////////////////// // Option B: Use external buffer for space domain ///////////////////////////////////////////////// std::vector spaceDomainVec(2 * transform.local_slice_size()); // Transform backward transform.backward(frequencyElements.data(), spaceDomainVec.data()); // Transform forward transform.forward(spaceDomainVec.data(), frequencyElements.data(), SPFFT_NO_SCALING); // Note: In-place transforms are also supported by passing the same pointer for input and output. std::cout << std::endl << "After forward transform (without normalization):" << std::endl; for (int i = 0; i < numFrequencyElements; ++i) { std::cout << frequencyElements[2 * i] << ", " << frequencyElements[2 * i + 1] << std::endl; } return 0; } C - .. code-block:: c #include #include #include "spfft/spfft.h" int main(int argc, char** argv) { const int dimX = 2; const int dimY = 2; const int dimZ = 2; printf("Dimensions: x = %d, y = %d, z = %d\n\n", dimX, dimY, dimZ); /* Use default OpenMP value */ const int numThreads = -1; /* use all elements in this example. */ const int numFrequencyElements = dimX * dimY * dimZ; /* Slice length in space domain. Equivalent to dimZ for non-distributed case. */ const int localZLength = dimZ; /* interleaved complex numbers */ double* frequencyElements = (double*)malloc(2 * sizeof(double) * numFrequencyElements); /* indices of frequency elements */ int* indices = (int*)malloc(3 * sizeof(int) * numFrequencyElements); /* initialize frequency domain values and indices */ double initValue = 0.0; size_t count = 0; for (int xIndex = 0; xIndex < dimX; ++xIndex) { for (int yIndex = 0; yIndex < dimY; ++yIndex) { for (int zIndex = 0; zIndex < dimZ; ++zIndex, ++count) { /* init values */ frequencyElements[2 * count] = initValue; frequencyElements[2 * count + 1] = -initValue; /* add index triplet for value */ indices[3 * count] = xIndex; indices[3 * count + 1] = yIndex; indices[3 * count + 2] = zIndex; initValue += 1.0; } } } printf("Input:\n"); for (size_t i = 0; i < dimX * dimY * dimZ; ++i) { printf("%f, %f\n", frequencyElements[2 * i], frequencyElements[2 * i + 1]); } printf("\n"); SpfftError status = 0; /* create local Grid. For distributed computations, a MPI Communicator has to be provided */ SpfftGrid grid; status = spfft_grid_create(&grid, dimX, dimY, dimZ, dimX * dimY, SPFFT_PU_HOST, numThreads); if (status != SPFFT_SUCCESS) exit(status); /* create transform */ SpfftTransform transform; status = spfft_transform_create(&transform, grid, SPFFT_PU_HOST, SPFFT_TRANS_C2C, dimX, dimY, dimZ, localZLength, numFrequencyElements, SPFFT_INDEX_TRIPLETS, indices); if (status != SPFFT_SUCCESS) exit(status); /* grid can be safely destroyed after creating all transforms */ status = spfft_grid_destroy(grid); if (status != SPFFT_SUCCESS) exit(status); /************************************************** Option A: Reuse internal buffer for space domain ***************************************************/ /* Get pointer to buffer with space domain data. Is guaranteed to be castable to a valid complex type pointer. Using the internal working buffer as input / output can help reduce memory usage.*/ double* spaceDomain; status = spfft_transform_get_space_domain(transform, SPFFT_PU_HOST, &spaceDomain); if (status != SPFFT_SUCCESS) exit(status); /* transform backward */ status = spfft_transform_backward(transform, frequencyElements, SPFFT_PU_HOST); if (status != SPFFT_SUCCESS) exit(status); printf("After backward transform:\n"); for (size_t i = 0; i < dimX * dimY * dimZ; ++i) { printf("%f, %f\n", spaceDomain[2 * i], spaceDomain[2 * i + 1]); } printf("\n"); /********************************************** Option B: Use external buffer for space domain ***********************************************/ spaceDomain = (double*)malloc(2 * sizeof(double) * dimX * dimY * dimZ); /* transform backward */ status = spfft_transform_backward_ptr(transform, frequencyElements, spaceDomain); if (status != SPFFT_SUCCESS) exit(status); /* transform forward */ status = spfft_transform_forward_ptr(transform, spaceDomain, frequencyElements, SPFFT_NO_SCALING); if (status != SPFFT_SUCCESS) exit(status); /* Note: In-place transforms are also supported by passing the same pointer for input and output. */ printf("After forward transform (without normalization):\n"); for (size_t i = 0; i < dimX * dimY * dimZ; ++i) { printf("%f, %f\n", frequencyElements[2 * i], frequencyElements[2 * i + 1]); } /* destroying the final transform will free the associated memory */ status = spfft_transform_destroy(transform); if (status != SPFFT_SUCCESS) exit(status); free(spaceDomain); free(frequencyElements); return 0; } Fortran ------- .. code-block:: fortran program main use iso_c_binding use spfft implicit none integer :: i, j, k, counter integer, parameter :: dimX = 2 integer, parameter :: dimY = 2 integer, parameter :: dimZ = 2 integer, parameter :: maxNumLocalZColumns = dimX * dimY integer, parameter :: processingUnit = 1 integer, parameter :: maxNumThreads = -1 type(c_ptr) :: grid = c_null_ptr type(c_ptr) :: transform = c_null_ptr integer :: errorCode = 0 integer, dimension(dimX * dimY * dimZ * 3):: indices = 0 complex(C_DOUBLE_COMPLEX), dimension(dimX * dimY * dimZ):: frequencyElements real(C_DOUBLE), dimension(2*dimX * dimY * dimZ):: spaceDomain complex(C_DOUBLE_COMPLEX), pointer :: spaceDomainPtr(:,:,:) type(c_ptr) :: realValuesPtr counter = 0 do k = 1, dimZ do j = 1, dimY do i = 1, dimX frequencyElements(counter + 1) = cmplx(counter, -counter) indices(counter * 3 + 1) = i - 1 indices(counter * 3 + 2) = j - 1 indices(counter * 3 + 3) = k - 1 counter = counter + 1 end do end do end do ! print input print *, "Input:" do i = 1, size(frequencyElements) print *, frequencyElements(i) end do ! create grid errorCode = spfft_grid_create(grid, dimX, dimY, dimZ, maxNumLocalZColumns, processingUnit, maxNumThreads); if (errorCode /= SPFFT_SUCCESS) error stop ! create transform ! Note: A transform handle can be created without a grid if no resource sharing is desired. errorCode = spfft_transform_create(transform, grid, processingUnit, 0, dimX, dimY, dimZ, dimZ,& size(frequencyElements), SPFFT_INDEX_TRIPLETS, indices) if (errorCode /= SPFFT_SUCCESS) error stop ! grid can be safely destroyed after creating all required transforms errorCode = spfft_grid_destroy(grid) if (errorCode /= SPFFT_SUCCESS) error stop ! ************************************************* ! Option A: Reuse internal buffer for space domain ! ************************************************* ! set space domain array to use memory allocted by the library errorCode = spfft_transform_get_space_domain(transform, processingUnit, realValuesPtr) if (errorCode /= SPFFT_SUCCESS) error stop ! transform backward errorCode = spfft_transform_backward(transform, frequencyElements, processingUnit) if (errorCode /= SPFFT_SUCCESS) error stop call c_f_pointer(realValuesPtr, spaceDomainPtr, [dimX,dimY,dimZ]) print *, "" print *, "After backward transform:" do k = 1, size(spaceDomainPtr, 3) do j = 1, size(spaceDomainPtr, 2) do i = 1, size(spaceDomainPtr, 1) print *, spaceDomainPtr(i, j, k) end do end do end do ! ********************************************** ! Option B: Use external buffer for space domain ! ********************************************** ! transform backward errorCode = spfft_transform_backward_ptr(transform, frequencyElements, spaceDomain) if (errorCode /= SPFFT_SUCCESS) error stop ! transform forward errorCode = spfft_transform_forward_ptr(transform, spaceDomain, frequencyElements, SPFFT_NO_SCALING) if (errorCode /= SPFFT_SUCCESS) error stop print *, "" print *, "After forward transform (without normalization):" do i = 1, size(frequencyElements) print *, frequencyElements(i) end do ! destroying the final transform will free the associated memory errorCode = spfft_transform_destroy(transform) if (errorCode /= SPFFT_SUCCESS) error stop end SpFFT-1.1.0/docs/source/exceptions.rst000066400000000000000000000001201457701740000175740ustar00rootroot00000000000000Exceptions ========== .. doxygenfile:: spfft/exceptions.hpp :project: SpFFT SpFFT-1.1.0/docs/source/grid.rst000066400000000000000000000004061457701740000163470ustar00rootroot00000000000000Grid ==== .. note:: A Grid object can be safely destroyed after Transform objects have been created, since internal reference counting used to prevent the release of resources while still in use. .. doxygenclass:: spfft::Grid :project: SpFFT :members: SpFFT-1.1.0/docs/source/grid_c.rst000066400000000000000000000003721457701740000166530ustar00rootroot00000000000000Grid ==== .. note:: A Grid handle can be safely destroyed after Transform handles have been created, since internal reference counting used to prevent the release of resources while still in use. .. doxygenfile:: spfft/grid.h :project: SpFFT SpFFT-1.1.0/docs/source/grid_float.rst000066400000000000000000000006611457701740000175370ustar00rootroot00000000000000GridFloat ========= .. note:: This class is only available if single precision support is enabled, in which case the marco SPFFT_SINGLE_PRECISION is defined in config.h. .. note:: A Grid object can be safely destroyed after Transform objects have been created, since internal reference counting used to prevent the release of resources while still in use. .. doxygenclass:: spfft::GridFloat :project: SpFFT :members: SpFFT-1.1.0/docs/source/grid_float_c.rst000066400000000000000000000006521457701740000200410ustar00rootroot00000000000000GridFloat ========= .. note:: A Grid handle can be safely destroyed after Transform handles have been created, since internal reference counting used to prevent the release of resources while still in use. .. note:: These functions are only available if single precision support is enabled, in which case the marco SPFFT_SINGLE_PRECISION is defined in config.h. .. doxygenfile:: spfft/grid_float.h :project: SpFFT SpFFT-1.1.0/docs/source/index.rst000066400000000000000000000051101457701740000165260ustar00rootroot00000000000000.. Copyright (c) 2019, ETH Zurich Distributed under the terms of the BSD 3-Clause License. The full license is in the file LICENSE, distributed with this software. SpFFT Documentation =================== | SpFFT - A 3D FFT library for sparse frequency domain data written in C++ with support for MPI, OpenMP, CUDA and ROCm. | Inspired by the need of some computational material science applications with spherical cutoff data in frequency domain, SpFFT provides Fast Fourier Transformations of sparse frequency domain data. For distributed computations with MPI, slab decomposition in space domain and pencil decomposition in frequency domain (sparse data within a pencil / column must be on one rank) is used. .. figure:: ../images/sparse_to_dense.png :align: center :width: 70% Illustration of a transform, where data on each MPI rank is identified by color. Design Goals ------------ - Sparse frequency domain input - Reuse of pre-allocated memory - Support of negative indexing for frequency domain data - Parallelization and acceleration are optional - Unified interface for calculations on CPUs and GPUs - Support of Complex-To-Real and Real-To-Complex transforms, where the full hermitian symmetry property is utilized - C++, C and Fortran interfaces Interface Design ---------------- To allow for pre-allocation and reuse of memory, the design is based on two classes: - **Grid**: Allocates memory for transforms up to a given size in each dimension. - **Transform**: Is associated with a *Grid* and can have any size up to the *Grid* dimensions. A *Transform* holds a counted reference to the underlying *Grid*. Therefore, *Transforms* created with the same *Grid* share memory, which is only freed, once the *Grid* and all associated *Transforms* are destroyed. A transform can be computed in-place and out-of-place. Addtionally, an internally allocated work buffer can optionally be used for input / output of space domain data. .. note:: The creation of Grids and Transforms, as well as the forward and backward execution may entail MPI calls and must be synchronized between all ranks. .. toctree:: :maxdepth: 2 :hidden: installation examples details .. toctree:: :maxdepth: 2 :caption: C++ API REFERENCE: :hidden: types grid grid_float transform transform_float multi_transform exceptions .. toctree:: :maxdepth: 2 :caption: C API REFERENCE: :hidden: types grid_c grid_float_c transform_c transform_float_c multi_transform_c errors_c .. Indices and tables .. ================== .. * :ref:`genindex` SpFFT-1.1.0/docs/source/installation.rst000066400000000000000000000036301457701740000201250ustar00rootroot00000000000000Installation ============ Requirements ------------ * C++ Compiler with C++17 support. Supported compilers are: * GCC 7 and later * Clang 5 and later * ICC 19.0 and later * CMake 3.18 and later (3.21 for ROCm) * Library providing a FFTW 3.x interface (FFTW3 or Intel MKL) * For multi-threading: OpenMP support by the compiler * For compilation with GPU support: * CUDA 11.0 and later for Nvidia hardware * ROCm 5.0 and later for AMD hardware Build ----- The build system follows the standard CMake workflow. Example: .. code-block:: bash mkdir build cd build cmake .. -DSPFFT_OMP=ON -DSPFFT_MPI=ON -DSPFFT_GPU_BACKEND=CUDA -DSPFFT_SINGLE_PRECISION=OFF -DCMAKE_INSTALL_PREFIX=/usr/local make -j8 install NOTE: When compiling with CUDA or ROCM (HIP), the standard `CMAKE_CUDA_ARCHITECTURES` and `CMAKE_HIP_ARCHITECTURES` options should be defined as well. `HIP_HCC_FLAGS` is no longer in use. CMake options ------------- ====================== ======= ============================================================= Option Default Description ====================== ======= ============================================================= SPFFT_MPI ON Enable MPI support SPFFT_OMP ON Enable multi-threading with OpenMP SPFFT_GPU_BACKEND OFF Select GPU backend. Can be OFF, CUDA or ROCM SPFFT_GPU_DIRECT OFF Use GPU aware MPI with GPUDirect SPFFT_SINGLE_PRECISION OFF Enable single precision support SPFFT_STATIC OFF Build as static library SPFFT_FFTW_LIB AUTO Library providing a FFTW interface. Can be AUTO, MKL or FFTW SPFFT_BUILD_TESTS OFF Build test executables for developement purposes SPFFT_INSTALL ON Add library to install target SPFFT_FORTRAN OFF Build Fortran interface module ====================== ======= ============================================================ SpFFT-1.1.0/docs/source/multi_transform.rst000066400000000000000000000003601457701740000206460ustar00rootroot00000000000000Multi-Transform =============== .. note:: Only fully independent transforms can be executed in parallel. .. doxygenfile:: spfft/multi_transform.hpp :project: SpFFT .. doxygenfile:: spfft/multi_transform_float.hpp :project: SpFFT SpFFT-1.1.0/docs/source/multi_transform_c.rst000066400000000000000000000003541457701740000211530ustar00rootroot00000000000000Multi-Transform =============== .. note:: Only fully independent transforms can be executed in parallel. .. doxygenfile:: spfft/multi_transform.h :project: SpFFT .. doxygenfile:: spfft/multi_transform_float.h :project: SpFFT SpFFT-1.1.0/docs/source/transform.rst000066400000000000000000000005141457701740000174350ustar00rootroot00000000000000Transform ========= .. note:: This class only holds an internal reference counted object. The object remains in a usable state even if the associated Grid object is destroyed. In addition, copying a transform only requires an internal copy of a shared pointer. .. doxygenclass:: spfft::Transform :project: SpFFT :members: SpFFT-1.1.0/docs/source/transform_c.rst000066400000000000000000000004771457701740000177470ustar00rootroot00000000000000Transform ========= .. note:: This class only holds an internal reference counted object. The object remains in a usable state even if the associated Grid object is destroyed. In addition, copying a transform only requires an internal copy of a shared pointer. .. doxygenfile:: spfft/transform.h :project: SpFFT SpFFT-1.1.0/docs/source/transform_float.rst000066400000000000000000000007661457701740000206330ustar00rootroot00000000000000TransformFloat ============== .. note:: This class is only available if single precision support is enabled, in which case the marco SPFFT_SINGLE_PRECISION is defined in config.h. .. note:: This class only holds an internal reference counted object. The object remains in a usable state even if the associated Grid object is destroyed. In addition, copying a transform only requires an internal copy of a shared pointer. .. doxygenclass:: spfft::TransformFloat :project: SpFFT :members: SpFFT-1.1.0/docs/source/transform_float_c.rst000066400000000000000000000003731457701740000211270ustar00rootroot00000000000000TransformFloat ============== .. note:: These functions are only available if single precision support is enabled, in which case the marco SPFFT_SINGLE_PRECISION is defined in config.h. .. doxygenfile:: spfft/transform_float.h :project: SpFFT SpFFT-1.1.0/docs/source/types.rst000066400000000000000000000000771457701740000165720ustar00rootroot00000000000000Types ===== .. doxygenfile:: spfft/types.h :project: SpFFT SpFFT-1.1.0/examples/000077500000000000000000000000001457701740000142565ustar00rootroot00000000000000SpFFT-1.1.0/examples/example.c000066400000000000000000000101701457701740000160540ustar00rootroot00000000000000#include #include #include "spfft/spfft.h" int main(int argc, char** argv) { const int dimX = 2; const int dimY = 2; const int dimZ = 2; printf("Dimensions: x = %d, y = %d, z = %d\n\n", dimX, dimY, dimZ); /* Use default OpenMP value */ const int numThreads = -1; /* use all elements in this example. */ const int numFrequencyElements = dimX * dimY * dimZ; /* Slice length in space domain. Equivalent to dimZ for non-distributed case. */ const int localZLength = dimZ; /* interleaved complex numbers */ double* frequencyElements = (double*)malloc(2 * sizeof(double) * numFrequencyElements); /* indices of frequency elements */ int* indices = (int*)malloc(3 * sizeof(int) * numFrequencyElements); /* initialize frequency domain values and indices */ double initValue = 0.0; size_t count = 0; for (int xIndex = 0; xIndex < dimX; ++xIndex) { for (int yIndex = 0; yIndex < dimY; ++yIndex) { for (int zIndex = 0; zIndex < dimZ; ++zIndex, ++count) { /* init values */ frequencyElements[2 * count] = initValue; frequencyElements[2 * count + 1] = -initValue; /* add index triplet for value */ indices[3 * count] = xIndex; indices[3 * count + 1] = yIndex; indices[3 * count + 2] = zIndex; initValue += 1.0; } } } printf("Input:\n"); for (size_t i = 0; i < dimX * dimY * dimZ; ++i) { printf("%f, %f\n", frequencyElements[2 * i], frequencyElements[2 * i + 1]); } printf("\n"); SpfftError status = 0; /* create local Grid. For distributed computations, a MPI Communicator has to be provided */ SpfftGrid grid; status = spfft_grid_create(&grid, dimX, dimY, dimZ, dimX * dimY, SPFFT_PU_HOST, numThreads); if (status != SPFFT_SUCCESS) exit(status); /* create transform */ SpfftTransform transform; status = spfft_transform_create(&transform, grid, SPFFT_PU_HOST, SPFFT_TRANS_C2C, dimX, dimY, dimZ, localZLength, numFrequencyElements, SPFFT_INDEX_TRIPLETS, indices); if (status != SPFFT_SUCCESS) exit(status); /* grid can be safely destroyed after creating all transforms */ status = spfft_grid_destroy(grid); if (status != SPFFT_SUCCESS) exit(status); /************************************************** Option A: Reuse internal buffer for space domain ***************************************************/ /* Get pointer to buffer with space domain data. Is guaranteed to be castable to a valid complex type pointer. Using the internal working buffer as input / output can help reduce memory usage.*/ double* spaceDomain; status = spfft_transform_get_space_domain(transform, SPFFT_PU_HOST, &spaceDomain); if (status != SPFFT_SUCCESS) exit(status); /* transform backward */ status = spfft_transform_backward(transform, frequencyElements, SPFFT_PU_HOST); if (status != SPFFT_SUCCESS) exit(status); printf("After backward transform:\n"); for (size_t i = 0; i < dimX * dimY * dimZ; ++i) { printf("%f, %f\n", spaceDomain[2 * i], spaceDomain[2 * i + 1]); } printf("\n"); /********************************************** Option B: Use external buffer for space domain ***********************************************/ spaceDomain = (double*)malloc(2 * sizeof(double) * dimX * dimY * dimZ); /* transform backward */ status = spfft_transform_backward_ptr(transform, frequencyElements, spaceDomain); if (status != SPFFT_SUCCESS) exit(status); /* transform forward */ status = spfft_transform_forward_ptr(transform, spaceDomain, frequencyElements, SPFFT_NO_SCALING); if (status != SPFFT_SUCCESS) exit(status); /* Note: In-place transforms are also supported by passing the same pointer for input and output. */ printf("After forward transform (without normalization):\n"); for (size_t i = 0; i < dimX * dimY * dimZ; ++i) { printf("%f, %f\n", frequencyElements[2 * i], frequencyElements[2 * i + 1]); } /* destroying the final transform will free the associated memory */ status = spfft_transform_destroy(transform); if (status != SPFFT_SUCCESS) exit(status); free(spaceDomain); free(frequencyElements); return 0; } SpFFT-1.1.0/examples/example.cpp000066400000000000000000000070741457701740000164250ustar00rootroot00000000000000#include #include #include #include "spfft/spfft.hpp" int main(int argc, char** argv) { const int dimX = 2; const int dimY = 2; const int dimZ = 2; std::cout << "Dimensions: x = " << dimX << ", y = " << dimY << ", z = " << dimZ << std::endl << std::endl; // Use default OpenMP value const int numThreads = -1; // Use all elements in this example. const int numFrequencyElements = dimX * dimY * dimZ; // Slice length in space domain. Equivalent to dimZ for non-distributed case. const int localZLength = dimZ; // Interleaved complex numbers std::vector frequencyElements; frequencyElements.reserve(2 * numFrequencyElements); // Indices of frequency elements std::vector indices; indices.reserve(dimX * dimY * dimZ * 3); // Initialize frequency domain values and indices double initValue = 0.0; for (int xIndex = 0; xIndex < dimX; ++xIndex) { for (int yIndex = 0; yIndex < dimY; ++yIndex) { for (int zIndex = 0; zIndex < dimZ; ++zIndex) { // init with interleaved complex numbers frequencyElements.emplace_back(initValue); frequencyElements.emplace_back(-initValue); // add index triplet for value indices.emplace_back(xIndex); indices.emplace_back(yIndex); indices.emplace_back(zIndex); initValue += 1.0; } } } std::cout << "Input:" << std::endl; for (int i = 0; i < numFrequencyElements; ++i) { std::cout << frequencyElements[2 * i] << ", " << frequencyElements[2 * i + 1] << std::endl; } // Create local Grid. For distributed computations, a MPI Communicator has to be provided spfft::Grid grid(dimX, dimY, dimZ, dimX * dimY, SPFFT_PU_HOST, numThreads); // Create transform. // Note: A transform handle can be created without a grid if no resource sharing is desired. spfft::Transform transform = grid.create_transform(SPFFT_PU_HOST, SPFFT_TRANS_C2C, dimX, dimY, dimZ, localZLength, numFrequencyElements, SPFFT_INDEX_TRIPLETS, indices.data()); /////////////////////////////////////////////////// // Option A: Reuse internal buffer for space domain /////////////////////////////////////////////////// // Transform backward transform.backward(frequencyElements.data(), SPFFT_PU_HOST); // Get pointer to buffer with space domain data. Is guaranteed to be castable to a valid // std::complex pointer. Using the internal working buffer as input / output can help reduce // memory usage. double* spaceDomainPtr = transform.space_domain_data(SPFFT_PU_HOST); std::cout << std::endl << "After backward transform:" << std::endl; for (int i = 0; i < transform.local_slice_size(); ++i) { std::cout << spaceDomainPtr[2 * i] << ", " << spaceDomainPtr[2 * i + 1] << std::endl; } ///////////////////////////////////////////////// // Option B: Use external buffer for space domain ///////////////////////////////////////////////// std::vector spaceDomainVec(2 * transform.local_slice_size()); // Transform backward transform.backward(frequencyElements.data(), spaceDomainVec.data()); // Transform forward transform.forward(spaceDomainVec.data(), frequencyElements.data(), SPFFT_NO_SCALING); // Note: In-place transforms are also supported by passing the same pointer for input and output. std::cout << std::endl << "After forward transform (without normalization):" << std::endl; for (int i = 0; i < numFrequencyElements; ++i) { std::cout << frequencyElements[2 * i] << ", " << frequencyElements[2 * i + 1] << std::endl; } return 0; } SpFFT-1.1.0/examples/example.f90000066400000000000000000000071271457701740000162400ustar00rootroot00000000000000program main use iso_c_binding use spfft implicit none integer :: i, j, k, counter integer, parameter :: dimX = 2 integer, parameter :: dimY = 2 integer, parameter :: dimZ = 2 integer, parameter :: maxNumLocalZColumns = dimX * dimY integer, parameter :: processingUnit = 1 integer, parameter :: maxNumThreads = -1 type(c_ptr) :: grid = c_null_ptr type(c_ptr) :: transform = c_null_ptr integer :: errorCode = 0 integer, dimension(dimX * dimY * dimZ * 3):: indices = 0 complex(C_DOUBLE_COMPLEX), dimension(dimX * dimY * dimZ):: frequencyElements real(C_DOUBLE), dimension(2*dimX * dimY * dimZ):: spaceDomain complex(C_DOUBLE_COMPLEX), pointer :: spaceDomainPtr(:,:,:) type(c_ptr) :: realValuesPtr counter = 0 do k = 1, dimZ do j = 1, dimY do i = 1, dimX frequencyElements(counter + 1) = cmplx(counter, -counter) indices(counter * 3 + 1) = i - 1 indices(counter * 3 + 2) = j - 1 indices(counter * 3 + 3) = k - 1 counter = counter + 1 end do end do end do ! print input print *, "Input:" do i = 1, size(frequencyElements) print *, frequencyElements(i) end do ! create grid errorCode = spfft_grid_create(grid, dimX, dimY, dimZ, maxNumLocalZColumns, processingUnit, maxNumThreads); if (errorCode /= SPFFT_SUCCESS) error stop ! create transform ! Note: A transform handle can be created without a grid if no resource sharing is desired. errorCode = spfft_transform_create(transform, grid, processingUnit, 0, dimX, dimY, dimZ, dimZ,& size(frequencyElements), SPFFT_INDEX_TRIPLETS, indices) if (errorCode /= SPFFT_SUCCESS) error stop ! grid can be safely destroyed after creating all required transforms errorCode = spfft_grid_destroy(grid) if (errorCode /= SPFFT_SUCCESS) error stop ! ************************************************* ! Option A: Reuse internal buffer for space domain ! ************************************************* ! set space domain array to use memory allocted by the library errorCode = spfft_transform_get_space_domain(transform, processingUnit, realValuesPtr) if (errorCode /= SPFFT_SUCCESS) error stop ! transform backward errorCode = spfft_transform_backward(transform, frequencyElements, processingUnit) if (errorCode /= SPFFT_SUCCESS) error stop call c_f_pointer(realValuesPtr, spaceDomainPtr, [dimX,dimY,dimZ]) print *, "" print *, "After backward transform:" do k = 1, size(spaceDomainPtr, 3) do j = 1, size(spaceDomainPtr, 2) do i = 1, size(spaceDomainPtr, 1) print *, spaceDomainPtr(i, j, k) end do end do end do ! ********************************************** ! Option B: Use external buffer for space domain ! ********************************************** ! transform backward errorCode = spfft_transform_backward_ptr(transform, frequencyElements, spaceDomain) if (errorCode /= SPFFT_SUCCESS) error stop ! transform forward errorCode = spfft_transform_forward_ptr(transform, spaceDomain, frequencyElements, SPFFT_NO_SCALING) if (errorCode /= SPFFT_SUCCESS) error stop print *, "" print *, "After forward transform (without normalization):" do i = 1, size(frequencyElements) print *, frequencyElements(i) end do ! destroying the final transform will free the associated memory errorCode = spfft_transform_destroy(transform) if (errorCode /= SPFFT_SUCCESS) error stop end SpFFT-1.1.0/include/000077500000000000000000000000001457701740000140635ustar00rootroot00000000000000SpFFT-1.1.0/include/spfft/000077500000000000000000000000001457701740000152055ustar00rootroot00000000000000SpFFT-1.1.0/include/spfft/config.h.in000066400000000000000000000035201457701740000172300ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /***************** * CMAKE GENERATED *****************/ #ifndef SPFFT_CONFIG_H #define SPFFT_CONFIG_H #cmakedefine SPFFT_CUDA #cmakedefine SPFFT_ROCM #cmakedefine SPFFT_MPI #cmakedefine SPFFT_OMP #cmakedefine SPFFT_SINGLE_PRECISION #cmakedefine SPFFT_GPU_DIRECT #include "spfft/spfft_export.h" #endif SpFFT-1.1.0/include/spfft/errors.h000066400000000000000000000070031457701740000166720ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_ERRORS_H #define SPFFT_ERRORS_H #include "spfft/config.h" enum SpfftError { /** * Success. No error. */ SPFFT_SUCCESS, /** * Unknown error. */ SPFFT_UNKNOWN_ERROR, /** * Invalid Grid or Transform handle. */ SPFFT_INVALID_HANDLE_ERROR, /** * Integer overflow. */ SPFFT_OVERFLOW_ERROR, /** * Failed to allocate memory on host. */ SPFFT_ALLOCATION_ERROR, /** * Invalid parameter. */ SPFFT_INVALID_PARAMETER_ERROR, /** * Duplicate indices given to transform. May indicate non-local z-coloumn between MPI ranks. */ SPFFT_DUPLICATE_INDICES_ERROR, /** * Invalid indices given to transform. */ SPFFT_INVALID_INDICES_ERROR, /** * Library not compiled with MPI support. */ SPFFT_MPI_SUPPORT_ERROR, /** * MPI error. Only returned if error code of MPI API calls is non-zero. */ SPFFT_MPI_ERROR, /** * Parameters differ between MPI ranks. */ SPFFT_MPI_PARAMETER_MISMATCH_ERROR, /** * Failed execution on host. */ SPFFT_HOST_EXECUTION_ERROR, /** * FFTW library error. */ SPFFT_FFTW_ERROR, /** * Generic GPU error. */ SPFFT_GPU_ERROR, /** * Detected error on GPU from previous GPU API / kernel calls. */ SPFFT_GPU_PRECEDING_ERROR, /** * Library not compiled with GPU support. */ SPFFT_GPU_SUPPORT_ERROR, /** * Failed allocation on GPU. */ SPFFT_GPU_ALLOCATION_ERROR, /** * Failed to launch kernel on GPU. */ SPFFT_GPU_LAUNCH_ERROR, /** * No GPU device detected. */ SPFFT_GPU_NO_DEVICE_ERROR, /** * Invalid value passed to GPU API. */ SPFFT_GPU_INVALID_VALUE_ERROR, /** * Invalid device pointer used. */ SPFFT_GPU_INVALID_DEVICE_PTR_ERROR, /** * Failed to copy from / to GPU. */ SPFFT_GPU_COPY_ERROR, /** * Failure in GPU FFT library call. */ SPFFT_GPU_FFT_ERROR }; #ifndef __cplusplus /*! \cond PRIVATE */ // C only typedef enum SpfftError SpfftError; /*! \endcond */ #endif // cpp #endif SpFFT-1.1.0/include/spfft/exceptions.hpp000066400000000000000000000215341457701740000201040ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_EXCEPTIONS_H #define SPFFT_EXCEPTIONS_H #include #include "spfft/config.h" #include "spfft/errors.h" namespace spfft { /** * A generic error. Base type for all other exceptions. */ class SPFFT_EXPORT GenericError : public std::exception { public: auto what() const noexcept -> const char* override { return "SpFFT: Generic error"; } virtual auto error_code() const noexcept -> SpfftError { return SpfftError::SPFFT_UNKNOWN_ERROR; } }; /** * Overflow of integer values. */ class SPFFT_EXPORT OverflowError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Overflow error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_OVERFLOW_ERROR; } }; /** * Failed allocation on host. */ class SPFFT_EXPORT HostAllocationError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Host allocation error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_ALLOCATION_ERROR; } }; /** * Invalid parameter. */ class SPFFT_EXPORT InvalidParameterError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Invalid parameter error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_INVALID_PARAMETER_ERROR; } }; /** * Duplicate indices given to transform. May indicate non-local z-coloumn between MPI ranks. */ class SPFFT_EXPORT DuplicateIndicesError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Duplicate indices error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_DUPLICATE_INDICES_ERROR; } }; /** * Invalid indices given to transform. */ class SPFFT_EXPORT InvalidIndicesError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Invalid indices error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_INVALID_INDICES_ERROR; } }; /** * Library not compiled with MPI support. */ class SPFFT_EXPORT MPISupportError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Not compiled with MPI support error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_MPI_SUPPORT_ERROR; } }; /** * MPI error. Only thrown if error code of MPI API calls is non-zero. */ class SPFFT_EXPORT MPIError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: MPI error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_MPI_ERROR; } }; /** * Parameters differ between MPI ranks. */ class SPFFT_EXPORT MPIParameterMismatchError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Mismatched parameters between MPI ranks"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_MPI_PARAMETER_MISMATCH_ERROR; } }; /** * Failed execution on host. */ class SPFFT_EXPORT HostExecutionError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Host execution error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_HOST_EXECUTION_ERROR; } }; /** * FFTW library error. */ class SPFFT_EXPORT FFTWError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: FFTW error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_FFTW_ERROR; } }; /** * Unknown internal error. */ class SPFFT_EXPORT InternalError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: Internal error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_FFTW_ERROR; } }; // ================================== // GPU Errors // ================================== /** * Generic GPU error. Base type for all GPU related exceptions. */ class SPFFT_EXPORT GPUError : public GenericError { public: auto what() const noexcept -> const char* override { return "SpFFT: GPU error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_ERROR; } }; /** * Library not compiled with GPU support. */ class SPFFT_EXPORT GPUSupportError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: Not compiled with GPU support"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_SUPPORT_ERROR; } }; /** * Detected error on GPU from previous GPU API / kernel calls. */ class SPFFT_EXPORT GPUPrecedingError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: Detected error from preceding gpu calls."; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_PRECEDING_ERROR; } }; /** * Failed allocation on GPU. */ class SPFFT_EXPORT GPUAllocationError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: GPU allocation error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_ALLOCATION_ERROR; } }; /** * Failed to launch kernel on GPU. */ class SPFFT_EXPORT GPULaunchError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: GPU launch error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_LAUNCH_ERROR; } }; /** * No GPU device detected. */ class SPFFT_EXPORT GPUNoDeviceError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: no GPU available"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_NO_DEVICE_ERROR; } }; /** * Invalid value passed to GPU API. */ class SPFFT_EXPORT GPUInvalidValueError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: GPU call with invalid value"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_INVALID_VALUE_ERROR; } }; /** * Invalid device pointer used. */ class SPFFT_EXPORT GPUInvalidDevicePointerError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: Invalid GPU pointer"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_INVALID_DEVICE_PTR_ERROR; } }; /** * Failed to copy from / to GPU. */ class SPFFT_EXPORT GPUCopyError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: GPU Memory copy error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_COPY_ERROR; } }; /** * Failure in GPU FFT library call. */ class SPFFT_EXPORT GPUFFTError : public GPUError { public: auto what() const noexcept -> const char* override { return "SpFFT: GPU FFT error"; } auto error_code() const noexcept -> SpfftError override { return SpfftError::SPFFT_GPU_FFT_ERROR; } }; } // namespace spfft #endif SpFFT-1.1.0/include/spfft/grid.h000066400000000000000000000166551457701740000163200ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GRID_H #define SPFFT_GRID_H #include "spfft/config.h" #include "spfft/errors.h" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif #ifdef __cplusplus extern "C" { #endif /** * Grid handle. */ typedef void* SpfftGrid; /** * Constructor for a local grid. * * @param[out] grid Handle to grid. * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_create(SpfftGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, SpfftProcessingUnitType processingUnit, int maxNumThreads); #ifdef SPFFT_MPI /** * Constructor for a distributed grid. * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[out] grid Handle to grid. * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain of the * local MPI rank. * @param[in] maxLocalZLength Maximum length in z in space domain for the local MPI rank. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_create_distributed(SpfftGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType); #endif /** * Destroy a grid. * * A grid can be safely destroyed independet from any related transforms. The internal memory * is released, once all associated transforms are destroyed as well (through internal reference * counting). * * @param[in] grid Handle to grid. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_destroy(SpfftGrid grid); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] dimX Maximum dimension in x. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_max_dim_x(SpfftGrid grid, int* dimX); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] dimY Maximum dimension in y. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_max_dim_y(SpfftGrid grid, int* dimY); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] dimZ Maximum dimension in z. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_max_dim_z(SpfftGrid grid, int* dimZ); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] maxNumLocalZColumns Maximum number of z-columns in frequency domain of the local MPI * rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_max_num_local_z_columns(SpfftGrid grid, int* maxNumLocalZColumns); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] maxLocalZLength Maximum length in z in space domain of the local MPI rank. * rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_max_local_z_length(SpfftGrid grid, int* maxLocalZLength); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] processingUnit The processing unit, the grid has prepared for. Can be SPFFT_PU_HOST * or SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_processing_unit(SpfftGrid grid, SpfftProcessingUnitType* processingUnit); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] deviceId The GPU device id used. Returns always 0, if no GPU support is enabled. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_device_id(SpfftGrid grid, int* deviceId); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] numThreads The exact number of threads used by transforms created from this grid. May * be less than the maximum given to the constructor. Always 1, if not compiled with OpenMP support. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_num_threads(SpfftGrid grid, int* numThreads); #ifdef SPFFT_MPI /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] comm The internal MPI communicator. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_grid_communicator(SpfftGrid grid, MPI_Comm* comm); #endif #ifdef __cplusplus } #endif #endif SpFFT-1.1.0/include/spfft/grid.hpp000066400000000000000000000172571457701740000166570ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GRID_HPP #define SPFFT_GRID_HPP #include #include "spfft/config.h" #include "spfft/transform.hpp" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif namespace spfft { // Forward declaration for internal use template class SPFFT_NO_EXPORT GridInternal; /** * A Grid, which provides pre-allocated memory for double precision transforms. */ class SPFFT_EXPORT Grid { public: /** * Constructor for a local grid. * * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ Grid(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, SpfftProcessingUnitType processingUnit, int maxNumThreads); #ifdef SPFFT_MPI /** * Constructor for a distributed grid. * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain of the * local MPI rank. * @param[in] maxLocalZLength Maximum length in z in space domain for the local MPI rank. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ Grid(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType); #endif /** * Custom copy constructor. * * Creates a independent copy. Calls MPI functions for the distributed case. */ Grid(const Grid&); /** * Default move constructor. */ Grid(Grid&&) = default; /** * Custom copy operator. * * Creates a independent copy. Calls MPI functions for the distributed case. */ Grid& operator=(const Grid&); /** * Default move operator. */ Grid& operator=(Grid&&) = default; /** * Creates a transform from this grid object. * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU and be supported by the grid itself. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. The maximum allowed depends on the grid parameters. * @param[in] dimY The dimension in y. The maximum allowed depends on the grid parameters. * @param[in] dimZ The dimension in z. The maximum allowed depends on the grid parameters. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to the frequency indices. Posive and negative indexing is supported. * @return Transform * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ Transform create_transform(SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) const; /** * Access a grid parameter. * @return Maximum dimension in x. */ int max_dim_x() const; /** * Access a grid parameter. * @return Maximum dimension in y. */ int max_dim_y() const; /** * Access a grid parameter. * @return Maximum dimension in z. */ int max_dim_z() const; /** * Access a grid parameter. * @return Maximum number of z-columns in frequency domain of the local MPI rank. */ int max_num_local_z_columns() const; /** * Access a grid parameter. * @return Maximum length in z in space domain of the local MPI rank. */ int max_local_z_length() const; /** * Access a grid parameter. * @return The processing unit, the grid has prepared for. Can be SPFFT_PU_HOST or SPFFT_PU_GPU or * SPFFT_PU_HOST | SPFFT_PU_GPU. */ SpfftProcessingUnitType processing_unit() const; /** * Access a grid parameter. * @return The GPU device id used. Always returns 0, if no GPU support is enabled. */ int device_id() const; /** * Access a grid parameter. * @return The exact number of threads used by transforms created from this grid. May be less than * the maximum given to the constructor. Always 1, if not compiled with OpenMP support. */ int num_threads() const; #ifdef SPFFT_MPI /** * Access a grid parameter. * @return The internal MPI communicator. */ MPI_Comm communicator() const; #endif private: std::shared_ptr> grid_; }; } // namespace spfft #endif SpFFT-1.1.0/include/spfft/grid_float.h000066400000000000000000000166751457701740000175070ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GRID_FLOAT_H #define SPFFT_GRID_FLOAT_H #include "spfft/config.h" #include "spfft/errors.h" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif #ifdef __cplusplus extern "C" { #endif /** * Grid handle. */ typedef void* SpfftFloatGrid; /** * Constructor for a single precision local grid. * * @param[out] grid Handle to grid. * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_create(SpfftFloatGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, SpfftProcessingUnitType processingUnit, int maxNumThreads); #ifdef SPFFT_MPI /** * Constructor for a single precision distributed grid. * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[out] grid Handle to grid. * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain of the * local MPI rank. * @param[in] maxLocalZLength Maximum length in z in space domain for the local MPI rank. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_create_distributed( SpfftFloatGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType); #endif /** * Destroy a grid. * * A grid can be safely destroyed independet from any related transforms. The internal memory * is released, once all associated transforms are destroyed as well (through internal reference * counting). * * @param[in] grid Handle to grid. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_destroy(SpfftFloatGrid grid); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] dimX Maximum dimension in x. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_max_dim_x(SpfftFloatGrid grid, int* dimX); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] dimY Maximum dimension in y. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_max_dim_y(SpfftFloatGrid grid, int* dimY); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] dimZ Maximum dimension in z. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_max_dim_z(SpfftFloatGrid grid, int* dimZ); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] maxNumLocalZColumns Maximum number of z-columns in frequency domain of the local MPI * rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_max_num_local_z_columns(SpfftFloatGrid grid, int* maxNumLocalZColumns); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] maxLocalZLength Maximum length in z in space domain of the local MPI rank. * rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_max_local_z_length(SpfftFloatGrid grid, int* maxLocalZLength); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] processingUnit The processing unit, the grid has prepared for. Can be SPFFT_PU_HOST * or SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_processing_unit(SpfftFloatGrid grid, SpfftProcessingUnitType* processingUnit); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] deviceId The GPU device id used. Returns always 0, if no GPU support is enabled. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_device_id(SpfftFloatGrid grid, int* deviceId); /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] numThreads The exact number of threads used by transforms created from this grid. May * be less than the maximum given to the constructor. Always 1, if not compiled with OpenMP support. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_num_threads(SpfftFloatGrid grid, int* numThreads); #ifdef SPFFT_MPI /** * Access a grid parameter. * @param[in] grid Handle to grid. * @param[out] comm The internal MPI communicator. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_grid_communicator(SpfftFloatGrid grid, MPI_Comm* comm); #endif #ifdef __cplusplus } #endif #endif SpFFT-1.1.0/include/spfft/grid_float.hpp000066400000000000000000000174221457701740000200360ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GRID_FLOAT_HPP #define SPFFT_GRID_FLOAT_HPP #include #include "spfft/config.h" #include "spfft/transform_float.hpp" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif namespace spfft { // Forward declaration for internal use template class SPFFT_NO_EXPORT GridInternal; #ifdef SPFFT_SINGLE_PRECISION /** * A Grid, which provides pre-allocated memory for single precision transforms. */ class SPFFT_EXPORT GridFloat { public: /** * Constructor for a local grid. * * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ GridFloat(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, SpfftProcessingUnitType processingUnit, int maxNumThreads); #ifdef SPFFT_MPI /** * Constructor for a distributed grid. * Thread-safe if MPI thread support is set to MPI_THREAD_MULTIPLE. * * @param[in] maxDimX Maximum dimension in x. * @param[in] maxDimY Maximum dimension in y. * @param[in] maxDimZ Maximum dimension in z. * @param[in] maxNumLocalZColumns Maximum number of z-columns in frequency domain of the * local MPI rank. * @param[in] maxLocalZLength Maximum length in z in space domain for the local MPI rank. * @param[in] processingUnit The processing unit type to prepare for. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU or SPFFT_PU_HOST | SPFFT_PU_GPU. * @param[in] maxNumThreads The maximum number of threads, transforms created with this grid are * allowed to use. If smaller than 1, the OpenMP default value is used. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ GridFloat(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType); #endif /** * Custom copy constructor. * * Creates a independent copy. Calls MPI functions for the distributed case. */ GridFloat(const GridFloat&); /** * Default move constructor. */ GridFloat(GridFloat&&) = default; /** * Custom copy operator. * * Creates a independent copy. Calls MPI functions for the distributed case. */ GridFloat& operator=(const GridFloat&); /** * Default move operator. */ GridFloat& operator=(GridFloat&&) = default; /** * Creates a transform from this grid object. * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU and be supported by the grid itself. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. The maximum allowed depends on the grid parameters. * @param[in] dimY The dimension in y. The maximum allowed depends on the grid parameters. * @param[in] dimZ The dimension in z. The maximum allowed depends on the grid parameters. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to the frequency indices. Posive and negative indexing is supported. * @return Transform * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ TransformFloat create_transform(SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) const; /** * Access a grid parameter. * @return Maximum dimension in x. */ int max_dim_x() const; /** * Access a grid parameter. * @return Maximum dimension in y. */ int max_dim_y() const; /** * Access a grid parameter. * @return Maximum dimension in z. */ int max_dim_z() const; /** * Access a grid parameter. * @return Maximum number of z-columns in frequency domain of the local MPI rank. */ int max_num_local_z_columns() const; /** * Access a grid parameter. * @return Maximum length in z in space domain of the local MPI rank. */ int max_local_z_length() const; /** * Access a grid parameter. * @return The processing unit, the grid has prepared for. Can be SPFFT_PU_HOST or SPFFT_PU_GPU or * SPFFT_PU_HOST | SPFFT_PU_GPU. */ SpfftProcessingUnitType processing_unit() const; /** * Access a grid parameter. * @return The GPU device id used. Always returns 0, if no GPU support is enabled. */ int device_id() const; /** * Access a grid parameter. * @return The exact number of threads used by transforms created from this grid. May be less than * the maximum given to the constructor. Always 1, if not compiled with OpenMP support. */ int num_threads() const; #ifdef SPFFT_MPI MPI_Comm communicator() const; #endif private: /*! \cond PRIVATE */ std::shared_ptr> grid_; /*! \endcond */ }; #endif } // namespace spfft #endif SpFFT-1.1.0/include/spfft/multi_transform.h000066400000000000000000000115611457701740000206070ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MULTI_TRANSFORM_H #define SPFFT_MULTI_TRANSFORM_H #include "spfft/config.h" #include "spfft/transform.h" #include "spfft/types.h" #ifdef __cplusplus extern "C" { #endif /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputLocations Input locations for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_multi_transform_forward(int numTransforms, SpfftTransform* transforms, const SpfftProcessingUnitType* inputLocations, double* const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT SpfftError spfft_multi_transform_forward_ptr(int numTransforms, SpfftTransform* transforms, const double* const* inputPointers, double* const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputLocations Output locations for each transform. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_multi_transform_backward( int numTransforms, SpfftTransform* transforms, const double* const* inputPointers, const SpfftProcessingUnitType* outputLocations); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputPointers Output pointers for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT SpfftError spfft_multi_transform_backward_ptr(int numTransforms, SpfftTransform* transforms, const double* const* inputPointers, double* const* outputPointers); #ifdef __cplusplus } #endif #endif SpFFT-1.1.0/include/spfft/multi_transform.hpp000066400000000000000000000115001457701740000211400ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MULTI_TRANSFORM_HPP #define SPFFT_MULTI_TRANSFORM_HPP #include "spfft/config.h" #include "spfft/transform.hpp" #include "spfft/types.h" namespace spfft { /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputLocations Input locations for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_forward(int numTransforms, Transform* transforms, const SpfftProcessingUnitType* inputLocations, double* const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_forward(int numTransforms, Transform* transforms, const double* const* inputPointers, double*const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputLocations Output locations for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_backward(int numTransforms, Transform* transforms, const double* const* inputPointers, const SpfftProcessingUnitType* outputLocations); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputPointers Output pointers for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_backward(int numTransforms, Transform* transforms, const double* const* inputPointers, double* const* outputPointers); } // namespace spfft #endif SpFFT-1.1.0/include/spfft/multi_transform_float.h000066400000000000000000000111101457701740000217620ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MULTI_TRANSFORM_FLOAT_H #define SPFFT_MULTI_TRANSFORM_FLOAT_H #include "spfft/config.h" #include "spfft/transform_float.h" #include "spfft/types.h" #ifdef __cplusplus extern "C" { #endif /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputLocations Input locations for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_multi_transform_forward( int numTransforms, SpfftFloatTransform* transforms, const SpfftProcessingUnitType* inputLocations, float* const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT SpfftError spfft_float_multi_transform_forward_ptr( int numTransforms, SpfftFloatTransform* transforms, const float* const* inputPointers, float* const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputLocations Output locations for each transform. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_multi_transform_backward( int numTransforms, SpfftFloatTransform* transforms, const float* const* inputPointers, const SpfftProcessingUnitType* outputLocations); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputPointers Output pointers for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT SpfftError spfft_float_multi_transform_backward_ptr(int numTransforms, SpfftFloatTransform* transforms, const float* const* inputPointers, float* const* outputPointers); #ifdef __cplusplus } #endif #endif SpFFT-1.1.0/include/spfft/multi_transform_float.hpp000066400000000000000000000115741457701740000223400ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MULTI_TRANSFORM_HPP #define SPFFT_MULTI_TRANSFORM_HPP #include "spfft/config.h" #include "spfft/transform_float.hpp" #include "spfft/types.h" namespace spfft { #ifdef SPFFT_SINGLE_PRECISION /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputLocations Input locations for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_forward(int numTransforms, TransformFloat* transforms, const SpfftProcessingUnitType* inputLocations, float* const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent forward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[out] outputPointers Output pointers for each transform. * @param[in] scalingTypes Scaling types for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_forward(int numTransforms, TransformFloat* transforms, const float* const* inputPointers, float* const* outputPointers, const SpfftScalingType* scalingTypes); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputLocations Output locations for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_backward(int numTransforms, TransformFloat* transforms, const float* const* inputPointers, const SpfftProcessingUnitType* outputLocations); /** * Execute multiple independent backward transforms at once by internal pipelining. * * @param[in] numTransforms Number of transforms to execute. * @param[in] transforms Transforms to execute. * @param[in] inputPointers Input pointers for each transform. * @param[in] outputPointers Output pointers for each transform. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ SPFFT_EXPORT void multi_transform_backward(int numTransforms, TransformFloat* transforms, const float* const* inputPointers, float* const* outputPointers); #endif } // namespace spfft #endif SpFFT-1.1.0/include/spfft/spfft.f90000066400000000000000000000650111457701740000166520ustar00rootroot00000000000000 ! Copyright (c) 2019 ETH Zurich, Simon Frasch ! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions are met: ! ! 1. Redistributions of source code must retain the above copyright notice, ! this list of conditions and the following disclaimer. ! 2. Redistributions in binary form must reproduce the above copyright ! notice, this list of conditions and the following disclaimer in the ! documentation and/or other materials provided with the distribution. ! 3. Neither the name of the copyright holder nor the names of its contributors ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. module spfft use iso_c_binding implicit none ! Constants integer(c_int), parameter :: & SPFFT_EXCH_DEFAULT = 0, & SPFFT_EXCH_BUFFERED = 1, & SPFFT_EXCH_BUFFERED_FLOAT = 2, & SPFFT_EXCH_COMPACT_BUFFERED = 3, & SPFFT_EXCH_COMPACT_BUFFERED_FLOAT = 4, & SPFFT_EXCH_UNBUFFERED = 5, & SPFFT_PU_HOST = 1, & SPFFT_PU_GPU = 2, & SPFFT_INDEX_TRIPLETS = 0, & SPFFT_TRANS_C2C = 0, & SPFFT_TRANS_R2C = 1, & SPFFT_NO_SCALING = 0, & SPFFT_FULL_SCALING = 1, & SPFFT_EXEC_SYNCHRONOUS = 0, & SPFFT_EXEC_ASYNCHRONOUS = 1, & SPFFT_SUCCESS = 0, & SPFFT_UNKNOWN_ERROR = 1, & SPFFT_INVALID_HANDLE_ERROR = 2, & SPFFT_OVERFLOW_ERROR = 3, & SPFFT_ALLOCATION_ERROR = 4, & SPFFT_INVALID_PARAMETER_ERROR = 5, & SPFFT_DUPLICATE_INDICES_ERROR = 6, & SPFFT_INVALID_INDICES_ERROR = 7, & SPFFT_MPI_SUPPORT_ERROR = 8, & SPFFT_MPI_ERROR = 9, & SPFFT_MPI_PARAMETER_MISMATCH_ERROR = 10, & SPFFT_HOST_EXECUTION_ERROR = 11, & SPFFT_FFTW_ERROR = 12, & SPFFT_GPU_ERROR = 13, & SPFFT_GPU_PRECEDING_ERROR = 14, & SPFFT_GPU_SUPPORT_ERROR = 15, & SPFFT_GPU_ALLOCATION_ERROR = 16, & SPFFT_GPU_LAUNCH_ERROR = 17, & SPFFT_GPU_NO_DEVICE_ERROR = 18, & SPFFT_GPU_INVALID_VALUE_ERROR = 19, & SPFFT_GPU_INVALID_DEVICE_PTR_ERROR = 20, & SPFFT_GPU_COPY_ERROR = 21, & SPFFT_GPU_FFT_ERROR = 22 interface !-------------------------- ! Grid !-------------------------- integer(c_int) function spfft_grid_create(grid, maxDimX, maxDimY, maxDimZ, & maxNumLocalZColumns, processingUnit, maxNumThreads) bind(C) use iso_c_binding type(c_ptr), intent(out) :: grid integer(c_int), value :: maxDimX integer(c_int), value :: maxDimY integer(c_int), value :: maxDimZ integer(c_int), value :: maxNumLocalZColumns integer(c_int), value :: processingUnit integer(c_int), value :: maxNumThreads end function integer(c_int) function spfft_grid_create_distributed(grid, maxDimX, maxDimY, maxDimZ, & maxNumLocalZColumns, maxLocalZLength, processingUnit, maxNumThreads,& comm, exchangeType) bind(C, name='spfft_grid_create_distributed_fortran') use iso_c_binding type(c_ptr), intent(out) :: grid integer(c_int), value :: maxDimX integer(c_int), value :: maxDimY integer(c_int), value :: maxDimZ integer(c_int), value :: maxNumLocalZColumns integer(c_int), value :: maxLocalZLength integer(c_int), value :: processingUnit integer(c_int), value :: maxNumThreads integer(c_int), value :: comm integer(c_int), value :: exchangeType end function integer(c_int) function spfft_grid_destroy(grid) bind(C) use iso_c_binding type(c_ptr), value :: grid end function integer(c_int) function spfft_grid_max_dim_x(grid, dimX) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: dimX end function integer(c_int) function spfft_grid_max_dim_y(grid, dimY) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: dimY end function integer(c_int) function spfft_grid_max_dim_z(grid, dimZ) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: dimZ end function integer(c_int) function spfft_grid_max_num_local_z_columns(grid, maxNumLocalZColumns) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: maxNumLocalZColumns end function integer(c_int) function spfft_grid_max_local_z_length(grid, maxLocalZLength) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: maxLocalZLength end function integer(c_int) function spfft_grid_processing_unit(grid, processingUnit) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: processingUnit end function integer(c_int) function spfft_grid_device_id(grid, deviceId) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: deviceId end function integer(c_int) function spfft_grid_num_threads(grid, numThreads) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: numThreads end function integer(c_int) function spfft_grid_communicator(grid, comm) & bind(C, name="spfft_grid_communicator_fortran") use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: comm end function !-------------------------- ! Grid Float !-------------------------- integer(c_int) function spfft_float_grid_create(grid, maxDimX, maxDimY, maxDimZ, & maxNumLocalZColumns, processingUnit, maxNumThreads) bind(C) use iso_c_binding type(c_ptr), intent(out) :: grid integer(c_int), value :: maxDimX integer(c_int), value :: maxDimY integer(c_int), value :: maxDimZ integer(c_int), value :: maxNumLocalZColumns integer(c_int), value :: processingUnit integer(c_int), value :: maxNumThreads end function integer(c_int) function spfft_float_grid_create_distributed(grid, maxDimX, maxDimY, maxDimZ, & maxNumLocalZColumns, maxLocalZLength, processingUnit, maxNumThreads,& comm, exchangeType) bind(C, name='spfft_float_grid_create_distributed_fortran') use iso_c_binding type(c_ptr), intent(out) :: grid integer(c_int), value :: maxDimX integer(c_int), value :: maxDimY integer(c_int), value :: maxDimZ integer(c_int), value :: maxNumLocalZColumns integer(c_int), value :: maxLocalZLength integer(c_int), value :: processingUnit integer(c_int), value :: maxNumThreads integer(c_int), value :: comm integer(c_int), value :: exchangeType end function integer(c_int) function spfft_float_grid_destroy(grid) bind(C) use iso_c_binding type(c_ptr), value :: grid end function integer(c_int) function spfft_float_grid_max_dim_x(grid, dimX) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: dimX end function integer(c_int) function spfft_float_grid_max_dim_y(grid, dimY) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: dimY end function integer(c_int) function spfft_float_grid_max_dim_z(grid, dimZ) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: dimZ end function integer(c_int) function spfft_float_grid_max_num_local_z_columns(grid, maxNumLocalZColumns) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: maxNumLocalZColumns end function integer(c_int) function spfft_float_grid_max_local_z_length(grid, maxLocalZLength) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: maxLocalZLength end function integer(c_int) function spfft_float_grid_processing_unit(grid, processingUnit) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: processingUnit end function integer(c_int) function spfft_float_grid_device_id(grid, deviceId) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: deviceId end function integer(c_int) function spfft_float_grid_num_threads(grid, numThreads) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: numThreads end function integer(c_int) function spfft_transform_execution_mode(grid, mode) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: mode end function integer(c_int) function spfft_transform_set_execution_mode(grid, mode) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), value :: mode end function integer(c_int) function spfft_float_grid_communicator(grid, comm) & bind(C, name="spfft_float_grid_communicator_fortran") use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: comm end function !-------------------------- ! Transform !-------------------------- integer(c_int) function spfft_transform_create(transform, grid, processingUnit, & transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices) bind(C) use iso_c_binding type(c_ptr), intent(out) :: transform type(c_ptr), value :: grid integer(c_int), value :: processingUnit integer(c_int), value :: transformType integer(c_int), value :: dimX integer(c_int), value :: dimY integer(c_int), value :: dimZ integer(c_int), value :: localZLength integer(c_int), value :: numLocalElements integer(c_int), value :: indexFormat integer(c_int), dimension(*), intent(in) :: indices end function integer(c_int) function spfft_transform_create_independent(transform, maxNumThreads, & processingUnit, transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, & indices) bind(C) use iso_c_binding type(c_ptr), intent(out) :: transform integer(c_int), value :: maxNumThreads integer(c_int), value :: processingUnit integer(c_int), value :: transformType integer(c_int), value :: dimX integer(c_int), value :: dimY integer(c_int), value :: dimZ integer(c_int), value :: numLocalElements integer(c_int), value :: indexFormat integer(c_int), dimension(*), intent(in) :: indices end function integer(c_int) function spfft_transform_create_independent_distributed(transform, & maxNumThreads, comm, exchangeType, processingUnit, transformType, & dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices) & bind(C, name="spfft_transform_create_independent_distributed_fortran") use iso_c_binding type(c_ptr), intent(out) :: transform integer(c_int), value :: maxNumThreads integer(c_int), value :: comm integer(c_int), value :: exchangeType integer(c_int), value :: processingUnit integer(c_int), value :: transformType integer(c_int), value :: dimX integer(c_int), value :: dimY integer(c_int), value :: dimZ integer(c_int), value :: localZLength integer(c_int), value :: numLocalElements integer(c_int), value :: indexFormat integer(c_int), dimension(*), intent(in) :: indices end function integer(c_int) function spfft_transform_destroy(transform) bind(C) use iso_c_binding type(c_ptr), value :: transform end function integer(c_int) function spfft_transform_clone(transform, newTransform) bind(C) use iso_c_binding type(c_ptr), value :: transform type(c_ptr), intent(out) :: newTransform end function integer(c_int) function spfft_transform_backward(transform, input, & outputLocation) bind(C) use iso_c_binding type(c_ptr), value :: transform complex(c_double), dimension(*), intent(in) :: input integer(c_int), value :: outputLocation end function integer(c_int) function spfft_transform_backward_ptr(transform, input, & output) bind(C) use iso_c_binding type(c_ptr), value :: transform complex(c_double), dimension(*), intent(in) :: input real(c_double), dimension(*), intent(out) :: output end function integer(c_int) function spfft_transform_forward(transform, inputLocation, & output, scaling) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), value :: inputLocation complex(c_double), dimension(*), intent(out) :: output integer(c_int), value :: scaling end function integer(c_int) function spfft_transform_forward_ptr(transform, input, & output, scaling) bind(C) use iso_c_binding type(c_ptr), value :: transform real(c_double), dimension(*), intent(in) :: input complex(c_double), dimension(*), intent(out) :: output integer(c_int), value :: scaling end function integer(c_int) function spfft_transform_get_space_domain(transform, & dataLocation, dataPtr) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), value :: dataLocation type(c_ptr), intent(out) :: dataPtr end function integer(c_int) function spfft_transform_dim_x(transform, dimX) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: dimX end function integer(c_int) function spfft_transform_dim_y(transform, dimY) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: dimY end function integer(c_int) function spfft_transform_dim_z(transform, dimZ) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: dimZ end function integer(c_int) function spfft_transform_local_z_length(transform, localZLength) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: localZLength end function integer(c_int) function spfft_transform_local_slice_size(transform, size) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: size end function integer(c_int) function spfft_transform_local_z_offset(transform, offset) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: offset end function integer(c_int) function spfft_transform_global_size(transform, globalSize) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_long_long), intent(out) :: globalSize end function integer(c_int) function spfft_transform_num_local_elements(transform, numLocalElements) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: numLocalElements end function integer(c_int) function spfft_transform_num_global_elements(transform, numGlobalElements) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_long_long), intent(out) :: numGlobalElements end function integer(c_int) function spfft_transform_device_id(transform, deviceId) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: deviceId end function integer(c_int) function spfft_transform_num_threads(transform, numThreads) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: numThreads end function integer(c_int) function spfft_transform_communicator(transform, comm) & bind(C, name="spfft_transform_communicator_fortran") use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: comm end function integer(c_int) function spfft_multi_transform_forward(numTransforms, transforms,& inputLocations, outputPointers, scalingTypes) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputLocations type(c_ptr), value :: outputPointers type(c_ptr), value :: scalingTypes end function integer(c_int) function spfft_multi_transform_forward_ptr(numTransforms, transforms,& inputPointers, outputPointers, scalingTypes) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputPointers type(c_ptr), value :: outputPointers type(c_ptr), value :: scalingTypes end function integer(c_int) function spfft_multi_transform_backward(numTransforms, transforms,& inputPointers, outputLocations) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputPointers type(c_ptr), value :: outputLocations end function integer(c_int) function spfft_multi_transform_backward_ptr(numTransforms, transforms,& inputPointers, outputPointers) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputPointers type(c_ptr), value :: outputPointers end function !-------------------------- ! Transform Float !-------------------------- integer(c_int) function spfft_float_transform_create(transform, grid, processingUnit, & transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices) bind(C) use iso_c_binding type(c_ptr), intent(out) :: transform type(c_ptr), value :: grid integer(c_int), value :: processingUnit integer(c_int), value :: transformType integer(c_int), value :: dimX integer(c_int), value :: dimY integer(c_int), value :: dimZ integer(c_int), value :: localZLength integer(c_int), value :: numLocalElements integer(c_int), value :: indexFormat integer(c_int), dimension(*), intent(in) :: indices end function integer(c_int) function spfft_float_transform_create_independent(transform, maxNumThreads, & processingUnit, transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, & indices) bind(C) use iso_c_binding type(c_ptr), intent(out) :: transform integer(c_int), value :: maxNumThreads integer(c_int), value :: processingUnit integer(c_int), value :: transformType integer(c_int), value :: dimX integer(c_int), value :: dimY integer(c_int), value :: dimZ integer(c_int), value :: numLocalElements integer(c_int), value :: indexFormat integer(c_int), dimension(*), intent(in) :: indices end function integer(c_int) function spfft_float_transform_create_independent_distributed(transform, & maxNumThreads, comm, exchangeType, processingUnit, transformType, & dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices) & bind(C, name="spfft_float_transform_create_independent_distributed_fortran") use iso_c_binding type(c_ptr), intent(out) :: transform integer(c_int), value :: maxNumThreads integer(c_int), value :: comm integer(c_int), value :: exchangeType integer(c_int), value :: processingUnit integer(c_int), value :: transformType integer(c_int), value :: dimX integer(c_int), value :: dimY integer(c_int), value :: dimZ integer(c_int), value :: localZLength integer(c_int), value :: numLocalElements integer(c_int), value :: indexFormat integer(c_int), dimension(*), intent(in) :: indices end function integer(c_int) function spfft_float_transform_destroy(transform) bind(C) use iso_c_binding type(c_ptr), value :: transform end function integer(c_int) function spfft_float_transform_clone(transform, newTransform) bind(C) use iso_c_binding type(c_ptr), value :: transform type(c_ptr), intent(out) :: newTransform end function integer(c_int) function spfft_float_transform_backward(transform, input, & outputLocation) bind(C) use iso_c_binding type(c_ptr), value :: transform complex(c_float), dimension(*), intent(in) :: input integer(c_int), value :: outputLocation end function integer(c_int) function spfft_float_transform_backward_ptr(transform, input, & output) bind(C) use iso_c_binding type(c_ptr), value :: transform complex(c_float), dimension(*), intent(in) :: input real(c_float), dimension(*), intent(out) :: output end function integer(c_int) function spfft_float_transform_forward(transform, inputLocation, & output, scaling) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), value :: inputLocation complex(c_float), dimension(*), intent(out) :: output integer(c_int), value :: scaling end function integer(c_int) function spfft_float_transform_forward_ptr(transform, input, & output, scaling) bind(C) use iso_c_binding type(c_ptr), value :: transform real(c_float), dimension(*), intent(in) :: input complex(c_float), dimension(*), intent(out) :: output integer(c_int), value :: scaling end function integer(c_int) function spfft_float_transform_get_space_domain(transform, & dataLocation, dataPtr) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), value :: dataLocation type(c_ptr), intent(out) :: dataPtr end function integer(c_int) function spfft_float_transform_dim_x(transform, dimX) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: dimX end function integer(c_int) function spfft_float_transform_dim_y(transform, dimY) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: dimY end function integer(c_int) function spfft_float_transform_dim_z(transform, dimZ) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: dimZ end function integer(c_int) function spfft_float_transform_local_z_length(transform, localZLength) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: localZLength end function integer(c_int) function spfft_float_transform_local_slice_size(transform, size) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: size end function integer(c_int) function spfft_float_transform_local_z_offset(transform, offset) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: offset end function integer(c_int) function spfft_float_transform_global_size(transform, globalSize) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_long_long), intent(out) :: globalSize end function integer(c_int) function spfft_float_transform_num_local_elements(transform, numLocalElements) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: numLocalElements end function integer(c_int) function spfft_float_transform_num_global_elements(transform, numGlobalElements) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_long_long), intent(out) :: numGlobalElements end function integer(c_int) function spfft_float_transform_device_id(transform, deviceId) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: deviceId end function integer(c_int) function spfft_float_transform_num_threads(transform, numThreads) bind(C) use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: numThreads end function integer(c_int) function spfft_float_transform_execution_mode(grid, mode) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), intent(out) :: mode end function integer(c_int) function spfft_float_transform_set_execution_mode(grid, mode) bind(C) use iso_c_binding type(c_ptr), value :: grid integer(c_int), value :: mode end function integer(c_int) function spfft_float_transform_communicator(transform, comm) & bind(C, name="spfft_float_transform_communicator_fortran") use iso_c_binding type(c_ptr), value :: transform integer(c_int), intent(out) :: comm end function integer(c_int) function spfft_float_multi_transform_forward(numTransforms, transforms,& inputLocations, outputPointers, scalingTypes) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputLocations type(c_ptr), value :: outputPointers type(c_ptr), value :: scalingTypes end function integer(c_int) function spfft_float_multi_transform_forward_ptr(numTransforms, transforms,& inputPointers, outputPointers, scalingTypes) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputPointers type(c_ptr), value :: outputPointers type(c_ptr), value :: scalingTypes end function integer(c_int) function spfft_float_multi_transform_backward(numTransforms, transforms,& inputPointers, outputLocations) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputPointers type(c_ptr), value :: outputLocations end function integer(c_int) function spfft_float_multi_transform_backward_ptr(numTransforms, transforms,& inputPointers, outputPointers) bind(C) use iso_c_binding integer(c_int), value :: numTransforms type(c_ptr), value :: transforms type(c_ptr), value :: inputPointers type(c_ptr), value :: outputPointers end function end interface end SpFFT-1.1.0/include/spfft/spfft.h000066400000000000000000000033411457701740000165010ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_SPFFT_H #define SPFFT_SPFFT_H #include "spfft/config.h" #include "spfft/grid.h" #include "spfft/grid_float.h" #include "spfft/transform.h" #include "spfft/transform_float.h" #endif SpFFT-1.1.0/include/spfft/spfft.hpp000066400000000000000000000034751457701740000170510ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_SPFFT_HPP #define SPFFT_SPFFT_HPP #include "spfft/config.h" #include "spfft/grid.hpp" #include "spfft/grid_float.hpp" #include "spfft/multi_transform.hpp" #include "spfft/multi_transform_float.hpp" #include "spfft/transform.hpp" #include "spfft/transform_float.hpp" #endif SpFFT-1.1.0/include/spfft/transform.h000066400000000000000000000365711457701740000174050ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_H #define SPFFT_TRANSFORM_H #include "spfft/config.h" #include "spfft/errors.h" #include "spfft/grid.h" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif #ifdef __cplusplus extern "C" { #endif /** * Transform handle. */ typedef void* SpfftTransform; /** * Create a transform from a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] grid Handle to the grid, with which the transform is created. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU and be supported by the grid itself. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. The maximum allowed depends on the grid parameters. * @param[in] dimY The dimension in y. The maximum allowed depends on the grid parameters. * @param[in] dimZ The dimension in z. The maximum allowed depends on the grid parameters. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to the frequency indices. Centered indexing is allowed. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_create(SpfftTransform* transform, SpfftGrid grid, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); /** * Create a transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] numLocalElements The number of elements in frequency domain. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_create_independent( SpfftTransform* transform, int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #ifdef SPFFT_MPI /** * Create a distributed transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_create_independent_distributed( SpfftTransform* transform, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #endif /** * Destroy a transform. * * @param[in] transform Handle to the transform. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_destroy(SpfftTransform transform); /** * Clone a transform. * * @param[in] transform Handle to the transform. * @param[out] newTransform Independent transform with the same parameters, but with new underlying * grid. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_clone(SpfftTransform transform, SpfftTransform* newTransform); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] transform Handle to the transform. * @param[in] inputLocation The processing unit, to take the input from. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_forward(SpfftTransform transform, SpfftProcessingUnitType inputLocation, double* output, SpfftScalingType scaling); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] transform Handle to the transform. * @param[in] input Pointer to memory, to read space domain data from. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_forward_ptr(SpfftTransform transform, const double* input, double* output, SpfftScalingType scaling); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] transform Handle to the transform. * @param[in] input Input data in frequency domain. Must match the indices provided at transform * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @param[in] outputLocation The processing unit, to place the output at. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_backward(SpfftTransform transform, const double* input, SpfftProcessingUnitType outputLocation); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] transform Handle to the transform. * @param[in] input Input data in frequency domain. Must match the indices provided at transform * @param[out] output Pointer to memory to write output in frequency domain to. Can be located at * Host or GPU memory, if GPU is set as processing unit. * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_backward_ptr(SpfftTransform transform, const double* input, double* output); /** * Provides access to the space domain data. * * @param[in] transform Handle to the transform. * @param[in] dataLocation The processing unit to query for the data. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @param[out] data Pointer to space domain data on given processing unit. Alignment is guaranteed * to fulfill requirements for std::complex and C language complex types. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_get_space_domain(SpfftTransform transform, SpfftProcessingUnitType dataLocation, double** data); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] dimX Dimension in x. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_dim_x(SpfftTransform transform, int* dimX); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] dimY Dimension in y. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_dim_y(SpfftTransform transform, int* dimY); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] dimZ Dimension in z. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_dim_z(SpfftTransform transform, int* dimZ); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] localZLength size in z of the slice in space domain on the local MPI rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_local_z_length(SpfftTransform transform, int* localZLength); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] size Number of elements in the space domain slice held by the local MPI rank. */ SPFFT_EXPORT SpfftError spfft_transform_local_slice_size(SpfftTransform transform, int* size); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] offset Offset in z of the space domain slice held by the local MPI rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_local_z_offset(SpfftTransform transform, int* offset); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] globalSize Global number of elements in space domain. Equals dim_x() * dim_y() * * dim_z(). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_global_size(SpfftTransform transform, long long int* globalSize); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] numLocalElements Number of local elements in frequency domain. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_num_local_elements(SpfftTransform transform, int* numLocalElements); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] numGlobalElements Global number of elements in space domain. Equals dim_x() * dim_y() * * dim_z(). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_num_global_elements(SpfftTransform transform, long long int* numGlobalElements); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] deviceId The GPU device id used. Returns always 0, if no GPU support is enabled. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_device_id(SpfftTransform transform, int* deviceId); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] numThreads The exact number of threads used by transforms created from this grid. May * be less than the maximum given to the constructor. Always 1, if not compiled with OpenMP support. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_num_threads(SpfftTransform transform, int* numThreads); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] mode The execution mode. Only affects execution on GPU. * Defaults to SPFFT_EXEC_SYNCHRONOUS. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_execution_mode(SpfftTransform transform, SpfftExecType* mode); /** * Set a transform parameter. * @param[in] transform Handle to the transform. * @param[int] mode The execution mode to change to. Only affects execution on GPU. * Defaults to SPFFT_EXEC_SYNCHRONOUS. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_set_execution_mode(SpfftTransform transform, SpfftExecType mode); #ifdef SPFFT_MPI /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] comm The internal MPI communicator. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_transform_communicator(SpfftTransform transform, MPI_Comm* comm); #endif #ifdef __cplusplus } #endif #endif SpFFT-1.1.0/include/spfft/transform.hpp000066400000000000000000000274521457701740000177430ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_HPP #define SPFFT_TRANSFORM_HPP #include #include "spfft/config.h" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif namespace spfft { template class SPFFT_NO_EXPORT TransformInternal; class SPFFT_EXPORT Grid; template class SPFFT_NO_EXPORT MultiTransformInternal; template class SPFFT_NO_EXPORT GridInternal; /** * A transform in double precision with fixed dimensions. Shares memory with other transform created * from the same Grid object. */ class SPFFT_EXPORT Transform { public: using ValueType = double; /** * Create a transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] numLocalElements The number of elements in frequency domain. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. */ Transform(int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #ifdef SPFFT_MPI /** * Create a distributed transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. */ Transform(int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #endif /** * Default copy constructor. */ Transform(const Transform&) = default; /** * Default move constructor. */ Transform(Transform&&) = default; /** * Default copy operator. */ Transform& operator=(const Transform&) = default; /** * Default move operator. */ Transform& operator=(Transform&&) = default; /** * Clone transform. * * @return Independent transform with the same parameters, but with new underlying grid. */ Transform clone() const; /** * Access a transform parameter. * @return Type of transform. */ SpfftTransformType type() const; /** * Access a transform parameter. * @return Dimension in x. */ int dim_x() const; /** * Access a transform parameter. * @return Dimension in y. */ int dim_y() const; /** * Access a transform parameter. * @return Dimension in z. */ int dim_z() const; /** * Access a transform parameter. * @return Length in z of the space domain slice held by the local MPI rank. */ int local_z_length() const; /** * Access a transform parameter. * @return Offset in z of the space domain slice held by the local MPI rank. */ int local_z_offset() const; /** * Access a transform parameter. * @return Number of elements in the space domain slice held by the local MPI rank. */ int local_slice_size() const; /** * Access a transform parameter. * @return Global number of elements in space domain. Equals dim_x() * dim_y() * dim_z(). */ long long int global_size() const; /** * Access a transform parameter. * @return Number of elements in frequency domain. */ int num_local_elements() const; /** * Access a transform parameter. * @return Global number of elements in frequency domain. */ long long int num_global_elements() const; /** * Access a transform parameter. * @return The processing unit used for calculations. Can be SPFFT_PU_HOST or SPFFT_PU_GPU. */ SpfftProcessingUnitType processing_unit() const; /** * Access a transform parameter. * @return The GPU device id used. Returns always 0, if no GPU support is enabled. */ int device_id() const; /** * Access a transform parameter. * @return The exact number of threads used by transforms created from this grid. May be less than * the maximum given to the constructor. Always 1, if not compiled with OpenMP support. */ int num_threads() const; /** * Access a transform parameter. * @return The execution mode. Only affects execution on GPU. Defaults to SPFFT_EXEC_SYNCHRONOUS. */ SpfftExecType execution_mode() const; /** * Set a transform parameter. * @param[in] mode The execution mode to change to. Only affects execution on GPU. * Defaults to SPFFT_EXEC_SYNCHRONOUS. */ void set_execution_mode(SpfftExecType mode); #ifdef SPFFT_MPI /** * Access a transform parameter. * @return The internal MPI communicator. */ MPI_Comm communicator() const; #endif /** * Provides access to the space domain data. * * @param[in] dataLocation The processing unit to query for the data. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @return Pointer to space domain data on given processing unit. Alignment is guaranteed to * fulfill requirements for std::complex and C language complex types. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ double* space_domain_data(SpfftProcessingUnitType dataLocation); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] inputLocation The processing unit, to take the input from. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void forward(SpfftProcessingUnitType inputLocation, double* output, SpfftScalingType scaling = SPFFT_NO_SCALING); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] input Pointer to memory, to read space domain data from. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void forward(const double* input, double* output, SpfftScalingType scaling = SPFFT_NO_SCALING); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] input Input data in frequency domain. Must match the indices provided at transform * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @param[in] outputLocation The processing unit, to place the output at. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void backward(const double* input, SpfftProcessingUnitType outputLocation); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] input Input data in frequency domain. Must match the indices provided at transform * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @param[out] output Pointer to memory to write output in frequency domain to. Can be located at * Host or GPU memory, if GPU is set as processing unit. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void backward(const double* input, double* output); private: /*! \cond PRIVATE */ friend Grid; friend MultiTransformInternal; SPFFT_NO_EXPORT Transform(const std::shared_ptr>& grid, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); SPFFT_NO_EXPORT explicit Transform(std::shared_ptr> transform); std::shared_ptr> transform_; /*! \endcond */ }; } // namespace spfft #endif SpFFT-1.1.0/include/spfft/transform_float.h000066400000000000000000000400221457701740000205540ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_FLOAT_H #define SPFFT_TRANSFORM_FLOAT_H #include "spfft/config.h" #include "spfft/errors.h" #include "spfft/grid_float.h" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif #ifdef __cplusplus extern "C" { #endif /** * Transform handle. */ typedef void* SpfftFloatTransform; /** * Create a single precision transform from a single precision grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] grid Handle to the grid, with which the transform is created. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU and be supported by the grid itself. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. The maximum allowed depends on the grid parameters. * @param[in] dimY The dimension in y. The maximum allowed depends on the grid parameters. * @param[in] dimZ The dimension in z. The maximum allowed depends on the grid parameters. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to the frequency indices. Posive and negative indexing is supported. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_create( SpfftFloatTransform* transform, SpfftFloatGrid grid, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); /** * Create a transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] numLocalElements The number of elements in frequency domain. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_create_independent( SpfftFloatTransform* transform, int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #ifdef SPFFT_MPI /** * Create a distributed transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[out] transform Handle to the transform. * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_create_independent_distributed( SpfftFloatTransform* transform, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #endif /** * Destroy a transform. * * @param[in] transform Handle to the transform. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_destroy(SpfftFloatTransform transform); /** * Clone a transform. * * @param[in] transform Handle to the transform. * @param[out] newTransform Independent transform with the same parameters, but with new underlying * grid. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_clone(SpfftFloatTransform transform, SpfftFloatTransform* newTransform); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] transform Handle to the transform. * @param[in] inputLocation The processing unit, to take the input from. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_forward(SpfftFloatTransform transform, SpfftProcessingUnitType inputLocation, float* output, SpfftScalingType scaling); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] transform Handle to the transform. * @param[in] input Pointer to memory, to read space domain data from. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_forward_ptr(SpfftFloatTransform transform, const float* input, float* output, SpfftScalingType scaling); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] transform Handle to the transform. * @param[in] input Input data in frequency domain. Must match the indices provided at transform * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @param[in] outputLocation The processing unit, to place the output at. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_backward(SpfftFloatTransform transform, const float* input, SpfftProcessingUnitType outputLocation); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] transform Handle to the transform. * @param[in] input Input data in frequency domain. Must match the indices provided at transform * @param[out] output Pointer to memory to write output in frequency domain to. Can be located at * Host or GPU memory, if GPU is set as processing unit. * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_backward_ptr(SpfftFloatTransform transform, const float* input, float* output); /** * Provides access to the space domain data. * * @param[in] transform Handle to the transform. * @param[in] dataLocation The processing unit to query for the data. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @param[out] data Pointer to space domain data on given processing unit. Alignment is guaranteed * to fulfill requirements for std::complex and C language complex types. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_get_space_domain(SpfftFloatTransform transform, SpfftProcessingUnitType dataLocation, float** data); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] dimX Dimension in x. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_dim_x(SpfftFloatTransform transform, int* dimX); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] dimY Dimension in y. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_dim_y(SpfftFloatTransform transform, int* dimY); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] dimZ Dimension in z. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_dim_z(SpfftFloatTransform transform, int* dimZ); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] localZLength size in z of the slice in space domain on the local MPI rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_local_z_length(SpfftFloatTransform transform, int* localZLength); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] size Number of elements in the space domain slice held by the local MPI rank. */ SPFFT_EXPORT SpfftError spfft_float_transform_local_slice_size(SpfftFloatTransform transform, int* size); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] globalSize Global number of elements in space domain. Equals dim_x() * dim_y() * * dim_z(). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_global_size(SpfftFloatTransform transform, long long int* globalSize); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] offset Offset in z of the space domain slice held by the local MPI rank. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_local_z_offset(SpfftFloatTransform transform, int* offset); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] numLocalElements Number of local elements in frequency domain. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_num_local_elements(SpfftFloatTransform transform, int* numLocalElements); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] numGlobalElements Global number of elements in space domain. Equals dim_x() * dim_y() * * dim_z(). * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_num_global_elements(SpfftFloatTransform transform, long long int* numGlobalElements); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] deviceId The GPU device id used. Returns always 0, if no GPU support is enabled. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_device_id(SpfftFloatTransform transform, int* deviceId); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] numThreads The exact number of threads used by transforms created from this grid. May * be less than the maximum given to the constructor. Always 1, if not compiled with OpenMP support. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_num_threads(SpfftFloatTransform transform, int* numThreads); /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] mode The execution mode. Only affects execution on GPU. * Defaults to SPFFT_EXEC_SYNCHRONOUS. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_execution_mode(SpfftFloatTransform transform, SpfftExecType* mode); /** * Set a transform parameter. * @param[in] transform Handle to the transform. * @param[int] mode The execution mode to change to. Only affects execution on GPU. * Defaults to SPFFT_EXEC_SYNCHRONOUS. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_set_execution_mode(SpfftFloatTransform transform, SpfftExecType mode); #ifdef SPFFT_MPI /** * Access a transform parameter. * @param[in] transform Handle to the transform. * @param[out] comm The internal MPI communicator. * @return Error code or SPFFT_SUCCESS. */ SPFFT_EXPORT SpfftError spfft_float_transform_communicator(SpfftFloatTransform transform, MPI_Comm* comm); #endif #ifdef __cplusplus } #endif #endif SpFFT-1.1.0/include/spfft/transform_float.hpp000066400000000000000000000277211457701740000211270ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_FLOAT_HPP #define SPFFT_TRANSFORM_FLOAT_HPP #include #include "spfft/config.h" #include "spfft/types.h" #ifdef SPFFT_MPI #include #endif namespace spfft { template class SPFFT_NO_EXPORT TransformInternal; template class SPFFT_NO_EXPORT MultiTransformInternal; template class SPFFT_NO_EXPORT GridInternal; #ifdef SPFFT_SINGLE_PRECISION class SPFFT_EXPORT GridFloat; /** * A transform in single precision with fixed dimensions. Shares memory with other transform created * from the same Grid object. */ class SPFFT_EXPORT TransformFloat { public: using ValueType = float; /** * Create a transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] numLocalElements The number of elements in frequency domain. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. */ TransformFloat(int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #ifdef SPFFT_MPI /** * Create a distributed transform without a grid handle. * Thread-safe if no FFTW calls are executed concurrently. * * @param[in] maxNumThreads The maximum number of threads to use. * @param[in] comm The MPI communicator to use. Will be duplicated for internal use. * @param[in] exchangeType The type of MPI exchange to use. Possible values are * SPFFT_EXCH_DEFAULT, SPFFT_EXCH_BUFFERED, SPFFT_EXCH_COMPACT_BUFFERED and SPFFT_EXCH_UNBUFFERED. * @param[in] processingUnit The processing unit type to use. Must be either SPFFT_PU_HOST or * SPFFT_PU_GPU. * @param[in] transformType The transform type (complex to complex or real to complex). Can be * SPFFT_TRANS_C2C or SPFFT_TRANS_R2C. * @param[in] dimX The dimension in x. * @param[in] dimY The dimension in y. * @param[in] dimZ The dimension in z. * @param[in] localZLength The length in z in space domain of the local MPI rank. * @param[in] numLocalElements The number of elements in frequency domain of the local MPI * rank. * @param[in] indexFormat The index format. Only SPFFT_INDEX_TRIPLETS currently supported. * @param[in] indices Pointer to frequency indices. Centered indexing is allowed. */ TransformFloat(int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #endif /** * Default copy constructor. */ TransformFloat(const TransformFloat&) = default; /** * Default move constructor. */ TransformFloat(TransformFloat&&) = default; /** * Default copy operator. */ TransformFloat& operator=(const TransformFloat&) = default; /** * Default move operator. */ TransformFloat& operator=(TransformFloat&&) = default; /** * Clone transform. * * @return Independent transform with the same parameters, but with new underlying grid. */ TransformFloat clone() const; /** * Access a transform parameter. * @return Type of transform. */ SpfftTransformType type() const; /** * Access a transform parameter. * @return Dimension in x. */ int dim_x() const; /** * Access a transform parameter. * @return Dimension in y. */ int dim_y() const; /** * Access a transform parameter. * @return Dimension in z. */ int dim_z() const; /** * Access a transform parameter. * @return Length in z of the space domain slice held by the local MPI rank. */ int local_z_length() const; /** * Access a transform parameter. * @return Offset in z of the space domain slice held by the local MPI rank. */ int local_z_offset() const; /** * Access a transform parameter. * @return Number of elements in the space domain slice held by the local MPI rank. */ int local_slice_size() const; /** * Access a transform parameter. * @return Global number of elements in space domain. Equals dim_x() * dim_y() * dim_z(). */ long long int global_size() const; /** * Access a transform parameter. * @return Number of elements in frequency domain. */ int num_local_elements() const; /** * Access a transform parameter. * @return Global number of elements in frequency domain. */ long long int num_global_elements() const; /** * Access a transform parameter. * @return The processing unit used for calculations. Can be SPFFT_PU_HOST or SPFFT_PU_GPU. */ SpfftProcessingUnitType processing_unit() const; /** * Access a transform parameter. * @return The GPU device id used. Returns always 0, if no GPU support is enabled. */ int device_id() const; /** * Access a transform parameter. * @return The exact number of threads used by transforms created from this grid. May be less than * the maximum given to the constructor. Always 1, if not compiled with OpenMP support. */ int num_threads() const; /** * Access a transform parameter. * @return The execution mode. Only affects execution on GPU. Defaults to SPFFT_EXEC_SYNCHRONOUS. */ SpfftExecType execution_mode() const; /** * Set a transform parameter. * @param[in] mode The execution mode to change to. Only affects execution on GPU. * Defaults to SPFFT_EXEC_SYNCHRONOUS. */ void set_execution_mode(SpfftExecType mode); #ifdef SPFFT_MPI /** * Access a transform parameter. * @return The internal MPI communicator. */ MPI_Comm communicator() const; #endif /** * Provides access to the space domain data. * * @param[in] dataLocation The processing unit to query for the data. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @return Pointer to space domain data on given processing unit. Alignment is guaranteed to * fulfill requirements for std::complex and C language complex types. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ float* space_domain_data(SpfftProcessingUnitType dataLocation); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] inputLocation The processing unit, to take the input from. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void forward(SpfftProcessingUnitType inputLocation, float* output, SpfftScalingType scaling = SPFFT_NO_SCALING); /** * Execute a forward transform from space domain to frequency domain. * * @param[in] input Pointer to memory, to read space domain data from. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[out] output Pointer to memory, where the frequency domain elements are written to. Can * be located at Host or GPU memory (if GPU is set as processing unit). * @param[in] scaling Controls scaling of output. SPFFT_NO_SCALING to disable or * SPFFT_FULL_SCALING to scale by factor 1 / (dim_x() * dim_y() * dim_z()). * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void forward(const float* input, float* output, SpfftScalingType scaling = SPFFT_NO_SCALING); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] input Input data in frequency domain. Must match the indices provided at transform * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @param[in] outputLocation The processing unit, to place the output at. Can be SPFFT_PU_HOST or * SPFFT_PU_GPU (if GPU is set as execution unit). * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void backward(const float* input, SpfftProcessingUnitType outputLocation); /** * Execute a backward transform from frequency domain to space domain. * * @param[in] input Input data in frequency domain. Must match the indices provided at transform * creation. Can be located at Host or GPU memory, if GPU is set as processing unit. * @param[out] output Pointer to memory to write output in frequency domain to. Can be located at * Host or GPU memory, if GPU is set as processing unit. * @throw GenericError SpFFT error. Can be a derived type. * @throw std::exception Error from standard library calls. Can be a derived type. */ void backward(const float* input, float* output); private: /*! \cond PRIVATE */ friend GridFloat; friend MultiTransformInternal; SPFFT_NO_EXPORT TransformFloat(const std::shared_ptr>& grid, SpfftProcessingUnitType executionUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType dataFormat, const int* indices); SPFFT_NO_EXPORT explicit TransformFloat(std::shared_ptr> transform); std::shared_ptr> transform_; /*! \endcond */ }; #endif } // namespace spfft #endif SpFFT-1.1.0/include/spfft/types.h000066400000000000000000000067151457701740000165330ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TYPES_H #define SPFFT_TYPES_H #include "spfft/config.h" enum SpfftExchangeType { /** * Default exchange. Equivalent to SPFFT_EXCH_COMPACT_BUFFERED. */ SPFFT_EXCH_DEFAULT, /** * Exchange based on MPI_Alltoall. */ SPFFT_EXCH_BUFFERED, /** * Exchange based on MPI_Alltoall in single precision. * Slight accuracy loss for double precision transforms due to conversion to float prior to MPI * exchange. */ SPFFT_EXCH_BUFFERED_FLOAT, /** * Exchange based on MPI_Alltoallv. */ SPFFT_EXCH_COMPACT_BUFFERED, /** * Exchange based on MPI_Alltoallv in single precision. * Slight accuracy loss for double precision transforms due to conversion to float prior to MPI * exchange. */ SPFFT_EXCH_COMPACT_BUFFERED_FLOAT, /** * Exchange based on MPI_Alltoallw. */ SPFFT_EXCH_UNBUFFERED }; /** * Processing unit type */ enum SpfftProcessingUnitType { /** * HOST */ SPFFT_PU_HOST = 1, /** * GPU */ SPFFT_PU_GPU = 2 }; enum SpfftIndexFormatType { /** * Triplets of x,y,z frequency indices */ SPFFT_INDEX_TRIPLETS }; enum SpfftTransformType { /** * Complex-to-Complex transform */ SPFFT_TRANS_C2C, /** * Real-to-Complex transform */ SPFFT_TRANS_R2C }; enum SpfftScalingType { /** * No scaling */ SPFFT_NO_SCALING, /** * Full scaling */ SPFFT_FULL_SCALING }; enum SpfftExecType { /** * Fully synchronous execution */ SPFFT_EXEC_SYNCHRONOUS, /** * Asynchronous execution on GPU */ SPFFT_EXEC_ASYNCHRONOUS }; #ifndef __cplusplus /*! \cond PRIVATE */ // C only typedef enum SpfftExchangeType SpfftExchangeType; typedef enum SpfftProcessingUnitType SpfftProcessingUnitType; typedef enum SpfftTransformType SpfftTransformType; typedef enum SpfftIndexFormatType SpfftIndexFormatType; typedef enum SpfftScalingType SpfftScalingType; typedef enum SpfftExecType SpfftExecType; /*! \endcond */ #endif // cpp #endif SpFFT-1.1.0/src/000077500000000000000000000000001457701740000132275ustar00rootroot00000000000000SpFFT-1.1.0/src/CMakeLists.txt000066400000000000000000000152621457701740000157750ustar00rootroot00000000000000set(SPFFT_SOURCE_FILES memory/aligned_allocation.cpp timing/timing.cpp timing/rt_graph.cpp parameters/parameters.cpp execution/execution_host.cpp spfft/transform.cpp spfft/transform_internal.cpp spfft/multi_transform.cpp spfft/grid.cpp spfft/grid_internal.cpp fft/fftw_mutex.cpp ) if(SPFFT_SINGLE_PRECISION) list(APPEND SPFFT_SOURCE_FILES spfft/transform_float.cpp spfft/multi_transform_float.cpp spfft/grid_float.cpp ) endif() set(SPFFT_GPU_KERNELS) if(SPFFT_CUDA OR SPFFT_ROCM) list(APPEND SPFFT_GPU_KERNELS transpose/gpu_kernels/local_transpose_kernels.cu compression/gpu_kernels/compression_kernels.cu symmetry/gpu_kernels/symmetry_kernels.cu transpose/gpu_kernels/buffered_kernels.cu transpose/gpu_kernels/compact_buffered_kernels.cu ) list(APPEND SPFFT_SOURCE_FILES execution/execution_gpu.cpp gpu_util/gpu_fft_api.cpp ) if(SPFFT_MPI) list(APPEND SPFFT_SOURCE_FILES transpose/transpose_mpi_buffered_gpu.cpp transpose/transpose_mpi_compact_buffered_gpu.cpp transpose/transpose_mpi_unbuffered_gpu.cpp ) endif() if(SPFFT_ROCM) set_source_files_properties(${SPFFT_GPU_KERNELS} PROPERTIES LANGUAGE HIP) endif() list(APPEND SPFFT_SOURCE_FILES ${SPFFT_GPU_KERNELS}) endif() if(SPFFT_MPI) list(APPEND SPFFT_SOURCE_FILES transpose/transpose_mpi_buffered_host.cpp transpose/transpose_mpi_compact_buffered_host.cpp transpose/transpose_mpi_unbuffered_host.cpp ) endif() # Creates library with given name. All common target modifications should be done here. macro(spfft_create_library _TARGET_NAME) # create target add_library(${_TARGET_NAME} ${SPFFT_LIBRARY_TYPE} ${SPFFT_SOURCE_FILES}) # set version set_property(TARGET ${_TARGET_NAME} PROPERTY VERSION ${SPFFT_VERSION}) set_property(TARGET ${_TARGET_NAME} PROPERTY SOVERSION ${SPFFT_SO_VERSION}) # All .cu files are self-contained. Device linking can have issues with propageted linker flags of other targets like MPI. if(SPFFT_CUDA) set_property(TARGET ${_TARGET_NAME} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS OFF) set_property(TARGET ${_TARGET_NAME} PROPERTY CUDA_SEPARABLE_COMPILATION OFF) endif() if(SPFFT_ROCM) target_compile_options(${_TARGET_NAME} PRIVATE $<$:-fno-gpu-rdc>) endif() # Don't export any symbols of external static libaries. Only works on linux. if(UNIX AND NOT APPLE) if(${CMAKE_VERSION} VERSION_LESS "3.13.5") target_link_libraries(${_TARGET_NAME} PRIVATE "-Wl,--exclude-libs,ALL") else() target_link_options(${_TARGET_NAME} PRIVATE "-Wl,--exclude-libs,ALL") endif() endif() target_include_directories(${_TARGET_NAME} PRIVATE ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS}) target_link_libraries(${_TARGET_NAME} PRIVATE ${SPFFT_EXTERNAL_LIBS}) target_include_directories(${_TARGET_NAME} INTERFACE $) # for install(EXPORT ...) target_include_directories(${_TARGET_NAME} INTERFACE $ $) # for export(...) if(${SPFFT_FORTRAN}) # Add include directory for fortran module target_include_directories(${_TARGET_NAME} INTERFACE $) target_include_directories(${_TARGET_NAME} INTERFACE $) endif() endmacro() # Create library spfft_create_library(spfft) set_target_properties(spfft PROPERTIES VISIBILITY_INLINES_HIDDEN TRUE CXX_VISIBILITY_PRESET hidden) # Create library for testing, which allows linking to internal symbols and has timings enabled. if(SPFFT_BUILD_TESTS) spfft_create_library(spfft_test) set_target_properties(spfft_test PROPERTIES VISIBILITY_INLINES_HIDDEN FALSE CXX_VISIBILITY_PRESET default) target_compile_options(spfft_test PUBLIC -DSPFFT_STATIC_DEFINE) # disable properties of export header # enable internal timings target_compile_options(spfft_test PUBLIC -DSPFFT_TIMING) endif() # build fortran module if(SPFFT_FORTRAN) add_library(spfft_fortran OBJECT ${PROJECT_SOURCE_DIR}/include/spfft/spfft.f90) endif() # generate export header to control symbol visibility include(GenerateExportHeader) generate_export_header(spfft) configure_file("${CMAKE_CURRENT_BINARY_DIR}/spfft_export.h" "${PROJECT_BINARY_DIR}/spfft/spfft_export.h" COPYONLY ) # set packge config names get_target_property(_LIB_TYPE spfft TYPE) if(_LIB_TYPE STREQUAL "STATIC_LIBRARY") set(SPFFT_VERSION_FILE "SpFFTStaticConfigVersion.cmake") set(SPFFT_CONFIG_FILE "SpFFTStaticConfig.cmake") set(SPFFT_TARGETS_FILE "SpFFTStaticTargets.cmake") else() set(SPFFT_VERSION_FILE "SpFFTSharedConfigVersion.cmake") set(SPFFT_CONFIG_FILE "SpFFTSharedConfig.cmake") set(SPFFT_TARGETS_FILE "SpFFTSharedTargets.cmake") endif() # generate cmake package include(CMakePackageConfigHelpers) write_basic_package_version_file( "${PROJECT_BINARY_DIR}/${SPFFT_VERSION_FILE}" VERSION ${Upstream_VERSION} COMPATIBILITY AnyNewerVersion ) export(TARGETS spfft NAMESPACE SpFFT:: FILE ${PROJECT_BINARY_DIR}/${SPFFT_TARGETS_FILE}) configure_file(${PROJECT_SOURCE_DIR}/cmake/${SPFFT_CONFIG_FILE} "${PROJECT_BINARY_DIR}/${SPFFT_CONFIG_FILE}" @ONLY ) configure_file(${PROJECT_SOURCE_DIR}/cmake/SpFFTConfig.cmake "${PROJECT_BINARY_DIR}/SpFFTConfig.cmake" COPYONLY ) configure_file(${PROJECT_SOURCE_DIR}/cmake/SpFFTConfigVersion.cmake "${PROJECT_BINARY_DIR}/SpFFTConfigVersion.cmake" COPYONLY ) configure_file(${PROJECT_SOURCE_DIR}/cmake/SpFFTTargets.cmake "${PROJECT_BINARY_DIR}/SpFFTTargets.cmake" COPYONLY ) configure_file(${PROJECT_SOURCE_DIR}/cmake/SpFFT.pc.in "${PROJECT_BINARY_DIR}/SpFFT.pc" @ONLY ) # installation commands if(SPFFT_INSTALL) install(TARGETS spfft DESTINATION ${CMAKE_INSTALL_LIBDIR} EXPORT SpFFTTargets) install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/spfft DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" PATTERN "*.f90") install(FILES ${PROJECT_BINARY_DIR}/spfft/config.h "${PROJECT_BINARY_DIR}/spfft/spfft_export.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/spfft) install(EXPORT SpFFTTargets NAMESPACE SpFFT:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SpFFT FILE ${SPFFT_TARGETS_FILE}) install( FILES "${PROJECT_BINARY_DIR}/SpFFTConfig.cmake" "${PROJECT_BINARY_DIR}/SpFFTTargets.cmake" "${PROJECT_BINARY_DIR}/SpFFTConfigVersion.cmake" "${PROJECT_BINARY_DIR}/${SPFFT_CONFIG_FILE}" "${PROJECT_BINARY_DIR}/${SPFFT_VERSION_FILE}" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/SpFFT ) install(FILES ${PROJECT_BINARY_DIR}/SpFFT.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) install(DIRECTORY "${PROJECT_SOURCE_DIR}/cmake/modules" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/SpFFT" FILES_MATCHING PATTERN "*.cmake") if(SPFFT_FORTRAN) install(FILES ${PROJECT_BINARY_DIR}/src/spfft.mod DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/spfft) endif() endif() SpFFT-1.1.0/src/compression/000077500000000000000000000000001457701740000155705ustar00rootroot00000000000000SpFFT-1.1.0/src/compression/compression_gpu.hpp000066400000000000000000000071751457701740000215270ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_COMPRESSION_GPU_HPP #define SPFFT_COMPRESSION_GPU_HPP #include #include #include #include #include "compression/gpu_kernels/compression_kernels.hpp" #include "compression/indices.hpp" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "gpu_util/gpu_transfer.hpp" #include "memory/array_view_utility.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "util/common_types.hpp" #include "util/type_check.hpp" namespace spfft { // Handles packing and unpacking of sparse frequency values for single or double precision on GPU class CompressionGPU { public: CompressionGPU(const std::shared_ptr& param) : indicesGPU_( param->local_value_indices().size()) { // stream MUST synchronize with default stream copy_to_gpu(param->local_value_indices(), indicesGPU_); } // Pack values into output buffer template auto compress(const GPUStreamHandle& stream, const GPUArrayView2D::type> input, T* output, const bool useScaling, const T scalingFactor = 1.0) -> void { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); compress_gpu(stream.get(), create_1d_view(indicesGPU_, 0, indicesGPU_.size()), input, output, useScaling, scalingFactor); } // Unpack values into z-stick collection template auto decompress(const GPUStreamHandle& stream, const T* input, GPUArrayView2D::type> output) -> void { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); gpu::check_status(gpu::memset_async( static_cast(output.data()), 0, output.size() * sizeof(typename decltype(output)::ValueType), stream.get())); decompress_gpu(stream.get(), create_1d_view(indicesGPU_, 0, indicesGPU_.size()), input, output); } private: GPUArray indicesGPU_; }; } // namespace spfft #endif SpFFT-1.1.0/src/compression/compression_host.hpp000066400000000000000000000077371457701740000217150ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_COMPRESSION_HOST_HPP #define SPFFT_COMPRESSION_HOST_HPP #include #include #include #include #include "compression/indices.hpp" #include "memory/host_array_const_view.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" namespace spfft { // Handles packing and unpacking of sparse frequency values for single or double precision on Host class CompressionHost { public: explicit CompressionHost(const std::shared_ptr& param) : param_(param) {} // Pack values into output buffer template auto compress(const HostArrayView2D> input2d, T* output, bool useScaling, const T scalingFactor = 1.0) const -> void { const auto& indices = param_->local_value_indices(); auto input = HostArrayConstView1D>(input2d.data(), input2d.size(), input2d.pinned()); if (useScaling) { SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < indices.size(); ++i) { const auto value = scalingFactor * input(indices[i]); output[2 * i] = value.real(); output[2 * i + 1] = value.imag(); } } else { SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < indices.size(); ++i) { const auto value = input(indices[i]); output[2 * i] = value.real(); output[2 * i + 1] = value.imag(); } } } // Unpack values into z-stick collection template auto decompress(const T* input, HostArrayView2D> output2d) const -> void { const auto& indices = param_->local_value_indices(); auto output = HostArrayView1D>(output2d.data(), output2d.size(), output2d.pinned()); // ensure values are padded with zeros SPFFT_OMP_PRAGMA("omp for schedule(static)") // implicit barrier for (SizeType stick = 0; stick < output2d.dim_outer(); ++stick) { std::memset(static_cast(&output2d(stick, 0)), 0, sizeof(typename decltype(output2d)::ValueType) * output2d.dim_inner()); } SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < indices.size(); ++i) { output(indices[i]) = std::complex(input[2 * i], input[2 * i + 1]); } } private: std::shared_ptr param_; }; } // namespace spfft #endif SpFFT-1.1.0/src/compression/gpu_kernels/000077500000000000000000000000001457701740000201065ustar00rootroot00000000000000SpFFT-1.1.0/src/compression/gpu_kernels/compression_kernels.cu000066400000000000000000000160431457701740000245270ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_kernel_parameter.hpp" #include "gpu_util/gpu_runtime.hpp" #include "memory/gpu_array_const_view.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { template __global__ static void decompress_kernel( const GPUArrayConstView1D indices, const T* input, GPUArrayView1D::type> output) { // const int stride = gridDim.x * blockDim.x; for (int idx = threadIdx.x + blockIdx.x * blockDim.x; idx < indices.size(); idx += gridDim.x * blockDim.x) { const int valueIdx = indices(idx); typename gpu::fft::ComplexType::type value; value.x = input[2 * idx]; value.y = input[2 * idx + 1]; output(valueIdx) = value; } } auto decompress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, const double* input, GPUArrayView2D::type> output) -> void { assert(indices.size() <= output.size()); const dim3 threadBlock(gpu::BlockSizeMedium); const dim3 threadGrid(std::min( static_cast((indices.size() + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); // const dim3 threadGrid(indices.size() < 4 ? 1 : indices.size() / 4); launch_kernel(decompress_kernel, threadGrid, threadBlock, 0, stream, GPUArrayConstView1D(indices), input, GPUArrayView1D::type>( output.data(), output.size(), output.device_id())); } auto decompress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, const float* input, GPUArrayView2D::type> output) -> void { assert(indices.size() <= output.size()); const dim3 threadBlock(gpu::BlockSizeMedium); const dim3 threadGrid(std::min( static_cast((indices.size() + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); launch_kernel(decompress_kernel, threadGrid, threadBlock, 0, stream, GPUArrayConstView1D(indices), input, GPUArrayView1D::type>( output.data(), output.size(), output.device_id())); } template __global__ static void compress_kernel( const GPUArrayConstView1D indices, GPUArrayConstView1D::type> input, T* output) { for (int idx = threadIdx.x + blockIdx.x * blockDim.x; idx < indices.size(); idx += gridDim.x * blockDim.x) { const int valueIdx = indices(idx); const auto value = input(valueIdx); output[2 * idx] = value.x; output[2 * idx + 1] = value.y; } } template __global__ static void compress_kernel_scaled( const GPUArrayConstView1D indices, GPUArrayConstView1D::type> input, T* output, const T scalingFactor) { for (int idx = threadIdx.x + blockIdx.x * blockDim.x; idx < indices.size(); idx += gridDim.x * blockDim.x) { const int valueIdx = indices(idx); const auto value = input(valueIdx); output[2 * idx] = scalingFactor * value.x; output[2 * idx + 1] = scalingFactor * value.y; } } auto compress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, GPUArrayView2D::type> input, double* output, const bool useScaling, const double scalingFactor) -> void { const dim3 threadBlock(gpu::BlockSizeMedium); const dim3 threadGrid(std::min( static_cast((indices.size() + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); if (useScaling) { launch_kernel(compress_kernel_scaled, threadGrid, threadBlock, 0, stream, GPUArrayConstView1D(indices), GPUArrayConstView1D::type>( input.data(), input.size(), input.device_id()), output, scalingFactor); } else { launch_kernel(compress_kernel, threadGrid, threadBlock, 0, stream, GPUArrayConstView1D(indices), GPUArrayConstView1D::type>( input.data(), input.size(), input.device_id()), output); } } auto compress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, GPUArrayView2D::type> input, float* output, const bool useScaling, const float scalingFactor) -> void { const dim3 threadBlock(gpu::BlockSizeMedium); const dim3 threadGrid(std::min( static_cast((indices.size() + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); if (useScaling) { launch_kernel(compress_kernel_scaled, threadGrid, threadBlock, 0, stream, GPUArrayConstView1D(indices), GPUArrayConstView1D::type>( input.data(), input.size(), input.device_id()), output, scalingFactor); } else { launch_kernel(compress_kernel, threadGrid, threadBlock, 0, stream, GPUArrayConstView1D(indices), GPUArrayConstView1D::type>( input.data(), input.size(), input.device_id()), output); } } } // namespace spfft SpFFT-1.1.0/src/compression/gpu_kernels/compression_kernels.hpp000066400000000000000000000053201457701740000247030ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef COMPRESSION_KERNELS_HPP #define COMPRESSION_KERNELS_HPP #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_runtime_api.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { auto decompress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, const double* input, GPUArrayView2D::type> output) -> void; auto decompress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, const float* input, GPUArrayView2D::type> output) -> void; auto compress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, GPUArrayView2D::type> input, double* output, const bool useScaling, const double scalingFactor) -> void; auto compress_gpu(const gpu::StreamType stream, const GPUArrayView1D& indices, GPUArrayView2D::type> input, float* output, const bool useScaling, const float scalingFactor) -> void; } // namespace spfft #endif SpFFT-1.1.0/src/compression/indices.hpp000066400000000000000000000155421457701740000177260ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_INDICES_HPP #define SPFFT_INDICES_HPP #include #include #include #include #include #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #endif namespace spfft { // convert [-N, N) frequency index to [0, N) for FFT input inline auto to_storage_index(const int dim, const int index) -> int { if (index < 0) { return dim + index; } else { return index; } } #ifdef SPFFT_MPI inline auto create_distributed_transform_indices(const MPICommunicatorHandle& comm, std::vector localSticks) -> std::vector> { std::vector sendRequests(comm.size()); constexpr int tag = 442; // random tag (must be less than 32768) // send local stick indices for (int r = 0; r < static_cast(comm.size()); ++r) { if (r != static_cast(comm.rank())) { mpi_check_status(MPI_Isend(localSticks.data(), localSticks.size(), MPI_INT, r, tag, comm.get(), &(sendRequests[r]))); } } std::vector> globalXYIndices(comm.size()); // recv all other stick indices for (int r = 0; r < static_cast(comm.size()); ++r) { if (r != static_cast(comm.rank())) { // get recv count MPI_Status status; MPI_Probe(r, tag, comm.get(), &status); int recvCount = 0; MPI_Get_count(&status, MPI_INT, &recvCount); // recv data globalXYIndices[r].resize(recvCount); MPI_Recv(globalXYIndices[r].data(), recvCount, MPI_INT, r, tag, comm.get(), MPI_STATUS_IGNORE); } } // wait for all sends to finish for (int r = 0; r < static_cast(comm.size()); ++r) { if (r != static_cast(comm.rank())) { MPI_Wait(&(sendRequests[r]), MPI_STATUS_IGNORE); } } // move local sticks into transform indices object AFTER sends are finished globalXYIndices[comm.rank()] = std::move(localSticks); return globalXYIndices; } #endif inline auto check_stick_duplicates(const std::vector>& indicesPerRank) -> void { // check for z-sticks indices std::set globalXYIndices; for (const auto& rankIndices : indicesPerRank) { for (const auto& index : rankIndices) { if (globalXYIndices.count(index)) { throw DuplicateIndicesError(); } globalXYIndices.insert(index); } } } // convert index triplets for every value into stick/z indices and z-stick index pairs. inline auto convert_index_triplets(const bool hermitianSymmetry, const int dimX, const int dimY, const int dimZ, const int numValues, const int* xIndices, const int* yIndices, const int* zIndices, const int stride) -> std::pair, std::vector> { if (static_cast(numValues) > static_cast(dimX) * static_cast(dimY) * static_cast(dimZ)) { throw InvalidParameterError(); } // check if indices are non-negative or centered bool centeredIndices = false; for (int i = 0; i < numValues; ++i) { if (xIndices[i * stride] < 0 || yIndices[i * stride] < 0 || zIndices[i * stride] < 0) { centeredIndices = true; break; } } const int maxX = (hermitianSymmetry || centeredIndices ? dimX / 2 + 1 : dimX) - 1; const int maxY = (centeredIndices ? dimY / 2 + 1 : dimY) - 1; const int maxZ = (centeredIndices ? dimZ / 2 + 1 : dimZ) - 1; const int minX = hermitianSymmetry ? 0 : maxX - dimX + 1; const int minY = maxY - dimY + 1; const int minZ = maxZ - dimZ + 1; // check if indices are inside bounds for (int i = 0; i < numValues; ++i) { if (xIndices[i * stride] < minX || xIndices[i * stride] > maxX) throw InvalidIndicesError(); if (yIndices[i * stride] < minY || yIndices[i * stride] > maxY) throw InvalidIndicesError(); if (zIndices[i * stride] < minZ || zIndices[i * stride] > maxZ) throw InvalidIndicesError(); } // store all unique xy index pairs in an ordered container std::map sortedXYIndices; // key = index in xy-plane, value = stick index for (int i = 0; i < numValues; ++i) { const auto x = to_storage_index(dimX, xIndices[i * stride]); const auto y = to_storage_index(dimY, yIndices[i * stride]); sortedXYIndices[x * dimY + y] = 0; } // assign z-stick indices int count = 0; for (auto& pair : sortedXYIndices) { pair.second = count; ++count; } // store index for each element. Each z-stick is continous std::vector valueIndices; valueIndices.reserve(numValues); for (int i = 0; i < numValues; ++i) { const auto x = to_storage_index(dimX, xIndices[i * stride]); const auto y = to_storage_index(dimY, yIndices[i * stride]); const auto z = to_storage_index(dimZ, zIndices[i * stride]); valueIndices.emplace_back(sortedXYIndices[x * dimY + y] * dimZ + z); } // store ordered unique xy-index pairs std::vector stickIndices; stickIndices.reserve(sortedXYIndices.size()); for (auto& pair : sortedXYIndices) { stickIndices.emplace_back(pair.first); } return {std::move(valueIndices), std::move(stickIndices)}; } } // namespace spfft #endif SpFFT-1.1.0/src/execution/000077500000000000000000000000001457701740000152325ustar00rootroot00000000000000SpFFT-1.1.0/src/execution/execution_gpu.cpp000066400000000000000000000444601457701740000206240ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "execution/execution_gpu.hpp" #include "fft/transform_1d_gpu.hpp" #include "fft/transform_2d_gpu.hpp" #include "fft/transform_real_2d_gpu.hpp" #include "gpu_util/gpu_pointer_translation.hpp" #include "gpu_util/gpu_runtime_api.hpp" #include "gpu_util/gpu_transfer.hpp" #include "memory/array_view_utility.hpp" #include "parameters/parameters.hpp" #include "symmetry/symmetry_gpu.hpp" #include "timing/timing.hpp" #include "transpose/transpose_gpu.hpp" #include "transpose/transpose_mpi_buffered_gpu.hpp" #include "transpose/transpose_mpi_compact_buffered_gpu.hpp" #include "transpose/transpose_mpi_unbuffered_gpu.hpp" namespace spfft { template ExecutionGPU::ExecutionGPU(const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2, GPUArray::type>& gpuArray1, GPUArray::type>& gpuArray2, const std::shared_ptr>& fftWorkBuffer) : stream_(false), externalStream_(nullptr), startEvent_(false), endEvent_(false), numThreads_(numThreads), scalingFactor_(static_cast( 1.0 / static_cast(param->dim_x() * param->dim_y() * param->dim_z()))), zStickSymmetry_(new Symmetry()), planeSymmetry_(new Symmetry()) { const SizeType numLocalZSticks = param->num_z_sticks(0); // frequency data with z-sticks freqDomainDataGPU_ = create_2d_view(gpuArray1, 0, numLocalZSticks, param->dim_z()); freqDomainCompressedDataGPU_ = GPUArrayView1D(reinterpret_cast(gpuArray2.data()), param->local_value_indices().size() * 2, gpuArray2.device_id()); // Z if (numLocalZSticks > 0) { transformZ_ = std::unique_ptr( new Transform1DGPU(freqDomainDataGPU_, stream_, fftWorkBuffer)); if (param->transform_type() == SPFFT_TRANS_R2C) { zStickSymmetry_.reset(new StickSymmetryGPU( stream_, GPUArrayView1D::type>( freqDomainDataGPU_.data() + freqDomainDataGPU_.index(param->zero_zero_stick_index(), 0), freqDomainDataGPU_.dim_inner(), freqDomainDataGPU_.device_id()))); } } if (numLocalZSticks > 0 && param->local_value_indices().size() > 0) { compression_.reset(new CompressionGPU(param)); } // Transpose freqDomainXYGPU_ = create_3d_view(gpuArray2, 0, param->dim_z(), param->dim_y(), param->dim_x_freq()); // must not overlap with z-sticks transpose_.reset(new TransposeGPU(param, stream_, freqDomainXYGPU_, freqDomainDataGPU_)); // XY if (param->num_xy_planes(0) > 0) { if (param->transform_type() == SPFFT_TRANS_R2C) { planeSymmetry_.reset(new PlaneSymmetryGPU(stream_, freqDomainXYGPU_)); // NOTE: param->dim_x() != param->dim_x_freq() spaceDomainDataExternalHost_ = create_new_type_3d_view(array1, param->dim_z(), param->dim_y(), param->dim_x()); spaceDomainDataExternalGPU_ = create_new_type_3d_view(gpuArray1, param->dim_z(), param->dim_y(), param->dim_x()); transformXY_ = std::unique_ptr(new TransformReal2DGPU( spaceDomainDataExternalGPU_, freqDomainXYGPU_, stream_, fftWorkBuffer)); } else { spaceDomainDataExternalHost_ = create_new_type_3d_view( array1, param->dim_z(), param->dim_y(), 2 * param->dim_x_freq()); spaceDomainDataExternalGPU_ = create_new_type_3d_view( freqDomainXYGPU_, param->dim_z(), param->dim_y(), 2 * param->dim_x_freq()); transformXY_ = std::unique_ptr( new Transform2DGPU(freqDomainXYGPU_, stream_, fftWorkBuffer)); } } } #ifdef SPFFT_MPI template ExecutionGPU::ExecutionGPU(MPICommunicatorHandle comm, const SpfftExchangeType exchangeType, const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2, GPUArray::type>& gpuArray1, GPUArray::type>& gpuArray2, const std::shared_ptr>& fftWorkBuffer) : stream_(false), externalStream_(nullptr), startEvent_(false), endEvent_(false), numThreads_(numThreads), scalingFactor_(static_cast( 1.0 / static_cast(param->dim_x() * param->dim_y() * param->dim_z()))), zStickSymmetry_(new Symmetry()), planeSymmetry_(new Symmetry()) { assert(array1.data() != array2.data()); assert(gpuArray1.data() != gpuArray2.data()); assert(gpuArray1.device_id() == gpuArray2.device_id()); const SizeType numLocalZSticks = param->num_z_sticks(comm.rank()); const SizeType numLocalXYPlanes = param->num_xy_planes(comm.rank()); freqDomainDataGPU_ = create_2d_view(gpuArray1, 0, numLocalZSticks, param->dim_z()); freqDomainCompressedDataGPU_ = GPUArrayView1D(reinterpret_cast(gpuArray2.data()), param->local_value_indices().size() * 2, gpuArray2.device_id()); freqDomainXYGPU_ = create_3d_view(gpuArray2, 0, numLocalXYPlanes, param->dim_y(), param->dim_x_freq()); // must not overlap with z-sticks // Z if (numLocalZSticks > 0) { transformZ_ = std::unique_ptr( new Transform1DGPU(freqDomainDataGPU_, stream_, fftWorkBuffer)); if (param->transform_type() == SPFFT_TRANS_R2C && param->zero_zero_stick_index() < freqDomainDataGPU_.dim_outer()) { zStickSymmetry_.reset(new StickSymmetryGPU( stream_, GPUArrayView1D::type>( freqDomainDataGPU_.data() + freqDomainDataGPU_.index(param->zero_zero_stick_index(), 0), freqDomainDataGPU_.dim_inner(), freqDomainDataGPU_.device_id()))); } } if (numLocalZSticks > 0) { compression_.reset(new CompressionGPU(param)); } // XY if (numLocalXYPlanes > 0) { if (param->transform_type() == SPFFT_TRANS_R2C) { // NOTE: param->dim_x() != param->dim_x_freq() spaceDomainDataExternalHost_ = create_new_type_3d_view(array1, numLocalXYPlanes, param->dim_y(), param->dim_x()); spaceDomainDataExternalGPU_ = create_new_type_3d_view(gpuArray1, numLocalXYPlanes, param->dim_y(), param->dim_x()); transformXY_ = std::unique_ptr(new TransformReal2DGPU( spaceDomainDataExternalGPU_, freqDomainXYGPU_, stream_, fftWorkBuffer)); planeSymmetry_.reset(new PlaneSymmetryGPU(stream_, freqDomainXYGPU_)); } else { spaceDomainDataExternalHost_ = create_new_type_3d_view( array1, numLocalXYPlanes, param->dim_y(), 2 * param->dim_x_freq()); spaceDomainDataExternalGPU_ = create_new_type_3d_view( freqDomainXYGPU_, numLocalXYPlanes, param->dim_y(), 2 * param->dim_x_freq()); transformXY_ = std::unique_ptr( new Transform2DGPU(freqDomainXYGPU_, stream_, fftWorkBuffer)); } } switch (exchangeType) { case SpfftExchangeType::SPFFT_EXCH_UNBUFFERED: { auto freqDomainDataHost = create_2d_view(array1, 0, numLocalZSticks, param->dim_z()); auto freqDomainXYHost = create_3d_view(array2, 0, numLocalXYPlanes, param->dim_y(), param->dim_x_freq()); transpose_.reset( new TransposeMPIUnbufferedGPU(param, comm, freqDomainXYHost, freqDomainXYGPU_, stream_, freqDomainDataHost, freqDomainDataGPU_, stream_)); } break; case SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED: { const auto bufferZSize = param->total_num_xy_planes() * param->num_z_sticks(comm.rank()); const auto bufferXYSize = param->total_num_z_sticks() * param->num_xy_planes(comm.rank()); auto transposeBufferZ = create_1d_view(array2, 0, bufferZSize); auto transposeBufferZGPU = create_1d_view(gpuArray2, 0, bufferZSize); auto transposeBufferXY = create_1d_view(array1, 0, bufferXYSize); auto transposeBufferXYGPU = create_1d_view(gpuArray1, 0, bufferXYSize); transpose_.reset(new TransposeMPICompactBufferedGPU( param, comm, transposeBufferXY, freqDomainXYGPU_, transposeBufferXYGPU, stream_, transposeBufferZ, freqDomainDataGPU_, transposeBufferZGPU, stream_)); } break; case SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED_FLOAT: { const auto bufferZSize = param->total_num_xy_planes() * param->num_z_sticks(comm.rank()); const auto bufferXYSize = param->total_num_z_sticks() * param->num_xy_planes(comm.rank()); auto transposeBufferZ = create_1d_view(array2, 0, bufferZSize); auto transposeBufferZGPU = create_1d_view(gpuArray2, 0, bufferZSize); auto transposeBufferXY = create_1d_view(array1, 0, bufferXYSize); auto transposeBufferXYGPU = create_1d_view(gpuArray1, 0, bufferXYSize); transpose_.reset(new TransposeMPICompactBufferedGPU( param, comm, transposeBufferXY, freqDomainXYGPU_, transposeBufferXYGPU, stream_, transposeBufferZ, freqDomainDataGPU_, transposeBufferZGPU, stream_)); } break; case SpfftExchangeType::SPFFT_EXCH_BUFFERED: { const auto bufferSize = param->max_num_z_sticks() * param->max_num_xy_planes() * comm.size(); auto transposeBufferZ = create_1d_view(array2, 0, bufferSize); auto transposeBufferZGPU = create_1d_view(gpuArray2, 0, bufferSize); auto transposeBufferXY = create_1d_view(array1, 0, bufferSize); auto transposeBufferXYGPU = create_1d_view(gpuArray1, 0, bufferSize); transpose_.reset(new TransposeMPIBufferedGPU( param, comm, transposeBufferXY, freqDomainXYGPU_, transposeBufferXYGPU, stream_, transposeBufferZ, freqDomainDataGPU_, transposeBufferZGPU, stream_)); } break; case SpfftExchangeType::SPFFT_EXCH_BUFFERED_FLOAT: { const auto bufferSize = param->max_num_z_sticks() * param->max_num_xy_planes() * comm.size(); auto transposeBufferZ = create_1d_view(array2, 0, bufferSize); auto transposeBufferZGPU = create_1d_view(gpuArray2, 0, bufferSize); auto transposeBufferXY = create_1d_view(array1, 0, bufferSize); auto transposeBufferXYGPU = create_1d_view(gpuArray1, 0, bufferSize); transpose_.reset(new TransposeMPIBufferedGPU( param, comm, transposeBufferXY, freqDomainXYGPU_, transposeBufferXYGPU, stream_, transposeBufferZ, freqDomainDataGPU_, transposeBufferZGPU, stream_)); } break; default: throw InvalidParameterError(); } } // instatiate templates for float and double #endif template auto ExecutionGPU::forward_xy(const T* input) -> void { // Check for any preceding errors before starting execution if (gpu::get_last_error() != gpu::status::Success) { throw GPUPrecedingError(); } startEvent_.record(externalStream_); startEvent_.stream_wait(stream_.get()); const T* inputPtrHost = nullptr; const T* inputPtrGPU = nullptr; std::tie(inputPtrHost, inputPtrGPU) = translate_gpu_pointer(input); if (!inputPtrGPU) inputPtrGPU = spaceDomainDataExternalGPU_.data(); // XY if (transformXY_) { if (inputPtrHost) { gpu::check_status(gpu::memcpy_async(static_cast(spaceDomainDataExternalGPU_.data()), static_cast(inputPtrHost), spaceDomainDataExternalGPU_.size() * sizeof(T), gpu::flag::MemcpyHostToDevice, stream_.get())); } transformXY_->forward(inputPtrGPU, freqDomainXYGPU_.data()); } // transpose if (transformXY_) transpose_->pack_forward(); } template auto ExecutionGPU::forward_exchange(const bool nonBlockingExchange) -> void { HOST_TIMING_SCOPED("exchange_start") transpose_->exchange_forward_start(nonBlockingExchange); } template auto ExecutionGPU::forward_z(T* output, const SpfftScalingType scalingType) -> void { HOST_TIMING_START("exechange_fininalize"); transpose_->exchange_forward_finalize(); HOST_TIMING_STOP("exechange_fininalize"); if (transformZ_) transpose_->unpack_forward(); // Z if (transformZ_) transformZ_->forward(); // Compress if (compression_) { T* outputPtrHost = nullptr; T* outputPtrGPU = nullptr; std::tie(outputPtrHost, outputPtrGPU) = translate_gpu_pointer(output); if (outputPtrGPU == nullptr) { // output on HOST compression_->compress(stream_, freqDomainDataGPU_, freqDomainCompressedDataGPU_.data(), scalingType == SpfftScalingType::SPFFT_FULL_SCALING, scalingFactor_); gpu::check_status( gpu::memcpy_async(static_cast(outputPtrHost), static_cast(freqDomainCompressedDataGPU_.data()), freqDomainCompressedDataGPU_.size() * sizeof(decltype(*(freqDomainCompressedDataGPU_.data()))), gpu::flag::MemcpyDeviceToHost, stream_.get())); } else { // output on GPU compression_->compress(stream_, freqDomainDataGPU_, outputPtrGPU, scalingType == SpfftScalingType::SPFFT_FULL_SCALING, scalingFactor_); } } } template auto ExecutionGPU::backward_z(const T* input) -> void { // Check for any preceding errors before starting execution if (gpu::get_last_error() != gpu::status::Success) { throw GPUPrecedingError(); } startEvent_.record(externalStream_); startEvent_.stream_wait(stream_.get()); // decompress if (compression_) { const T* inputPtrHost = nullptr; const T* inputPtrGPU = nullptr; std::tie(inputPtrHost, inputPtrGPU) = translate_gpu_pointer(input); // Add explicit default stream synchronization startEvent_.record(nullptr); startEvent_.stream_wait(stream_.get()); if (inputPtrGPU == nullptr) { // input on HOST gpu::check_status( gpu::memcpy_async(static_cast(freqDomainCompressedDataGPU_.data()), static_cast(inputPtrHost), freqDomainCompressedDataGPU_.size() * sizeof(decltype(*(freqDomainCompressedDataGPU_.data()))), gpu::flag::MemcpyHostToDevice, stream_.get())); compression_->decompress(stream_, freqDomainCompressedDataGPU_.data(), freqDomainDataGPU_); } else { // input on GPU compression_->decompress(stream_, inputPtrGPU, freqDomainDataGPU_); } } // Z if (transformZ_) { zStickSymmetry_->apply(); transformZ_->backward(); } // transpose if (transformZ_) transpose_->pack_backward(); } template auto ExecutionGPU::backward_exchange(const bool nonBlockingExchange) -> void { HOST_TIMING_SCOPED("exchange_start") transpose_->exchange_backward_start(nonBlockingExchange); } template auto ExecutionGPU::backward_xy(T* output) -> void { HOST_TIMING_START("exechange_fininalize"); transpose_->exchange_backward_finalize(); HOST_TIMING_STOP("exechange_fininalize"); T* outputPtrHost = nullptr; T* outputPtrGPU = nullptr; std::tie(outputPtrHost, outputPtrGPU) = translate_gpu_pointer(output); if (!outputPtrGPU) outputPtrGPU = spaceDomainDataExternalGPU_.data(); if (transformXY_) { transpose_->unpack_backward(); planeSymmetry_->apply(); transformXY_->backward(freqDomainXYGPU_.data(), outputPtrGPU); if (outputPtrHost) { gpu::check_status( gpu::memcpy_async(static_cast(outputPtrHost), static_cast(spaceDomainDataExternalGPU_.data()), spaceDomainDataExternalGPU_.size() * sizeof(T), gpu::flag::MemcpyDeviceToHost, stream_.get())); } } } template auto ExecutionGPU::synchronize(SpfftExecType mode) -> void { if (mode == SPFFT_EXEC_ASYNCHRONOUS) { endEvent_.record(stream_.get()); endEvent_.stream_wait(externalStream_); } else { gpu::check_status(gpu::stream_synchronize(stream_.get())); } } template auto ExecutionGPU::space_domain_data_host() -> HostArrayView3D { return spaceDomainDataExternalHost_; } template auto ExecutionGPU::space_domain_data_gpu() -> GPUArrayView3D { return spaceDomainDataExternalGPU_; } // instatiate templates for float and double template class ExecutionGPU; #ifdef SPFFT_SINGLE_PRECISION template class ExecutionGPU; #endif } // namespace spfft SpFFT-1.1.0/src/execution/execution_gpu.hpp000066400000000000000000000122011457701740000206150ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_EXECUTION_GPU #define SPFFT_EXECUTION_GPU #include #include #include "compression/compression_gpu.hpp" #include "compression/indices.hpp" #include "fft/transform_interface.hpp" #include "gpu_util/gpu_event_handle.hpp" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_runtime_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "memory/host_array.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "spfft/types.h" #include "symmetry/symmetry.hpp" #include "transpose/transpose.hpp" #include "util/common_types.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #endif namespace spfft { // Controls the execution of the 3D FFT from a compressed format in frequency space and slices in // space domain. Memory is NOT owned by this class and must remain valid during the lifetime. template class ExecutionGPU { public: // Initialize a local execution on GPU ExecutionGPU(const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2, GPUArray::type>& gpuArray1, GPUArray::type>& gpuArray2, const std::shared_ptr>& fftWorkBuffer); #ifdef SPFFT_MPI // Initialize a distributed execution on GPU ExecutionGPU(MPICommunicatorHandle comm, const SpfftExchangeType exchangeType, const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2, GPUArray::type>& gpuArray1, GPUArray::type>& gpuArray2, const std::shared_ptr>& fftWorkBuffer); #endif // transform forward from a given memory location (Host or GPU). // The output is located on the GPU. auto forward_z(T* output, const SpfftScalingType scalingType) -> void; auto forward_exchange(const bool nonBlockingExchange) -> void; auto forward_xy(const T* input) -> void; // transform backward into a given memory location (Host or GPU). // The input is taken from the GPU. auto backward_z(const T* input) -> void; auto backward_exchange(const bool nonBlockingExchange) -> void; auto backward_xy(T* output) -> void; auto synchronize(SpfftExecType mode) -> void; // The space domain data on Host auto space_domain_data_host() -> HostArrayView3D; // The space domain data on GPU auto space_domain_data_gpu() -> GPUArrayView3D; auto get_external_stream() -> gpu::StreamType { return externalStream_; } auto set_external_stream(gpu::StreamType stream) -> void { externalStream_ = stream; } private: GPUStreamHandle stream_; gpu::StreamType externalStream_; GPUEventHandle startEvent_; GPUEventHandle endEvent_; int numThreads_; T scalingFactor_; std::unique_ptr transformZ_; std::unique_ptr transpose_; std::unique_ptr transformXY_; std::unique_ptr zStickSymmetry_; std::unique_ptr planeSymmetry_; std::unique_ptr compression_; HostArrayView3D spaceDomainDataExternalHost_; GPUArrayView3D spaceDomainDataExternalGPU_; GPUArrayView2D::type> freqDomainDataGPU_; GPUArrayView1D freqDomainCompressedDataGPU_; GPUArrayView3D::type> freqDomainXYGPU_; }; } // namespace spfft #endif SpFFT-1.1.0/src/execution/execution_host.cpp000066400000000000000000000417041457701740000210040ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "execution/execution_host.hpp" #include "compression/indices.hpp" #include "fft/transform_1d_host.hpp" #include "fft/transform_real_1d_host.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "spfft/exceptions.hpp" #include "symmetry/symmetry_host.hpp" #include "timing/timing.hpp" #include "transpose/transpose_host.hpp" #include "util/common_types.hpp" #ifdef SPFFT_MPI #include "transpose/transpose_mpi_buffered_host.hpp" #include "transpose/transpose_mpi_compact_buffered_host.hpp" #include "transpose/transpose_mpi_unbuffered_host.hpp" #endif namespace spfft { template ExecutionHost::ExecutionHost(const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2) : numThreads_(numThreads), scalingFactor_(static_cast( 1.0 / static_cast(param->dim_x() * param->dim_y() * param->dim_z()))), zStickSymmetry_(new Symmetry()), planeSymmetry_(new Symmetry()) { HOST_TIMING_SCOPED("Execution init"); const SizeType numLocalZSticks = param->num_z_sticks(0); const SizeType numLocalXYPlanes = param->num_xy_planes(0); std::set uniqueXIndices; for (const auto& xyIndex : param->z_stick_xy_indices(0)) { uniqueXIndices.emplace(static_cast(xyIndex / param->dim_y())); } auto freqDomainZ3D = create_3d_view(array1, 0, 1, numLocalZSticks, param->dim_z()); freqDomainData_ = create_2d_view(freqDomainZ3D, 0, numLocalZSticks, param->dim_z()); freqDomainXY_ = create_3d_view(array2, 0, param->dim_z(), param->dim_x_freq(), param->dim_y()); transpose_.reset(new TransposeHost(param, freqDomainXY_, freqDomainData_)); if (param->local_value_indices().size() > 0) { compression_.reset(new CompressionHost(param)); } if (numLocalZSticks > 0) { // Z transformZBackward_.reset(new Transform1DPlanesHost(freqDomainZ3D, freqDomainZ3D, false, false, FFTW_BACKWARD, numThreads)); transformZForward_.reset(new Transform1DPlanesHost(freqDomainZ3D, freqDomainZ3D, false, false, FFTW_FORWARD, numThreads)); } if (numLocalXYPlanes > 0) { // Y transformYBackward_.reset(new Transform1DVerticalHost(freqDomainXY_, freqDomainXY_, false, false, FFTW_BACKWARD, uniqueXIndices)); transformYForward_.reset(new Transform1DVerticalHost(freqDomainXY_, freqDomainXY_, false, false, FFTW_FORWARD, uniqueXIndices)); // X if (param->transform_type() == SPFFT_TRANS_R2C) { if (param->zero_zero_stick_index() < param->num_z_sticks(0)) { zStickSymmetry_.reset(new StickSymmetryHost(HostArrayView1D>( &freqDomainData_(param->zero_zero_stick_index(), 0), freqDomainData_.dim_inner(), freqDomainData_.pinned()))); } planeSymmetry_.reset(new PlaneSymmetryHost(freqDomainXY_)); spaceDomainDataExternal_ = create_new_type_3d_view(array1, param->dim_z(), param->dim_y(), param->dim_x()); transformXBackward_.reset(new C2RTransform1DPlanesHost( freqDomainXY_, spaceDomainDataExternal_, true, false, numThreads)); transformXForward_.reset(new R2CTransform1DPlanesHost( spaceDomainDataExternal_, freqDomainXY_, false, true, numThreads)); } else { auto spaceDomainData = create_3d_view(array1, 0, param->dim_z(), param->dim_y(), param->dim_x_freq()); spaceDomainDataExternal_ = create_new_type_3d_view(array1, param->dim_z(), param->dim_y(), 2 * param->dim_x()); transformXBackward_.reset(new Transform1DPlanesHost(freqDomainXY_, spaceDomainData, true, false, FFTW_BACKWARD, numThreads)); transformXForward_.reset(new Transform1DPlanesHost(spaceDomainData, freqDomainXY_, false, true, FFTW_FORWARD, numThreads)); } } } #ifdef SPFFT_MPI template ExecutionHost::ExecutionHost(MPICommunicatorHandle comm, const SpfftExchangeType exchangeType, const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2) : numThreads_(numThreads), scalingFactor_(static_cast( 1.0 / static_cast(param->dim_x() * param->dim_y() * param->dim_z()))), zStickSymmetry_(new Symmetry()), planeSymmetry_(new Symmetry()) { HOST_TIMING_SCOPED("Execution init"); const SizeType numLocalZSticks = param->num_z_sticks(comm.rank()); const SizeType numLocalXYPlanes = param->num_xy_planes(comm.rank()); // get unique x indices to only compute non-zero y-transforms std::set uniqueXIndices; for (SizeType r = 0; r < comm.size(); ++r) { for (const auto& xyIndex : param->z_stick_xy_indices(r)) { uniqueXIndices.emplace(static_cast(xyIndex / param->dim_y())); } } auto freqDomainZ3D = create_3d_view(array1, 0, 1, numLocalZSticks, param->dim_z()); freqDomainData_ = create_2d_view(freqDomainZ3D, 0, numLocalZSticks, param->dim_z()); freqDomainXY_ = create_3d_view(array2, 0, numLocalXYPlanes, param->dim_x_freq(), param->dim_y()); auto& spaceDomainArray = array1; // create external view with if (param->transform_type() == SPFFT_TRANS_R2C) { spaceDomainDataExternal_ = create_new_type_3d_view(spaceDomainArray, numLocalXYPlanes, param->dim_y(), param->dim_x()); } else { spaceDomainDataExternal_ = create_new_type_3d_view(spaceDomainArray, numLocalXYPlanes, param->dim_y(), 2 * param->dim_x()); } if (param->local_value_indices().size() > 0) { compression_.reset(new CompressionHost(param)); } if (numLocalZSticks > 0) { // apply hermitian symmetry for x=0, y=0 stick if (param->transform_type() == SPFFT_TRANS_R2C && param->zero_zero_stick_index() < freqDomainData_.dim_outer()) { zStickSymmetry_.reset(new StickSymmetryHost( HostArrayView1D>(&freqDomainData_(param->zero_zero_stick_index(), 0), freqDomainData_.dim_inner(), freqDomainData_.pinned()))); } transformZForward_ = std::unique_ptr>(new Transform1DPlanesHost( freqDomainZ3D, freqDomainZ3D, false, false, FFTW_FORWARD, numThreads)); transformZBackward_ = std::unique_ptr>(new Transform1DPlanesHost( freqDomainZ3D, freqDomainZ3D, false, false, FFTW_BACKWARD, numThreads)); } if (numLocalXYPlanes > 0) { transformYBackward_.reset(new Transform1DVerticalHost(freqDomainXY_, freqDomainXY_, false, false, FFTW_BACKWARD, uniqueXIndices)); transformYForward_.reset(new Transform1DVerticalHost(freqDomainXY_, freqDomainXY_, false, false, FFTW_FORWARD, uniqueXIndices)); if (param->transform_type() == SPFFT_TRANS_R2C) { transformXBackward_.reset(new C2RTransform1DPlanesHost( freqDomainXY_, spaceDomainDataExternal_, true, false, numThreads)); transformXForward_.reset(new R2CTransform1DPlanesHost( spaceDomainDataExternal_, freqDomainXY_, false, true, numThreads)); planeSymmetry_.reset(new PlaneSymmetryHost(freqDomainXY_)); } else { auto spaceDomainData = create_3d_view(spaceDomainArray, 0, numLocalXYPlanes, param->dim_y(), param->dim_x()); transformXBackward_.reset(new Transform1DPlanesHost(freqDomainXY_, spaceDomainData, true, false, FFTW_BACKWARD, numThreads)); transformXForward_.reset(new Transform1DPlanesHost(spaceDomainData, freqDomainXY_, false, true, FFTW_FORWARD, numThreads)); } } switch (exchangeType) { case SpfftExchangeType::SPFFT_EXCH_UNBUFFERED: { transpose_.reset( new TransposeMPIUnbufferedHost(param, comm, freqDomainXY_, freqDomainData_)); } break; case SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED: { auto transposeBufferZ = create_1d_view( array2, 0, param->total_num_xy_planes() * param->num_z_sticks(comm.rank())); auto transposeBufferXY = create_1d_view( array1, 0, param->total_num_z_sticks() * param->num_xy_planes(comm.rank())); transpose_.reset(new TransposeMPICompactBufferedHost( param, comm, freqDomainXY_, freqDomainData_, transposeBufferXY, transposeBufferZ)); } break; case SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED_FLOAT: { auto transposeBufferZ = create_1d_view( array2, 0, param->total_num_xy_planes() * param->num_z_sticks(comm.rank())); auto transposeBufferXY = create_1d_view( array1, 0, param->total_num_z_sticks() * param->num_xy_planes(comm.rank())); transpose_.reset(new TransposeMPICompactBufferedHost( param, comm, freqDomainXY_, freqDomainData_, transposeBufferXY, transposeBufferZ)); } break; case SpfftExchangeType::SPFFT_EXCH_BUFFERED: { auto transposeBufferZ = create_1d_view( array2, 0, param->max_num_z_sticks() * param->max_num_xy_planes() * comm.size()); auto transposeBufferXY = create_1d_view( array1, 0, param->max_num_z_sticks() * param->max_num_xy_planes() * comm.size()); transpose_.reset(new TransposeMPIBufferedHost( param, comm, freqDomainXY_, freqDomainData_, transposeBufferXY, transposeBufferZ)); } break; case SpfftExchangeType::SPFFT_EXCH_BUFFERED_FLOAT: { auto transposeBufferZ = create_1d_view( array2, 0, param->max_num_z_sticks() * param->max_num_xy_planes() * comm.size()); auto transposeBufferXY = create_1d_view( array1, 0, param->max_num_z_sticks() * param->max_num_xy_planes() * comm.size()); transpose_.reset(new TransposeMPIBufferedHost( param, comm, freqDomainXY_, freqDomainData_, transposeBufferXY, transposeBufferZ)); } break; default: throw InvalidParameterError(); } } #endif template auto ExecutionHost::forward_xy(const T* input) -> void { SPFFT_OMP_PRAGMA("omp parallel num_threads(numThreads_)") { SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("x transform"); } if (transformXForward_) transformXForward_->execute(input, reinterpret_cast(freqDomainXY_.data())); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("x transform"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("y transform"); } if (transformYForward_) transformYForward_->execute(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("y transform"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("pack"); } if (transformYForward_) transpose_->pack_forward(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("pack"); } } } template auto ExecutionHost::forward_exchange(const bool nonBlockingExchange) -> void { HOST_TIMING_SCOPED("exchange_start") // must be called outside omp parallel region (MPI restriction on thread id) transpose_->exchange_forward_start(nonBlockingExchange); // SPFFT_OMP_PRAGMA("omp barrier") // ensure exchange is done } template auto ExecutionHost::forward_z(T* output, const SpfftScalingType scalingType) -> void { // must be called outside omp parallel region (MPI restriction on thread id) HOST_TIMING_START("exechange_fininalize"); transpose_->exchange_forward_finalize(); HOST_TIMING_STOP("exechange_fininalize"); HOST_TIMING_STOP("exchange") SPFFT_OMP_PRAGMA("omp parallel num_threads(numThreads_)") { SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("unpack"); } if (transformZForward_) transpose_->unpack_forward(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("unpack"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("z transform"); } if (transformZForward_) transformZForward_->execute(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("z transform"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("compression"); } if (compression_) compression_->compress(freqDomainData_, output, scalingType == SpfftScalingType::SPFFT_FULL_SCALING, scalingFactor_); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("compression"); } } } template auto ExecutionHost::backward_z(const T* input) -> void { SPFFT_OMP_PRAGMA("omp parallel num_threads(numThreads_)") { SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("compression"); } if (compression_) compression_->decompress(input, freqDomainData_); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("compression"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("z symmetrization"); } zStickSymmetry_->apply(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("z symmetrization"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("z transform"); } if (transformZBackward_) transformZBackward_->execute(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("z transform"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("pack"); } if (transformZBackward_) transpose_->pack_backward(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("pack"); } } } template auto ExecutionHost::backward_exchange(const bool nonBlockingExchange) -> void { HOST_TIMING_SCOPED("exchange_start") // must be called outside omp parallel region (MPI restriction on thread id) transpose_->exchange_backward_start(nonBlockingExchange); } template auto ExecutionHost::backward_xy(T* output) -> void { // must be called outside omp parallel region (MPI restriction on thread id) HOST_TIMING_START("exechange_fininalize"); transpose_->exchange_forward_finalize(); HOST_TIMING_STOP("exechange_fininalize"); HOST_TIMING_STOP("exchange") SPFFT_OMP_PRAGMA("omp parallel num_threads(numThreads_)") { SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("unpack"); } if (transformYBackward_) transpose_->unpack_backward(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("unpack"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("xy symmetrization"); } planeSymmetry_->apply(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("xy symmetrization"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("y transform"); } if (transformYBackward_) transformYBackward_->execute(); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("y transform"); } SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_START("x transform"); } if (transformXBackward_) transformXBackward_->execute(reinterpret_cast(freqDomainXY_.data()), output); SPFFT_OMP_PRAGMA("omp master") { HOST_TIMING_STOP("x transform"); } } } template auto ExecutionHost::space_domain_data() -> HostArrayView3D { return spaceDomainDataExternal_; } // instatiate templates for float and double template class ExecutionHost; #ifdef SPFFT_SINGLE_PRECISION template class ExecutionHost; #endif } // namespace spfft SpFFT-1.1.0/src/execution/execution_host.hpp000066400000000000000000000101411457701740000210000ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_EXECUTION_HOST_HPP #define SPFFT_EXECUTION_HOST_HPP #include #include #include #include "compression/compression_host.hpp" #include "compression/indices.hpp" #include "fft/transform_interface.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "spfft/types.h" #include "symmetry/symmetry.hpp" #include "timing/timing.hpp" #include "transpose/transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_init_handle.hpp" #endif namespace spfft { // Controls the execution of the 3D FFT from a compressed format in frequency space and slices in // space domain. Memory is NOT owned by this class and must remain valid during the lifetime. template class ExecutionHost { public: // Initialize a local execution on Host ExecutionHost(const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2); #ifdef SPFFT_MPI // Initialize a distributed execution on Host ExecutionHost(MPICommunicatorHandle comm, const SpfftExchangeType exchangeType, const int numThreads, std::shared_ptr param, HostArray>& array1, HostArray>& array2); #endif // Transform forward auto forward_z(T* output, const SpfftScalingType scalingType) -> void; auto forward_exchange(const bool nonBlockingExchange) -> void; auto forward_xy(const T* input) -> void; // Transform backward auto backward_z(const T* input) -> void; auto backward_exchange(const bool nonBlockingExchange) -> void; auto backward_xy(T* output) -> void; // Access the space domain data auto space_domain_data() -> HostArrayView3D; private: int numThreads_; T scalingFactor_; std::unique_ptr> transformZBackward_; std::unique_ptr> transformZForward_; std::unique_ptr> transformYBackward_; std::unique_ptr> transformYForward_; std::unique_ptr> transformXBackward_; std::unique_ptr> transformXForward_; std::unique_ptr transpose_; std::unique_ptr zStickSymmetry_; std::unique_ptr planeSymmetry_; std::unique_ptr compression_; HostArrayView3D spaceDomainDataExternal_; HostArrayView2D> freqDomainData_; HostArrayView3D> freqDomainXY_; }; } // namespace spfft #endif SpFFT-1.1.0/src/fft/000077500000000000000000000000001457701740000140065ustar00rootroot00000000000000SpFFT-1.1.0/src/fft/fftw_interface.hpp000066400000000000000000000122301457701740000175030ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_FFTW_INTERFACE_HPP #define SPFFT_FFTW_INTERFACE_HPP #include #include #include "fft/fftw_mutex.hpp" #include "spfft/config.h" namespace spfft { template struct FFTW; template <> struct FFTW { using ValueType = double; using ComplexType = fftw_complex; using PlanType = fftw_plan; template static auto alignment_of(ARGS&&... args) -> int { return fftw_alignment_of(args...); } template static auto plan_dft_1d(ARGS&&... args) -> fftw_plan { std::lock_guard guard(global_fftw_mutex()); return fftw_plan_dft_1d(args...); } template static auto plan_many_dft(ARGS&&... args) -> fftw_plan { std::lock_guard guard(global_fftw_mutex()); return fftw_plan_many_dft(args...); } template static auto plan_many_dft_c2r(ARGS&&... args) -> fftw_plan { std::lock_guard guard(global_fftw_mutex()); return fftw_plan_many_dft_c2r(args...); } template static auto plan_many_dft_r2c(ARGS&&... args) -> fftw_plan { std::lock_guard guard(global_fftw_mutex()); return fftw_plan_many_dft_r2c(args...); } template static auto destroy_plan(ARGS&&... args) -> void { std::lock_guard guard(global_fftw_mutex()); fftw_destroy_plan(args...); } template static auto execute(ARGS&&... args) -> void { fftw_execute(args...); } template static auto execute_dft(ARGS&&... args) -> void { fftw_execute_dft(args...); } template static auto execute_dft_r2c(ARGS&&... args) -> void { fftw_execute_dft_r2c(args...); } template static auto execute_dft_c2r(ARGS&&... args) -> void { fftw_execute_dft_c2r(args...); } }; #ifdef SPFFT_SINGLE_PRECISION template <> struct FFTW { using ValueType = float; using ComplexType = fftwf_complex; using PlanType = fftwf_plan; template static auto alignment_of(ARGS&&... args) -> int { return fftwf_alignment_of(args...); } template static auto plan_dft_1d(ARGS&&... args) -> fftwf_plan { std::lock_guard guard(global_fftw_mutex()); return fftwf_plan_dft_1d(args...); } template static auto plan_many_dft(ARGS&&... args) -> fftwf_plan { std::lock_guard guard(global_fftw_mutex()); return fftwf_plan_many_dft(args...); } template static auto plan_many_dft_c2r(ARGS&&... args) -> fftwf_plan { std::lock_guard guard(global_fftw_mutex()); return fftwf_plan_many_dft_c2r(args...); } template static auto plan_many_dft_r2c(ARGS&&... args) -> fftwf_plan { std::lock_guard guard(global_fftw_mutex()); return fftwf_plan_many_dft_r2c(args...); } template static auto destroy_plan(ARGS&&... args) -> void { std::lock_guard guard(global_fftw_mutex()); fftwf_destroy_plan(args...); } template static auto execute(ARGS&&... args) -> void { fftwf_execute(args...); } template static auto execute_dft(ARGS&&... args) -> void { fftwf_execute_dft(args...); } template static auto execute_dft_r2c(ARGS&&... args) -> void { fftwf_execute_dft_r2c(args...); } template static auto execute_dft_c2r(ARGS&&... args) -> void { fftwf_execute_dft_c2r(args...); } }; #endif } // namespace spfft #endif SpFFT-1.1.0/src/fft/fftw_mutex.cpp000066400000000000000000000034311457701740000167030ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include "fft/fftw_mutex.hpp" #include "spfft/config.h" namespace spfft { auto global_fftw_mutex() -> std::mutex& { static std::mutex globMutex; // thread safe initialization since C++11 return globMutex; } } // namespace spfft SpFFT-1.1.0/src/fft/fftw_mutex.hpp000066400000000000000000000034661457701740000167200ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_FFTW_MUTEX_HPP #define SPFFT_FFTW_MUTEX_HPP #include #include "spfft/config.h" namespace spfft { // provides a global mutex for guarding fftw functions calls, which are not thread-safe auto global_fftw_mutex() -> std::mutex&; } // namespace spfft #endif SpFFT-1.1.0/src/fft/fftw_plan_1d.hpp000066400000000000000000000323711457701740000170710ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_FFTW_PLAN_HPP #define SPFFT_FFTW_PLAN_HPP #include #include #include #include #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" #include "fft/fftw_interface.hpp" namespace spfft { // Hash for tuple of int alignment values. Assumption is that alignments are small numbers (less than // half the maximum value of an int) struct FFTWPropHash { std::size_t operator()(const std::tuple& tuple) const { assert(std::get<1>(tuple) >= 0); assert(std::get<2>(tuple) >= 0); assert(std::get<1>(tuple) < (1 << (sizeof(int) * 4 - 1))); assert(std::get<2>(tuple) < (1 << (sizeof(int) * 4 - 1))); const int sign = 2 * static_cast(std::get<0>(tuple)) - 1; return std::hash()( sign * ((std::get<1>(tuple) << (sizeof(int) * 4 - 1)) + std::get<2>(tuple) + 1)); } }; enum class FFTWPlanType { C2C, R2C, C2R }; template class FFTWPlan { public: using ComplexType = std::complex; // Create strided 1d fftw plan. // If input and output pointers are equal, in-place transform is created. FFTWPlan(const ComplexType* input, ComplexType* output, const SizeType size, const SizeType istride, const SizeType ostride, const SizeType idist, const SizeType odist, const SizeType howmany, const int sign) : size_(size), sign_(sign), inPlace_(input == output), alignmentInput_( FFTW::alignment_of(reinterpret_cast(const_cast(input)))), alignmentOutput_(FFTW::alignment_of(reinterpret_cast(output))), type_(FFTWPlanType::C2C) { int rank = 1; int n[] = {(int)size}; int inembed[] = {n[0]}; int onembed[] = {n[0]}; auto flags = FFTW_ESTIMATE; plan_ = FFTW::plan_many_dft( rank, n, (int)howmany, reinterpret_cast::ComplexType*>(const_cast(input)), inembed, (int)istride, (int)idist, reinterpret_cast::ComplexType*>(output), onembed, (int)ostride, (int)odist, sign, flags); if (!plan_) throw FFTWError(); } // C2R FFTWPlan(const ComplexType* input, T* output, const SizeType size, const SizeType istride, const SizeType ostride, const SizeType idist, const SizeType odist, const SizeType howmany) : size_(size), sign_(FFTW_BACKWARD), inPlace_(reinterpret_cast(input) == reinterpret_cast(output)), alignmentInput_( FFTW::alignment_of(reinterpret_cast(const_cast(input)))), alignmentOutput_(FFTW::alignment_of(output)), type_(FFTWPlanType::C2R) { assert(reinterpret_cast(input) != reinterpret_cast(output)); // must not be in place int rank = 1; int n[] = {(int)size}; int inembed[] = {n[0]}; int onembed[] = {n[0]}; auto flags = FFTW_ESTIMATE; plan_ = FFTW::plan_many_dft_c2r( rank, n, (int)howmany, reinterpret_cast::ComplexType*>(const_cast(input)), inembed, (int)istride, (int)idist, output, onembed, (int)ostride, (int)odist, flags); if (!plan_) throw FFTWError(); } // R2C FFTWPlan(const T* input, ComplexType* output, const SizeType size, const SizeType istride, const SizeType ostride, const SizeType idist, const SizeType odist, const SizeType howmany) : size_(size), sign_(FFTW_FORWARD), inPlace_(reinterpret_cast(input) == reinterpret_cast(output)), alignmentInput_(FFTW::alignment_of(const_cast(input))), alignmentOutput_(FFTW::alignment_of(reinterpret_cast(output))), type_(FFTWPlanType::R2C) { assert(reinterpret_cast(input) != reinterpret_cast(output)); // must not be in place int rank = 1; int n[] = {(int)size}; int inembed[] = {n[0]}; int onembed[] = {n[0]}; auto flags = FFTW_ESTIMATE; plan_ = FFTW::plan_many_dft_r2c(rank, n, (int)howmany, const_cast(input), inembed, (int)istride, (int)idist, reinterpret_cast::ComplexType*>(output), onembed, (int)ostride, (int)odist, flags); if (!plan_) throw FFTWError(); } FFTWPlan(const FFTWPlan& other) = delete; FFTWPlan(FFTWPlan&& other) noexcept { *this = std::move(other); } auto operator=(const FFTWPlan& other) -> FFTWPlan& = delete; auto operator=(FFTWPlan&& other) noexcept -> FFTWPlan& { FFTW::destroy_plan(plan_); plan_ = other.plan_; size_ = other.size_; sign_ = other.sign_; inPlace_ = other.inPlace_; alignmentInput_ = other.alignmentInput_; alignmentOutput_ = other.alignmentOutput_; type_ = other.type_; other.plan_ = nullptr; other.size_ = 0; other.sign_ = 0; other.inPlace_ = false; other.alignmentInput_ = 0; other.alignmentOutput_ = 0; other.type_ = FFTWPlanType::C2C; return *this; } // Get plan handle inline auto get() -> fftw_plan { return plan_; }; // Release ownership of plan handle inline auto release() -> fftw_plan { typename FFTW::PlanType planLocal = plan_; plan_ = nullptr; return planLocal; }; inline auto empty() const noexcept -> bool { return !plan_; } inline auto size() const noexcept -> SizeType { return size_; } inline auto sign() const noexcept -> int { return sign_; } inline auto type() const noexcept -> FFTWPlanType { return type_; } // Plan created with in-place transform inline auto in_place() const noexcept -> bool { return inPlace_; } // Execute on input / output provided to constructor. // Undefinded behaviour if empty(). auto execute() -> void { FFTW::execute(plan_); } // Execute on given input / output. // The alignment of input and output must match the pointers given to the constructor. // If the plan was not setup for in-place transforms, input and output must not be equal // Undefinded behaviour if empty(). auto execute(const void* inputConst, void* output) -> void { void* input = const_cast(inputConst); assert(inPlace_ == (input == output)); assert(FFTW::alignment_of(reinterpret_cast(input)) == alignmentInput_); assert(FFTW::alignment_of(reinterpret_cast(output)) == alignmentOutput_); if(type_ == FFTWPlanType::C2C) FFTW::execute_dft(plan_, reinterpret_cast::ComplexType*>(input), reinterpret_cast::ComplexType*>(output)); else if (type_== FFTWPlanType::C2R) FFTW::execute_dft_c2r(plan_, reinterpret_cast::ComplexType*>(input), reinterpret_cast(output)); else FFTW::execute_dft_r2c(plan_, reinterpret_cast(input), reinterpret_cast::ComplexType*>(output)); } ~FFTWPlan() { if (plan_) { FFTW::destroy_plan(plan_); } plan_ = nullptr; } private: typename FFTW::PlanType plan_ = nullptr; SizeType size_ = 0; int sign_; bool inPlace_ = false; int alignmentInput_ = 0; int alignmentOutput_ = 0; FFTWPlanType type_ = FFTWPlanType::C2C; }; template class FlexibleFFTWPlan { public: using ComplexType = typename FFTWPlan::ComplexType; FlexibleFFTWPlan(const ComplexType* input, ComplexType* output, const SizeType size, const SizeType istride, const SizeType ostride, const SizeType idist, const SizeType odist, const SizeType howmany, const int sign) : originalKey_(input == output, FFTW::alignment_of(reinterpret_cast(const_cast(input))), FFTW::alignment_of(reinterpret_cast(output))), size_(size), istride_(istride), ostride_(ostride), idist_(idist), odist_(odist), howmany_(howmany), sign_(sign), type_(FFTWPlanType::C2C) { plans_.insert({originalKey_, FFTWPlan(input, output, size, istride, ostride, idist, odist, howmany, sign)}); } FlexibleFFTWPlan(const ComplexType* input, T* output, const SizeType size, const SizeType istride, const SizeType ostride, const SizeType idist, const SizeType odist, const SizeType howmany) : originalKey_(reinterpret_cast(input) == output, FFTW::alignment_of(reinterpret_cast(const_cast(input))), FFTW::alignment_of(output)), size_(size), istride_(istride), ostride_(ostride), idist_(idist), odist_(odist), howmany_(howmany), sign_(FFTW_BACKWARD), type_(FFTWPlanType::C2R) { plans_.insert( {originalKey_, FFTWPlan(input, output, size, istride, ostride, idist, odist, howmany)}); } FlexibleFFTWPlan(const T* input, ComplexType* output, const SizeType size, const SizeType istride, const SizeType ostride, const SizeType idist, const SizeType odist, const SizeType howmany) : originalKey_(input == reinterpret_cast(output), FFTW::alignment_of(const_cast(input)), FFTW::alignment_of(reinterpret_cast(output))), size_(size), istride_(istride), ostride_(ostride), idist_(idist), odist_(odist), howmany_(howmany), sign_(FFTW_FORWARD), type_(FFTWPlanType::R2C) { plans_.insert( {originalKey_, FFTWPlan(input, output, size, istride, ostride, idist, odist, howmany)}); } inline auto sign() const noexcept -> int { return sign_; } auto execute(const void* input, void* output) -> void { std::tuple key{ input == output, FFTW::alignment_of(reinterpret_cast(const_cast(input))), FFTW::alignment_of(reinterpret_cast(output))}; auto it = plans_.find(key); // Create plan if no matching one is found if (it == plans_.end()) { if (type_ == FFTWPlanType::C2C) it = plans_ .insert({key, FFTWPlan(reinterpret_cast(input), reinterpret_cast(output), size_, istride_, ostride_, idist_, odist_, howmany_, sign_)}) .first; else if (type_ == FFTWPlanType::C2R) it = plans_ .insert({key, FFTWPlan(reinterpret_cast(input), reinterpret_cast(output), size_, istride_, ostride_, idist_, odist_, howmany_)}) .first; else it = plans_ .insert({key, FFTWPlan(reinterpret_cast(input), reinterpret_cast(output), size_, istride_, ostride_, idist_, odist_, howmany_)}) .first; } it->second.execute(input, output); } auto execute() -> void { auto it = plans_.find(originalKey_); assert(it != plans_.end()); it->second.execute(); } private: std::unordered_map, FFTWPlan, FFTWPropHash> plans_; const std::tuple originalKey_; const SizeType size_; const SizeType istride_; const SizeType ostride_; const SizeType idist_; const SizeType odist_; const SizeType howmany_; const int sign_; const FFTWPlanType type_; }; } // namespace spfft #endif SpFFT-1.1.0/src/fft/transform_1d_gpu.hpp000066400000000000000000000131201457701740000177660ustar00rootroot00000000000000 /* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_1D_GPU_HPP #define SPFFT_TRANSFORM_1D_GPU_HPP #include #include #include #include #include "fft/transform_interface.hpp" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_runtime_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { template class Transform1DGPU : public TransformGPU { public: using ValueType = T; using ComplexType = typename gpu::fft::ComplexType::type; Transform1DGPU(GPUArrayView2D::type>& data, GPUStreamHandle stream, std::shared_ptr> workBuffer) : stream_(std::move(stream)), workBuffer_(std::move(workBuffer)), dataPtr_(data.data()) { assert(workBuffer_); std::size_t worksize = 0; int rank = 1; int n[1] = {data.dim_inner()}; int nembed[1] = {data.dim_inner()}; int stride = 1; int dist = data.dim_inner(); int batch = data.dim_outer(); // create plan gpu::fft::check_result(gpu::fft::create(&plan_)); gpu::fft::check_result(gpu::fft::set_auto_allocation(plan_, 0)); gpu::fft::check_result(gpu::fft::make_plan_many( plan_, rank, n, nembed, stride, dist, nembed, stride, dist, gpu::fft::TransformType::ComplexToComplex::value, batch, &worksize)); // set stream gpu::fft::check_result(gpu::fft::set_stream(plan_, stream_.get())); // resize work buffer if necessary if (workBuffer_->size() < worksize) { *workBuffer_ = GPUArray(worksize); } } Transform1DGPU(const Transform1DGPU& transform) = delete; Transform1DGPU(Transform1DGPU&& transform) noexcept : stream_(std::move(transform.stream_)), plan_(std::move(transform.plan_)), workBuffer_(std::move(transform.workBuffer_)), dataPtr_(transform.dataPtr_) { transform.plan_ = 0; } ~Transform1DGPU() { if (plan_) { gpu::fft::destroy(plan_); } } auto operator=(const Transform1DGPU& transform) -> Transform1DGPU& = delete; auto operator=(Transform1DGPU&& transform) noexcept -> Transform1DGPU& { if (plan_) { gpu::fft::destroy(plan_); } stream_ = std::move(transform.stream_); plan_ = std::move(transform.plan_); workBuffer_ = std::move(transform.workBuffer_); dataPtr_ = transform.dataPtr_; transform.plan_ = 0; return *this; } inline auto device_id() const noexcept -> int { return stream_.device_id(); } auto forward() -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result( gpu::fft::execute(plan_, dataPtr_, dataPtr_, gpu::fft::TransformDirection::Forward)); } auto forward(const void* input, void* output) -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result(gpu::fft::execute(plan_, reinterpret_cast(input), reinterpret_cast(output), gpu::fft::TransformDirection::Forward)); } auto backward() -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result( gpu::fft::execute(plan_, dataPtr_, dataPtr_, gpu::fft::TransformDirection::Backward)); } auto backward(const void* input, void* output) -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result(gpu::fft::execute(plan_, reinterpret_cast(input), reinterpret_cast(output), gpu::fft::TransformDirection::Backward)); } private: GPUStreamHandle stream_; gpu::fft::HandleType plan_ = 0; std::shared_ptr> workBuffer_; typename gpu::fft::ComplexType::type* dataPtr_; }; } // namespace spfft #endif SpFFT-1.1.0/src/fft/transform_1d_host.hpp000066400000000000000000000254321457701740000201610ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_1D_HOST_HPP #define SPFFT_TRANSFORM_1D_HOST_HPP #include #include #include #include #include #include "fft/fftw_plan_1d.hpp" #include "fft/transform_interface.hpp" #include "memory/host_array_view.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" namespace spfft { // Computes the FFT in 1D along either the innermost dimension (not transposed) or the second // innermost dimension (transposed) // The transforms are computed in batches aligned to inner 2d planes template class Transform1DPlanesHost : public TransformHost { public: static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; Transform1DPlanesHost(HostArrayView3D inputData, HostArrayView3D outputData, bool transposeInputData, bool transposeOutputData, int sign, int maxNumThreads) { assert(inputData.dim_outer() == outputData.dim_outer()); // only one is transposed assert((transposeInputData != transposeOutputData) || (inputData.dim_inner() == outputData.dim_inner())); assert((transposeInputData != transposeOutputData) || (inputData.dim_mid() == outputData.dim_mid())); // none or both transposed assert((transposeInputData == transposeOutputData) || (inputData.dim_inner() == outputData.dim_mid())); assert((transposeInputData == transposeOutputData) || (inputData.dim_mid() == outputData.dim_inner())); // transposed case must not be in-place assert(!(inputData.data() == outputData.data() && (transposeInputData || transposeOutputData))); // make sure maxNumThreads is at least 1 SizeType numSplitsPerPlane = maxNumThreads < 1 ? 1 : maxNumThreads; // only use at most as many splits as required to create work for every thread if (numSplitsPerPlane > 1 && inputData.dim_outer() > numSplitsPerPlane) { numSplitsPerPlane = 2; } const SizeType numTransformsPerPlane = transposeInputData ? inputData.dim_inner() : inputData.dim_mid(); // make sure there are at most as many splits as transforms per plane numSplitsPerPlane = numTransformsPerPlane < numSplitsPerPlane ? numTransformsPerPlane : numSplitsPerPlane; // set fftw plan parameters const SizeType size = transposeInputData ? inputData.dim_mid() : inputData.dim_inner(); const SizeType inputStride = transposeInputData ? inputData.dim_inner() : 1; const SizeType outputStride = transposeOutputData ? outputData.dim_inner() : 1; const SizeType inputDist = transposeInputData ? 1 : inputData.dim_inner(); const SizeType outputDist = transposeOutputData ? 1 : outputData.dim_inner(); const SizeType numTransformsPerSplit = numTransformsPerPlane / numSplitsPerPlane; const SizeType inputSplitStrideMid = transposeInputData ? 0 : numTransformsPerSplit; const SizeType inputSplitStrideInner = transposeInputData ? numTransformsPerSplit : 0; const SizeType outputSplitStrideMid = transposeOutputData ? 0 : numTransformsPerSplit; const SizeType outputSplitStrideInner = transposeOutputData ? numTransformsPerSplit : 0; // determine number of transforms per plane // create plans within each plane transforms_.reserve(inputData.dim_outer() * numSplitsPerPlane); for (SizeType idxOuter = 0; idxOuter < inputData.dim_outer(); ++idxOuter) { for (SizeType idxSplit = 0; idxSplit < numSplitsPerPlane; ++idxSplit) { const SizeType howmany = idxSplit == numSplitsPerPlane - 1 ? numTransformsPerSplit + numTransformsPerPlane % numSplitsPerPlane : numTransformsPerSplit; transforms_.emplace_back( FlexibleFFTWPlan{&(inputData(idxOuter, idxSplit * inputSplitStrideMid, idxSplit * inputSplitStrideInner)), &(outputData(idxOuter, idxSplit * outputSplitStrideMid, idxSplit * outputSplitStrideInner)), size, inputStride, outputStride, inputDist, outputDist, howmany, sign}, inputData.index(idxOuter, idxSplit * inputSplitStrideMid, idxSplit * inputSplitStrideInner), outputData.index(idxOuter, idxSplit * outputSplitStrideMid, idxSplit * outputSplitStrideInner)); } } } auto execute(const T* input, T* output) -> void override { const ComplexType* inputComplex = reinterpret_cast(input); ComplexType* outputComplex = reinterpret_cast(output); SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(inputComplex + std::get<1>(triplet), outputComplex + std::get<2>(triplet)); } } auto execute() -> void override { SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(); } } private: std::vector, SizeType, SizeType>> transforms_; }; // Computes the FFT in 1D along either the innermost dimension (not transposed) or the second // innermost dimension (transposed). // The transforms are computed in batches aligned to the outer and transform dimension. // The indices of transforms to be computed per plane can be provided as well. template class Transform1DVerticalHost : public TransformHost { public: static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; Transform1DVerticalHost(HostArrayView3D inputData, HostArrayView3D outputData, bool transposeInputData, bool transposeOutputData, int sign, const std::set& inputMidIndices) { assert(inputData.dim_outer() == outputData.dim_outer()); // check case where only one is transposed assert((transposeInputData != transposeOutputData) || (inputData.dim_inner() == outputData.dim_inner())); assert((transposeInputData != transposeOutputData) || (inputData.dim_mid() == outputData.dim_mid())); // none or both transposed assert((transposeInputData == transposeOutputData) || (inputData.dim_inner() == outputData.dim_mid())); assert((transposeInputData == transposeOutputData) || (inputData.dim_mid() == outputData.dim_inner())); // transposed case must not be in-place assert(!(inputData.data() == outputData.data() && (transposeInputData || transposeOutputData))); // set fftw plan parameters const SizeType size = transposeInputData ? inputData.dim_mid() : inputData.dim_inner(); const SizeType inputStride = transposeInputData ? inputData.dim_inner() : 1; const SizeType outputStride = transposeOutputData ? outputData.dim_inner() : 1; const SizeType inputDist = inputData.dim_mid() * inputData.dim_inner(); const SizeType outputDist = outputData.dim_mid() * outputData.dim_inner(); const SizeType howmany = inputData.dim_outer(); // determine number of transforms per plane // create plans within each plane transforms_.reserve(inputMidIndices.size()); for (const auto& midIndex : inputMidIndices) { const SizeType idxMidInput = transposeInputData ? 0 : midIndex; const SizeType idxInnerInput = transposeInputData ? midIndex : 0; const SizeType idxMidOutput = transposeOutputData ? 0 : midIndex; const SizeType idxInnerOutput = transposeOutputData ? midIndex : 0; transforms_.emplace_back( FlexibleFFTWPlan{&(inputData(0, idxMidInput, idxInnerInput)), &(outputData(0, idxMidOutput, idxInnerOutput)), size, inputStride, outputStride, inputDist, outputDist, howmany, sign}, inputData.index(0, idxMidInput, idxInnerInput), outputData.index(0, idxMidOutput, idxInnerOutput)); } } auto execute(const T* input, T* output) -> void override { const ComplexType* inputComplex = reinterpret_cast(input); ComplexType* outputComplex = reinterpret_cast(output); SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(inputComplex + std::get<1>(triplet), outputComplex + std::get<2>(triplet)); } } auto execute() -> void override { SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(); } } private: std::vector, SizeType, SizeType>> transforms_; }; } // namespace spfft #endif SpFFT-1.1.0/src/fft/transform_2d_gpu.hpp000066400000000000000000000131771457701740000200030ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_2D_GPU_HPP #define SPFFT_TRANSFORM_2D_GPU_HPP #include #include #include #include #include "fft/transform_interface.hpp" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_runtime_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { template class Transform2DGPU : public TransformGPU { public: using ValueType = T; using ComplexType = typename gpu::fft::ComplexType::type; Transform2DGPU(GPUArrayView3D::type>& data, GPUStreamHandle stream, std::shared_ptr> workBuffer) : stream_(std::move(stream)), workBuffer_(std::move(workBuffer)), dataPtr_(data.data()) { assert(workBuffer_); std::size_t worksize = 0; int rank = 2; int n[2] = {data.dim_mid(), data.dim_inner()}; int nembed[2] = {data.dim_mid(), data.dim_inner()}; int stride = 1; int dist = data.dim_inner() * data.dim_mid(); int batch = data.dim_outer(); // create plan gpu::fft::check_result(gpu::fft::create(&plan_)); gpu::fft::check_result(gpu::fft::set_auto_allocation(plan_, 0)); gpu::fft::check_result(gpu::fft::make_plan_many( plan_, rank, n, nembed, stride, dist, nembed, stride, dist, gpu::fft::TransformType::ComplexToComplex::value, batch, &worksize)); // set stream gpu::fft::check_result(gpu::fft::set_stream(plan_, stream_.get())); // resize work buffer if necessary if (workBuffer_->size() < worksize) { *workBuffer_ = GPUArray(worksize); } } Transform2DGPU(const Transform2DGPU& transform) = delete; Transform2DGPU(Transform2DGPU&& transform) noexcept : stream_(std::move(transform.stream_)), plan_(std::move(transform.plan_)), workBuffer_(std::move(transform.workBuffer_)), dataPtr_(transform.dataPtr_) { transform.plan_ = 0; } ~Transform2DGPU() { if (plan_) { gpu::fft::destroy(plan_); } } auto operator=(const Transform2DGPU& transform) -> Transform2DGPU& = delete; auto operator=(Transform2DGPU&& transform) noexcept -> Transform2DGPU& { if (plan_) { gpu::fft::destroy(plan_); } stream_ = std::move(transform.stream_); plan_ = std::move(transform.plan_); workBuffer_ = std::move(transform.workBuffer_); dataPtr_ = transform.dataPtr_; transform.plan_ = 0; return *this; } inline auto device_id() const noexcept -> int { return stream_.device_id(); } auto forward() -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result( gpu::fft::execute(plan_, dataPtr_, dataPtr_, gpu::fft::TransformDirection::Forward)); } auto forward(const void* input, void* output) -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result(gpu::fft::execute(plan_, reinterpret_cast(input), reinterpret_cast(output), gpu::fft::TransformDirection::Forward)); } auto backward() -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result( gpu::fft::execute(plan_, dataPtr_, dataPtr_, gpu::fft::TransformDirection::Backward)); } auto backward(const void* input, void* output) -> void override { gpu::fft::check_result(gpu::fft::set_work_area(plan_, workBuffer_->data())); gpu::fft::check_result(gpu::fft::execute(plan_, reinterpret_cast(input), reinterpret_cast(output), gpu::fft::TransformDirection::Backward)); } private: GPUStreamHandle stream_; gpu::fft::HandleType plan_ = 0; std::shared_ptr> workBuffer_; typename gpu::fft::ComplexType::type* dataPtr_; }; } // namespace spfft #endif SpFFT-1.1.0/src/fft/transform_interface.hpp000066400000000000000000000042471457701740000205610ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_INTERFACE_HPP #define SPFFT_TRANSFORM_INTERFACE_HPP #include #include "spfft/config.h" namespace spfft { template class TransformHost { public: virtual auto execute(const T* input, T* output) -> void = 0; virtual auto execute() -> void = 0; virtual ~TransformHost() = default; }; class TransformGPU { public: virtual auto forward() -> void = 0; virtual auto forward(const void* input, void* output) -> void = 0; virtual auto backward() -> void = 0; virtual auto backward(const void* input, void* output) -> void = 0; virtual ~TransformGPU() = default; }; } // namespace spfft #endif SpFFT-1.1.0/src/fft/transform_real_1d_host.hpp000066400000000000000000000261051457701740000211620ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_REAL_1D_HOST_HPP #define SPFFT_TRANSFORM_REAL_1D_HOST_HPP #include #include #include #include #include "fft/fftw_plan_1d.hpp" #include "fft/transform_interface.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" namespace spfft { // Computes the FFT in 1D along either the innermost dimension (not transposed) or the second // innermost dimension (transposed) // The transforms are computed in batches aligned to inner 2d planes template class R2CTransform1DPlanesHost : public TransformHost { public: static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; // r2c R2CTransform1DPlanesHost(HostArrayView3D inputData, HostArrayView3D outputData, bool transposeInputData, bool transposeOutputData, int maxNumThreads) { assert(inputData.dim_outer() == outputData.dim_outer()); assert(disjoint(inputData, outputData)); // set fftw plan parameters const SizeType size = transposeInputData ? inputData.dim_mid() : inputData.dim_inner(); const SizeType inputStride = transposeInputData ? inputData.dim_inner() : 1; const SizeType outputStride = transposeOutputData ? outputData.dim_inner() : 1; const SizeType inputDist = transposeInputData ? 1 : inputData.dim_inner(); const SizeType outputDist = transposeOutputData ? 1 : outputData.dim_inner(); // make sure maxNumThreads is at least 1 SizeType numSplitsPerPlane = maxNumThreads < 1 ? 1 : maxNumThreads; // only use at most as many splits as required to create work for every thread if (numSplitsPerPlane > 1 && inputData.dim_outer() > numSplitsPerPlane) { numSplitsPerPlane = 2; } const SizeType numTransformsPerPlane = transposeInputData ? inputData.dim_inner() : inputData.dim_mid(); // make sure there are at most as many splits as transforms per plane numSplitsPerPlane = numTransformsPerPlane < numSplitsPerPlane ? numTransformsPerPlane : numSplitsPerPlane; const SizeType numTransformsPerSplit = numTransformsPerPlane / numSplitsPerPlane; const SizeType inputSplitStrideMid = transposeInputData ? 0 : numTransformsPerSplit; const SizeType inputSplitStrideInner = transposeInputData ? numTransformsPerSplit : 0; const SizeType outputSplitStrideMid = transposeOutputData ? 0 : numTransformsPerSplit; const SizeType outputSplitStrideInner = transposeOutputData ? numTransformsPerSplit : 0; // check for non-transposed output assert((transposeOutputData) || (size / 2 + 1 == outputData.dim_inner())); // check for transposed output assert((!transposeOutputData) || (size / 2 + 1 == outputData.dim_mid())); // determine number of transforms per plane // create plans within each plane transforms_.reserve(inputData.dim_outer() * numSplitsPerPlane); for (SizeType idxOuter = 0; idxOuter < inputData.dim_outer(); ++idxOuter) { for (SizeType idxSplit = 0; idxSplit < numSplitsPerPlane; ++idxSplit) { const SizeType howmany = idxSplit == numSplitsPerPlane - 1 ? numTransformsPerSplit + numTransformsPerPlane % numSplitsPerPlane : numTransformsPerSplit; transforms_.emplace_back( FlexibleFFTWPlan{&(inputData(idxOuter, idxSplit * inputSplitStrideMid, idxSplit * inputSplitStrideInner)), &(outputData(idxOuter, idxSplit * outputSplitStrideMid, idxSplit * outputSplitStrideInner)), size, inputStride, outputStride, inputDist, outputDist, howmany}, inputData.index(idxOuter, idxSplit * inputSplitStrideMid, idxSplit * inputSplitStrideInner), outputData.index(idxOuter, idxSplit * outputSplitStrideMid, idxSplit * outputSplitStrideInner) ); } } } auto execute(const T* input, T* output) -> void override { ComplexType* outputComplex = reinterpret_cast(output); SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(input + std::get<1>(triplet), outputComplex + std::get<2>(triplet)); } } auto execute() -> void override { SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(); } } private: std::vector, SizeType, SizeType>> transforms_; }; // Computes the FFT in 1D along either the innermost dimension (not transposed) or the second // innermost dimension (transposed) // The transforms are computed in batches aligned to inner 2d planes template class C2RTransform1DPlanesHost : public TransformHost { public: static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; // c2r C2RTransform1DPlanesHost(HostArrayView3D inputData, HostArrayView3D outputData, bool transposeInputData, bool transposeOutputData, int maxNumThreads) { assert(inputData.dim_outer() == outputData.dim_outer()); assert(disjoint(inputData, outputData)); // set fftw plan parameters const SizeType size = transposeOutputData ? outputData.dim_mid() : outputData.dim_inner(); const SizeType inputStride = transposeInputData ? inputData.dim_inner() : 1; const SizeType outputStride = transposeOutputData ? outputData.dim_inner() : 1; const SizeType inputDist = transposeInputData ? 1 : inputData.dim_inner(); const SizeType outputDist = transposeOutputData ? 1 : outputData.dim_inner(); // make sure maxNumThreads is at least 1 SizeType numSplitsPerPlane = maxNumThreads < 1 ? 1 : maxNumThreads; // only use at most as many splits as required to create work for every thread if (numSplitsPerPlane > 1 && inputData.dim_outer() > numSplitsPerPlane) { numSplitsPerPlane = 2; } const SizeType numTransformsPerPlane = transposeInputData ? inputData.dim_inner() : inputData.dim_mid(); // make sure there are at most as many splits as transforms per plane numSplitsPerPlane = numTransformsPerPlane < numSplitsPerPlane ? numTransformsPerPlane : numSplitsPerPlane; const SizeType numTransformsPerSplit = numTransformsPerPlane / numSplitsPerPlane; const SizeType inputSplitStrideMid = transposeInputData ? 0 : numTransformsPerSplit; const SizeType inputSplitStrideInner = transposeInputData ? numTransformsPerSplit : 0; const SizeType outputSplitStrideMid = transposeOutputData ? 0 : numTransformsPerSplit; const SizeType outputSplitStrideInner = transposeOutputData ? numTransformsPerSplit : 0; // check for non-transposed output assert((transposeInputData) || (size / 2 + 1 == inputData.dim_inner())); // check for transposed output assert((!transposeInputData) || (size / 2 + 1 == inputData.dim_mid())); // determine number of transforms per plane // create plans within each plane transforms_.reserve(inputData.dim_outer() * numSplitsPerPlane); for (SizeType idxOuter = 0; idxOuter < inputData.dim_outer(); ++idxOuter) { for (SizeType idxSplit = 0; idxSplit < numSplitsPerPlane; ++idxSplit) { const SizeType howmany = idxSplit == numSplitsPerPlane - 1 ? numTransformsPerSplit + numTransformsPerPlane % numSplitsPerPlane : numTransformsPerSplit; transforms_.emplace_back( FlexibleFFTWPlan{&(inputData(idxOuter, idxSplit * inputSplitStrideMid, idxSplit * inputSplitStrideInner)), &(outputData(idxOuter, idxSplit * outputSplitStrideMid, idxSplit * outputSplitStrideInner)), size, inputStride, outputStride, inputDist, outputDist, howmany}, inputData.index(idxOuter, idxSplit * inputSplitStrideMid, idxSplit * inputSplitStrideInner), outputData.index(idxOuter, idxSplit * outputSplitStrideMid, idxSplit * outputSplitStrideInner) ); } } } auto execute(const T* input, T* output) -> void override { const ComplexType* inputComplex = reinterpret_cast(input); SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(inputComplex + std::get<1>(triplet), output + std::get<2>(triplet)); } } auto execute() -> void override { SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType i = 0; i < transforms_.size(); ++i) { auto& triplet = transforms_[i]; std::get<0>(triplet).execute(); } } private: std::vector, SizeType, SizeType>> transforms_; }; } // namespace spfft #endif SpFFT-1.1.0/src/fft/transform_real_2d_gpu.hpp000066400000000000000000000262221457701740000210010ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_REAL_2D_GPU_HPP #define SPFFT_TRANSFORM_REAL_2D_GPU_HPP #include #include #include #include #include "fft/transform_interface.hpp" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_runtime_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/array_view_utility.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "spfft/config.h" #include "util/common_types.hpp" #include "symmetry/symmetry_gpu.hpp" namespace spfft { template class TransformReal2DGPU : public TransformGPU { public: using ValueType = T; using ComplexType = typename gpu::fft::ComplexType::type; TransformReal2DGPU(GPUArrayView3D spaceDomain, GPUArrayView3D::type> freqDomain, GPUStreamHandle stream, std::shared_ptr> workBuffer) : stream_(std::move(stream)), workBuffer_(std::move(workBuffer)), spaceDomain_(spaceDomain), freqDomain_(freqDomain) { assert(disjoint(spaceDomain, freqDomain)); assert(workBuffer_); assert(spaceDomain.dim_outer() == freqDomain.dim_outer()); assert(spaceDomain.dim_mid() == freqDomain.dim_mid()); assert(spaceDomain.dim_inner() / 2 + 1 == freqDomain.dim_inner()); gpu::fft::check_result(gpu::fft::create(&planForward_)); gpu::fft::check_result(gpu::fft::create(&planBackward_)); gpu::fft::check_result(gpu::fft::set_auto_allocation(planForward_, 0)); gpu::fft::check_result(gpu::fft::set_auto_allocation(planBackward_, 0)); std::size_t worksizeForward = 0; std::size_t worksizeBackward = 0; // Starting with CUDA 10.2, a bug with 2D R2C transforms of size (1, x) with x being a prime // number was introduced. As workaround, create batched 1D transforms, if one dimension is 1. if (spaceDomain.dim_mid() == 1) { int rank = 1; int n[1] = {spaceDomain.dim_inner()}; int nembedReal[1] = {spaceDomain.dim_inner()}; int nembedFreq[1] = {freqDomain.dim_inner()}; int stride = 1; int distReal = spaceDomain.dim_inner(); int distFreq = freqDomain.dim_inner(); int batch = spaceDomain.dim_outer(); // create plan gpu::fft::check_result(gpu::fft::make_plan_many( planForward_, rank, n, nembedReal, stride, distReal, nembedFreq, stride, distFreq, gpu::fft::TransformType::RealToComplex::value, batch, &worksizeForward)); gpu::fft::check_result(gpu::fft::make_plan_many( planBackward_, rank, n, nembedFreq, stride, distFreq, nembedReal, stride, distReal, gpu::fft::TransformType::ComplexToReal::value, batch, &worksizeBackward)); } else if (spaceDomain.dim_inner() == 1) { // For consistency, the full result is required along the mid (y) dimension. Therefore, use // hermitian symmetry to calculate missing values after R2C transform. symm_.reset(new PlaneSymmetryGPU(stream_, freqDomain)); int rank = 1; int n[1] = {spaceDomain.dim_mid()}; int nembedReal[1] = {spaceDomain.dim_mid()}; int nembedFreq[1] = {freqDomain.dim_mid()}; int stride = 1; int distReal = spaceDomain.dim_mid(); int distFreq = freqDomain.dim_mid(); int batch = spaceDomain.dim_outer(); // create plan gpu::fft::check_result(gpu::fft::make_plan_many( planForward_, rank, n, nembedReal, stride, distReal, nembedFreq, stride, distFreq, gpu::fft::TransformType::RealToComplex::value, batch, &worksizeForward)); gpu::fft::check_result(gpu::fft::make_plan_many( planBackward_, rank, n, nembedFreq, stride, distFreq, nembedReal, stride, distReal, gpu::fft::TransformType::ComplexToReal::value, batch, &worksizeBackward)); } else { int rank = 2; int n[2] = {spaceDomain.dim_mid(), spaceDomain.dim_inner()}; int nembedReal[2] = {spaceDomain.dim_mid(), spaceDomain.dim_inner()}; int nembedFreq[2] = {freqDomain.dim_mid(), freqDomain.dim_inner()}; int stride = 1; int distReal = spaceDomain.dim_inner() * spaceDomain.dim_mid(); int distFreq = freqDomain.dim_inner() * freqDomain.dim_mid(); int batch = spaceDomain.dim_outer(); // create plan gpu::fft::check_result(gpu::fft::make_plan_many( planForward_, rank, n, nembedReal, stride, distReal, nembedFreq, stride, distFreq, gpu::fft::TransformType::RealToComplex::value, batch, &worksizeForward)); gpu::fft::check_result(gpu::fft::make_plan_many( planBackward_, rank, n, nembedFreq, stride, distFreq, nembedReal, stride, distReal, gpu::fft::TransformType::ComplexToReal::value, batch, &worksizeBackward)); } // set stream gpu::fft::check_result(gpu::fft::set_stream(planForward_, stream_.get())); gpu::fft::check_result(gpu::fft::set_stream(planBackward_, stream_.get())); const std::size_t worksize = worksizeForward > worksizeBackward ? worksizeForward : worksizeBackward; // resize work buffer if necessary if (workBuffer_->size() < worksize) { *workBuffer_ = GPUArray(worksize); } } TransformReal2DGPU(const TransformReal2DGPU& transform) = delete; TransformReal2DGPU(TransformReal2DGPU&& transform) noexcept : stream_(std::move(transform.stream_)), planForward_(std::move(transform.planForward_)), planBackward_(std::move(transform.planBackward_)), workBuffer_(std::move(transform.workBuffer_)), spaceDomain_(transform.spaceDomain_), freqDomain_(transform.freqDomain_), symm_(std::move(transform.symm_)) { transform.planForward_ = 0; transform.planBackward_ = 0; } ~TransformReal2DGPU() { if (planForward_) { gpu::fft::destroy(planForward_); planForward_ = 0; } if (planBackward_) { gpu::fft::destroy(planBackward_); planBackward_ = 0; } } auto operator=(const TransformReal2DGPU& transform) -> TransformReal2DGPU& = delete; auto operator=(TransformReal2DGPU&& transform) noexcept -> TransformReal2DGPU& { if (planForward_) { gpu::fft::destroy(planForward_); planForward_ = 0; } if (planBackward_) { gpu::fft::destroy(planBackward_); planBackward_ = 0; } stream_ = std::move(transform.stream_); planForward_ = std::move(transform.planForward_); planBackward_ = std::move(transform.planBackward_); workBuffer_ = std::move(transform.workBuffer_); spaceDomain_ = transform.spaceDomain_; freqDomain_ = transform.freqDomain_; symm_ = std::move(transform.symm_); transform.planForward_ = 0; transform.planBackward_ = 0; return *this; } inline auto device_id() const noexcept -> int { return stream_.device_id(); } auto forward() -> void override { this->forward(spaceDomain_.data(), freqDomain_.data()); } auto forward(const void* input, void* output) -> void override { #ifdef SPFFT_ROCM // workaround for bug with rocFFT for case 1x1xZ if (spaceDomain_.dim_mid() == 1 && spaceDomain_.dim_inner() == 1) { // make sure imaginary part is 0 gpu::check_status(gpu::memset_async( static_cast(output), 0, freqDomain_.size() * sizeof(typename decltype(freqDomain_)::ValueType), stream_.get())); // copy real valued data into complex buffer -> from stride 1 to stride 2 gpu::check_status(gpu::memcpy_2d_async( static_cast(output), 2 * sizeof(T), static_cast(input), sizeof(T), sizeof(T), freqDomain_.dim_outer(), gpu::flag::MemcpyDeviceToDevice, stream_.get())); // no transform needed return; } #endif if (symm_) { // Make sure buffer is zero before transform, such that the symmtry operation can identify // elements, which have not been written to by the FFT gpu::check_status(gpu::memset_async( static_cast(output), 0, freqDomain_.size() * sizeof(typename decltype(freqDomain_)::ValueType), stream_.get())); } gpu::fft::check_result(gpu::fft::set_work_area(planForward_, workBuffer_->data())); gpu::fft::check_result(gpu::fft::execute(planForward_, reinterpret_cast(input), reinterpret_cast(output))); if (symm_) symm_->apply(); } auto backward() -> void override { this->backward(freqDomain_.data(), spaceDomain_.data()); } auto backward(const void* input, void* output) -> void override { #ifdef SPFFT_ROCM // workaround for bug with rocFFT for case 1x1xZ if (spaceDomain_.dim_mid() == 1 && spaceDomain_.dim_inner() == 1) { // copy complex data into real valued buffer -> from stride 2 to stride 1 gpu::check_status(gpu::memcpy_2d_async(static_cast(output), sizeof(T), static_cast(input), 2 * sizeof(T), sizeof(T), freqDomain_.dim_outer(), gpu::flag::MemcpyDeviceToDevice, stream_.get())); // no transform needed return; } #endif gpu::fft::check_result(gpu::fft::set_work_area(planBackward_, workBuffer_->data())); gpu::fft::check_result(gpu::fft::execute( planBackward_, reinterpret_cast(input), reinterpret_cast(output))); } private: GPUStreamHandle stream_; gpu::fft::HandleType planForward_ = 0; gpu::fft::HandleType planBackward_ = 0; std::shared_ptr> workBuffer_; GPUArrayView3D spaceDomain_; GPUArrayView3D::type> freqDomain_; std::unique_ptr> symm_; }; } // namespace spfft #endif SpFFT-1.1.0/src/gpu_util/000077500000000000000000000000001457701740000150575ustar00rootroot00000000000000SpFFT-1.1.0/src/gpu_util/complex_conversion.cuh000066400000000000000000000045701457701740000215020ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_COMPLEX_CONVERISON_CUH #define SPFFT_GPU_COMPLEX_CONVERISON_CUH #include "gpu_util/gpu_fft_api.hpp" namespace spfft { template struct ConvertComplex { __device__ __host__ inline static T apply(const U& val) { return val; } }; template <> struct ConvertComplex { __device__ __host__ inline static gpu::fft::ComplexFloatType apply( const gpu::fft::ComplexDoubleType& val) { return gpu::fft::ComplexFloatType{(float)val.x, (float)val.y}; } }; template <> struct ConvertComplex { __device__ __host__ inline static gpu::fft::ComplexDoubleType apply( const gpu::fft::ComplexFloatType& val) { return gpu::fft::ComplexDoubleType{(double)val.x, (double)val.y}; } }; } #endif SpFFT-1.1.0/src/gpu_util/gpu_device_guard.hpp000066400000000000000000000052031457701740000210640ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_DEVICE_GUARD_HPP #define SPFFT_GPU_DEVICE_GUARD_HPP #include "spfft/config.h" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include #include #include "gpu_util/gpu_runtime_api.hpp" #include "spfft/exceptions.hpp" namespace spfft { class GPUDeviceGuard { public: explicit GPUDeviceGuard(const int deviceId) : targetDeviceId_(deviceId), originalDeviceId_(0) { gpu::check_status(gpu::get_device(&originalDeviceId_)); if (originalDeviceId_ != deviceId) { gpu::check_status(gpu::set_device(deviceId)); } }; GPUDeviceGuard() = delete; GPUDeviceGuard(const GPUDeviceGuard&) = delete; GPUDeviceGuard(GPUDeviceGuard&&) = delete; auto operator=(const GPUDeviceGuard&) -> GPUDeviceGuard& = delete; auto operator=(GPUDeviceGuard &&) -> GPUDeviceGuard& = delete; ~GPUDeviceGuard() { if (targetDeviceId_ != originalDeviceId_) { std::ignore = gpu::set_device(originalDeviceId_); // no check to avoid throw exeception in destructor } } private: int targetDeviceId_ = 0; int originalDeviceId_ = 0; }; } // namespace spfft #endif #endif SpFFT-1.1.0/src/gpu_util/gpu_event_handle.hpp000066400000000000000000000054751457701740000211120ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_EVENT_HANDLE_HPP #define SPFFT_GPU_EVENT_HANDLE_HPP #include "spfft/config.h" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include #include #include "gpu_util/gpu_runtime_api.hpp" #include "spfft/exceptions.hpp" namespace spfft { class GPUEventHandle { public: explicit GPUEventHandle(const bool enableTiming) : deviceId_(0) { gpu::check_status(gpu::get_device(&deviceId_)); gpu::EventType event; const auto flag = enableTiming ? gpu::flag::EventDefault : gpu::flag::EventDisableTiming; gpu::check_status(gpu::event_create_with_flags(&event, flag)); event_ = std::shared_ptr(new gpu::EventType(event), [](gpu::EventType* ptr) { std::ignore = gpu::event_destroy(*ptr); delete ptr; }); }; inline auto get() const -> gpu::EventType { return *event_; } inline auto device_id() const noexcept -> int { return deviceId_; } inline auto record(const gpu::StreamType& stream) const -> void { gpu::check_status(gpu::event_record(*event_, stream)); } inline auto stream_wait(const gpu::StreamType& stream) const -> void { gpu::check_status(gpu::stream_wait_event(stream, *event_, 0)); } private: std::shared_ptr event_; int deviceId_ = 0; }; } // namespace spfft #endif #endif SpFFT-1.1.0/src/gpu_util/gpu_fft_api.cpp000066400000000000000000000045331457701740000200530ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/config.h" #include "gpu_util/gpu_fft_api.hpp" // only declare namespace members if GPU support is enabled #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) namespace spfft { namespace gpu { namespace fft { namespace TransformType { constexpr decltype(ComplexToComplex::value) ComplexToComplex::value; constexpr decltype(ComplexToComplex::value) ComplexToComplex::value; constexpr decltype(RealToComplex::value) RealToComplex::value; constexpr decltype(RealToComplex::value) RealToComplex::value; constexpr decltype(ComplexToReal::value) ComplexToReal::value; constexpr decltype(ComplexToReal::value) ComplexToReal::value; } // namespace TransformType } // namespace fft } // namespace gpu } // namespace spfft #endif SpFFT-1.1.0/src/gpu_util/gpu_fft_api.hpp000066400000000000000000000161231457701740000200560ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_FFT_API_HPP #define SPFFT_GPU_FFT_API_HPP #include "spfft/config.h" #if defined(SPFFT_CUDA) #include #define GPU_FFT_PREFIX(val) cufft##val #elif defined(SPFFT_ROCM) #if __has_include() #include #else #include #endif #define GPU_FFT_PREFIX(val) hipfft##val #endif // only declare namespace members if GPU support is enabled #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include #include "spfft/exceptions.hpp" namespace spfft { namespace gpu { namespace fft { // ================================== // Types // ================================== using ResultType = GPU_FFT_PREFIX(Result); using HandleType = GPU_FFT_PREFIX(Handle); using ComplexFloatType = GPU_FFT_PREFIX(Complex); using ComplexDoubleType = GPU_FFT_PREFIX(DoubleComplex); // Complex type selector template struct ComplexType; template <> struct ComplexType { using type = ComplexDoubleType; }; template <> struct ComplexType { using type = ComplexFloatType; }; // ================================== // Transform types // ================================== namespace TransformDirection { #ifdef SPFFT_CUDA constexpr auto Forward = CUFFT_FORWARD; constexpr auto Backward = CUFFT_INVERSE; #else constexpr auto Forward = HIPFFT_FORWARD; constexpr auto Backward = HIPFFT_BACKWARD; #endif } // namespace TransformDirection // ================================== // Transform types // ================================== namespace TransformType { #ifdef SPFFT_CUDA constexpr auto R2C = CUFFT_R2C; constexpr auto C2R = CUFFT_C2R; constexpr auto C2C = CUFFT_C2C; constexpr auto D2Z = CUFFT_D2Z; constexpr auto Z2D = CUFFT_Z2D; constexpr auto Z2Z = CUFFT_Z2Z; #else constexpr auto R2C = HIPFFT_R2C; constexpr auto C2R = HIPFFT_C2R; constexpr auto C2C = HIPFFT_C2C; constexpr auto D2Z = HIPFFT_D2Z; constexpr auto Z2D = HIPFFT_Z2D; constexpr auto Z2Z = HIPFFT_Z2Z; #endif // Transform type selector template struct ComplexToComplex; template <> struct ComplexToComplex { constexpr static auto value = Z2Z; }; template <> struct ComplexToComplex { constexpr static auto value = C2C; }; // Transform type selector template struct RealToComplex; template <> struct RealToComplex { constexpr static auto value = D2Z; }; template <> struct RealToComplex { constexpr static auto value = R2C; }; // Transform type selector template struct ComplexToReal; template <> struct ComplexToReal { constexpr static auto value = Z2D; }; template <> struct ComplexToReal { constexpr static auto value = C2R; }; } // namespace TransformType // ================================== // Result values // ================================== namespace result { #ifdef SPFFT_CUDA constexpr auto Success = CUFFT_SUCCESS; #else constexpr auto Success = HIPFFT_SUCCESS; #endif } // namespace result // ================================== // Error check functions // ================================== inline auto check_result(ResultType error) -> void { if (error != result::Success) { throw GPUFFTError(); } } // ================================== // Execution function overload // ================================== inline auto execute(HandleType& plan, const ComplexDoubleType* iData, double* oData) -> ResultType { return GPU_FFT_PREFIX(ExecZ2D)(plan, const_cast(iData), oData); } inline auto execute(HandleType& plan, const ComplexFloatType* iData, float* oData) -> ResultType { return GPU_FFT_PREFIX(ExecC2R)(plan, const_cast(iData), oData); } inline auto execute(HandleType& plan, const double* iData, ComplexDoubleType* oData) -> ResultType { return GPU_FFT_PREFIX(ExecD2Z)(plan, const_cast(iData), oData); } inline auto execute(HandleType& plan, const float* iData, ComplexFloatType* oData) -> ResultType { return GPU_FFT_PREFIX(ExecR2C)(plan, const_cast(iData), oData); } inline auto execute(HandleType& plan, const ComplexDoubleType* iData, ComplexDoubleType* oData, int direction) -> ResultType { return GPU_FFT_PREFIX(ExecZ2Z)(plan, const_cast(iData), oData, direction); } inline auto execute(HandleType& plan, const ComplexFloatType* iData, ComplexFloatType* oData, int direction) -> ResultType { return GPU_FFT_PREFIX(ExecC2C)(plan, const_cast(iData), oData, direction); } // ================================== // Forwarding functions of to GPU API // ================================== template inline auto create(ARGS&&... args) -> ResultType { return GPU_FFT_PREFIX(Create)(std::forward(args)...); } template inline auto make_plan_many(ARGS&&... args) -> ResultType { return GPU_FFT_PREFIX(MakePlanMany)(std::forward(args)...); } template inline auto set_work_area(ARGS&&... args) -> ResultType { return GPU_FFT_PREFIX(SetWorkArea)(std::forward(args)...); } template inline auto destroy(ARGS&&... args) -> ResultType { return GPU_FFT_PREFIX(Destroy)(std::forward(args)...); } template inline auto set_stream(ARGS&&... args) -> ResultType { return GPU_FFT_PREFIX(SetStream)(std::forward(args)...); } template inline auto set_auto_allocation(ARGS&&... args) -> ResultType { return GPU_FFT_PREFIX(SetAutoAllocation)(std::forward(args)...); } } // namespace fft } // namespace gpu } // namespace spfft #undef GPU_FFT_PREFIX #endif // defined SPFFT_CUDA || SPFFT_ROCM #endif SpFFT-1.1.0/src/gpu_util/gpu_kernel_parameter.hpp000066400000000000000000000036311457701740000217660ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_KERNEL_PARAMETER_HPP #define SPFFT_GPU_KERNEL_PARAMETER_HPP #include "spfft/config.h" namespace spfft { namespace gpu { constexpr int BlockSizeSmall = 128; constexpr int BlockSizeMedium = 256; constexpr int BlockSizeLarge = 512; constexpr int GridSizeSmall = 2160; constexpr int GridSizeMedium = 4320; constexpr int GridSizeLarge = 8640; } } #endif SpFFT-1.1.0/src/gpu_util/gpu_pointer_translation.hpp000066400000000000000000000064271457701740000225520ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_POINTER_TRANSLATION_HPP #define SPFFT_GPU_POINTER_TRANSLATION_HPP #include #include #include #include "spfft/config.h" namespace spfft { // Translate input pointer to host / device pointer pair. Managed memory is not considered for // device pointer. template auto translate_gpu_pointer(const T* inputPointer) -> std::pair { gpu::PointerAttributes attr; attr.devicePointer = nullptr; attr.hostPointer = nullptr; auto status = gpu::pointer_get_attributes(&attr, static_cast(inputPointer)); if (status != gpu::status::Success) { std::ignore = gpu::get_last_error(); // clear error from cache // Invalid value is always indicated before CUDA 11 for valid host pointers, which have not been // registered. -> Don't throw error in this case. if (status != gpu::status::ErrorInvalidValue) gpu::check_status(status); } std::pair ptrPair{nullptr, nullptr}; // get memory type - cuda 10 changed attribute name #if (defined(SPFFT_CUDA) && (CUDART_VERSION >= 10000)) || (defined(SPFFT_ROCM) && (HIP_VERSION_MAJOR >= 6)) auto memoryType = attr.type; #else auto memoryType = attr.memoryType; #endif if (memoryType != gpu::flag::MemoryTypeDevice) { ptrPair.first = attr.hostPointer ? static_cast(attr.hostPointer) : inputPointer; } else { ptrPair.second = static_cast(attr.devicePointer); } return ptrPair; } template auto translate_gpu_pointer(T* inputPointer) -> std::pair { auto pointers = translate_gpu_pointer(static_cast(inputPointer)); return {const_cast(pointers.first), const_cast(pointers.second)}; } } // namespace spfft #endif SpFFT-1.1.0/src/gpu_util/gpu_runtime.hpp000066400000000000000000000057131457701740000201340ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_RUNTIME_HPP #define SPFFT_GPU_RUNTIME_HPP #include "gpu_util/gpu_runtime_api.hpp" #include "spfft/config.h" #ifdef SPFFT_ROCM #include #endif namespace spfft { #ifdef SPFFT_CUDA template inline auto launch_kernel(F func, const dim3 threadGrid, const dim3 threadBlock, const size_t sharedMemoryBytes, const gpu::StreamType stream, ARGS&&... args) -> void { #ifndef NDEBUG gpu::device_synchronize(); gpu::check_status(gpu::get_last_error()); // before #endif func<<>>(std::forward(args)...); #ifndef NDEBUG gpu::device_synchronize(); gpu::check_status(gpu::get_last_error()); // after #endif } #endif #ifdef SPFFT_ROCM template inline auto launch_kernel(F func, const dim3 threadGrid, const dim3 threadBlock, const size_t sharedMemoryBytes, const gpu::StreamType stream, ARGS&&... args) -> void { #ifndef NDEBUG gpu::device_synchronize(); gpu::check_status(gpu::get_last_error()); // before #endif hipLaunchKernelGGL(func, threadGrid, threadBlock, sharedMemoryBytes, stream, std::forward(args)...); #ifndef NDEBUG gpu::device_synchronize(); gpu::check_status(gpu::get_last_error()); // after #endif } #endif } // namespace spfft #endif SpFFT-1.1.0/src/gpu_util/gpu_runtime_api.hpp000066400000000000000000000215731457701740000207670ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_RUNTIME_RUNTIME_HPP #define SPFFT_GPU_RUNTIME_RUNTIME_HPP #include "spfft/config.h" #if defined(SPFFT_CUDA) #include #define GPU_PREFIX(val) cuda##val #elif defined(SPFFT_ROCM) #include #define GPU_PREFIX(val) hip##val #endif // only declare namespace members if GPU support is enabled #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include #include "spfft/exceptions.hpp" namespace spfft { namespace gpu { using StatusType = GPU_PREFIX(Error_t); using StreamType = GPU_PREFIX(Stream_t); using EventType = GPU_PREFIX(Event_t); #ifdef SPFFT_CUDA using PointerAttributes = GPU_PREFIX(PointerAttributes); #else using PointerAttributes = GPU_PREFIX(PointerAttribute_t); #endif namespace status { // error / return values constexpr StatusType Success = GPU_PREFIX(Success); constexpr StatusType ErrorMemoryAllocation = GPU_PREFIX(ErrorMemoryAllocation); constexpr StatusType ErrorLaunchOutOfResources = GPU_PREFIX(ErrorLaunchOutOfResources); constexpr StatusType ErrorInvalidValue = GPU_PREFIX(ErrorInvalidValue); constexpr StatusType ErrorInvalidResourceHandle = GPU_PREFIX(ErrorInvalidResourceHandle); constexpr StatusType ErrorInvalidDevice = GPU_PREFIX(ErrorInvalidDevice); constexpr StatusType ErrorInvalidMemcpyDirection = GPU_PREFIX(ErrorInvalidMemcpyDirection); constexpr StatusType ErrorInvalidDevicePointer = GPU_PREFIX(ErrorInvalidDevicePointer); constexpr StatusType ErrorInitializationError = GPU_PREFIX(ErrorInitializationError); constexpr StatusType ErrorNoDevice = GPU_PREFIX(ErrorNoDevice); constexpr StatusType ErrorNotReady = GPU_PREFIX(ErrorNotReady); constexpr StatusType ErrorUnknown = GPU_PREFIX(ErrorUnknown); constexpr StatusType ErrorPeerAccessNotEnabled = GPU_PREFIX(ErrorPeerAccessNotEnabled); constexpr StatusType ErrorPeerAccessAlreadyEnabled = GPU_PREFIX(ErrorPeerAccessAlreadyEnabled); constexpr StatusType ErrorHostMemoryAlreadyRegistered = GPU_PREFIX(ErrorHostMemoryAlreadyRegistered); constexpr StatusType ErrorHostMemoryNotRegistered = GPU_PREFIX(ErrorHostMemoryNotRegistered); constexpr StatusType ErrorUnsupportedLimit = GPU_PREFIX(ErrorUnsupportedLimit); } // namespace status // flags to pass to GPU API namespace flag { constexpr auto HostRegisterDefault = GPU_PREFIX(HostRegisterDefault); constexpr auto HostRegisterPortable = GPU_PREFIX(HostRegisterPortable); constexpr auto HostRegisterMapped = GPU_PREFIX(HostRegisterMapped); constexpr auto HostRegisterIoMemory = GPU_PREFIX(HostRegisterIoMemory); constexpr auto StreamDefault = GPU_PREFIX(StreamDefault); constexpr auto StreamNonBlocking = GPU_PREFIX(StreamNonBlocking); constexpr auto MemoryTypeHost = GPU_PREFIX(MemoryTypeHost); constexpr auto MemoryTypeDevice = GPU_PREFIX(MemoryTypeDevice); #if (CUDART_VERSION >= 10000) constexpr auto MemoryTypeUnregistered = GPU_PREFIX(MemoryTypeUnregistered); constexpr auto MemoryTypeManaged = GPU_PREFIX(MemoryTypeManaged); #endif constexpr auto MemcpyHostToDevice = GPU_PREFIX(MemcpyHostToDevice); constexpr auto MemcpyDeviceToHost = GPU_PREFIX(MemcpyDeviceToHost); constexpr auto MemcpyDeviceToDevice = GPU_PREFIX(MemcpyDeviceToDevice); constexpr auto EventDefault = GPU_PREFIX(EventDefault); constexpr auto EventBlockingSync = GPU_PREFIX(EventBlockingSync); constexpr auto EventDisableTiming = GPU_PREFIX(EventDisableTiming); constexpr auto EventInterprocess = GPU_PREFIX(EventInterprocess); } // namespace flag // ================================== // Error check function // ================================== inline auto check_status(StatusType error) -> void { if (error != status::Success) { if (error == status::ErrorMemoryAllocation) throw GPUAllocationError(); if (error == status::ErrorLaunchOutOfResources) throw GPULaunchError(); if (error == status::ErrorNoDevice) throw GPUNoDeviceError(); if (error == status::ErrorInvalidValue) throw GPUInvalidValueError(); if (error == status::ErrorInvalidDevicePointer) throw GPUInvalidDevicePointerError(); throw GPUError(); } } // ================================== // Forwarding functions to GPU API // ================================== template inline auto host_register(ARGS&&... args) -> StatusType { return GPU_PREFIX(HostRegister)(std::forward(args)...); } template inline auto host_unregister(ARGS&&... args) -> StatusType { return GPU_PREFIX(HostUnregister)(std::forward(args)...); } template inline auto stream_create_with_flags(ARGS&&... args) -> StatusType { return GPU_PREFIX(StreamCreateWithFlags)(std::forward(args)...); } template inline auto stream_destroy(ARGS&&... args) -> StatusType { return GPU_PREFIX(StreamDestroy)(std::forward(args)...); } template inline auto stream_wait_event(ARGS&&... args) -> StatusType { return GPU_PREFIX(StreamWaitEvent)(std::forward(args)...); } template inline auto event_create_with_flags(ARGS&&... args) -> StatusType { return GPU_PREFIX(EventCreateWithFlags)(std::forward(args)...); } template inline auto event_destroy(ARGS&&... args) -> StatusType { return GPU_PREFIX(EventDestroy)(std::forward(args)...); } template inline auto event_record(ARGS&&... args) -> StatusType { return GPU_PREFIX(EventRecord)(std::forward(args)...); } template inline auto malloc(ARGS&&... args) -> StatusType { return GPU_PREFIX(Malloc)(std::forward(args)...); } template inline auto free(ARGS&&... args) -> StatusType { return GPU_PREFIX(Free)(std::forward(args)...); } template inline auto memcpy(ARGS&&... args) -> StatusType { return GPU_PREFIX(Memcpy)(std::forward(args)...); } template inline auto memcpy_2d(ARGS&&... args) -> StatusType { return GPU_PREFIX(Memcpy2D)(std::forward(args)...); } template inline auto memcpy_async(ARGS&&... args) -> StatusType { return GPU_PREFIX(MemcpyAsync)(std::forward(args)...); } template inline auto memcpy_2d_async(ARGS&&... args) -> StatusType { return GPU_PREFIX(Memcpy2DAsync)(std::forward(args)...); } template inline auto get_device(ARGS&&... args) -> StatusType { return GPU_PREFIX(GetDevice)(std::forward(args)...); } template inline auto set_device(ARGS&&... args) -> StatusType { return GPU_PREFIX(SetDevice)(std::forward(args)...); } template inline auto get_device_count(ARGS&&... args) -> StatusType { return GPU_PREFIX(GetDeviceCount)(std::forward(args)...); } template inline auto stream_synchronize(ARGS&&... args) -> StatusType { return GPU_PREFIX(StreamSynchronize)(std::forward(args)...); } template inline auto memset_async(ARGS&&... args) -> StatusType { return GPU_PREFIX(MemsetAsync)(std::forward(args)...); } template inline auto pointer_get_attributes(ARGS&&... args) -> StatusType { return GPU_PREFIX(PointerGetAttributes)(std::forward(args)...); } inline auto get_last_error() -> StatusType { return GPU_PREFIX(GetLastError)(); } inline auto device_synchronize() -> StatusType { return GPU_PREFIX(DeviceSynchronize)(); } } // namespace gpu } // namespace spfft #undef GPU_PREFIX #endif // defined SPFFT_CUDA || SPFFT_ROCM #endif SpFFT-1.1.0/src/gpu_util/gpu_stream_handle.hpp000066400000000000000000000054371457701740000212620ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_STREAM_HANDLE_HPP #define SPFFT_GPU_STREAM_HANDLE_HPP #include "spfft/config.h" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include #include #include "gpu_util/gpu_runtime_api.hpp" #include "spfft/exceptions.hpp" namespace spfft { class GPUStreamHandle { public: GPUStreamHandle() : stream_(new gpu::StreamType(0)), deviceId_(0) { gpu::check_status(gpu::get_device(&deviceId_)); } explicit GPUStreamHandle(const bool blockedByDefaultStream) : deviceId_(0) { gpu::check_status(gpu::get_device(&deviceId_)); gpu::StreamType rawStream; if (blockedByDefaultStream) gpu::check_status(gpu::stream_create_with_flags(&rawStream, gpu::flag::StreamDefault)); else gpu::check_status(gpu::stream_create_with_flags(&rawStream, gpu::flag::StreamNonBlocking)); stream_ = std::shared_ptr(new gpu::StreamType(rawStream), [](gpu::StreamType* ptr) { std::ignore = gpu::stream_destroy(*ptr); delete ptr; }); }; inline auto get() const -> gpu::StreamType { return *stream_; } inline auto device_id() const noexcept -> int { return deviceId_; } private: std::shared_ptr stream_; int deviceId_ = 0; }; } // namespace spfft #endif #endif SpFFT-1.1.0/src/gpu_util/gpu_transfer.hpp000066400000000000000000000122101457701740000202630ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_TRANSFER_HPP #define SPFFT_GPU_TRANSFER_HPP #include #include "gpu_util/gpu_stream_handle.hpp" #include "memory/memory_type_trait.hpp" #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { template auto copy_to_gpu(const T& hostArray, U&& gpuArray) -> void { using UType = typename std::remove_reference::type; static_assert(!IsDeviceMemory::value, "First argument must represent host memory!"); static_assert(IsDeviceMemory::value, "Second argument must represent device memory!"); static_assert(sizeof(decltype(*(gpuArray.data()))) == sizeof(decltype(*(hostArray.data()))), "Size of value types must match!"); assert(hostArray.size() == static_cast(gpuArray.size())); gpu::check_status(gpu::memcpy( static_cast(gpuArray.data()), static_cast(hostArray.data()), gpuArray.size() * sizeof(decltype(*(gpuArray.data()))), gpu::flag::MemcpyHostToDevice)); } template auto copy_to_gpu_async(const GPUStreamHandle& stream, const T& hostArray, U&& gpuArray) -> void { using UType = typename std::remove_reference::type; static_assert(!IsDeviceMemory::value, "First argument must represent host memory!"); static_assert(IsDeviceMemory::value, "Second argument must represent device memory!"); static_assert(sizeof(decltype(*(gpuArray.data()))) == sizeof(decltype(*(hostArray.data()))), "Size of value types must match!"); assert(hostArray.size() == static_cast(gpuArray.size())); gpu::check_status(gpu::memcpy_async(static_cast(gpuArray.data()), static_cast(hostArray.data()), gpuArray.size() * sizeof(decltype(*(gpuArray.data()))), gpu::flag::MemcpyHostToDevice, stream.get())); } template auto copy_from_gpu(const T& gpuArray, U&& hostArray) -> void { using UType = typename std::remove_reference::type; static_assert(IsDeviceMemory::value, "First argument must represent device memory!"); static_assert(!IsDeviceMemory::value, "Second argument must represent host memory!"); static_assert(sizeof(decltype(*(gpuArray.data()))) == sizeof(decltype(*(hostArray.data()))), "Size of value types must match!"); assert(hostArray.size() == static_cast(gpuArray.size())); gpu::check_status(gpu::memcpy( static_cast(hostArray.data()), static_cast(gpuArray.data()), hostArray.size() * sizeof(decltype(*(gpuArray.data()))), gpu::flag::MemcpyDeviceToHost)); } template auto copy_from_gpu_async(const GPUStreamHandle& stream, const T& gpuArray, U&& hostArray) -> void { using UType = typename std::remove_reference::type; static_assert(IsDeviceMemory::value, "First argument must represent device memory!"); static_assert(!IsDeviceMemory::value, "Second argument must represent host memory!"); static_assert(sizeof(decltype(*(gpuArray.data()))) == sizeof(decltype(*(hostArray.data()))), "Size of value types must match!"); assert(hostArray.size() == static_cast(gpuArray.size())); gpu::check_status(gpu::memcpy_async(static_cast(hostArray.data()), static_cast(gpuArray.data()), hostArray.size() * sizeof(decltype(*(gpuArray.data()))), gpu::flag::MemcpyDeviceToHost, stream.get())); } } // namespace spfft #endif SpFFT-1.1.0/src/memory/000077500000000000000000000000001457701740000145375ustar00rootroot00000000000000SpFFT-1.1.0/src/memory/aligned_allocation.cpp000066400000000000000000000047051457701740000210610ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "memory/aligned_allocation.hpp" #include #include namespace spfft { namespace memory { auto allocate_aligned(SizeType numBytes, SizeType alignment) -> void* { // check if sizeof(void*) is power of 2 static_assert((sizeof(void*) & (sizeof(void*) - 1)) == 0, "size of void* must by power of 2 for alignment!"); // check if alignment is power of 2 and multiple of sizeof(void*) if (alignment % sizeof(void*) != 0 || ((alignment & (alignment - 1)) != 0)) throw HostAllocationError(); void* ptr; if (posix_memalign(&ptr, alignment, numBytes) != 0) throw HostAllocationError(); return ptr; } auto allocate_aligned(SizeType numBytes) -> void* { static auto pageSize = sysconf(_SC_PAGESIZE); return allocate_aligned(numBytes, static_cast(pageSize)); } auto free_aligned(void* ptr) noexcept -> void { free(ptr); } } // namespace memory } // namespace spfft SpFFT-1.1.0/src/memory/aligned_allocation.hpp000066400000000000000000000101011457701740000210510ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_ALIGNED_ALLOCATOR_HPP #define SPFFT_ALIGNED_ALLOCATOR_HPP #include #include #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" namespace spfft { namespace memory { // Allocate given number of bytes at adress with given alignment. // The alignment must be a multiple of sizeof(void*) and a power of 2 // Throws upon failure. auto allocate_aligned(SizeType numBytes, SizeType alignment) -> void*; // Allocate memory aligned at page boundaries auto allocate_aligned(SizeType numBytes) -> void*; // Free memory allocated with allocate_aligned() function auto free_aligned(void* ptr) noexcept -> void; // construct numElements elements of type T with arguments args at location pointed to by ptr template auto construct_elements_in_place(T* ptr, SizeType numElements, ARGS&&... args) -> void; // deconstruct elements of trivially destructable type in array template ::value, int>::type = 0> auto deconstruct_elements(T* ptr, SizeType numElements) noexcept -> void; // deconstruct elements of non-trivially destructable type in array template ::value, int>::type = 0> auto deconstruct_elements(T* ptr, SizeType numElements) noexcept(std::is_nothrow_destructible::value) -> void; // ====================== // Implementation // ====================== template auto construct_elements_in_place(T* ptr, SizeType numElements, ARGS&&... args) -> void { SizeType constructIdx = 0; try { // construct all elements for (; constructIdx < numElements; ++constructIdx) { new (ptr + constructIdx) T(std::forward(args)...); } } catch (...) { // destruct all elements which did not throw in case of error deconstruct_elements(ptr, constructIdx); throw; } } template ::value, int>::type> auto deconstruct_elements(T*, SizeType) noexcept -> void {} template ::value, int>::type> auto deconstruct_elements(T* ptr, SizeType numElements) noexcept(std::is_nothrow_destructible::value) -> void { for (SizeType destructIdx = 0; destructIdx < numElements; ++destructIdx) { ptr[destructIdx].~T(); } } } // namespace memory } // namespace spfft #endif SpFFT-1.1.0/src/memory/array_view_utility.hpp000066400000000000000000000276051457701740000212150ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_ARRAY_VIEW_UTILITY_HPP #define SPFFT_ARRAY_VIEW_UTILITY_HPP #include #include #include #include #include #include #include "memory/gpu_array_view.hpp" #include "memory/host_array_view.hpp" #include "memory/memory_type_trait.hpp" #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { template auto disjoint(const T& array1, const U& array2) -> bool { const void* start1 = static_cast(array1.data()); const void* end1 = static_cast(array1.data() + array1.size()); const void* start2 = static_cast(array2.data()); const void* end2 = static_cast(array2.data() + array2.size()); return !(start1 >= start2 && start1 < end2) && !(start2 >= start1 && start2 < end1); } namespace gpu_array_utility_internal { inline auto checked_cast_to_int(const SizeType value) -> int { static_assert(std::is_unsigned::value, "Expected unsigend SizeType"); if (value > static_cast(std::numeric_limits::max())) { throw OverflowError(); } return static_cast(value); } } // namespace gpu_array_utility_internal // ---------------------- // Create array view // ---------------------- template ::value, int>::type = 0> auto create_1d_view(T& array, const SizeType startIdx, const SizeType size) -> HostArrayView1D { assert(array.size() >= startIdx + size); return HostArrayView1D(array.data() + startIdx, size, array.pinned()); } template ::value, int>::type = 0> auto create_1d_view(T& array, const SizeType startIdx, const SizeType size) -> GPUArrayView1D { assert(array.size() >= startIdx + size); return GPUArrayView1D( array.data() + startIdx, gpu_array_utility_internal::checked_cast_to_int(size), array.device_id()); } template ::value, int>::type = 0> auto create_2d_view(T& array, const SizeType startIdx, const SizeType dimOuter, const SizeType dimInner) -> HostArrayView2D { assert(array.size() >= startIdx + dimInner * dimOuter); return HostArrayView2D(array.data() + startIdx, dimOuter, dimInner, array.pinned()); } template ::value, int>::type = 0> auto create_2d_view(T& array, const SizeType startIdx, const SizeType dimOuter, const SizeType dimInner) -> GPUArrayView2D { assert(array.size() >= startIdx + dimInner * dimOuter); // check that entire memory can be adressed with int gpu_array_utility_internal::checked_cast_to_int(dimOuter * dimInner); return GPUArrayView2D( array.data() + startIdx, gpu_array_utility_internal::checked_cast_to_int(dimOuter), gpu_array_utility_internal::checked_cast_to_int(dimInner), array.device_id()); } template ::value, int>::type = 0> auto create_3d_view(T& array, const SizeType startIdx, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner) -> HostArrayView3D { assert(array.size() >= startIdx + dimOuter * dimMid * dimInner); return HostArrayView3D(array.data() + startIdx, dimOuter, dimMid, dimInner, array.pinned()); } template ::value, int>::type = 0> auto create_3d_view(T& array, const SizeType startIdx, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner) -> GPUArrayView3D { assert(array.size() >= startIdx + dimOuter * dimMid * dimInner); // check that entire memory can be adressed with int gpu_array_utility_internal::checked_cast_to_int(dimOuter * dimMid * dimInner); return GPUArrayView3D( array.data() + startIdx, gpu_array_utility_internal::checked_cast_to_int(dimOuter), gpu_array_utility_internal::checked_cast_to_int(dimMid), gpu_array_utility_internal::checked_cast_to_int(dimInner), array.device_id()); } // ------------------------------- // Create array view with new type // ------------------------------ template ::value, int>::type = 0> auto create_new_type_1d_view(T& array, const SizeType size) -> HostArrayView1D { assert(array.size() * sizeof(typename T::ValueType) >= size * sizeof(U)); static_assert(alignof(typename T::ValueType) % alignof(U) == 0, "Alignment of old type must be multiple of new type alignment"); return HostArrayView1D(reinterpret_cast(array.data()), size, array.pinned()); } template ::value, int>::type = 0> auto create_new_type_1d_view(T& array, const SizeType size) -> GPUArrayView1D { assert(array.size() * sizeof(typename T::ValueType) >= size * sizeof(U)); static_assert(alignof(typename T::ValueType) % alignof(U) == 0, "Alignment of old type must be multiple of new type alignment"); return GPUArrayView1D(reinterpret_cast(array.data()), gpu_array_utility_internal::checked_cast_to_int(size), array.device_id()); } template ::value, int>::type = 0> auto create_new_type_2d_view(T& array, const SizeType dimOuter, const SizeType dimInner) -> HostArrayView2D { assert(array.size() * sizeof(typename T::ValueType) >= dimOuter * dimInner * sizeof(U)); static_assert(alignof(typename T::ValueType) % alignof(U) == 0, "Alignment of old type must be multiple of new type alignment"); return HostArrayView2D(reinterpret_cast(array.data()), dimOuter, dimInner, array.pinned()); } template ::value, int>::type = 0> auto create_new_type_2d_view(T& array, const SizeType dimOuter, const SizeType dimInner) -> GPUArrayView2D { assert(array.size() * sizeof(typename T::ValueType) >= dimOuter * dimInner * sizeof(U)); static_assert(alignof(typename T::ValueType) % alignof(U) == 0, "Alignment of old type must be multiple of new type alignment"); // check that entire memory can be adressed with int gpu_array_utility_internal::checked_cast_to_int(dimOuter * dimInner); return GPUArrayView2D( reinterpret_cast(array.data()), gpu_array_utility_internal::checked_cast_to_int(dimOuter), gpu_array_utility_internal::checked_cast_to_int(dimInner), array.device_id()); } template ::value, int>::type = 0> auto create_new_type_3d_view(T& array, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner) -> HostArrayView3D { assert(array.size() * sizeof(typename T::ValueType) >= dimOuter * dimMid * dimInner * sizeof(U)); static_assert(alignof(typename T::ValueType) % alignof(U) == 0, "Alignment of old type must be multiple of new type alignment"); return HostArrayView3D(reinterpret_cast(array.data()), dimOuter, dimMid, dimInner, array.pinned()); } template ::value, int>::type = 0> auto create_new_type_3d_view(T& array, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner) -> GPUArrayView3D { assert(array.size() * sizeof(typename T::ValueType) >= dimOuter * dimMid * dimInner * sizeof(U)); static_assert(alignof(typename T::ValueType) % alignof(U) == 0, "Alignment of old type must be multiple of new type alignment"); // check that entire memory can be adressed with int gpu_array_utility_internal::checked_cast_to_int(dimOuter * dimMid * dimInner); return GPUArrayView3D( reinterpret_cast(array.data()), gpu_array_utility_internal::checked_cast_to_int(dimOuter), gpu_array_utility_internal::checked_cast_to_int(dimMid), gpu_array_utility_internal::checked_cast_to_int(dimInner), array.device_id()); } // -------------------------------- // convert scalar and complex views // -------------------------------- template auto convert_to_complex_view(HostArrayView1D view) -> HostArrayView1D> { assert(view.size() % 2 == 0); return HostArrayView1D>(reinterpret_cast*>(view.data()), view.size() / 2, view.pinned()); } template auto convert_to_complex_view(HostArrayView2D view) -> HostArrayView2D> { assert(view.dim_inner() % 2 == 0); return HostArrayView2D>(reinterpret_cast*>(view.data()), view.dim_outer(), view.dim_inner() / 2, view.pinned()); } template auto convert_to_complex_view(HostArrayView3D view) -> HostArrayView3D> { assert(view.dim_inner() % 2 == 0); return HostArrayView3D>(reinterpret_cast*>(view.data()), view.dim_outer(), view.dim_mid(), view.dim_inner() / 2, view.pinned()); } template auto convert_from_complex_view(HostArrayView2D> view) -> HostArrayView1D { return HostArrayView1D(reinterpret_cast(view.data()), view.size() * 2, view.pinned()); } template auto convert_from_complex_view(HostArrayView2D> view) -> HostArrayView3D { return HostArrayView2D(reinterpret_cast(view.data()), view.dim_outer(), view.dim_inner() * 2, view.pinned()); } template auto convert_from_complex_view(HostArrayView3D> view) -> HostArrayView3D { return HostArrayView3D(reinterpret_cast(view.data()), view.dim_outer(), view.dim_mid(), view.dim_inner() * 2, view.pinned()); } } // namespace spfft #endif SpFFT-1.1.0/src/memory/gpu_array.hpp000066400000000000000000000071601457701740000172450ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_ARRAY_HPP #define SPFFT_GPU_ARRAY_HPP #include #include #include "gpu_util/gpu_runtime_api.hpp" #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { template class GPUArray { public: using ValueType = T; static constexpr SizeType ORDER = 1; GPUArray() = default; GPUArray(const SizeType size); GPUArray(const GPUArray& array) = delete; GPUArray(GPUArray&& array) noexcept; ~GPUArray(); auto operator=(const GPUArray& array) -> GPUArray& = delete; auto operator=(GPUArray&& array) noexcept -> GPUArray&; inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return size_ == 0; } inline auto size() const noexcept -> SizeType { return size_; } inline auto device_id() const noexcept -> int { return deviceId_; } private: SizeType size_ = 0; ValueType* data_ = nullptr; int deviceId_ = 0; }; // ====================== // Implementation // ====================== template GPUArray::GPUArray(const SizeType size) : size_(size), data_(nullptr), deviceId_(0) { assert(size >= 0); gpu::check_status(gpu::get_device(&deviceId_)); if (size > 0) { gpu::check_status(gpu::malloc(reinterpret_cast(&data_), size * sizeof(ValueType))); } } template GPUArray::~GPUArray() { if (data_) { // don't check error to avoid throwing exception in destructor std::ignore = gpu::free(data_); data_ = nullptr; size_ = 0; } } template GPUArray::GPUArray(GPUArray&& array) noexcept : size_(array.size_), data_(array.data_), deviceId_(array.deviceId_) { array.data_ = nullptr; array.size_ = 0; } template auto GPUArray::operator=(GPUArray&& array) noexcept -> GPUArray& { if (data_) { std::ignore = gpu::free(data_); } data_ = array.data_; size_ = array.size_; deviceId_ = array.deviceId_; array.data_ = nullptr; array.size_ = 0; return *this; } } // namespace spfft #endif SpFFT-1.1.0/src/memory/gpu_array_const_view.hpp000066400000000000000000000221451457701740000215050ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_ARRAY_CONST_VIEW_HPP #define SPFFT_GPU_ARRAY_CONST_VIEW_HPP #include #include #include "memory/gpu_array_view.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #if defined(__CUDACC__) || defined(__HIPCC__) #include "gpu_util/gpu_runtime.hpp" #endif namespace spfft { // T must be build-in type template class GPUArrayConstView1D { public: using ValueType = T; static constexpr SizeType ORDER = 1; GPUArrayConstView1D() = default; GPUArrayConstView1D(const ValueType* data, const int size, const int deviceId); GPUArrayConstView1D(const GPUArrayView1D&); // conversion allowed #if defined(__CUDACC__) || defined(__HIPCC__) __device__ inline auto operator()(const int idx) const -> ValueType { assert(idx < size_); #if __CUDA_ARCH__ >= 350 || defined(__HIPCC__) return __ldg(data_ + idx); #else return data_[idx]; #endif } __host__ __device__ inline auto empty() const noexcept -> bool { return size_ == 0; } __host__ __device__ inline auto size() const noexcept -> int { return size_; } __host__ __device__ inline auto device_id() const noexcept -> int { return deviceId_; } #else inline auto empty() const noexcept -> bool { return size_ == 0; } inline auto size() const noexcept -> int { return size_; } inline auto device_id() const noexcept -> int { return deviceId_; } #endif private: int size_ = 0; const ValueType* data_ = nullptr; int deviceId_ = 0; }; // T must be build-in type template class GPUArrayConstView2D { public: using ValueType = T; static constexpr SizeType ORDER = 2; GPUArrayConstView2D() = default; GPUArrayConstView2D(const ValueType* data, const int dimOuter, const int dimInner, const int deviceId); GPUArrayConstView2D(const GPUArrayView2D&); // conversion allowed #if defined(__CUDACC__) || defined(__HIPCC__) __device__ inline auto operator()(const int idxOuter, const int idxInner) const -> ValueType { assert(idxOuter < dims_[0]); assert(idxInner < dims_[1]); #if __CUDA_ARCH__ >= 350 || defined(__HIPCC__) return __ldg(data_ + (idxOuter * dims_[1]) + idxInner); #else return data_[(idxOuter * dims_[1]) + idxInner]; #endif } __host__ __device__ inline auto index(const int idxOuter, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1]) + idxInner; } __host__ __device__ inline auto empty() const noexcept -> bool { return this->size() == 0; } __host__ __device__ inline auto size() const noexcept -> int { return dims_[0] * dims_[1]; } __host__ __device__ inline auto dim_inner() const noexcept -> int { return dims_[1]; } __host__ __device__ inline auto dim_outer() const noexcept -> int { return dims_[0]; } __host__ __device__ inline auto device_id() const noexcept -> int { return deviceId_; } #else inline auto index(const int idxOuter, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1]) + idxInner; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> int { return dims_[0] * dims_[1]; } inline auto dim_inner() const noexcept -> int { return dims_[1]; } inline auto dim_outer() const noexcept -> int { return dims_[0]; } inline auto device_id() const noexcept -> int { return deviceId_; } #endif private: int dims_[2]; const ValueType* data_ = nullptr; int deviceId_ = 0; }; // T must be build-in type template class GPUArrayConstView3D { public: using ValueType = T; static constexpr SizeType ORDER = 3; GPUArrayConstView3D() = default; GPUArrayConstView3D(const ValueType* data, const int dimOuter, const int dimMid, const int dimInner, const int deviceId); GPUArrayConstView3D(const GPUArrayView3D&); // conversion allowed #if defined(__CUDACC__) || defined(__HIPCC__) __device__ inline auto operator()(const int idxOuter, const int idxMid, const int idxInner) const -> ValueType { assert(idxOuter < dims_[0]); assert(idxMid < dims_[1]); assert(idxInner < dims_[2]); #if __CUDA_ARCH__ >= 350 || defined(__HIPCC__) return __ldg(data_ + (idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner); #else return data_[(idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner]; #endif } __host__ __device__ inline auto index(const int idxOuter, const int idxMid, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner; } __host__ __device__ inline auto empty() const noexcept -> bool { return this->size() == 0; } __host__ __device__ inline auto size() const noexcept -> int { return dims_[0] * dims_[1] * dims_[2]; } __host__ __device__ inline auto dim_inner() const noexcept -> int { return dims_[2]; } __host__ __device__ inline auto dim_mid() const noexcept -> int { return dims_[1]; } __host__ __device__ inline auto dim_outer() const noexcept -> int { return dims_[0]; } __host__ __device__ inline auto device_id() const noexcept -> int { return deviceId_; } #else inline auto index(const int idxOuter, const int idxMid, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> int { return dims_[0] * dims_[1] * dims_[2]; } inline auto dim_inner() const noexcept -> int { return dims_[2]; } inline auto dim_mid() const noexcept -> int { return dims_[1]; } inline auto dim_outer() const noexcept -> int { return dims_[0]; } inline auto device_id() const noexcept -> int { return deviceId_; } #endif private: int dims_[3]; const ValueType* data_ = nullptr; int deviceId_ = 0; }; // ====================== // Implementation // ====================== template GPUArrayConstView1D::GPUArrayConstView1D(const ValueType* data, const int size, const int deviceId) : size_(size), data_(data), deviceId_(deviceId) { assert(!(size != 0 && data == nullptr)); } template GPUArrayConstView1D::GPUArrayConstView1D(const GPUArrayView1D& view) : size_(view.size()), data_(view.data()), deviceId_(view.device_id()) {} template GPUArrayConstView2D::GPUArrayConstView2D(const ValueType* data, const int dimOuter, const int dimInner, const int deviceId) : dims_{dimOuter, dimInner}, data_(data), deviceId_(deviceId) { assert(!(dimOuter != 0 && dimInner != 0 && data == nullptr)); assert(dimOuter >= 0); assert(dimInner >= 0); } template GPUArrayConstView2D::GPUArrayConstView2D(const GPUArrayView2D& view) : dims_{view.dim_outer(), view.dim_inner()}, data_(view.data()), deviceId_(view.device_id()) {} template GPUArrayConstView3D::GPUArrayConstView3D(const ValueType* data, const int dimOuter, const int dimMid, const int dimInner, const int deviceId) : dims_{dimOuter, dimMid, dimInner}, data_(data), deviceId_(deviceId) { assert(!(dimOuter != 0 && dimMid != 0 && dimInner != 0 && data == nullptr)); assert(dimOuter >= 0); assert(dimMid >= 0); assert(dimInner >= 0); } template GPUArrayConstView3D::GPUArrayConstView3D(const GPUArrayView3D& view) : dims_{view.dim_outer(), view.dim_mid(), view.dim_inner()}, data_(view.data()), deviceId_(view.device_id()) {} } // namespace spfft #endif SpFFT-1.1.0/src/memory/gpu_array_view.hpp000066400000000000000000000222741457701740000203020ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GPU_ARRAY_VIEW_HPP #define SPFFT_GPU_ARRAY_VIEW_HPP #include #include #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" #if defined(__CUDACC__) || defined(__HIPCC__) #include "gpu_util/gpu_runtime.hpp" #endif namespace spfft { template class GPUArrayView1D { public: using ValueType = T; static constexpr SizeType ORDER = 1; GPUArrayView1D() = default; GPUArrayView1D(ValueType* data, const int size, const int deviceId); #if defined(__CUDACC__) || defined(__HIPCC__) __device__ inline auto operator()(const int idx) -> ValueType& { assert(idx < size_); return data_[idx]; } __device__ inline auto operator()(const int idx) const -> const ValueType& { assert(idx < size_); return data_[idx]; } __host__ __device__ inline auto data() noexcept -> ValueType* { return data_; } __host__ __device__ inline auto data() const noexcept -> const ValueType* { return data_; } __host__ __device__ inline auto empty() const noexcept -> bool { return size_ == 0; } __host__ __device__ inline auto size() const noexcept -> int { return size_; } __host__ __device__ inline auto device_id() const noexcept -> int { return deviceId_; } #else inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return size_ == 0; } inline auto size() const noexcept -> int { return size_; } inline auto device_id() const noexcept -> int { return deviceId_; } #endif private: int size_ = 0; ValueType* data_ = nullptr; int deviceId_ = 0; }; template class GPUArrayView2D { public: using ValueType = T; static constexpr SizeType ORDER = 2; GPUArrayView2D() = default; GPUArrayView2D(ValueType* data, const int dimOuter, const int dimInner, const int deviceId); #if defined(__CUDACC__) || defined(__HIPCC__) __device__ inline auto operator()(const int idxOuter, const int idxInner) -> ValueType& { assert(idxOuter < dims_[0]); assert(idxInner < dims_[1]); return data_[(idxOuter * dims_[1]) + idxInner]; } __device__ inline auto operator()(const int idxOuter, const int idxInner) const -> const ValueType& { assert(idxOuter < dims_[0]); assert(idxInner < dims_[1]); return data_[(idxOuter * dims_[1]) + idxInner]; } __host__ __device__ inline auto index(const int idxOuter, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1]) + idxInner; } __host__ __device__ inline auto data() noexcept -> ValueType* { return data_; } __host__ __device__ inline auto data() const noexcept -> const ValueType* { return data_; } __host__ __device__ inline auto empty() const noexcept -> bool { return this->size() == 0; } __host__ __device__ inline auto size() const noexcept -> int { return dims_[0] * dims_[1]; } __host__ __device__ inline auto dim_inner() const noexcept -> int { return dims_[1]; } __host__ __device__ inline auto dim_outer() const noexcept -> int { return dims_[0]; } __host__ __device__ inline auto device_id() const noexcept -> int { return deviceId_; } #else inline auto index(const int idxOuter, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1]) + idxInner; } inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> int { return dims_[0] * dims_[1]; } inline auto dim_inner() const noexcept -> int { return dims_[1]; } inline auto dim_outer() const noexcept -> int { return dims_[0]; } inline auto device_id() const noexcept -> int { return deviceId_; } #endif private: int dims_[2]; ValueType* data_ = nullptr; int deviceId_ = 0; }; template class GPUArrayView3D { public: using ValueType = T; static constexpr SizeType ORDER = 3; GPUArrayView3D() = default; GPUArrayView3D(ValueType* data, const int dimOuter, const int dimMid, const int dimInner, const int deviceId); #if defined(__CUDACC__) || defined(__HIPCC__) __device__ inline auto operator()(const int idxOuter, const int idxMid, const int idxInner) noexcept -> ValueType& { assert(idxOuter < dims_[0]); assert(idxMid < dims_[1]); assert(idxInner < dims_[2]); return data_[(idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner]; } __device__ inline auto operator()(const int idxOuter, const int idxMid, const int idxInner) const noexcept -> const ValueType& { assert(idxOuter < dims_[0]); assert(idxMid < dims_[1]); assert(idxInner < dims_[2]); return data_[(idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner]; } __host__ __device__ inline auto index(const int idxOuter, const int idxMid, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner; } __host__ __device__ inline auto data() noexcept -> ValueType* { return data_; } __host__ __device__ inline auto data() const noexcept -> const ValueType* { return data_; } __host__ __device__ inline auto empty() const noexcept -> bool { return this->size() == 0; } __host__ __device__ inline auto size() const noexcept -> int { return dims_[0] * dims_[1] * dims_[2]; } __host__ __device__ inline auto dim_inner() const noexcept -> int { return dims_[2]; } __host__ __device__ inline auto dim_mid() const noexcept -> int { return dims_[1]; } __host__ __device__ inline auto dim_outer() const noexcept -> int { return dims_[0]; } __host__ __device__ inline auto device_id() const noexcept -> int { return deviceId_; } #else inline auto index(const int idxOuter, const int idxMid, const int idxInner) const noexcept -> int { return (idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner; } inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> int { return dims_[0] * dims_[1] * dims_[2]; } inline auto dim_inner() const noexcept -> int { return dims_[2]; } inline auto dim_mid() const noexcept -> int { return dims_[1]; } inline auto dim_outer() const noexcept -> int { return dims_[0]; } inline auto device_id() const noexcept -> int { return deviceId_; } #endif private: int dims_[3]; ValueType* data_ = nullptr; int deviceId_ = 0; }; // ====================== // Implementation // ====================== template GPUArrayView1D::GPUArrayView1D(ValueType* data, const int size, const int deviceId) : size_(size), data_(data), deviceId_(deviceId) { assert(!(size != 0 && data == nullptr)); } template GPUArrayView2D::GPUArrayView2D(ValueType* data, const int dimOuter, const int dimInner, const int deviceId) : dims_{dimOuter, dimInner}, data_(data), deviceId_(deviceId) { assert(!(dimOuter != 0 && dimInner != 0 && data == nullptr)); assert(dimOuter >= 0); assert(dimInner >= 0); } template GPUArrayView3D::GPUArrayView3D(ValueType* data, const int dimOuter, const int dimMid, const int dimInner, const int deviceId) : dims_{dimOuter, dimMid, dimInner}, data_(data), deviceId_(deviceId) { assert(!(dimOuter != 0 && dimMid != 0 && dimInner != 0 && data == nullptr)); assert(dimOuter >= 0); assert(dimMid >= 0); assert(dimInner >= 0); } } // namespace spfft #endif SpFFT-1.1.0/src/memory/host_array.hpp000066400000000000000000000160131457701740000174240ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_HOST_ARRAY_HPP #define SPFFT_HOST_ARRAY_HPP #include #include #include #include #include #include "gpu_util/gpu_runtime_api.hpp" #include "memory/aligned_allocation.hpp" #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { // Fixed sized array with data aligned to page boundaries // and requirements for pinned memory with ROCm. // The data can be pinned in memory, if GPU support is enabled. // The destructor of type T must not throw. template class HostArray { public: static_assert(std::is_nothrow_destructible::value, "Destructor of ValueType for HostArray must be noexcept."); using ValueType = T; using Iterator = T*; using ConstIterator = const T*; static constexpr SizeType ORDER = 1; // Construct empty array HostArray() noexcept; // Create array with given size. Additional parameters are passed to the // constructor of each element of type T. // Throws exception upon allocation or element construction failure template HostArray(SizeType size, ARGS&&... args); HostArray(const HostArray& array) = delete; HostArray(HostArray&& array) noexcept; ~HostArray() noexcept(std::is_nothrow_destructible::value); auto operator=(const HostArray& array) -> HostArray& = delete; auto operator=(HostArray&& array) noexcept -> HostArray&; inline auto operator[](const SizeType idx) -> ValueType& { assert(idx < size_); return data_[idx]; } inline auto operator[](const SizeType idx) const -> const ValueType& { assert(idx < size_); return data_[idx]; } inline auto operator()(const SizeType idx) -> ValueType& { assert(idx < size_); return data_[idx]; } inline auto operator()(const SizeType idx) const -> const ValueType& { assert(idx < size_); return data_[idx]; } inline auto size() const noexcept -> SizeType { return size_; } inline auto pinned() const noexcept -> bool { return pinned_; } // Attempt to pin memory. Return true on success and false otherwise auto pin_memory() noexcept -> bool; // Unpin memory if pinned. Does nothing otherwise auto unpin_memory() noexcept -> void; inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto begin() noexcept -> Iterator { return data_; } inline auto begin() const noexcept -> ConstIterator { return data_; } inline auto cbegin() const noexcept -> ConstIterator { return data_; } inline auto end() noexcept -> Iterator { return data_ + size_; } inline auto end() const noexcept -> ConstIterator { return data_ + size_; } inline auto cend() const noexcept -> ConstIterator { return data_ + size_; } // undefined behaviour for empty array inline auto front() -> ValueType& { return data_[0]; } // undefined behaviour for empty array inline auto front() const -> const ValueType& { return data_[0]; } // undefined behaviour for empty array inline auto back() -> ValueType& { return data_[size_ - 1]; } // undefined behaviour for empty array inline auto back() const -> const ValueType& { return data_[size_ - 1]; } inline auto empty() const noexcept -> bool { return size_ == 0; } private: T* data_ = nullptr; SizeType size_ = 0; bool pinned_ = false; }; // ====================== // Implementation // ====================== template HostArray::HostArray() noexcept : data_(nullptr), size_(0), pinned_(false) {} template template HostArray::HostArray(SizeType size, ARGS&&... args) : data_(static_cast(memory::allocate_aligned(size * sizeof(T)))), size_(size), pinned_(false) { try { memory::construct_elements_in_place(data_, size, std::forward(args)...); } catch (...) { size_ = 0; memory::free_aligned(data_); data_ = nullptr; throw; } } template HostArray::HostArray(HostArray&& array) noexcept : data_(nullptr), size_(0), pinned_(false) { data_ = array.data_; array.data_ = nullptr; size_ = array.size_; array.size_ = 0; pinned_ = array.pinned_; array.pinned_ = false; } template HostArray::~HostArray() noexcept(std::is_nothrow_destructible::value) { if (data_) { this->unpin_memory(); memory::deconstruct_elements(data_, size_); memory::free_aligned(data_); data_ = nullptr; size_ = 0; } assert(data_ == nullptr); assert(size_ == 0); assert(!pinned_); } template auto HostArray::operator=(HostArray&& array) noexcept -> HostArray& { if (data_) { this->unpin_memory(); memory::deconstruct_elements(data_, size_); memory::free_aligned(data_); } data_ = array.data_; array.data_ = nullptr; size_ = array.size_; array.size_ = 0; pinned_ = array.pinned_; array.pinned_ = false; return *this; } template auto HostArray::pin_memory() noexcept -> bool { #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) if (!pinned_ && data_) { if (gpu::host_register(static_cast(data_), size_ * sizeof(ValueType), gpu::flag::HostRegisterDefault) == gpu::status::Success) { pinned_ = true; } } #endif return pinned_; } template auto HostArray::unpin_memory() noexcept -> void { #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) if (pinned_) { std::ignore = gpu::host_unregister((void*)data_); pinned_ = false; } #endif } } // namespace spfft #endif SpFFT-1.1.0/src/memory/host_array_const_view.hpp000066400000000000000000000217241457701740000216710ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_HOST_ARRAY_CONST_VIEW_HPP #define SPFFT_HOST_ARRAY_CONST_VIEW_HPP #include #include #include "memory/host_array_view.hpp" #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { template class HostArrayConstView1D { public: using ValueType = T; using Iterator = T*; using ConstIterator = const T*; static constexpr SizeType ORDER = 1; HostArrayConstView1D() = default; HostArrayConstView1D(const HostArrayConstView1D&) = default; HostArrayConstView1D(HostArrayConstView1D&&) = default; HostArrayConstView1D(const ValueType* data, const SizeType size, const bool pinned); // conversion from non-const view HostArrayConstView1D(const HostArrayView1D& view) : size_(view.size()), pinned_(view.pinned()), data_(view.data()) {} inline auto operator()(const SizeType idx) const -> const ValueType& { assert(idx < size_); return data_[idx]; } inline auto pinned() const noexcept -> bool { return pinned_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return size_ == 0; } inline auto size() const noexcept -> SizeType { return size_; } inline auto begin() const noexcept -> ConstIterator { return data_; } inline auto cbegin() const noexcept -> ConstIterator { return data_; } inline auto end() const noexcept -> ConstIterator { return data_ + size_; } inline auto cend() const noexcept -> ConstIterator { return data_ + size_; } private: SizeType size_ = 0; bool pinned_ = false; const ValueType* data_ = nullptr; }; template class HostArrayConstView2D { public: using ValueType = T; using Iterator = T*; using ConstIterator = const T*; static constexpr SizeType ORDER = 2; HostArrayConstView2D() = default; HostArrayConstView2D(const HostArrayConstView2D&) = default; HostArrayConstView2D(HostArrayConstView2D&&) = default; HostArrayConstView2D(const ValueType* data, const SizeType dimOuter, const SizeType dimInner, const bool pinned); HostArrayConstView2D(const ValueType* data, const std::array& dims, const bool pinned); // conversion from non-const view HostArrayConstView2D(const HostArrayView2D& view) : dims_({view.dim_outer(), view.dim_inner()}), pinned_(view.pinned()), data_(view.data()) {} inline auto operator()(const SizeType idxOuter, const SizeType idxInner) const -> const ValueType& { assert(idxOuter < dims_[0]); assert(idxInner < dims_[1]); return data_[(idxOuter * dims_[1]) + idxInner]; } inline auto index(const SizeType idxOuter, const SizeType idxInner) const noexcept -> SizeType { return (idxOuter * dims_[1]) + idxInner; } inline auto pinned() const noexcept -> bool { return pinned_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> SizeType { return dims_[0] * dims_[1]; } inline auto dim_inner() const noexcept -> SizeType { return dims_[1]; } inline auto dim_outer() const noexcept -> SizeType { return dims_[0]; } inline auto begin() const noexcept -> ConstIterator { return data_; } inline auto cbegin() const noexcept -> ConstIterator { return data_; } inline auto end() const noexcept -> ConstIterator { return data_ + size(); } inline auto cend() const noexcept -> ConstIterator { return data_ + size(); } private: std::array dims_ = {0, 0}; bool pinned_ = false; const ValueType* data_ = nullptr; }; template class HostArrayConstView3D { public: using ValueType = T; using Iterator = T*; using ConstIterator = const T*; static constexpr SizeType ORDER = 3; HostArrayConstView3D() = default; HostArrayConstView3D(const HostArrayConstView3D&) = default; HostArrayConstView3D(HostArrayConstView3D&&) = default; HostArrayConstView3D(const ValueType* data, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner, const bool pinned); HostArrayConstView3D(const ValueType* data, const std::array& dims, const bool pinned); // conversion from non-const view HostArrayConstView3D(const HostArrayView3D& view) : dims_({view.dim_outer(), view.dim_mid(), view.dim_inner()}), pinned_(view.pinned()), data_(view.data()) {} inline auto operator()(const SizeType idxOuter, const SizeType idxMid, const SizeType idxInner) const noexcept -> const ValueType& { assert(idxOuter < dims_[0]); assert(idxMid < dims_[1]); assert(idxInner < dims_[2]); return data_[(idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner]; } inline auto index(const SizeType idxOuter, const SizeType idxMid, const SizeType idxInner) const noexcept -> SizeType { return (idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner; } inline auto pinned() const noexcept -> bool { return pinned_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> SizeType { return dims_[0] * dims_[1] * dims_[2]; } inline auto dim_inner() const noexcept -> SizeType { return dims_[2]; } inline auto dim_mid() const noexcept -> SizeType { return dims_[1]; } inline auto dim_outer() const noexcept -> SizeType { return dims_[0]; } inline auto begin() const noexcept -> ConstIterator { return data_; } inline auto cbegin() const noexcept -> ConstIterator { return data_; } inline auto end() const noexcept -> ConstIterator { return data_ + size(); } inline auto cend() const noexcept -> ConstIterator { return data_ + size(); } private: std::array dims_ = {0, 0, 0}; bool pinned_ = false; const ValueType* data_ = nullptr; }; // ====================== // Implementation // ====================== template HostArrayConstView1D::HostArrayConstView1D(const ValueType* data, const SizeType size, const bool pinned) : size_(size), pinned_(pinned), data_(data) { assert(!(size != 0 && data == nullptr)); } template HostArrayConstView2D::HostArrayConstView2D(const ValueType* data, const SizeType dimOuter, const SizeType dimInner, const bool pinned) : dims_({dimOuter, dimInner}), pinned_(pinned), data_(data) {} template HostArrayConstView2D::HostArrayConstView2D(const ValueType* data, const std::array& dims, const bool pinned) : dims_(dims), pinned_(pinned), data_(data) {} template HostArrayConstView3D::HostArrayConstView3D(const ValueType* data, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner, const bool pinned) : dims_({dimOuter, dimMid, dimInner}), pinned_(pinned), data_(data) {} template HostArrayConstView3D::HostArrayConstView3D(const ValueType* data, const std::array& dims, const bool pinned) : dims_(dims), pinned_(pinned), data_(data) {} } // namespace spfft #endif SpFFT-1.1.0/src/memory/host_array_view.hpp000066400000000000000000000213551457701740000204630ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_HOST_ARRAY_VIEW_HPP #define SPFFT_HOST_ARRAY_VIEW_HPP #include #include #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { template class HostArrayView1D { public: using ValueType = T; using Iterator = T*; using ConstIterator = const T*; static constexpr SizeType ORDER = 1; HostArrayView1D() = default; HostArrayView1D(ValueType* data, const SizeType size, const bool pinned); inline auto operator()(const SizeType idx) -> ValueType& { assert(idx < size_); return data_[idx]; } inline auto operator()(const SizeType idx) const -> const ValueType& { assert(idx < size_); return data_[idx]; } inline auto pinned() const noexcept -> bool { return pinned_; } inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return size_ == 0; } inline auto size() const noexcept -> SizeType { return size_; } inline auto begin() noexcept -> Iterator { return data_; } inline auto begin() const noexcept -> ConstIterator { return data_; } inline auto cbegin() const noexcept -> ConstIterator { return data_; } inline auto end() noexcept -> Iterator { return data_ + size_; } inline auto end() const noexcept -> ConstIterator { return data_ + size_; } inline auto cend() const noexcept -> ConstIterator { return data_ + size_; } private: SizeType size_ = 0; bool pinned_ = false; ValueType* data_ = nullptr; }; template class HostArrayView2D { public: using ValueType = T; using Iterator = T*; using ConstIterator = const T*; static constexpr SizeType ORDER = 2; HostArrayView2D() = default; HostArrayView2D(ValueType* data, const SizeType dimOuter, const SizeType dimInner, const bool pinned); HostArrayView2D(ValueType* data, const std::array& dims, const bool pinned); inline auto operator()(const SizeType idxOuter, const SizeType idxInner) -> ValueType& { assert(idxOuter < dims_[0]); assert(idxInner < dims_[1]); return data_[(idxOuter * dims_[1]) + idxInner]; } inline auto operator()(const SizeType idxOuter, const SizeType idxInner) const -> const ValueType& { assert(idxOuter < dims_[0]); assert(idxInner < dims_[1]); return data_[(idxOuter * dims_[1]) + idxInner]; } inline auto index(const SizeType idxOuter, const SizeType idxInner) const noexcept -> SizeType { return (idxOuter * dims_[1]) + idxInner; } inline auto pinned() const noexcept -> bool { return pinned_; } inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> SizeType { return dims_[0] * dims_[1]; } inline auto dim_inner() const noexcept -> SizeType { return dims_[1]; } inline auto dim_outer() const noexcept -> SizeType { return dims_[0]; } inline auto begin() noexcept -> Iterator { return data_; } inline auto begin() const noexcept -> ConstIterator { return data_; } inline auto cbegin() const noexcept -> ConstIterator { return data_; } inline auto end() noexcept -> Iterator { return data_ + size(); } inline auto end() const noexcept -> ConstIterator { return data_ + size(); } inline auto cend() const noexcept -> ConstIterator { return data_ + size(); } private: std::array dims_ = {0, 0}; bool pinned_ = false; ValueType* data_ = nullptr; }; template class HostArrayView3D { public: using ValueType = T; using Iterator = T*; using ConstIterator = const T*; static constexpr SizeType ORDER = 3; HostArrayView3D() = default; HostArrayView3D(ValueType* data, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner, const bool pinned); HostArrayView3D(ValueType* data, const std::array& dims, const bool pinned); inline auto operator()(const SizeType idxOuter, const SizeType idxMid, const SizeType idxInner) noexcept -> ValueType& { assert(idxOuter < dims_[0]); assert(idxMid < dims_[1]); assert(idxInner < dims_[2]); return data_[(idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner]; } inline auto operator()(const SizeType idxOuter, const SizeType idxMid, const SizeType idxInner) const noexcept -> const ValueType& { assert(idxOuter < dims_[0]); assert(idxMid < dims_[1]); assert(idxInner < dims_[2]); return data_[(idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner]; } inline auto index(const SizeType idxOuter, const SizeType idxMid, const SizeType idxInner) const noexcept -> SizeType { return (idxOuter * dims_[1] + idxMid) * dims_[2] + idxInner; } inline auto pinned() const noexcept -> bool { return pinned_; } inline auto data() noexcept -> ValueType* { return data_; } inline auto data() const noexcept -> const ValueType* { return data_; } inline auto empty() const noexcept -> bool { return this->size() == 0; } inline auto size() const noexcept -> SizeType { return dims_[0] * dims_[1] * dims_[2]; } inline auto dim_inner() const noexcept -> SizeType { return dims_[2]; } inline auto dim_mid() const noexcept -> SizeType { return dims_[1]; } inline auto dim_outer() const noexcept -> SizeType { return dims_[0]; } inline auto begin() noexcept -> Iterator { return data_; } inline auto begin() const noexcept -> ConstIterator { return data_; } inline auto cbegin() const noexcept -> ConstIterator { return data_; } inline auto end() noexcept -> Iterator { return data_ + size(); } inline auto end() const noexcept -> ConstIterator { return data_ + size(); } inline auto cend() const noexcept -> ConstIterator { return data_ + size(); } private: std::array dims_ = {0, 0, 0}; bool pinned_ = false; ValueType* data_ = nullptr; }; // ====================== // Implementation // ====================== template HostArrayView1D::HostArrayView1D(ValueType* data, const SizeType size, const bool pinned) : size_(size), pinned_(pinned), data_(data) { assert(!(size != 0 && data == nullptr)); } template HostArrayView2D::HostArrayView2D(ValueType* data, const SizeType dimOuter, const SizeType dimInner, const bool pinned) : dims_({dimOuter, dimInner}), pinned_(pinned), data_(data) {} template HostArrayView2D::HostArrayView2D(ValueType* data, const std::array& dims, const bool pinned) : dims_(dims), pinned_(pinned), data_(data) {} template HostArrayView3D::HostArrayView3D(ValueType* data, const SizeType dimOuter, const SizeType dimMid, const SizeType dimInner, const bool pinned) : dims_({dimOuter, dimMid, dimInner}), pinned_(pinned), data_(data) {} template HostArrayView3D::HostArrayView3D(ValueType* data, const std::array& dims, const bool pinned) : dims_(dims), pinned_(pinned), data_(data) {} } // namespace spfft #endif SpFFT-1.1.0/src/memory/memory_type_trait.hpp000066400000000000000000000053311457701740000210260ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MEMORY_TYPE_TRAIT_HPP #define SPFFT_MEMORY_TYPE_TRAIT_HPP #include "spfft/config.h" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "memory/gpu_array.hpp" #include "memory/gpu_array_const_view.hpp" #include "memory/gpu_array_view.hpp" #endif namespace spfft { template struct IsDeviceMemory { constexpr static bool value = false; }; #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) template struct IsDeviceMemory> { constexpr static bool value = true; }; template struct IsDeviceMemory> { constexpr static bool value = true; }; template struct IsDeviceMemory> { constexpr static bool value = true; }; template struct IsDeviceMemory> { constexpr static bool value = true; }; template struct IsDeviceMemory> { constexpr static bool value = true; }; template struct IsDeviceMemory> { constexpr static bool value = true; }; template struct IsDeviceMemory> { constexpr static bool value = true; }; #endif } // namespace spfft #endif SpFFT-1.1.0/src/mpi_util/000077500000000000000000000000001457701740000150515ustar00rootroot00000000000000SpFFT-1.1.0/src/mpi_util/mpi_check_status.hpp000066400000000000000000000035151457701740000211130ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MPI_CHECK_STATUS_HPP #define SPFFT_MPI_CHECK_STATUS_HPP #include #include "spfft/config.h" #include "spfft/exceptions.hpp" namespace spfft { inline auto mpi_check_status(int status) -> void { if (status != MPI_SUCCESS) { throw MPIError(); } } } // namespace spfft #endif SpFFT-1.1.0/src/mpi_util/mpi_communicator_handle.hpp000066400000000000000000000061071457701740000224460ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MPI_COMMUNICATOR_HANDLE_HPP #define SPFFT_MPI_COMMUNICATOR_HANDLE_HPP #include #include #include #include "mpi_util/mpi_check_status.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "util/common_types.hpp" namespace spfft { // MPI Communicator, which creates a duplicate at construction time. // Copies of the object share the same communicator, which is reference counted. class MPICommunicatorHandle { public: MPICommunicatorHandle() : comm_(new MPI_Comm(MPI_COMM_SELF)), size_(1), rank_(0) {} MPICommunicatorHandle(const MPI_Comm& comm) { // create copy of communicator MPI_Comm newComm; mpi_check_status(MPI_Comm_dup(comm, &newComm)); comm_ = std::shared_ptr(new MPI_Comm(newComm), [](MPI_Comm* ptr) { int finalized = 0; MPI_Finalized(&finalized); if (!finalized) { MPI_Comm_free(ptr); } delete ptr; }); int sizeInt, rankInt; mpi_check_status(MPI_Comm_size(*comm_, &sizeInt)); mpi_check_status(MPI_Comm_rank(*comm_, &rankInt)); if (sizeInt < 1 || rankInt < 0) { throw MPIError(); } rank_ = static_cast(rankInt); size_ = static_cast(sizeInt); } inline auto get() const -> const MPI_Comm& { return *comm_; } inline auto size() const noexcept -> SizeType { return size_; } inline auto rank() const noexcept -> SizeType { return rank_; } private: std::shared_ptr comm_ = nullptr; SizeType size_ = 1; SizeType rank_ = 0; }; } // namespace spfft #endif SpFFT-1.1.0/src/mpi_util/mpi_datatype_handle.hpp000066400000000000000000000117131457701740000215600ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MPI_DATATYPE_HANDLE_HPP #define SPFFT_MPI_DATATYPE_HANDLE_HPP #include #include #include #include "mpi_util/mpi_check_status.hpp" #include "spfft/config.h" namespace spfft { // Storage for MPI datatypes class MPIDatatypeHandle { public: MPIDatatypeHandle() = default; // Create custom datatype with ownership // Does not call MPI_Type_commit! // Can take predifined MPI types such as MPI_DOUBLE, on which MPI_Type_free() will not be called // NOTE: Freeing a MPI_Datatype on which this type depends on does not affect this type (see "The // MPI core") MPIDatatypeHandle(const MPI_Datatype& mpiType) { assert(mpiType != MPI_DATATYPE_NULL); int numIntegers, numAddresses, numDatatypes, combiner; mpi_check_status( MPI_Type_get_envelope(mpiType, &numIntegers, &numAddresses, &numDatatypes, &combiner)); if (combiner != MPI_COMBINER_NAMED && combiner != MPI_COMBINER_DUP) { // take ownership and call MPI_Type_free upon release type_ = std::shared_ptr(new MPI_Datatype(mpiType), [](MPI_Datatype* ptr) { assert(*ptr != MPI_DATATYPE_NULL); int finalized = 0; MPI_Finalized(&finalized); if (!finalized) { MPI_Type_free(ptr); } delete ptr; }); } else { // only copy type descriptor, will not call MPI_Type_free() type_ = std::make_shared(mpiType); } } inline auto get() const -> const MPI_Datatype& { assert(type_); assert(*type_ != MPI_DATATYPE_NULL); return *type_; } inline auto empty() const noexcept -> bool { return type_ == nullptr; } inline static MPIDatatypeHandle create_contiguous(int count, MPI_Datatype oldType) { MPI_Datatype newType; mpi_check_status(MPI_Type_contiguous(count, oldType, &newType)); mpi_check_status(MPI_Type_commit(&newType)); return MPIDatatypeHandle(newType); } inline static MPIDatatypeHandle create_vector(int count, int blocklength, int stride, MPI_Datatype oldType) { MPI_Datatype newType; mpi_check_status(MPI_Type_vector(count, blocklength, stride, oldType, &newType)); mpi_check_status(MPI_Type_commit(&newType)); return MPIDatatypeHandle(newType); } inline static MPIDatatypeHandle create_hindexed(int count, const int arrayOfBlocklengths[], const MPI_Aint arrayOfDispls[], MPI_Datatype oldType) { MPI_Datatype newType; mpi_check_status( MPI_Type_create_hindexed(count, arrayOfBlocklengths, arrayOfDispls, oldType, &newType)); mpi_check_status(MPI_Type_commit(&newType)); return MPIDatatypeHandle(newType); } inline static MPIDatatypeHandle create_subarray(int ndims, const int arrayOfSizes[], const int arrayOfSubsizes[], const int arrayOfStarts[], int order, MPI_Datatype oldType) { MPI_Datatype newType; mpi_check_status(MPI_Type_create_subarray(ndims, arrayOfSizes, arrayOfSubsizes, arrayOfStarts, order, oldType, &newType)); mpi_check_status(MPI_Type_commit(&newType)); return MPIDatatypeHandle(newType); } private: std::shared_ptr type_ = nullptr; }; } // namespace spfft #endif SpFFT-1.1.0/src/mpi_util/mpi_init_handle.hpp000066400000000000000000000051341457701740000207100ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MPI_INIT_HANDLE_HPP #define SPFFT_MPI_INIT_HANDLE_HPP #include #include "mpi_util/mpi_check_status.hpp" #include "spfft/config.h" namespace spfft { // MPI Communicator, which creates a duplicate at construction time. // Copies of the object share the same communicator, which is reference counted. class MPIInitHandle { public: MPIInitHandle(int& argc, char**& argv, bool callFinalize) : callFinalize_(callFinalize) { int initialized; MPI_Initialized(&initialized); if (!initialized) { // MPI_Init(&argc, &argv); int provided; mpi_check_status(MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided)); } } // unmovable MPIInitHandle(const MPIInitHandle& other) = delete; MPIInitHandle(MPIInitHandle&& other) = delete; auto operator=(const MPIInitHandle& other) -> MPIInitHandle& = delete; auto operator=(MPIInitHandle&& other) -> MPIInitHandle& = delete; ~MPIInitHandle() { if (callFinalize_) { MPI_Finalize(); } } private: bool callFinalize_ = false; }; } // namespace spfft #endif SpFFT-1.1.0/src/mpi_util/mpi_match_elementary_type.hpp000066400000000000000000000070741457701740000230210ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MPI_MATCH_ELEMENTARY_TYPE_HPP #define SPFFT_MPI_MATCH_ELEMENTARY_TYPE_HPP #include #include "spfft/config.h" namespace spfft { template struct MPIMatchElementaryType; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_CHAR; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_SHORT; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_INT; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_LONG; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_LONG_LONG; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_SIGNED_CHAR; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_UNSIGNED_CHAR; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_UNSIGNED_SHORT; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_UNSIGNED; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_UNSIGNED_LONG; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_UNSIGNED_LONG_LONG; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_FLOAT; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_DOUBLE; } }; template <> struct MPIMatchElementaryType { inline static auto get() -> MPI_Datatype { return MPI_LONG_DOUBLE; } }; } // namespace spfft #endif SpFFT-1.1.0/src/mpi_util/mpi_request_handle.hpp000066400000000000000000000047111457701740000214350ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MPI_REQUEST_HANDLE_HPP #define SPFFT_MPI_REQUEST_HANDLE_HPP #include #include #include #include "mpi_util/mpi_check_status.hpp" #include "spfft/config.h" namespace spfft { // Storage for MPI datatypes class MPIRequestHandle { public: MPIRequestHandle() = default; MPIRequestHandle(const MPIRequestHandle&) = delete; MPIRequestHandle(MPIRequestHandle&&) = default; auto operator=(const MPIRequestHandle& other) -> MPIRequestHandle& = delete; auto operator=(MPIRequestHandle&& other) -> MPIRequestHandle& = default; inline auto get_and_activate() -> MPI_Request* { activated_ = true; return &mpiRequest_; } inline auto wait_if_active() -> void { if (activated_) { activated_ = false; MPI_Wait(&mpiRequest_, MPI_STATUS_IGNORE); } } private: MPI_Request mpiRequest_ = MPI_REQUEST_NULL; bool activated_ = false; }; } // namespace spfft #endif SpFFT-1.1.0/src/parameters/000077500000000000000000000000001457701740000153725ustar00rootroot00000000000000SpFFT-1.1.0/src/parameters/parameters.cpp000066400000000000000000000157421457701740000202520ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "parameters/parameters.hpp" #include #include #include #ifdef SPFFT_MPI #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #endif namespace spfft { #ifdef SPFFT_MPI Parameters::Parameters(const MPICommunicatorHandle& comm, const SpfftTransformType transformType, const SizeType dimX, const SizeType dimY, const SizeType dimZ, const SizeType numLocalXYPlanes, const SizeType numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) : transformType_(transformType), dimX_(dimX), dimXFreq_(transformType == SPFFT_TRANS_R2C ? dimX / 2 + 1 : dimX), dimY_(dimY), dimZ_(dimZ), totalNumXYPlanes_(dimZ), comm_rank_(comm.rank()), comm_size_(comm.size()) { // helper struct to exchange information struct TransposeParameter { SizeType dimX; SizeType dimY; SizeType dimZ; SizeType numLocalXYPlanes; SizeType numLocalZSticks; SizeType numLocalElements; }; // Only index triplets supported (for now) if (indexFormat != SPFFT_INDEX_TRIPLETS) { throw InternalError(); } // convert indices to internal format std::vector localStickIndices; std::tie(freqValueIndices_, localStickIndices) = convert_index_triplets(transformType == SPFFT_TRANS_R2C, dimX, dimY, dimZ, numLocalElements, indices, indices + 1, indices + 2, 3); stickIndicesPerRank_ = create_distributed_transform_indices(comm, std::move(localStickIndices)); check_stick_duplicates(stickIndicesPerRank_); const SizeType numLocalZSticks = stickIndicesPerRank_[comm.rank()].size(); TransposeParameter paramLocal = TransposeParameter{dimX, dimY, dimZ, numLocalXYPlanes, numLocalZSticks, numLocalElements}; // exchange local parameters MPIDatatypeHandle parameterType = MPIDatatypeHandle::create_contiguous( sizeof(TransposeParameter) / sizeof(SizeType), MPIMatchElementaryType::get()); std::vector paramPerRank(comm.size()); mpi_check_status(MPI_Allgather(¶mLocal, 1, parameterType.get(), paramPerRank.data(), 1, parameterType.get(), comm.get())); // Check parameters SizeType numZSticksTotal = 0; SizeType numXYPlanesTotal = 0; for (const auto& p : paramPerRank) { // dimensions must match for all ranks if (p.dimX != paramLocal.dimX || p.dimY != paramLocal.dimY || p.dimZ != paramLocal.dimZ) { throw MPIParameterMismatchError(); } numZSticksTotal += p.numLocalZSticks; numXYPlanesTotal += p.numLocalXYPlanes; } if (numZSticksTotal > dimX * dimY) { // More z sticks than possible throw MPIParameterMismatchError(); } if (numXYPlanesTotal != dimZ) { throw MPIParameterMismatchError(); } // store all parameters in members numZSticksPerRank_.reserve(comm.size()); numXYPlanesPerRank_.reserve(comm.size()); xyPlaneOffsets_.reserve(comm.size()); SizeType startIndex = 0; SizeType xyPlaneOffset = 0; for (const auto& p : paramPerRank) { numZSticksPerRank_.emplace_back(p.numLocalZSticks); numXYPlanesPerRank_.emplace_back(p.numLocalXYPlanes); xyPlaneOffsets_.emplace_back(xyPlaneOffset); startIndex += p.numLocalZSticks; xyPlaneOffset += p.numLocalXYPlanes; totalNumFrequencyDomainElements_ += p.numLocalElements; } maxNumZSticks_ = *std::max_element(numZSticksPerRank_.begin(), numZSticksPerRank_.end()); maxNumXYPlanes_ = *std::max_element(numXYPlanesPerRank_.begin(), numXYPlanesPerRank_.end()); totalNumZSticks_ = std::accumulate(numZSticksPerRank_.begin(), numZSticksPerRank_.end(), SizeType(0)); // check if this rank holds the x=0, y=0 z-stick, which is treated specially for the real to // complex case zeroZeroStickIndex_ = 0; for (const auto& index : stickIndicesPerRank_[comm.rank()]) { if (index == 0) { break; } ++zeroZeroStickIndex_; } } #endif Parameters::Parameters(const SpfftTransformType transformType, const SizeType dimX, const SizeType dimY, const SizeType dimZ, const SizeType numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) : transformType_(transformType), dimX_(dimX), dimXFreq_(transformType == SPFFT_TRANS_R2C ? dimX / 2 + 1 : dimX), dimY_(dimY), dimZ_(dimZ), maxNumXYPlanes_(dimZ), totalNumXYPlanes_(dimZ), totalNumFrequencyDomainElements_(numLocalElements), comm_rank_(0), comm_size_(1), numXYPlanesPerRank_(1, dimZ), xyPlaneOffsets_(1, 0) { // Only index triplets supported (for now) if (indexFormat != SPFFT_INDEX_TRIPLETS) { throw InternalError(); } std::vector localStickIndices; std::tie(freqValueIndices_, localStickIndices) = convert_index_triplets(transformType == SPFFT_TRANS_R2C, dimX, dimY, dimZ, numLocalElements, indices, indices + 1, indices + 2, 3); stickIndicesPerRank_.emplace_back(std::move(localStickIndices)); check_stick_duplicates(stickIndicesPerRank_); maxNumZSticks_ = stickIndicesPerRank_[0].size(); totalNumZSticks_ = stickIndicesPerRank_[0].size(); numZSticksPerRank_.assign(1, stickIndicesPerRank_[0].size()); zeroZeroStickIndex_ = 0; for (const auto& index : stickIndicesPerRank_[0]) { if (index == 0) { break; } ++zeroZeroStickIndex_; } } } // namespace spfft SpFFT-1.1.0/src/parameters/parameters.hpp000066400000000000000000000142501457701740000202500ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_PARAMETERS_HPP #define SPFFT_PARAMETERS_HPP #include #include #include #include #include "compression/indices.hpp" #include "memory/host_array_const_view.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "spfft/types.h" #include "util/common_types.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #endif namespace spfft { class Parameters { public: #ifdef SPFFT_MPI Parameters(const MPICommunicatorHandle& comm, const SpfftTransformType transformType, const SizeType dimX, const SizeType dimY, const SizeType dimZ, const SizeType numLocalXYPlanes, const SizeType numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); #endif Parameters(const SpfftTransformType transformType, const SizeType dimX, const SizeType dimY, const SizeType dimZ, const SizeType numLocalElements, SpfftIndexFormatType indexFormat, const int* indices); inline auto dim_x() const noexcept -> SizeType { return dimX_; } inline auto dim_x_freq() const noexcept -> SizeType { return dimXFreq_; } inline auto dim_y() const noexcept -> SizeType { return dimY_; } inline auto dim_z() const noexcept -> SizeType { return dimZ_; } inline auto max_num_z_sticks() const noexcept -> SizeType { return maxNumZSticks_; } inline auto max_num_xy_planes() const noexcept -> SizeType { return maxNumXYPlanes_; } inline auto total_num_z_sticks() const noexcept -> SizeType { return totalNumZSticks_; } inline auto total_num_xy_planes() const noexcept -> SizeType { return totalNumXYPlanes_; } inline auto transform_type() const noexcept -> SpfftTransformType { return transformType_; } inline auto zero_zero_stick_index() const noexcept -> SizeType { return zeroZeroStickIndex_; } inline auto num_xy_planes(const SizeType rank) const -> SizeType { assert(rank < numXYPlanesPerRank_.size()); return numXYPlanesPerRank_[rank]; } inline auto local_num_xy_planes() const -> SizeType { assert(comm_rank_ < numXYPlanesPerRank_.size()); return numXYPlanesPerRank_[comm_rank_]; } inline auto xy_plane_offset(const SizeType rank) const -> SizeType { assert(rank < numXYPlanesPerRank_.size()); return xyPlaneOffsets_[rank]; } inline auto local_xy_plane_offset() const -> SizeType { assert(comm_rank_ < numXYPlanesPerRank_.size()); return xyPlaneOffsets_[comm_rank_]; } inline auto num_z_sticks(const SizeType rank) const -> SizeType { assert(rank < numZSticksPerRank_.size()); return numZSticksPerRank_[rank]; } inline auto local_num_z_sticks() const -> SizeType { assert(comm_rank_ < numZSticksPerRank_.size()); return numZSticksPerRank_[comm_rank_]; } inline auto z_stick_xy_indices(const SizeType rank) const -> HostArrayConstView1D { assert(rank < stickIndicesPerRank_.size()); assert(num_z_sticks(rank) == stickIndicesPerRank_[rank].size()); return HostArrayConstView1D(stickIndicesPerRank_[rank].data(), stickIndicesPerRank_[rank].size(), false); } inline auto local_z_stick_xy_indices() const -> HostArrayConstView1D { assert(comm_rank_ < stickIndicesPerRank_.size()); assert(num_z_sticks(comm_rank_) == stickIndicesPerRank_[comm_rank_].size()); return HostArrayConstView1D(stickIndicesPerRank_[comm_rank_].data(), stickIndicesPerRank_[comm_rank_].size(), false); } inline auto local_value_indices() const -> const std::vector& { return freqValueIndices_; } inline auto local_num_elements() const -> SizeType { return freqValueIndices_.size(); } inline auto global_num_elements() const -> SizeType { return totalNumFrequencyDomainElements_; } inline auto global_size() const -> SizeType { return dimX_ * dimY_ * dimZ_; } inline auto comm_rank() const -> SizeType { return comm_rank_; } inline auto comm_size() const -> SizeType { return comm_size_; } private: SpfftTransformType transformType_; SizeType zeroZeroStickIndex_ = std::numeric_limits::max(); SizeType dimX_ = 0; SizeType dimXFreq_ = 0; SizeType dimY_ = 0; SizeType dimZ_ = 0; SizeType maxNumZSticks_ = 0; SizeType maxNumXYPlanes_ = 0; SizeType totalNumZSticks_ = 0; SizeType totalNumXYPlanes_ = 0; SizeType totalNumFrequencyDomainElements_ = 0; SizeType comm_rank_ = 0; SizeType comm_size_ = 1; std::vector numZSticksPerRank_; std::vector numXYPlanesPerRank_; std::vector xyPlaneOffsets_; std::vector> stickIndicesPerRank_; std::vector freqValueIndices_; }; } // namespace spfft #endif SpFFT-1.1.0/src/spfft/000077500000000000000000000000001457701740000143515ustar00rootroot00000000000000SpFFT-1.1.0/src/spfft/grid.cpp000066400000000000000000000240631457701740000160070ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/grid.hpp" #include "spfft/grid.h" #include "spfft/grid_internal.hpp" namespace spfft { Grid::Grid(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, SpfftProcessingUnitType processingUnit, int maxNumThreads) : grid_(new GridInternal(maxDimX, maxDimY, maxDimZ, maxNumLocalZColumns, processingUnit, maxNumThreads)) {} #ifdef SPFFT_MPI Grid::Grid(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType) : grid_(new GridInternal(maxDimX, maxDimY, maxDimZ, maxNumLocalZColumns, maxLocalZLength, processingUnit, maxNumThreads, comm, exchangeType)) {} #endif Grid::Grid(const Grid& grid) : grid_(new GridInternal(*(grid.grid_))) {} Grid& Grid::operator=(const Grid& grid) { grid_.reset(new GridInternal(*(grid.grid_))); return *this; } Transform Grid::create_transform(SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) const { return Transform(grid_, processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices); } int Grid::max_dim_x() const { return grid_->max_dim_x(); } int Grid::max_dim_y() const { return grid_->max_dim_y(); } int Grid::max_dim_z() const { return grid_->max_dim_z(); } int Grid::max_num_local_z_columns() const { return grid_->max_num_local_z_columns(); } int Grid::max_local_z_length() const { return grid_->max_num_local_xy_planes(); } SpfftProcessingUnitType Grid::processing_unit() const { return grid_->processing_unit(); } int Grid::device_id() const { return grid_->device_id(); } int Grid::num_threads() const { return grid_->num_threads(); } #ifdef SPFFT_MPI MPI_Comm Grid::communicator() const { return grid_->communicator().get(); } #endif } // namespace spfft //--------------------- // C API //--------------------- extern "C" { SpfftError spfft_grid_create(SpfftGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, SpfftProcessingUnitType processingUnit, int maxNumThreads) { try { *grid = new spfft::Grid(maxDimX, maxDimY, maxDimZ, maxNumLocalZSticks, processingUnit, maxNumThreads); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_grid_create_distributed(SpfftGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType) { try { *grid = new spfft::Grid(maxDimX, maxDimY, maxDimZ, maxNumLocalZSticks, maxLocalZLength, processingUnit, maxNumThreads, comm, exchangeType); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_grid_create_distributed_fortran( SpfftGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, int commFortran, SpfftExchangeType exchangeType) { try { MPI_Comm comm = MPI_Comm_f2c(commFortran); *grid = new spfft::Grid(maxDimX, maxDimY, maxDimZ, maxNumLocalZSticks, maxLocalZLength, processingUnit, maxNumThreads, comm, exchangeType); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #endif SpfftError spfft_grid_destroy(SpfftGrid grid) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { delete reinterpret_cast(grid); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } grid = nullptr; return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_max_dim_x(SpfftGrid grid, int* dimX) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimX = reinterpret_cast(grid)->max_dim_x(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_max_dim_y(SpfftGrid grid, int* dimY) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimY = reinterpret_cast(grid)->max_dim_y(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_max_dim_z(SpfftGrid grid, int* dimZ) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimZ = reinterpret_cast(grid)->max_dim_z(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_max_num_local_z_columns(SpfftGrid grid, int* maxNumLocalZColumns) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *maxNumLocalZColumns = reinterpret_cast(grid)->max_num_local_z_columns(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_max_local_z_length(SpfftGrid grid, int* maxLocalZLength) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *maxLocalZLength = reinterpret_cast(grid)->max_local_z_length(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_processing_unit(SpfftGrid grid, SpfftProcessingUnitType* processingUnit) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *processingUnit = reinterpret_cast(grid)->processing_unit(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_device_id(SpfftGrid grid, int* deviceId) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *deviceId = reinterpret_cast(grid)->device_id(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_grid_num_threads(SpfftGrid grid, int* numThreads) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *numThreads = reinterpret_cast(grid)->num_threads(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_grid_communicator(SpfftGrid grid, MPI_Comm* comm) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *comm = reinterpret_cast(grid)->communicator(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_grid_communicator_fortran(SpfftGrid grid, int* commFortran) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *commFortran = MPI_Comm_c2f(reinterpret_cast(grid)->communicator()); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #endif } SpFFT-1.1.0/src/spfft/grid_float.cpp000066400000000000000000000253521457701740000171760ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/grid_float.hpp" #include "spfft/grid_float.h" #include "spfft/grid_internal.hpp" #ifdef SPFFT_SINGLE_PRECISION namespace spfft { GridFloat::GridFloat(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, SpfftProcessingUnitType processingUnit, int maxNumThreads) : grid_(new GridInternal(maxDimX, maxDimY, maxDimZ, maxNumLocalZColumns, processingUnit, maxNumThreads)) {} #ifdef SPFFT_MPI GridFloat::GridFloat(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZColumns, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType) : grid_(new GridInternal(maxDimX, maxDimY, maxDimZ, maxNumLocalZColumns, maxLocalZLength, processingUnit, maxNumThreads, comm, exchangeType)) {} #endif GridFloat::GridFloat(const GridFloat& grid) : grid_(new GridInternal(*(grid.grid_))) {} GridFloat& GridFloat::operator=(const GridFloat& grid) { grid_.reset(new GridInternal(*(grid.grid_))); return *this; } TransformFloat GridFloat::create_transform(SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) const { return TransformFloat(grid_, processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices); } int GridFloat::max_dim_x() const { return grid_->max_dim_x(); } int GridFloat::max_dim_y() const { return grid_->max_dim_y(); } int GridFloat::max_dim_z() const { return grid_->max_dim_z(); } int GridFloat::max_num_local_z_columns() const { return grid_->max_num_local_z_columns(); } int GridFloat::max_local_z_length() const { return grid_->max_num_local_xy_planes(); } SpfftProcessingUnitType GridFloat::processing_unit() const { return grid_->processing_unit(); } int GridFloat::device_id() const { return grid_->device_id(); } int GridFloat::num_threads() const { return grid_->num_threads(); } #ifdef SPFFT_MPI MPI_Comm GridFloat::communicator() const { return grid_->communicator().get(); } #endif } // namespace spfft //--------------------- // C API //--------------------- extern "C" { SpfftError spfft_float_grid_create(SpfftFloatGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, SpfftProcessingUnitType processingUnit, int maxNumThreads) { try { *grid = new spfft::GridFloat(maxDimX, maxDimY, maxDimZ, maxNumLocalZSticks, processingUnit, maxNumThreads); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_float_grid_create_distributed(SpfftFloatGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType) { try { *grid = new spfft::GridFloat(maxDimX, maxDimY, maxDimZ, maxNumLocalZSticks, maxLocalZLength, processingUnit, maxNumThreads, comm, exchangeType); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_float_grid_create_distributed_fortran( SpfftFloatGrid* grid, int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, int maxLocalZLength, SpfftProcessingUnitType processingUnit, int maxNumThreads, int commFortran, SpfftExchangeType exchangeType) { try { MPI_Comm comm = MPI_Comm_f2c(commFortran); *grid = new spfft::GridFloat(maxDimX, maxDimY, maxDimZ, maxNumLocalZSticks, maxLocalZLength, processingUnit, maxNumThreads, comm, exchangeType); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #endif SpfftError spfft_float_grid_destroy(SpfftFloatGrid grid) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { delete reinterpret_cast(grid); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } grid = nullptr; return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_max_dim_x(SpfftFloatGrid grid, int* dimX) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimX = reinterpret_cast(grid)->max_dim_x(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_max_dim_y(SpfftFloatGrid grid, int* dimY) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimY = reinterpret_cast(grid)->max_dim_y(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_max_dim_z(SpfftFloatGrid grid, int* dimZ) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimZ = reinterpret_cast(grid)->max_dim_z(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_max_num_local_z_columns(SpfftFloatGrid grid, int* maxNumLocalZColumns) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *maxNumLocalZColumns = reinterpret_cast(grid)->max_num_local_z_columns(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_max_local_z_length(SpfftFloatGrid grid, int* maxLocalZLength) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *maxLocalZLength = reinterpret_cast(grid)->max_local_z_length(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_processing_unit(SpfftFloatGrid grid, SpfftProcessingUnitType* processingUnit) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *processingUnit = reinterpret_cast(grid)->processing_unit(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_device_id(SpfftFloatGrid grid, int* deviceId) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *deviceId = reinterpret_cast(grid)->device_id(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_grid_num_threads(SpfftFloatGrid grid, int* numThreads) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *numThreads = reinterpret_cast(grid)->num_threads(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_float_grid_communicator(SpfftFloatGrid grid, MPI_Comm* comm) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *comm = reinterpret_cast(grid)->communicator(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_float_grid_communicator_fortran(SpfftFloatGrid grid, int* commFortran) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *commFortran = MPI_Comm_c2f(reinterpret_cast(grid)->communicator()); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #endif } #endif SpFFT-1.1.0/src/spfft/grid_internal.cpp000066400000000000000000000243121457701740000177000ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/config.h" #include #include #include #include "spfft/grid_internal.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #endif #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "gpu_util/gpu_device_guard.hpp" #include "gpu_util/gpu_runtime_api.hpp" #endif namespace spfft { template GridInternal::GridInternal(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, SpfftProcessingUnitType executionUnit, int numThreads) : isLocal_(true), executionUnit_(executionUnit), deviceId_(0), numThreads_(numThreads), maxDimX_(maxDimX), maxDimY_(maxDimY), maxDimZ_(maxDimZ), maxNumLocalZSticks_(maxNumLocalZSticks), maxNumLocalXYPlanes_(maxDimZ) { // input check if (maxDimX <= 0 || maxDimY <= 0 || maxDimZ <= 0 || maxNumLocalZSticks < 0) { throw InvalidParameterError(); } if (!(executionUnit & (SpfftProcessingUnitType::SPFFT_PU_HOST | SpfftProcessingUnitType::SPFFT_PU_GPU))) { throw InvalidParameterError(); } // set number of threads to default omp value if not valid if (numThreads < 1) { numThreads = omp_get_max_threads(); numThreads_ = omp_get_max_threads(); } // allocate memory if (executionUnit & SpfftProcessingUnitType::SPFFT_PU_HOST) { arrayHost1_ = HostArray(static_cast(maxDimX * maxDimY * maxDimZ)); arrayHost2_ = HostArray(static_cast(maxDimX * maxDimY * maxDimZ)); } if (executionUnit & SpfftProcessingUnitType::SPFFT_PU_GPU) { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // store device id gpu::check_status(gpu::get_device(&deviceId_)); if (arrayHost1_.empty()) { // not already created for CPU, which always requires at least as much memory arrayHost1_ = HostArray(static_cast(maxNumLocalZSticks * maxDimZ)); arrayHost2_ = HostArray(static_cast(maxDimX * maxDimY * maxDimZ)); } arrayHost1_.pin_memory(); arrayHost2_.pin_memory(); arrayGPU1_ = GPUArray::type>( static_cast(maxNumLocalZSticks * maxDimZ)); arrayGPU2_ = GPUArray::type>( static_cast(maxDimX * maxDimY * maxDimZ)); // each transform will resize the work buffer as needed fftWorkBuffer_.reset(new GPUArray()); #else throw GPUSupportError(); #endif } } #ifdef SPFFT_MPI template GridInternal::GridInternal(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, int maxNumLocalXYPlanes, SpfftProcessingUnitType executionUnit, int numThreads, MPI_Comm comm, SpfftExchangeType exchangeType) : isLocal_(false), executionUnit_(executionUnit), deviceId_(0), numThreads_(numThreads), maxDimX_(maxDimX), maxDimY_(maxDimY), maxDimZ_(maxDimZ), maxNumLocalZSticks_(maxNumLocalZSticks), maxNumLocalXYPlanes_(maxNumLocalXYPlanes), comm_(comm), exchangeType_(exchangeType) { // input check if (static_cast(maxDimX) * static_cast(maxDimY) * static_cast(maxNumLocalXYPlanes) > std::numeric_limits::max()) { throw OverflowError(); } if (static_cast(maxNumLocalZSticks) * static_cast(maxDimZ) > std::numeric_limits::max()) { throw OverflowError(); } if (maxDimX <= 0 || maxDimY <= 0 || maxDimZ <= 0 || maxNumLocalZSticks < 0) { throw InvalidParameterError(); } if (!(executionUnit & (SpfftProcessingUnitType::SPFFT_PU_HOST | SpfftProcessingUnitType::SPFFT_PU_GPU))) { throw InvalidParameterError(); } if (exchangeType != SpfftExchangeType::SPFFT_EXCH_DEFAULT && exchangeType != SpfftExchangeType::SPFFT_EXCH_BUFFERED && exchangeType != SpfftExchangeType::SPFFT_EXCH_BUFFERED_FLOAT && exchangeType != SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED && exchangeType != SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED_FLOAT && exchangeType != SpfftExchangeType::SPFFT_EXCH_UNBUFFERED) { throw InvalidParameterError(); } // compare parameters between ranks { int errorDetected = 0; int exchangeAll = exchangeType; int executionUnitAll = executionUnit; // Bitwise or will lead to a mismatch on at least one rank if not all values are equal mpi_check_status(MPI_Allreduce(MPI_IN_PLACE, &exchangeAll, 1, MPI_INT, MPI_BOR, comm_.get())); mpi_check_status( MPI_Allreduce(MPI_IN_PLACE, &executionUnitAll, 1, MPI_INT, MPI_BOR, comm_.get())); if (exchangeAll != exchangeType || executionUnitAll != executionUnit) { errorDetected = 1; } // check if any rank has detected an error mpi_check_status(MPI_Allreduce(MPI_IN_PLACE, &errorDetected, 1, MPI_INT, MPI_SUM, comm_.get())); if (errorDetected) { throw MPIParameterMismatchError(); } } // set number of threads to default omp value if not valid if (numThreads < 1) { numThreads = omp_get_max_threads(); numThreads_ = omp_get_max_threads(); } // set default exchange type if (exchangeType == SpfftExchangeType::SPFFT_EXCH_DEFAULT) { exchangeType = SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED; exchangeType_ = SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED; } // mark as local if comm size is 1 if (comm_.size() == 1) isLocal_ = true; int requiredSize = 0; switch (exchangeType) { case SpfftExchangeType::SPFFT_EXCH_BUFFERED: { decltype(maxNumLocalXYPlanes_) globalMaxNumXYPlanes = 0; decltype(maxNumLocalZSticks_) globalMaxNumZSticks = 0; MPI_Allreduce(&maxNumLocalXYPlanes_, &globalMaxNumXYPlanes, 1, MPIMatchElementaryType::get(), MPI_MAX, comm); MPI_Allreduce(&maxNumLocalZSticks_, &globalMaxNumZSticks, 1, MPIMatchElementaryType::get(), MPI_MAX, comm); requiredSize = std::max({globalMaxNumXYPlanes * globalMaxNumZSticks * static_cast(comm_.size() + 1), maxDimX_ * maxDimY_ * maxNumLocalXYPlanes_, maxDimZ_ * maxNumLocalZSticks_}); } break; default: { // AUTO or COMPACT_BUFFERED or UNBUFFERED requiredSize = std::max(maxDimX_ * maxDimY_ * maxNumLocalXYPlanes_, maxDimZ_ * maxNumLocalZSticks_); } break; } // Host arrayHost1_ = HostArray(static_cast(requiredSize)); arrayHost2_ = HostArray(static_cast(requiredSize)); // GPU if (executionUnit & SpfftProcessingUnitType::SPFFT_PU_GPU) { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // store device id gpu::check_status(gpu::get_device(&deviceId_)); arrayHost1_.pin_memory(); arrayHost2_.pin_memory(); arrayGPU1_ = GPUArray::type>( static_cast(requiredSize)); arrayGPU2_ = GPUArray::type>( static_cast(requiredSize)); // each transform will resize the work buffer as needed fftWorkBuffer_.reset(new GPUArray()); #else throw GPUSupportError(); #endif } } #endif template GridInternal::GridInternal(const GridInternal& grid) : isLocal_(grid.isLocal_), executionUnit_(grid.executionUnit_), deviceId_(grid.deviceId_), numThreads_(grid.numThreads_), maxDimX_(grid.maxDimX_), maxDimY_(grid.maxDimY_), maxDimZ_(grid.maxDimZ_), maxNumLocalZSticks_(grid.maxNumLocalZSticks_), maxNumLocalXYPlanes_(grid.maxNumLocalXYPlanes_), arrayHost1_(grid.arrayHost1_.size()), arrayHost2_(grid.arrayHost2_.size()) { #ifdef SPFFT_MPI if (!grid.isLocal_) comm_ = MPICommunicatorHandle(grid.comm_.get()); exchangeType_ = grid.exchangeType_; #endif #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) if (grid.executionUnit_ & SPFFT_PU_GPU) { GPUDeviceGuard(grid.device_id()); if (grid.arrayGPU1_.size() > 0) arrayGPU1_ = GPUArray::type>(grid.arrayGPU1_.size()); if (grid.arrayGPU2_.size() > 0) arrayGPU2_ = GPUArray::type>(grid.arrayGPU2_.size()); if (grid.fftWorkBuffer_) fftWorkBuffer_.reset(new GPUArray(grid.fftWorkBuffer_->size())); } #endif } // instatiate templates for float and double template class GridInternal; #ifdef SPFFT_SINGLE_PRECISION template class GridInternal; #endif } // namespace spfft SpFFT-1.1.0/src/spfft/grid_internal.hpp000066400000000000000000000117311457701740000177060ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GRID_INTERNAL_HPP #define SPFFT_GRID_INTERNAL_HPP #include "spfft/config.h" #include #include #include #include "memory/host_array.hpp" #include "spfft/types.h" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include #include "mpi_util/mpi_communicator_handle.hpp" #endif #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "gpu_util/gpu_fft_api.hpp" #include "memory/gpu_array.hpp" #endif namespace spfft { template class GridInternal { public: static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; GridInternal(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, SpfftProcessingUnitType executionUnit, int numThreads); #ifdef SPFFT_MPI GridInternal(int maxDimX, int maxDimY, int maxDimZ, int maxNumLocalZSticks, int maxNumLocalXYPlanes, SpfftProcessingUnitType executionUnit, int numThreads, MPI_Comm comm, SpfftExchangeType exchangeType); #endif GridInternal(const GridInternal& grid); GridInternal(GridInternal&&) = default; inline GridInternal& operator=(const GridInternal& grid) { *this = GridInternal(grid); return *this; } inline GridInternal& operator=(GridInternal&&) = default; inline auto max_dim_x() const noexcept -> int { return maxDimX_; } inline auto max_dim_y() const noexcept -> int { return maxDimY_; } inline auto max_dim_z() const noexcept -> int { return maxDimZ_; } inline auto max_num_local_z_columns() const noexcept -> int { return maxNumLocalZSticks_; } inline auto max_num_local_xy_planes() const noexcept -> int { return maxNumLocalXYPlanes_; } inline auto device_id() const noexcept -> int { return deviceId_; } inline auto num_threads() const noexcept -> int { return numThreads_; } inline auto array_host_1() -> HostArray& { return arrayHost1_; } inline auto array_host_2() -> HostArray& { return arrayHost2_; } inline auto processing_unit() -> SpfftProcessingUnitType { return executionUnit_; } inline auto local() -> bool { return isLocal_; } #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) inline auto array_gpu_1() -> GPUArray::type>& { return arrayGPU1_; } inline auto array_gpu_2() -> GPUArray::type>& { return arrayGPU2_; } inline auto fft_work_buffer() -> const std::shared_ptr>& { assert(fftWorkBuffer_); return fftWorkBuffer_; } #endif #ifdef SPFFT_MPI inline auto communicator() const -> const MPICommunicatorHandle& { return comm_; } inline auto exchange_type() const -> SpfftExchangeType { return exchangeType_; } #endif private: bool isLocal_; SpfftProcessingUnitType executionUnit_; int deviceId_, numThreads_; int maxDimX_, maxDimY_, maxDimZ_, maxNumLocalZSticks_, maxNumLocalXYPlanes_; HostArray arrayHost1_; HostArray arrayHost2_; #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) GPUArray::type> arrayGPU1_; GPUArray::type> arrayGPU2_; std::shared_ptr> fftWorkBuffer_; #endif #ifdef SPFFT_MPI MPICommunicatorHandle comm_; SpfftExchangeType exchangeType_; #endif }; } // namespace spfft #endif SpFFT-1.1.0/src/spfft/multi_transform.cpp000066400000000000000000000133601457701740000203050ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/multi_transform.h" #include #include "spfft/config.h" #include "spfft/multi_transform.hpp" #include "spfft/multi_transform_internal.hpp" #include "spfft/types.h" namespace spfft { void multi_transform_forward(int numTransforms, Transform* transforms, const SpfftProcessingUnitType* inputLocations, double* const* outputPointers, const SpfftScalingType* scalingTypes) { MultiTransformInternal::forward(numTransforms, transforms, inputLocations, outputPointers, scalingTypes); } void multi_transform_backward(int numTransforms, Transform* transforms, const double* const* inputPointers, const SpfftProcessingUnitType* outputLocations) { MultiTransformInternal::backward(numTransforms, transforms, inputPointers, outputLocations); } void multi_transform_forward(int numTransforms, Transform* transforms, const double* const* inputPointers, double* const* outputPointers, const SpfftScalingType* scalingTypes) { MultiTransformInternal::forward(numTransforms, transforms, inputPointers, outputPointers, scalingTypes); } void multi_transform_backward(int numTransforms, Transform* transforms, const double* const* inputPointers, double* const* outputPointers) { MultiTransformInternal::backward(numTransforms, transforms, inputPointers, outputPointers); } } // namespace spfft extern "C" { SpfftError spfft_multi_transform_forward(int numTransforms, SpfftTransform* transforms, const SpfftProcessingUnitType* inputLocations, double* const* outputPointers, const SpfftScalingType* scalingTypes) { try { multi_transform_forward(numTransforms, reinterpret_cast(transforms), inputLocations, outputPointers, scalingTypes); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_multi_transform_forward_ptr(int numTransforms, SpfftTransform* transforms, const double* const* inputPointers, double* const* outputPointers, const SpfftScalingType* scalingTypes) { try { multi_transform_forward(numTransforms, reinterpret_cast(transforms), inputPointers, outputPointers, scalingTypes); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_multi_transform_backward(int numTransforms, SpfftTransform* transforms, const double* const* inputPointers, const SpfftProcessingUnitType* outputLocations) { try { multi_transform_backward(numTransforms, reinterpret_cast(transforms), inputPointers, outputLocations); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_multi_transform_backward_ptr(int numTransforms, SpfftTransform* transforms, const double* const* inputPointers, double* const* outputPointers) { try { multi_transform_backward(numTransforms, reinterpret_cast(transforms), inputPointers, outputPointers); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } } SpFFT-1.1.0/src/spfft/multi_transform_float.cpp000066400000000000000000000140211457701740000214650ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/multi_transform_float.h" #include "spfft/config.h" #include "spfft/multi_transform_float.hpp" #include "spfft/multi_transform_internal.hpp" #include "spfft/types.h" namespace spfft { #ifdef SPFFT_SINGLE_PRECISION void multi_transform_forward(int numTransforms, TransformFloat* transforms, const SpfftProcessingUnitType* inputLocations, float* const* outputPointers, const SpfftScalingType* scalingTypes) { MultiTransformInternal::forward(numTransforms, transforms, inputLocations, outputPointers, scalingTypes); } void multi_transform_backward(int numTransforms, TransformFloat* transforms, const float* const* inputPointers, const SpfftProcessingUnitType* outputLocations) { MultiTransformInternal::backward(numTransforms, transforms, inputPointers, outputLocations); } void multi_transform_forward(int numTransforms, TransformFloat* transforms, const float* const* inputPointers, float* const* outputPointers, const SpfftScalingType* scalingTypes) { MultiTransformInternal::forward(numTransforms, transforms, inputPointers, outputPointers, scalingTypes); } void multi_transform_backward(int numTransforms, TransformFloat* transforms, const float* const* inputPointers, float* const* outputPointers) { MultiTransformInternal::backward(numTransforms, transforms, inputPointers, outputPointers); } #endif } // namespace spfft extern "C" { SpfftError spfft_float_multi_transform_forward(int numTransforms, SpfftFloatTransform* transforms, const SpfftProcessingUnitType* inputLocations, float* const* outputPointers, const SpfftScalingType* scalingTypes) { try { multi_transform_forward(numTransforms, reinterpret_cast(transforms), inputLocations, outputPointers, scalingTypes); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_multi_transform_forward_ptr(int numTransforms, SpfftFloatTransform* transforms, const float* const* inputPointers, float* const* outputPointers, const SpfftScalingType* scalingTypes) { try { multi_transform_forward(numTransforms, reinterpret_cast(transforms), inputPointers, outputPointers, scalingTypes); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_multi_transform_backward(int numTransforms, SpfftFloatTransform* transforms, const float* const* inputPointers, const SpfftProcessingUnitType* outputLocations) { try { multi_transform_backward(numTransforms, reinterpret_cast(transforms), inputPointers, outputLocations); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_multi_transform_backward_ptr(int numTransforms, SpfftFloatTransform* transforms, const float* const* inputPointers, float* const* outputPointers) { try { multi_transform_backward(numTransforms, reinterpret_cast(transforms), inputPointers, outputPointers); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } } SpFFT-1.1.0/src/spfft/multi_transform_internal.hpp000066400000000000000000000154471457701740000222160ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_MULTI_TRANSFORM_INTERNAL_HPP #define SPFFT_MULTI_TRANSFORM_INTERNAL_HPP #include #include "spfft/exceptions.hpp" #include "spfft/transform.hpp" #ifdef SPFFT_SINGLE_PRECISION #include "spfft/transform_float.hpp" #endif #include "spfft/transform_internal.hpp" #include "timing/timing.hpp" namespace spfft { template class MultiTransformInternal { public: using ValueType = typename TransformType::ValueType; inline static auto forward(const int numTransforms, TransformType* transforms, const SpfftProcessingUnitType* inputLocations, ValueType* const* outputPointers, const SpfftScalingType* scalingTypes) -> void { std::vector inputPointers(numTransforms); for(int i = 0; i < numTransforms; ++i) { inputPointers[i] = transforms[i].space_domain_data(inputLocations[i]); } MultiTransformInternal::forward(numTransforms, transforms, inputPointers.data(), outputPointers, scalingTypes); } inline static auto forward(const int numTransforms, TransformType* transforms, const ValueType* const* inputPointers, ValueType* const* outputPointers, const SpfftScalingType* scalingTypes) -> void { HOST_TIMING_SCOPED("forward") // transforms must not share grids for (int t1 = 0; t1 < numTransforms; ++t1) { for (int t2 = t1 + 1; t2 < numTransforms; ++t2) { if (transforms[t1].transform_->shared_grid(*(transforms[t2].transform_))) { throw InvalidParameterError(); } } } // launch all gpu transforms first for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() == SPFFT_PU_GPU) { transforms[t].transform_->forward_xy(inputPointers[t]); } } // launch all cpu transforms including MPI exchange for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() != SPFFT_PU_GPU) { transforms[t].transform_->forward_xy(inputPointers[t]); transforms[t].transform_->forward_exchange(); } } // launch all GPU MPI exhanges and transform for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() == SPFFT_PU_GPU) { transforms[t].transform_->forward_exchange(); transforms[t].transform_->forward_z(outputPointers[t], scalingTypes[t]); } } // launch all remaining cpu transforms for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() != SPFFT_PU_GPU) { transforms[t].transform_->forward_z(outputPointers[t], scalingTypes[t]); } } // synchronize all transforms for (int t = 0; t < numTransforms; ++t) { transforms[t].transform_->synchronize(); } } inline static auto backward(const int numTransforms, TransformType* transforms, const ValueType* const* inputPointers, const SpfftProcessingUnitType* outputLocations) -> void { std::vector outputPointers(numTransforms); for(int i = 0; i < numTransforms; ++i) { outputPointers[i] = transforms[i].space_domain_data(outputLocations[i]); } MultiTransformInternal::backward(numTransforms, transforms, inputPointers, outputPointers.data()); } inline static auto backward(const int numTransforms, TransformType* transforms, const ValueType* const* inputPointers, ValueType* const* outputPointers) -> void { HOST_TIMING_SCOPED("backward") // transforms must not share grids for (int t1 = 0; t1 < numTransforms; ++t1) { for (int t2 = t1 + 1; t2 < numTransforms; ++t2) { if (transforms[t1].transform_->shared_grid(*(transforms[t2].transform_))) { throw InvalidParameterError(); } } } // launch all gpu transforms first for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() == SPFFT_PU_GPU) { transforms[t].transform_->backward_z(inputPointers[t]); } } // launch all cpu transforms including MPI exchange for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() != SPFFT_PU_GPU) { transforms[t].transform_->backward_z(inputPointers[t]); transforms[t].transform_->backward_exchange(); } } // launch all GPU MPI exhanges and transform for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() == SPFFT_PU_GPU) { transforms[t].transform_->backward_exchange(); transforms[t].transform_->backward_xy(outputPointers[t]); } } // launch all remaining cpu transforms for (int t = 0; t < numTransforms; ++t) { if (transforms[t].transform_->processing_unit() != SPFFT_PU_GPU) { transforms[t].transform_->backward_xy(outputPointers[t]); } } // synchronize all transforms for (int t = 0; t < numTransforms; ++t) { transforms[t].transform_->synchronize(); } } }; } // namespace spfft #endif SpFFT-1.1.0/src/spfft/transform.cpp000066400000000000000000000473121457701740000170770ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/transform.hpp" #include "parameters/parameters.hpp" #include "spfft/grid.hpp" #include "spfft/grid_internal.hpp" #include "spfft/transform.h" #include "spfft/transform_internal.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #endif namespace spfft { //--------------------- // Double precision //--------------------- Transform::Transform(const std::shared_ptr>& grid, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { if (dimX < 0 || dimY < 0 || dimZ < 0 || localZLength < 0 || numLocalElements < 0 || (!indices && numLocalElements > 0)) { throw InvalidParameterError(); } std::shared_ptr param; if (!grid->local()) { #ifdef SPFFT_MPI param.reset(new Parameters(grid->communicator(), transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices)); #else throw MPISupportError(); #endif } else { param.reset( new Parameters(transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, indices)); } transform_.reset(new TransformInternal(processingUnit, grid, std::move(param))); } Transform::Transform(int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { if (dimX < 0 || dimY < 0 || dimZ < 0 || numLocalElements < 0 || (!indices && numLocalElements > 0)) { throw InvalidParameterError(); } std::shared_ptr param (new Parameters(transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, indices)); std::shared_ptr> grid(new GridInternal(dimX, dimY, dimZ, param->max_num_z_sticks(), processingUnit, maxNumThreads)); transform_.reset( new TransformInternal(processingUnit, std::move(grid), std::move(param))); } #ifdef SPFFT_MPI Transform::Transform(int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { if (dimX < 0 || dimY < 0 || dimZ < 0 || numLocalElements < 0 || (!indices && numLocalElements > 0)) { throw InvalidParameterError(); } std::shared_ptr param(new Parameters(MPICommunicatorHandle(comm), transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices)); std::shared_ptr> grid( new GridInternal(dimX, dimY, dimZ, param->max_num_z_sticks(), localZLength, processingUnit, maxNumThreads, comm, exchangeType)); transform_.reset( new TransformInternal(processingUnit, std::move(grid), std::move(param))); } #endif Transform::Transform(std::shared_ptr> transform) : transform_(std::move(transform)) {} Transform Transform::clone() const { return Transform(std::shared_ptr>( new TransformInternal(transform_->clone()))); } double* Transform::space_domain_data(SpfftProcessingUnitType dataLocation) { return transform_->space_domain_data(dataLocation); } void Transform::forward(SpfftProcessingUnitType inputLocation, double* output, SpfftScalingType scaling) { transform_->forward(inputLocation, output, scaling); } void Transform::forward(const double* input, double* output, SpfftScalingType scaling) { transform_->forward(input, output, scaling); } void Transform::backward(const double* input, SpfftProcessingUnitType outputLocation) { transform_->backward(input, outputLocation); } void Transform::backward(const double* input, double* output) { transform_->backward(input, output); } SpfftTransformType Transform::type() const { return transform_->type(); } int Transform::dim_x() const { return transform_->dim_x(); } int Transform::dim_y() const { return transform_->dim_y(); } int Transform::dim_z() const { return transform_->dim_z(); } int Transform::local_z_length() const { return transform_->num_local_xy_planes(); } int Transform::local_z_offset() const { return transform_->local_xy_plane_offset(); } int Transform::local_slice_size() const { return dim_x() * dim_y() * local_z_length(); } int Transform::num_local_elements() const { return transform_->num_local_elements(); } long long int Transform::num_global_elements() const { return transform_->num_global_elements(); } long long int Transform::global_size() const { return transform_->global_size(); } SpfftProcessingUnitType Transform::processing_unit() const { return transform_->processing_unit(); } int Transform::device_id() const { return transform_->device_id(); } int Transform::num_threads() const { return transform_->num_threads(); } SpfftExecType Transform::execution_mode() const {return transform_->execution_mode();} void Transform::set_execution_mode(SpfftExecType mode) {return transform_->set_execution_mode(mode);} #ifdef SPFFT_MPI MPI_Comm Transform::communicator() const { return transform_->communicator(); } #endif } // namespace spfft //--------------------- // C API //--------------------- extern "C" { SpfftError spfft_transform_create(SpfftTransform* transform, SpfftGrid grid, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { try { *transform = new spfft::Transform(reinterpret_cast(grid)->create_transform( processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices)); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_create_independent(SpfftTransform* transform, int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { try { *transform = new spfft::Transform(maxNumThreads, processingUnit, transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, indices); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_transform_create_independent_distributed( SpfftTransform* transform, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { try { *transform = new spfft::Transform(maxNumThreads, comm, exchangeType, processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_transform_create_independent_distributed_fortran( SpfftTransform* transform, int maxNumThreads, int commFortran, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { MPI_Comm comm = MPI_Comm_f2c(commFortran); return spfft_transform_create_independent_distributed( transform, maxNumThreads, comm, exchangeType, processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices); } #endif SpfftError spfft_transform_destroy(SpfftTransform transform) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { delete reinterpret_cast(transform); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } transform = nullptr; return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_clone(SpfftTransform transform, SpfftTransform* newTransform) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *newTransform = new spfft::Transform(reinterpret_cast(transform)->clone()); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_forward(SpfftTransform transform, SpfftProcessingUnitType inputLocation, double* output, SpfftScalingType scaling) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->forward(inputLocation, output, scaling); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_forward_ptr(SpfftTransform transform, const double* input, double* output, SpfftScalingType scaling) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->forward(input, output, scaling); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_backward(SpfftTransform transform, const double* input, SpfftProcessingUnitType outputLocation) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->backward(input, outputLocation); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_backward_ptr(SpfftTransform transform, const double* input, double* output) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->backward(input, output); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_get_space_domain(SpfftTransform transform, SpfftProcessingUnitType dataLocation, double** data) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *data = reinterpret_cast(transform)->space_domain_data(dataLocation); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_dim_x(SpfftTransform transform, int* dimX) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimX = reinterpret_cast(transform)->dim_x(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_dim_y(SpfftTransform transform, int* dimY) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimY = reinterpret_cast(transform)->dim_y(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_dim_z(SpfftTransform transform, int* dimZ) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimZ = reinterpret_cast(transform)->dim_z(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_local_z_length(SpfftTransform transform, int* localZLength) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *localZLength = reinterpret_cast(transform)->local_z_length(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_local_slice_size(SpfftTransform transform, int* size) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *size = reinterpret_cast(transform)->local_slice_size(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_local_z_offset(SpfftTransform transform, int* offset) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *offset = reinterpret_cast(transform)->local_z_offset(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_num_local_elements(SpfftTransform transform, int* localZLength) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *localZLength = reinterpret_cast(transform)->local_z_length(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_num_global_elements(SpfftTransform transform, long long int* numGlobalElements) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *numGlobalElements = reinterpret_cast(transform)->num_global_elements(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_global_size(SpfftTransform transform, long long int* globalSize) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *globalSize = reinterpret_cast(transform)->global_size(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_device_id(SpfftTransform transform, int* deviceId) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *deviceId = reinterpret_cast(transform)->device_id(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_num_threads(SpfftTransform transform, int* numThreads) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *numThreads = reinterpret_cast(transform)->num_threads(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_execution_mode(SpfftTransform transform, SpfftExecType* mode) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *mode = reinterpret_cast(transform)->execution_mode(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_transform_set_execution_mode(SpfftTransform transform, SpfftExecType mode) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->set_execution_mode(mode); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_transform_communicator(SpfftTransform transform, MPI_Comm* comm) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *comm = reinterpret_cast(transform)->communicator(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_transform_communicator_fortran(SpfftGrid grid, int* commFortran) { if (!grid) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *commFortran = MPI_Comm_c2f(reinterpret_cast(grid)->communicator()); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #endif } // extern C SpFFT-1.1.0/src/spfft/transform_float.cpp000066400000000000000000000507531457701740000202670ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/transform_float.hpp" #include "spfft/grid_float.hpp" #include "spfft/transform_float.h" #include "spfft/transform_internal.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #endif #ifdef SPFFT_SINGLE_PRECISION namespace spfft { TransformFloat::TransformFloat(const std::shared_ptr>& grid, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { std::shared_ptr param; if (!grid->local()) { #ifdef SPFFT_MPI param.reset(new Parameters(grid->communicator(), transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices)); #else throw MPISupportError(); #endif } else { param.reset( new Parameters(transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, indices)); } transform_.reset(new TransformInternal(processingUnit, grid, std::move(param))); } TransformFloat::TransformFloat(int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { if (dimX < 0 || dimY < 0 || dimZ < 0 || numLocalElements < 0 || (!indices && numLocalElements > 0)) { throw InvalidParameterError(); } std::shared_ptr param (new Parameters(transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, indices)); std::shared_ptr> grid(new GridInternal(dimX, dimY, dimZ, param->max_num_z_sticks(), processingUnit, maxNumThreads)); transform_.reset( new TransformInternal(processingUnit, std::move(grid), std::move(param))); } #ifdef SPFFT_MPI TransformFloat::TransformFloat(int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { if (dimX < 0 || dimY < 0 || dimZ < 0 || numLocalElements < 0 || (!indices && numLocalElements > 0)) { throw InvalidParameterError(); } std::shared_ptr param(new Parameters(MPICommunicatorHandle(comm), transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices)); std::shared_ptr> grid( new GridInternal(dimX, dimY, dimZ, param->max_num_z_sticks(), localZLength, processingUnit, maxNumThreads, comm, exchangeType)); transform_.reset( new TransformInternal(processingUnit, std::move(grid), std::move(param))); } #endif TransformFloat::TransformFloat(std::shared_ptr> transform) : transform_(std::move(transform)) {} TransformFloat TransformFloat::clone() const { return TransformFloat( std::shared_ptr>(new TransformInternal(transform_->clone()))); } float* TransformFloat::space_domain_data(SpfftProcessingUnitType dataLocation) { return transform_->space_domain_data(dataLocation); } void TransformFloat::forward(SpfftProcessingUnitType inputLocation, float* output, SpfftScalingType scaling) { transform_->forward(inputLocation, output, scaling); } void TransformFloat::forward(const float* input, float* output, SpfftScalingType scaling) { transform_->forward(input, output, scaling); } void TransformFloat::backward(const float* input, SpfftProcessingUnitType outputLocation) { transform_->backward(input, outputLocation); } void TransformFloat::backward(const float* input, float* ouput) { transform_->backward(input, ouput); } SpfftTransformType TransformFloat::type() const { return transform_->type(); } int TransformFloat::dim_x() const { return transform_->dim_x(); } int TransformFloat::dim_y() const { return transform_->dim_y(); } int TransformFloat::dim_z() const { return transform_->dim_z(); } int TransformFloat::local_z_length() const { return transform_->num_local_xy_planes(); } int TransformFloat::local_z_offset() const { return transform_->local_xy_plane_offset(); } int TransformFloat::local_slice_size() const { return dim_x() * dim_y() * local_z_length(); } int TransformFloat::num_local_elements() const { return transform_->num_local_elements(); } long long int TransformFloat::num_global_elements() const { return transform_->num_global_elements(); } long long int TransformFloat::global_size() const { return transform_->global_size(); } SpfftProcessingUnitType TransformFloat::processing_unit() const { return transform_->processing_unit(); } int TransformFloat::device_id() const { return transform_->device_id(); } int TransformFloat::num_threads() const { return transform_->num_threads(); } SpfftExecType TransformFloat::execution_mode() const {return transform_->execution_mode();} void TransformFloat::set_execution_mode(SpfftExecType mode) {return transform_->set_execution_mode(mode);} #ifdef SPFFT_MPI MPI_Comm TransformFloat::communicator() const { return transform_->communicator(); } #endif } // namespace spfft //--------------------- // C API //--------------------- extern "C" { SpfftError spfft_float_transform_create(SpfftFloatTransform* transform, SpfftFloatGrid grid, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { try { *transform = new spfft::TransformFloat(reinterpret_cast(grid)->create_transform( processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices)); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_create_independent( SpfftFloatTransform* transform, int maxNumThreads, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { try { *transform = new spfft::TransformFloat(maxNumThreads, processingUnit, transformType, dimX, dimY, dimZ, numLocalElements, indexFormat, indices); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_float_transform_create_independent_distributed( SpfftFloatTransform* transform, int maxNumThreads, MPI_Comm comm, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { try { *transform = new spfft::TransformFloat(maxNumThreads, comm, exchangeType, processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_float_transform_create_independent_distributed_fortran( SpfftFloatTransform* transform, int maxNumThreads, int commFortran, SpfftExchangeType exchangeType, SpfftProcessingUnitType processingUnit, SpfftTransformType transformType, int dimX, int dimY, int dimZ, int localZLength, int numLocalElements, SpfftIndexFormatType indexFormat, const int* indices) { MPI_Comm comm = MPI_Comm_f2c(commFortran); return spfft_float_transform_create_independent_distributed( transform, maxNumThreads, comm, exchangeType, processingUnit, transformType, dimX, dimY, dimZ, localZLength, numLocalElements, indexFormat, indices); } #endif SpfftError spfft_float_transform_destroy(SpfftFloatTransform transform) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { delete reinterpret_cast(transform); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } transform = nullptr; return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_clone(SpfftFloatTransform transform, SpfftFloatTransform* newTransform) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *newTransform = new spfft::TransformFloat(reinterpret_cast(transform)->clone()); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_forward(SpfftFloatTransform transform, SpfftProcessingUnitType inputLocation, float* output, SpfftScalingType scaling) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->forward(inputLocation, output, scaling); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_forward_ptr(SpfftFloatTransform transform, const float* input, float* output, SpfftScalingType scaling) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->forward(input, output, scaling); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_backward(SpfftFloatTransform transform, const float* input, SpfftProcessingUnitType outputLocation) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->backward(input, outputLocation); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_backward_ptr(SpfftFloatTransform transform, const float* input, float* output) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->backward(input, output); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_get_space_domain(SpfftFloatTransform transform, SpfftProcessingUnitType dataLocation, float** data) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *data = reinterpret_cast(transform)->space_domain_data(dataLocation); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_dim_x(SpfftFloatTransform transform, int* dimX) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimX = reinterpret_cast(transform)->dim_x(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_dim_y(SpfftFloatTransform transform, int* dimY) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimY = reinterpret_cast(transform)->dim_y(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_dim_z(SpfftFloatTransform transform, int* dimZ) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *dimZ = reinterpret_cast(transform)->dim_z(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_local_z_length(SpfftFloatTransform transform, int* localZLength) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *localZLength = reinterpret_cast(transform)->local_z_length(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_local_z_offset(SpfftFloatTransform transform, int* offset) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *offset = reinterpret_cast(transform)->local_z_offset(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_local_slice_size(SpfftFloatTransform transform, int* size) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *size = reinterpret_cast(transform)->local_slice_size(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_num_local_elements(SpfftFloatTransform transform, int* localZLength) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *localZLength = reinterpret_cast(transform)->local_z_length(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_num_global_elements(SpfftFloatTransform transform, long long int* numGlobalElements) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *numGlobalElements = reinterpret_cast(transform)->num_global_elements(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_global_size(SpfftFloatTransform transform, long long int* globalSize) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *globalSize = reinterpret_cast(transform)->global_size(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_device_id(SpfftFloatTransform transform, int* deviceId) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *deviceId = reinterpret_cast(transform)->device_id(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_num_threads(SpfftFloatTransform transform, int* numThreads) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *numThreads = reinterpret_cast(transform)->num_threads(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_execution_mode(SpfftFloatTransform transform, SpfftExecType* mode) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *mode = reinterpret_cast(transform)->execution_mode(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SpfftError spfft_float_transform_set_execution_mode(SpfftFloatTransform transform, SpfftExecType mode) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { reinterpret_cast(transform)->set_execution_mode(mode); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #ifdef SPFFT_MPI SpfftError spfft_float_transform_communicator(SpfftFloatTransform transform, MPI_Comm* comm) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *comm = reinterpret_cast(transform)->communicator(); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } SPFFT_EXPORT SpfftError spfft_float_transform_communicator_fortran(SpfftFloatTransform transform, int* commFortran) { if (!transform) { return SpfftError::SPFFT_INVALID_HANDLE_ERROR; } try { *commFortran = MPI_Comm_c2f(reinterpret_cast(transform)->communicator()); } catch (const spfft::GenericError& e) { return e.error_code(); } catch (...) { return SpfftError::SPFFT_UNKNOWN_ERROR; } return SpfftError::SPFFT_SUCCESS; } #endif } // extern C #endif SpFFT-1.1.0/src/spfft/transform_internal.cpp000066400000000000000000000303751457701740000207740ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include "compression/indices.hpp" #include "execution/execution_host.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "spfft/transform_internal.hpp" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "gpu_util/gpu_device_guard.hpp" #include "gpu_util/gpu_transfer.hpp" #endif namespace spfft { template TransformInternal::TransformInternal(SpfftProcessingUnitType executionUnit, std::shared_ptr> grid, std::shared_ptr param) : executionUnit_(executionUnit), execMode_(SPFFT_EXEC_SYNCHRONOUS), param_(std::move(param)), grid_(std::move(grid)) { // ---------------------- // Input Check // ---------------------- if (!grid_) { throw InvalidParameterError(); } if (param_->local_num_xy_planes() > static_cast(grid_->max_num_local_xy_planes())) { throw InvalidParameterError(); } if (grid_->local() && param_->dim_z() != param_->local_num_xy_planes()) { throw InvalidParameterError(); } if (param_->local_num_z_sticks() > static_cast(grid_->max_num_local_z_columns())) { throw InvalidParameterError(); } if (param_->dim_x() > static_cast(grid_->max_dim_x()) || param_->dim_y() > static_cast(grid_->max_dim_y()) || param_->dim_z() > static_cast(grid_->max_dim_z())) { throw InvalidParameterError(); } if (!(executionUnit & grid_->processing_unit())) { // must match memory initialization parameters for grid throw InvalidParameterError(); } if (executionUnit != SpfftProcessingUnitType::SPFFT_PU_HOST && executionUnit != SpfftProcessingUnitType::SPFFT_PU_GPU) { // must be exclusively CPU or GPU throw InvalidParameterError(); } #ifdef SPFFT_MPI if (grid_->communicator().size() != param_->comm_size() || grid_->communicator().rank() != param_->comm_rank()) { throw InternalError(); } #endif // create execution if (grid_->local()) { // ---------------------- // Local // ---------------------- if (executionUnit == SpfftProcessingUnitType::SPFFT_PU_HOST) { execHost_.reset(new ExecutionHost(grid_->num_threads(), param_, grid_->array_host_1(), grid_->array_host_2())); } else { // GPU #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_.reset(new ExecutionGPU(grid_->num_threads(), param_, grid_->array_host_1(), grid_->array_host_2(), grid_->array_gpu_1(), grid_->array_gpu_2(), grid_->fft_work_buffer())); #else throw GPUSupportError(); #endif } } else { // ---------------------- // Distributed // ---------------------- #ifdef SPFFT_MPI if (executionUnit == SpfftProcessingUnitType::SPFFT_PU_HOST) { // CPU execHost_.reset(new ExecutionHost(grid_->communicator(), grid_->exchange_type(), grid_->num_threads(), param_, grid_->array_host_1(), grid_->array_host_2())); } else { // GPU #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_.reset(new ExecutionGPU(grid_->communicator(), grid_->exchange_type(), grid_->num_threads(), param_, grid_->array_host_1(), grid_->array_host_2(), grid_->array_gpu_1(), grid_->array_gpu_2(), grid_->fft_work_buffer())); #else // GPU throw GPUSupportError(); #endif // GPU } #else // MPI throw MPISupportError(); #endif // MPI } } template auto TransformInternal::forward(const SpfftProcessingUnitType inputLocation, T* output, SpfftScalingType scaling) -> void { if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST && inputLocation != SpfftProcessingUnitType::SPFFT_PU_HOST) { throw InvalidParameterError(); } this->forward(this->space_domain_data(inputLocation), output, scaling); } template auto TransformInternal::forward(const T* input, T* output, SpfftScalingType scaling) -> void { HOST_TIMING_SCOPED("forward") if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->forward_xy(input); execHost_->forward_exchange(false); execHost_->forward_z(output, scaling); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) assert(execGPU_); // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->forward_xy(input); execGPU_->forward_exchange(false); execGPU_->forward_z(output, scaling); execGPU_->synchronize(execMode_); #else throw GPUSupportError(); #endif } } template auto TransformInternal::clone() const -> TransformInternal { std::shared_ptr> newGrid(new GridInternal(*grid_)); return TransformInternal(executionUnit_, std::move(newGrid), param_); } template auto TransformInternal::forward_xy(const SpfftProcessingUnitType inputLocation) -> void { if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST && inputLocation != SpfftProcessingUnitType::SPFFT_PU_HOST) { throw InvalidParameterError(); } this->forward_xy(this->space_domain_data(inputLocation)); } template auto TransformInternal::forward_xy(const T* input) -> void { if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->forward_xy(input); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) assert(execGPU_); // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->forward_xy(input); #else throw GPUSupportError(); #endif } } template auto TransformInternal::forward_exchange() -> void { if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->forward_exchange(true); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) assert(execGPU_); // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->forward_exchange(true); #else throw GPUSupportError(); #endif } } template auto TransformInternal::forward_z(T* output, SpfftScalingType scaling) -> void { if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->forward_z(output, scaling); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) assert(execGPU_); // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->forward_z(output, scaling); #else throw GPUSupportError(); #endif } } template auto TransformInternal::backward(const T* input, const SpfftProcessingUnitType outputLocation) -> void { if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST && outputLocation != SpfftProcessingUnitType::SPFFT_PU_HOST) { throw InvalidParameterError(); } this->backward(input, this->space_domain_data(outputLocation)); } template auto TransformInternal::backward(const T* input, T* output) -> void { HOST_TIMING_SCOPED("backward") // check if input is can be accessed from gpu if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->backward_z(input); execHost_->backward_exchange(false); execHost_->backward_xy(output); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->backward_z(input); execGPU_->backward_exchange(false); execGPU_->backward_xy(output); execGPU_->synchronize(execMode_); #else throw GPUSupportError(); #endif } } template auto TransformInternal::backward_z(const T* input) -> void { // check if input is can be accessed from gpu if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->backward_z(input); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->backward_z(input); #else throw GPUSupportError(); #endif } } template auto TransformInternal::backward_exchange() -> void { // check if input is can be accessed from gpu if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->backward_exchange(true); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->backward_exchange(true); #else throw GPUSupportError(); #endif } } template auto TransformInternal::backward_xy(const SpfftProcessingUnitType outputLocation) -> void { if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST && outputLocation != SpfftProcessingUnitType::SPFFT_PU_HOST) { throw InvalidParameterError(); } this->backward_xy(this->space_domain_data(outputLocation)); } template auto TransformInternal::backward_xy(T* output) -> void { // check if input is can be accessed from gpu if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_HOST) { assert(execHost_); execHost_->backward_xy(output); } else { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) // set device for current thread GPUDeviceGuard(grid_->device_id()); execGPU_->backward_xy(output); #else throw GPUSupportError(); #endif } } template auto TransformInternal::space_domain_data(SpfftProcessingUnitType location) -> T* { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) if (executionUnit_ == SpfftProcessingUnitType::SPFFT_PU_GPU) { // GPU if (location == SpfftProcessingUnitType::SPFFT_PU_GPU) { return execGPU_->space_domain_data_gpu().data(); } else { return execGPU_->space_domain_data_host().data(); } } #endif // CPU if (location != SpfftProcessingUnitType::SPFFT_PU_HOST) throw InvalidParameterError(); return execHost_->space_domain_data().data(); } template auto TransformInternal::synchronize() -> void { #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) if (execGPU_) execGPU_->synchronize(execMode_); #endif } // instatiate templates for float and double template class TransformInternal; #ifdef SPFFT_SINGLE_PRECISION template class TransformInternal; #endif } // namespace spfft SpFFT-1.1.0/src/spfft/transform_internal.hpp000066400000000000000000000126551457701740000210020ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSFORM_INTERNAL_HPP #define SPFFT_TRANSFORM_INTERNAL_HPP #include #include "execution/execution_host.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "spfft/grid_internal.hpp" #include "spfft/types.h" #include "util/common_types.hpp" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "compression/compression_gpu.hpp" #include "execution/execution_gpu.hpp" #endif namespace spfft { template class TransformInternal { public: TransformInternal(SpfftProcessingUnitType executionUnit, std::shared_ptr> grid, std::shared_ptr param); auto clone() const -> TransformInternal; inline auto type() const noexcept -> SpfftTransformType { return param_->transform_type(); } inline auto dim_x() const noexcept -> int { return param_->dim_x(); } inline auto dim_y() const noexcept -> int { return param_->dim_y(); } inline auto dim_z() const noexcept -> int { return param_->dim_z(); } inline auto num_local_xy_planes() const noexcept -> int { return param_->local_num_xy_planes(); } inline auto local_xy_plane_offset() const noexcept -> int { return param_->local_xy_plane_offset(); } inline auto processing_unit() const noexcept -> SpfftProcessingUnitType { return executionUnit_; } inline auto device_id() const -> int { return grid_->device_id(); } inline auto num_threads() const -> int { return grid_->num_threads(); } inline auto num_local_elements() const -> int { return param_->local_num_elements(); } inline auto num_global_elements() const -> long long int { return param_->global_num_elements(); } inline auto global_size() const -> long long int { return param_->global_size(); } inline auto execution_mode() const -> SpfftExecType { return execMode_;} inline auto set_execution_mode(SpfftExecType mode) -> void { execMode_ = mode;} inline auto shared_grid(const TransformInternal& other) const -> bool { return other.grid_ == grid_; } inline auto transform_type() const -> SpfftTransformType { return param_->transform_type(); } #ifdef SPFFT_MPI inline auto communicator() const -> MPI_Comm { return grid_->communicator().get(); } #endif // full forward transform with blocking communication auto forward(const SpfftProcessingUnitType inputLocation, T* output, SpfftScalingType scaling) -> void; // full forward transform with blocking communication auto forward(const T* input, T* output, SpfftScalingType scaling) -> void; // transform in x and y auto forward_xy(const SpfftProcessingUnitType inputLocation) -> void; // transform in x and y auto forward_xy(const T* input) -> void; // start non-blocking exchange auto forward_exchange() -> void; // finalize exchange and transform z auto forward_z(T* output, SpfftScalingType scaling) -> void; // full backward transform with blocking communication auto backward(const T* input, const SpfftProcessingUnitType outputLocation) -> void; // full backward transform with blocking communication auto backward(const T* input, T* output) -> void; // transform in x and y auto backward_xy(const SpfftProcessingUnitType outputLocation) -> void; // transform in x and y auto backward_xy(T* output) -> void; // start non-blocking exchange auto backward_exchange() -> void; // finalize exchange and transform z auto backward_z(const T* input) -> void; // must be called after step-wise transforms on GPUs auto synchronize() -> void; auto space_domain_data(SpfftProcessingUnitType location) -> T*; private: SpfftProcessingUnitType executionUnit_; SpfftExecType execMode_; std::shared_ptr param_; // Only for immutable parameters std::shared_ptr> grid_; std::unique_ptr> execHost_; #if (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) std::unique_ptr> execGPU_; #endif }; } // namespace spfft #endif SpFFT-1.1.0/src/symmetry/000077500000000000000000000000001457701740000151205ustar00rootroot00000000000000SpFFT-1.1.0/src/symmetry/gpu_kernels/000077500000000000000000000000001457701740000174365ustar00rootroot00000000000000SpFFT-1.1.0/src/symmetry/gpu_kernels/symmetry_kernels.cu000066400000000000000000000161131457701740000234050ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_kernel_parameter.hpp" #include "gpu_util/gpu_runtime.hpp" #include "memory/gpu_array_const_view.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { template __global__ static void symmetrize_plane_kernel( GPUArrayView3D::type> data, const int startIndex, const int numIndices) { assert(startIndex + numIndices <= data.dim_mid()); int idxMid = threadIdx.x + blockIdx.x * blockDim.x; if (idxMid < numIndices) { idxMid += startIndex; for (int idxOuter = blockIdx.y; idxOuter < data.dim_outer(); idxOuter += gridDim.y) { auto value = data(blockIdx.y, idxMid, 0); if (value.x != T(0) || value.y != T(0)) { value.y = -value.y; data(idxOuter, data.dim_mid() - idxMid, 0) = value; } } } } auto symmetrize_plane_gpu(const gpu::StreamType stream, const GPUArrayView3D::type>& data) -> void { assert(data.size() > 2); { const int startIndex = 1; const int numIndices = data.dim_mid() / 2; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((numIndices + threadBlock.x - 1) / threadBlock.x, std::min(data.dim_outer(), gpu::GridSizeMedium)); launch_kernel(symmetrize_plane_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } { const int startIndex = data.dim_mid() / 2 + 1; const int numIndices = data.dim_mid() - startIndex; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((numIndices + threadBlock.x - 1) / threadBlock.x, std::min(data.dim_outer(), gpu::GridSizeMedium)); launch_kernel(symmetrize_plane_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } } auto symmetrize_plane_gpu(const gpu::StreamType stream, const GPUArrayView3D::type>& data) -> void { assert(data.size() > 2); { const int startIndex = 1; const int numIndices = data.dim_mid() / 2; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((numIndices + threadBlock.x - 1) / threadBlock.x, std::min(data.dim_outer(), gpu::GridSizeMedium)); launch_kernel(symmetrize_plane_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } { const int startIndex = data.dim_mid() / 2 + 1; const int numIndices = data.dim_mid() - startIndex; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((numIndices + threadBlock.x - 1) / threadBlock.x, std::min(data.dim_outer(), gpu::GridSizeMedium)); launch_kernel(symmetrize_plane_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } } template __global__ static void symmetrize_stick_kernel( GPUArrayView1D::type> data, const int startIndex, const int numIndices) { assert(startIndex + numIndices <= data.size()); for (int idxInner = threadIdx.x + blockIdx.x * blockDim.x + startIndex; idxInner < numIndices + startIndex; idxInner += gridDim.x * blockDim.x) { auto value = data(idxInner); if (value.x != T(0) || value.y != T(0)) { value.y = -value.y; data(data.size() - idxInner) = value; } } } auto symmetrize_stick_gpu(const gpu::StreamType stream, const GPUArrayView1D::type>& data) -> void { assert(data.size() > 2); { const int startIndex = 1; const int numIndices = data.size() / 2; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid(std::min( static_cast((numIndices + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); launch_kernel(symmetrize_stick_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } { const int startIndex = data.size() / 2 + 1; const int numIndices = data.size() - startIndex; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid(std::min( static_cast((numIndices + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); launch_kernel(symmetrize_stick_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } } auto symmetrize_stick_gpu(const gpu::StreamType stream, const GPUArrayView1D::type>& data) -> void { assert(data.size() > 2); { const int startIndex = 1; const int numIndices = data.size() / 2; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid(std::min( static_cast((numIndices + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); launch_kernel(symmetrize_stick_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } { const int startIndex = data.size() / 2 + 1; const int numIndices = data.size() - startIndex; const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid(std::min( static_cast((numIndices + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium)); launch_kernel(symmetrize_stick_kernel, threadGrid, threadBlock, 0, stream, data, startIndex, numIndices); } } } // namespace spfft SpFFT-1.1.0/src/symmetry/gpu_kernels/symmetry_kernels.hpp000066400000000000000000000046531457701740000235730ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_SYMMETRY_KERNELS_HPP #define SPFFT_SYMMETRY_KERNELS_HPP #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_runtime_api.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { auto symmetrize_plane_gpu(const gpu::StreamType stream, const GPUArrayView3D::type>& data) -> void; auto symmetrize_plane_gpu(const gpu::StreamType stream, const GPUArrayView3D::type>& data) -> void; auto symmetrize_stick_gpu(const gpu::StreamType stream, const GPUArrayView1D::type>& data) -> void; auto symmetrize_stick_gpu(const gpu::StreamType stream, const GPUArrayView1D::type>& data) -> void; } // namespace spfft #endif SpFFT-1.1.0/src/symmetry/symmetry.hpp000066400000000000000000000033771457701740000175340ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_SYMMETRY_HPP #define SPFFT_SYMMETRY_HPP #include "spfft/config.h" namespace spfft { class Symmetry { public: virtual auto apply() -> void{}; virtual ~Symmetry() = default; }; } // namespace spfft #endif SpFFT-1.1.0/src/symmetry/symmetry_gpu.hpp000066400000000000000000000061201457701740000203740ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_SYMMETRY_GPU_HPP #define SPFFT_SYMMETRY_GPU_HPP #include #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/gpu_array_view.hpp" #include "spfft/config.h" #include "symmetry/gpu_kernels/symmetry_kernels.hpp" #include "symmetry/symmetry.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" namespace spfft { // This class will apply the 1D hermitian symmetry along the inner dimension on the plane with mid // index 0 template class PlaneSymmetryGPU : public Symmetry { public: PlaneSymmetryGPU(GPUStreamHandle stream, const GPUArrayView3D::type>& data) : stream_(std::move(stream)), data_(data) {} auto apply() -> void override { if (data_.dim_mid() > 2 && data_.size() > 0) { symmetrize_plane_gpu(stream_.get(), data_); } } private: GPUStreamHandle stream_; GPUArrayView3D::type> data_; }; // This class will apply the hermitian symmetry in 1d template class StickSymmetryGPU : public Symmetry { public: StickSymmetryGPU(GPUStreamHandle stream, const GPUArrayView1D::type>& stick) : stream_(std::move(stream)), stick_(stick) {} auto apply() -> void override { if (stick_.size() > 2) { symmetrize_stick_gpu(stream_.get(), stick_); } } private: GPUStreamHandle stream_; GPUArrayView1D::type> stick_; }; } // namespace spfft #endif SpFFT-1.1.0/src/symmetry/symmetry_host.hpp000066400000000000000000000074501457701740000205650ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_SYMMETRY_HOST_HPP #define SPFFT_SYMMETRY_HOST_HPP #include #include "memory/host_array_view.hpp" #include "spfft/config.h" #include "symmetry/symmetry.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" namespace spfft { // This class will apply the 1D hermitian symmetry along the inner dimension on the plane with mid // index 0 template class PlaneSymmetryHost : public Symmetry { public: explicit PlaneSymmetryHost(const HostArrayView3D>& data) : data_(data) {} auto apply() -> void override { constexpr std::complex zeroElement; // Data may be conjugated twice, but this way symmetry is applied independent of positive or // negative frequencies provided SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType idxOuter = 0; idxOuter < data_.dim_outer(); ++idxOuter) { for (SizeType idxInner = 1; idxInner < data_.dim_inner(); ++idxInner) { const auto value = data_(idxOuter, 0, idxInner); if (value != zeroElement) { data_(idxOuter, 0, data_.dim_inner() - idxInner) = std::conj(value); } } } } private: HostArrayView3D> data_; }; // This class will apply the hermitian symmetry in 1d template class StickSymmetryHost : public Symmetry { public: explicit StickSymmetryHost(const HostArrayView1D>& stick) : stick_(stick) {} auto apply() -> void override { constexpr std::complex zeroElement; // Data may be conjugated twice, but this way symmetry is applied independent of positive or // negative frequencies provided SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType idxInner = 1; idxInner < stick_.size() / 2 + 1; ++idxInner) { const auto value = stick_(idxInner); if (value != zeroElement) { stick_(stick_.size() - idxInner) = std::conj(value); } } SPFFT_OMP_PRAGMA("omp for schedule(static)") for (SizeType idxInner = stick_.size() / 2 + 1; idxInner < stick_.size(); ++idxInner) { const auto value = stick_(idxInner); if (value != zeroElement) { stick_(stick_.size() - idxInner) = std::conj(value); } } } private: HostArrayView1D> stick_; }; } // namespace spfft #endif SpFFT-1.1.0/src/timing/000077500000000000000000000000001457701740000145165ustar00rootroot00000000000000SpFFT-1.1.0/src/timing/rt_graph.cpp000066400000000000000000000437421457701740000170420ustar00rootroot00000000000000/* * Copyright (c) 2019 Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "timing/rt_graph.hpp" #include #include #include #include #include #include #include #include #include namespace rt_graph { // ====================== // internal helper // ====================== namespace internal { namespace { struct Format { Format(Stat stat_) : stat(stat_) { switch (stat_) { case Stat::Count: header = "#"; space = 6; break; case Stat::Total: header = "Total"; space = 14; break; case Stat::Mean: header = "Mean"; space = 14; break; case Stat::Median: header = "Median"; space = 14; break; case Stat::QuartileHigh: header = "Quartile High"; space = 14; break; case Stat::QuartileLow: header = "Quartile Low"; space = 14; break; case Stat::Min: header = "Min"; space = 14; break; case Stat::Max: header = "Max"; space = 14; break; case Stat::Percentage: header = "%"; space = 11; break; case Stat::ParentPercentage: header = "Parent %"; space = 11; break; } } Stat stat; std::string header; std::size_t space; }; // format time input in seconds into string with appropriate unit auto format_time(const double time_seconds) -> std::string { if (time_seconds <= 0.0) return std::string("0 s"); // time is always greater than 0 here const double exponent = std::log10(std::abs(time_seconds)); const int siExponent = static_cast(std::floor(exponent / 3.0) * 3); std::stringstream result; result << std::fixed << std::setprecision(2); result << time_seconds * std::pow(10.0, static_cast(-siExponent)); result << " "; switch (siExponent) { case 24: result << "Y"; break; case 21: result << "Z"; break; case 18: result << "E"; break; case 15: result << "P"; break; case 12: result << "T"; break; case 9: result << "G"; break; case 6: result << "M"; break; case 3: result << "k"; break; case 0: break; case -3: result << "m"; break; case -6: result << "u"; break; case -9: result << "n"; break; case -12: result << "p"; break; case -15: result << "f"; break; case -18: result << "a"; break; case -21: result << "z"; break; case -24: result << "y"; break; default: result << "?"; } result << "s"; return result.str(); } auto calc_median(const std::vector::const_iterator& begin, const std::vector::const_iterator& end) -> double { const auto n = end - begin; if (n == 0) return 0.0; if (n % 2 == 0) { return (*(begin + n / 2) + *(begin + n / 2 - 1)) / 2.0; } else { return *(begin + n / 2); } } auto print_stat(std::ostream& out, const Format& format, const std::vector& sortedTimings, double totalSum, double parentSum, double currentSum) -> void { switch (format.stat) { case Stat::Count: out << std::right << std::setw(format.space) << sortedTimings.size(); break; case Stat::Total: out << std::right << std::setw(format.space) << format_time(currentSum); break; case Stat::Mean: out << std::right << std::setw(format.space) << format_time(currentSum / sortedTimings.size()); break; case Stat::Median: out << std::right << std::setw(format.space) << format_time(calc_median(sortedTimings.begin(), sortedTimings.end())); break; case Stat::QuartileHigh: { const double upperQuartile = calc_median(sortedTimings.begin() + sortedTimings.size() / 2 + (sortedTimings.size() % 2) * (sortedTimings.size() > 1), sortedTimings.end()); out << std::right << std::setw(format.space) << format_time(upperQuartile); } break; case Stat::QuartileLow: { const double lowerQuartile = calc_median(sortedTimings.begin(), sortedTimings.begin() + sortedTimings.size() / 2); out << std::right << std::setw(format.space) << format_time(lowerQuartile); } break; case Stat::Min: out << std::right << std::setw(format.space) << format_time(sortedTimings.front()); break; case Stat::Max: out << std::right << std::setw(format.space) << format_time(sortedTimings.back()); break; case Stat::Percentage: { const double p = (totalSum < currentSum || totalSum == 0) ? 100.0 : currentSum / totalSum * 100.0; out << std::right << std::fixed << std::setprecision(2) << std::setw(format.space) << p; } break; case Stat::ParentPercentage: { const double p = (parentSum < currentSum || parentSum == 0) ? 100.0 : currentSum / parentSum * 100.0; out << std::right << std::fixed << std::setprecision(2) << std::setw(format.space) << p; } break; } } // Helper struct for creating a tree of timings struct TimeStampPair { std::string identifier; double time = 0.0; std::size_t startIdx = 0; std::size_t stopIdx = 0; internal::TimingNode* nodePtr = nullptr; }; auto calculate_statistic(std::vector values) -> std::tuple { if (values.empty()) return std::make_tuple(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); std::sort(values.begin(), values.end()); const double min = values.front(); const double max = values.back(); const double median = calc_median(values.begin(), values.end()); const double sum = std::accumulate(values.begin(), values.end(), 0.0); const double mean = sum / values.size(); const double lowerQuartile = calc_median(values.begin(), values.begin() + values.size() / 2); const double upperQuartile = calc_median( values.begin() + values.size() / 2 + (values.size() % 2) * (values.size() > 1), values.end()); return std::make_tuple(sum, mean, median, min, max, lowerQuartile, upperQuartile); } // print rt_graph nodes in tree recursively auto print_node(std::ostream& out, const std::vector formats, const std::size_t identifierSpace, const std::string& nodePrefix, const internal::TimingNode& node, const bool isSubNode, const bool isLastSubnode, double parentTime, double totalTime) -> void { double sum, mean, median, min, max, lowerQuartile, upperQuartile; std::tie(sum, mean, median, min, max, lowerQuartile, upperQuartile) = calculate_statistic(node.timings); if (!isSubNode) { totalTime = sum; parentTime = sum; } const double totalPercentage = (totalTime < sum || totalTime == 0) ? 100.0 : sum / totalTime * 100.0; const double parentPercentage = (parentTime < sum || parentTime == 0) ? 100.0 : sum / parentTime * 100.0; std::stringstream totalPercentageStream; totalPercentageStream << std::fixed << std::setprecision(2) << totalPercentage; std::stringstream parentPercentageStream; parentPercentageStream << std::fixed << std::setprecision(2) << parentPercentage; out << std::left << std::setw(identifierSpace); if (isSubNode) out << nodePrefix + "- " + node.identifier; else out << nodePrefix + node.identifier; auto sortedTimings = node.timings; std::sort(sortedTimings.begin(), sortedTimings.end()); const double currentTime = std::accumulate(sortedTimings.begin(), sortedTimings.end(), 0.0); for (const auto& format : formats) { print_stat(out, format, sortedTimings, totalTime, parentTime, currentTime); } out << std::endl; for (const auto& subNode : node.subNodes) { print_node(out, formats, identifierSpace, nodePrefix + std::string(" |"), subNode, true, &subNode == &node.subNodes.back(), sum, totalTime); if (!isLastSubnode && &subNode == &node.subNodes.back()) { out << nodePrefix << std::endl; } } } // determine length of padding required for printing entire tree identifiers recursively auto max_node_identifier_length(const internal::TimingNode& node, const std::size_t recursionDepth, const std::size_t addPerLevel, const std::size_t parentMax) -> std::size_t { std::size_t currentLength = node.identifier.length() + recursionDepth * addPerLevel; std::size_t max = currentLength > parentMax ? currentLength : parentMax; for (const auto& subNode : node.subNodes) { const std::size_t subMax = max_node_identifier_length(subNode, recursionDepth + 1, addPerLevel, max); if (subMax > max) max = subMax; } return max; } auto export_node_json(const std::string& padding, const std::list& nodeList, std::ostream& stream) -> void { stream << "{" << std::endl; const std::string nodePadding = padding + " "; const std::string subNodePadding = nodePadding + " "; for (const auto& node : nodeList) { stream << nodePadding << "\"" << node.identifier << "\" : {" << std::endl; stream << subNodePadding << "\"timings\" : ["; for (const auto& value : node.timings) { stream << value; if (&value != &(node.timings.back())) stream << ", "; } stream << "]," << std::endl; stream << subNodePadding << "\"sub-timings\" : "; export_node_json(subNodePadding, node.subNodes, stream); stream << nodePadding << "}"; if (&node != &(nodeList.back())) stream << ","; stream << std::endl; } stream << padding << "}" << std::endl; } auto extract_timings(const std::string& identifier, const std::list& nodes, std::vector& timings) -> void { for (const auto& node : nodes) { if (node.identifier == identifier) { timings.insert(timings.end(), node.timings.begin(), node.timings.end()); } extract_timings(identifier, node.subNodes, timings); } } } // namespace } // namespace internal // ====================== // Timer // ====================== auto Timer::process() const -> TimingResult { std::list results; std::stringstream warnings; try { std::vector timePairs; timePairs.reserve(timeStamps_.size() / 2); // create pairs of start / stop timings for (std::size_t i = 0; i < timeStamps_.size(); ++i) { if (timeStamps_[i].type == internal::TimeStampType::Start) { internal::TimeStampPair pair; pair.startIdx = i; pair.identifier = std::string(timeStamps_[i].identifierPtr); std::size_t numInnerMatchingIdentifiers = 0; // search for matching stop after start for (std::size_t j = i + 1; j < timeStamps_.size(); ++j) { // only consider matching identifiers if (std::string(timeStamps_[j].identifierPtr) == std::string(timeStamps_[i].identifierPtr)) { if (timeStamps_[j].type == internal::TimeStampType::Stop && numInnerMatchingIdentifiers == 0) { // Matching stop found std::chrono::duration duration = timeStamps_[j].time - timeStamps_[i].time; pair.time = duration.count(); pair.stopIdx = j; timePairs.push_back(pair); if (pair.time < 0) { warnings << "rt_graph WARNING:Measured time is negative. Non-steady system-clock?!" << std::endl; } break; } else if (timeStamps_[j].type == internal::TimeStampType::Stop && numInnerMatchingIdentifiers > 0) { // inner stop with matching identifier --numInnerMatchingIdentifiers; } else if (timeStamps_[j].type == internal::TimeStampType::Start) { // inner start with matching identifier ++numInnerMatchingIdentifiers; } } } if (pair.stopIdx == 0) { warnings << "rt_graph WARNING: Start / stop time stamps do not match for \"" << timeStamps_[i].identifierPtr << "\"!" << std::endl; } } } // create tree of timings where sub-nodes represent timings fully enclosed by another start / // stop pair Use the fact that timePairs is sorted by startIdx for (std::size_t i = 0; i < timePairs.size(); ++i) { auto& pair = timePairs[i]; // find potential parent by going backwards through pairs, starting with the current pair // position for (auto timePairIt = timePairs.rbegin() + (timePairs.size() - i); timePairIt != timePairs.rend(); ++timePairIt) { if (timePairIt->stopIdx > pair.stopIdx && timePairIt->nodePtr != nullptr) { auto& parentNode = *(timePairIt->nodePtr); // check if sub-node with identifier exists bool nodeFound = false; for (auto& subNode : parentNode.subNodes) { if (subNode.identifier == pair.identifier) { nodeFound = true; subNode.timings.push_back(pair.time); // mark node position in pair for finding sub-nodes pair.nodePtr = &(subNode); break; } } if (!nodeFound) { // create new sub-node internal::TimingNode newNode; newNode.identifier = pair.identifier; newNode.timings.push_back(pair.time); parentNode.subNodes.push_back(std::move(newNode)); // mark node position in pair for finding sub-nodes pair.nodePtr = &(parentNode.subNodes.back()); } break; } } // No parent found, must be top level node if (pair.nodePtr == nullptr) { // Check if top level node with same name exists for (auto& topNode : results) { if (topNode.identifier == pair.identifier) { topNode.timings.push_back(pair.time); pair.nodePtr = &(topNode); break; } } } // New top level node if (pair.nodePtr == nullptr) { internal::TimingNode newNode; newNode.identifier = pair.identifier; newNode.timings.push_back(pair.time); // newNode.parent = nullptr; results.push_back(std::move(newNode)); // mark node position in pair for finding sub-nodes pair.nodePtr = &(results.back()); } } } catch (const std::exception& e) { warnings << "rt_graph WARNING: Processing of timings failed: " << e.what() << std::endl; } catch (...) { warnings << "rt_graph WARNING: Processing of timings failed!" << std::endl; } return TimingResult(std::move(results), warnings.str()); } // ====================== // // ====================== auto TimingResult::json() const -> std::string { std::stringstream jsonStream; jsonStream << std::scientific; internal::export_node_json("", rootNodes_, jsonStream); return jsonStream.str(); } auto TimingResult::get_timings(const std::string& identifier) const -> std::vector { std::vector timings; internal::extract_timings(identifier, rootNodes_, timings); return timings; } auto TimingResult::print(std::vector statistic) const -> std::string { std::stringstream stream; // print warnings stream << warnings_; // calculate space for printing identifiers std::size_t identifierSpace = 0; for (const auto& node : rootNodes_) { const auto nodeMax = internal::max_node_identifier_length(node, 0, 2, identifierSpace); if (nodeMax > identifierSpace) identifierSpace = nodeMax; } identifierSpace += 3; auto totalSpace = identifierSpace; std::vector formats; formats.reserve(statistic.size()); for (const auto& stat : statistic) { formats.emplace_back(stat); totalSpace += formats.back().space; } // Construct table header // Table start stream << std::string(totalSpace, '=') << std::endl; // header stream << std::right << std::setw(identifierSpace) << ""; for (const auto& format : formats) { stream << std::right << std::setw(format.space) << format.header; } stream << std::endl; // Header separtion line stream << std::string(totalSpace, '-') << std::endl; // print all timings for (const auto& node : rootNodes_) { internal::print_node(stream, formats, identifierSpace, std::string(), node, false, true, 0.0, 0.0); stream << std::endl; } // End table stream << std::string(totalSpace, '=') << std::endl; return stream.str(); } } // namespace rt_graph SpFFT-1.1.0/src/timing/rt_graph.hpp000066400000000000000000000175401457701740000170440ustar00rootroot00000000000000/* * Copyright (c) 2019 Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef RT_GRAPH_HPP_GUARD #define RT_GRAPH_HPP_GUARD #include #include #include #include #include #include #include namespace rt_graph { using ClockType = std::chrono::high_resolution_clock; // Selection of available statistics enum class Stat { Count, // Number of measurements Total, // Total accumulated time Mean, // Mean time Median, // Median time QuartileHigh, // Third quartile time QuartileLow, // First quartile time Min, // Mininum time Max, // Maximum time Percentage, // Percentage of accumulated time with respect to the top-level node in graph ParentPercentage // Percentage of accumulated time with respect to the parent node in graph }; // internal helper functionality namespace internal { enum class TimeStampType { Start, Stop, Empty }; struct TimeStamp { TimeStamp() : type(TimeStampType::Empty) {} // Identifier pointer must point to compile time string literal TimeStamp(const char* identifier, const TimeStampType& stampType) : time(ClockType::now()), identifierPtr(identifier), type(stampType) {} ClockType::time_point time; const char* identifierPtr; TimeStampType type; }; struct TimingNode { std::string identifier; std::vector timings; std::list subNodes; }; } // namespace internal // Processed timings results. class TimingResult { public: TimingResult(std::list rootNodes, std::string warnings) : rootNodes_(std::move(rootNodes)), warnings_(std::move(warnings)) {} // Get json representation of the full graph with all timings. Unit of time is seconds. auto json() const -> std::string; // Get all timings for given identifier auto get_timings(const std::string& identifier) const -> std::vector; // Print graph statistic to string. auto print(std::vector statistic = {Stat::Count, Stat::Total, Stat::Percentage, Stat::ParentPercentage, Stat::Median, Stat::Min, Stat::Max}) const -> std::string; private: std::list rootNodes_; std::string warnings_; }; class ScopedTiming; // Timer class, which allows to start / stop measurements with a given identifier. class Timer { public: // reserve space for 1000'000 measurements Timer() { timeStamps_.reserve(2 * 1000 * 1000); } // reserve space for given number of measurements explicit Timer(std::size_t reserveCount) { timeStamps_.reserve(2 * reserveCount); } // start with string literal identifier template inline auto start(const char (&identifierPtr)[N]) -> void { atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering timeStamps_.emplace_back(identifierPtr, internal::TimeStampType::Start); atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering } // start with string identifier (storing string object comes with some additional overhead) inline auto start(std::string identifier) -> void { atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering identifierStrings_.emplace_back(std::move(identifier)); timeStamps_.emplace_back(identifierStrings_.back().c_str(), internal::TimeStampType::Start); atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering } // stop with string literal identifier template inline auto stop(const char (&identifierPtr)[N]) -> void { atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering timeStamps_.emplace_back(identifierPtr, internal::TimeStampType::Stop); atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering } // stop with string identifier (storing string object comes with some additional overhead) inline auto stop(std::string identifier) -> void { atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering identifierStrings_.emplace_back(std::move(identifier)); timeStamps_.emplace_back(identifierStrings_.back().c_str(), internal::TimeStampType::Stop); atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering } // clear timer and reserve space for given number of new measurements. inline auto clear(std::size_t reserveCount) -> void { timeStamps_.clear(); identifierStrings_.clear(); this->reserve(reserveCount); } // reserve space for given number of measurements. Can prevent allocations at start / stop calls. inline auto reserve(std::size_t reserveCount) -> void { timeStamps_.reserve(reserveCount); } // process timings into result type auto process() const -> TimingResult; private: inline auto stop_with_ptr(const char* identifierPtr) -> void { atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering timeStamps_.emplace_back(identifierPtr, internal::TimeStampType::Stop); atomic_signal_fence(std::memory_order_seq_cst); // only prevents compiler reordering } friend ScopedTiming; std::vector timeStamps_; std::deque identifierStrings_; // pointer to elements always remain valid after push back }; // Helper class, which calls start() upon creation and stop() on timer when leaving scope with given // identifier. class ScopedTiming { public: // timer reference must be valid for the entire lifetime template ScopedTiming(const char (&identifierPtr)[N], Timer& timer) : identifierPtr_(identifierPtr), timer_(timer) { timer_.start(identifierPtr); } ScopedTiming(std::string identifier, Timer& timer) : identifierPtr_(nullptr), identifier_(std::move(identifier)), timer_(timer) { timer_.start(identifier_); } ScopedTiming(const ScopedTiming&) = delete; ScopedTiming(ScopedTiming&&) = delete; auto operator=(const ScopedTiming&) -> ScopedTiming& = delete; auto operator=(ScopedTiming &&) -> ScopedTiming& = delete; ~ScopedTiming() { if (identifierPtr_) { timer_.stop_with_ptr(identifierPtr_); } else { timer_.stop(std::move(identifier_)); } } private: const char* identifierPtr_; std::string identifier_; Timer& timer_; }; } // namespace rt_graph #endif SpFFT-1.1.0/src/timing/timing.cpp000066400000000000000000000034251457701740000165150ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "timing/timing.hpp" namespace spfft { namespace timing { #ifdef SPFFT_TIMING ::rt_graph::Timer GlobalTimer; #else int dummySymbol = 0; // prevent warnings of no symbols in object file #endif } // namespace timing } // namespace spfft SpFFT-1.1.0/src/timing/timing.hpp000066400000000000000000000046671457701740000165330ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TIMING_HPP #define SPFFT_TIMING_HPP #include "spfft/config.h" #ifdef SPFFT_TIMING #include #include #include "timing/rt_graph.hpp" namespace spfft { namespace timing { extern ::rt_graph::Timer GlobalTimer; } // namespace timing } // namespace spfft #define HOST_TIMING_CONCAT_IMPL(x, y) x##y #define HOST_TIMING_MACRO_CONCAT(x, y) HOST_TIMING_CONCAT_IMPL(x, y) #define HOST_TIMING_SCOPED(identifier) \ ::rt_graph::ScopedTiming HOST_TIMING_MACRO_CONCAT( \ scopedHostTimerMacroGenerated, __COUNTER__)(identifier, ::spfft::timing::GlobalTimer); #define HOST_TIMING_START(identifier) ::spfft::timing::GlobalTimer.start(identifier); #define HOST_TIMING_STOP(identifier) ::spfft::timing::GlobalTimer.stop(identifier); #else #define HOST_TIMING_START(identifier) #define HOST_TIMING_STOP(identifier) #define HOST_TIMING_SCOPED(identifier) #endif #endif SpFFT-1.1.0/src/transpose/000077500000000000000000000000001457701740000152455ustar00rootroot00000000000000SpFFT-1.1.0/src/transpose/gpu_kernels/000077500000000000000000000000001457701740000175635ustar00rootroot00000000000000SpFFT-1.1.0/src/transpose/gpu_kernels/buffered_kernels.cu000066400000000000000000000406041457701740000234250ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "gpu_util/complex_conversion.cuh" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_kernel_parameter.hpp" #include "gpu_util/gpu_runtime.hpp" #include "memory/array_view_utility.hpp" #include "memory/gpu_array_const_view.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { // Packs z-sticks into buffer for MPI_Alltoall // Dimension of buffer are (numRanks, maxNumZSticks, maxNumXYPlanes) // Dimension of freqZData are (numLocalZSticks, dimZ) template __global__ static void buffered_pack_backward_kernel(const GPUArrayConstView1D numXYPlanes, const GPUArrayConstView1D xyPlaneOffsets, const GPUArrayConstView2D freqZData, GPUArrayView3D buffer) { const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; for (int r = 0; r < numXYPlanes.size(); ++r) { if (xyPlaneIndex < numXYPlanes(r)) { const int xyOffset = xyPlaneOffsets(r); for (int zStickIndex = blockIdx.y; zStickIndex < freqZData.dim_outer(); zStickIndex += gridDim.y) { buffer(r, zStickIndex, xyPlaneIndex) = ConvertComplex::apply( freqZData(zStickIndex, xyPlaneIndex + xyOffset)); } } } } template static auto buffered_pack_backward_launch(const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D& freqZData, GPUArrayView3D buffer) -> void { assert(xyPlaneOffsets.size() == numXYPlanes.size()); assert(buffer.size() >= freqZData.size()); assert(buffer.dim_outer() == xyPlaneOffsets.size()); assert(buffer.dim_inner() == maxNumXYPlanes); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((maxNumXYPlanes + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); assert(threadGrid.x > 0); assert(threadGrid.y > 0); launch_kernel(buffered_pack_backward_kernel, threadGrid, threadBlock, 0, stream, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } auto buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> buffer) -> void { buffered_pack_backward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } auto buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> buffer) -> void { buffered_pack_backward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } auto buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> buffer) -> void { buffered_pack_backward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } // Unpacks z-sticks from buffer after MPI_Alltoall // Dimension of buffer are (numRanks, maxNumZSticks, maxNumXYPlanes) // Dimension of freqXYData are (numLocalXYPlanes, dimY, dimX) template __global__ static void buffered_unpack_backward_kernel( const GPUArrayConstView1D numZSticks, const GPUArrayConstView1D indices, const GPUArrayConstView3D buffer, GPUArrayView2D freqXYDataFlat) { // buffer.dim_mid() is equal to maxNumZSticks const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; if (xyPlaneIndex < freqXYDataFlat.dim_outer()) { for (int r = 0; r < numZSticks.size(); ++r) { const int numCurrentZSticks = numZSticks(r); for (int zStickIndex = blockIdx.y; zStickIndex < numCurrentZSticks; zStickIndex += gridDim.y) { const int currentIndex = indices(r * buffer.dim_mid() + zStickIndex); freqXYDataFlat(xyPlaneIndex, currentIndex) = ConvertComplex::apply(buffer(r, zStickIndex, xyPlaneIndex)); } } } } template static auto buffered_unpack_backward_launch(const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D& buffer, GPUArrayView3D freqXYData) -> void { assert(buffer.dim_outer() == numZSticks.size()); assert(buffer.dim_inner() == maxNumXYPlanes); assert(indices.size() == buffer.dim_mid() * numZSticks.size()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqXYData.dim_outer() + threadBlock.x - 1) / threadBlock.x, std::min(buffer.dim_mid(), gpu::GridSizeMedium)); assert(threadGrid.x > 0); assert(threadGrid.y > 0); launch_kernel(buffered_unpack_backward_kernel, threadGrid, threadBlock, 0, stream, numZSticks, indices, buffer, GPUArrayView2D(freqXYData.data(), freqXYData.dim_outer(), freqXYData.dim_mid() * freqXYData.dim_inner(), freqXYData.device_id())); } auto buffered_unpack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void { buffered_unpack_backward_launch(stream, maxNumXYPlanes, numZSticks, indices, buffer, freqXYData); } auto buffered_unpack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void { buffered_unpack_backward_launch(stream, maxNumXYPlanes, numZSticks, indices, buffer, freqXYData); } auto buffered_unpack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void { buffered_unpack_backward_launch(stream, maxNumXYPlanes, numZSticks, indices, buffer, freqXYData); } // Unpacks z-sticks from buffer after MPI_Alltoall // Dimension of buffer are (numRanks, maxNumZSticks, maxNumXYPlanes) // Dimension of freqZData are (numLocalZSticks, dimZ) template __global__ static void buffered_unpack_forward_kernel(const GPUArrayConstView1D numXYPlanes, const GPUArrayConstView1D xyPlaneOffsets, const GPUArrayConstView3D buffer, GPUArrayView2D freqZData) { const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; for (int r = 0; r < numXYPlanes.size(); ++r) { if (xyPlaneIndex < numXYPlanes(r)) { const int xyOffset = xyPlaneOffsets(r); for (int zStickIndex = blockIdx.y; zStickIndex < freqZData.dim_outer(); zStickIndex += gridDim.y) { freqZData(zStickIndex, xyPlaneIndex + xyOffset) = ConvertComplex::apply(buffer(r, zStickIndex, xyPlaneIndex)); } } } } template static auto buffered_unpack_forward_launch(const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView3D& buffer, GPUArrayView2D freqZData) -> void { assert(xyPlaneOffsets.size() == numXYPlanes.size()); assert(buffer.size() >= freqZData.size()); assert(buffer.dim_outer() == xyPlaneOffsets.size()); assert(buffer.dim_inner() == maxNumXYPlanes); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((maxNumXYPlanes + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); assert(threadGrid.x > 0); assert(threadGrid.y > 0); launch_kernel(buffered_unpack_forward_kernel, threadGrid, threadBlock, 0, stream, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } auto buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView3D::type>& buffer, GPUArrayView2D::type> freqZData) -> void { buffered_unpack_forward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } auto buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView3D::type>& buffer, GPUArrayView2D::type> freqZData) -> void { buffered_unpack_forward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } auto buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView3D::type>& buffer, GPUArrayView2D::type> freqZData) -> void { buffered_unpack_forward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } // Packs z-sticks into buffer for MPI_Alltoall // Dimension of buffer are (numRanks, maxNumZSticks, maxNumXYPlanes) // Dimension of freqXYData are (numLocalXYPlanes, dimY, dimX) template __global__ static void buffered_pack_forward_kernel( const GPUArrayConstView1D numZSticks, const GPUArrayConstView1D indices, const GPUArrayConstView2D freqXYDataFlat, GPUArrayView3D buffer) { // buffer.dim_mid() is equal to maxNumZSticks const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; if (xyPlaneIndex < freqXYDataFlat.dim_outer()) { for (int r = 0; r < numZSticks.size(); ++r) { const int numCurrentZSticks = numZSticks(r); for (int zStickIndex = blockIdx.y; zStickIndex < numCurrentZSticks; zStickIndex += gridDim.y) { const int currentIndex = indices(r * buffer.dim_mid() + zStickIndex); buffer(r, zStickIndex, xyPlaneIndex) = ConvertComplex::apply( freqXYDataFlat(xyPlaneIndex, currentIndex)); } } } } template static auto buffered_pack_forward_launch(const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D& freqXYData, GPUArrayView3D buffer) -> void { assert(buffer.dim_outer() == numZSticks.size()); assert(buffer.dim_inner() == maxNumXYPlanes); assert(indices.size() == buffer.dim_mid() * numZSticks.size()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqXYData.dim_outer() + threadBlock.x - 1) / threadBlock.x, std::min(buffer.dim_mid(), gpu::GridSizeMedium)); assert(threadGrid.x > 0); assert(threadGrid.y > 0); launch_kernel(buffered_pack_forward_kernel, threadGrid, threadBlock, 0, stream, numZSticks, indices, GPUArrayConstView2D(freqXYData.data(), freqXYData.dim_outer(), freqXYData.dim_mid() * freqXYData.dim_inner(), freqXYData.device_id()), buffer); } auto buffered_pack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView3D::type> buffer) -> void { buffered_pack_forward_launch(stream, maxNumXYPlanes, numZSticks, indices, freqXYData, buffer); } auto buffered_pack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView3D::type> buffer) -> void { buffered_pack_forward_launch(stream, maxNumXYPlanes, numZSticks, indices, freqXYData, buffer); } auto buffered_pack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView3D::type> buffer) -> void { buffered_pack_forward_launch(stream, maxNumXYPlanes, numZSticks, indices, freqXYData, buffer); } } // namespace spfft SpFFT-1.1.0/src/transpose/gpu_kernels/buffered_kernels.hpp000066400000000000000000000133101457701740000235770ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_BUFFERED_KERNELS_HPP #define SPFFT_BUFFERED_KERNELS_HPP #include #include "gpu_util/gpu_fft_api.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { auto buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> buffer) -> void; auto buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> buffer) -> void; auto buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> buffer) -> void; auto buffered_unpack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void; auto buffered_unpack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void; auto buffered_unpack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void; auto buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView3D::type>& buffer, GPUArrayView2D::type> freqZData) -> void; auto buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView3D::type>& buffer, GPUArrayView2D::type> freqZData) -> void; auto buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView3D::type>& buffer, GPUArrayView2D::type> freqZData) -> void; auto buffered_pack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView3D::type> buffer) -> void; auto buffered_pack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView3D::type> buffer) -> void; auto buffered_pack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView3D::type> buffer) -> void; } // namespace spfft #endif SpFFT-1.1.0/src/transpose/gpu_kernels/compact_buffered_kernels.cu000066400000000000000000000376421457701740000251430ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "gpu_util/complex_conversion.cuh" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_kernel_parameter.hpp" #include "gpu_util/gpu_runtime.hpp" #include "memory/array_view_utility.hpp" #include "memory/gpu_array_const_view.hpp" namespace spfft { template __global__ static void compact_buffered_pack_backward_kernel( const GPUArrayConstView1D numXYPlanes, const GPUArrayConstView1D xyPlaneOffsets, const GPUArrayConstView2D freqZData, GPUArrayView1D buffer) { const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; int bufferOffset = 0; for (int r = 0; r < numXYPlanes.size(); ++r) { const int numCurrentXYPlanes = numXYPlanes(r); if (xyPlaneIndex < numCurrentXYPlanes) { for (int zStickIndex = blockIdx.y; zStickIndex < freqZData.dim_outer(); zStickIndex += gridDim.y) { buffer(bufferOffset + zStickIndex * numCurrentXYPlanes + xyPlaneIndex) = ConvertComplex::apply( freqZData(zStickIndex, xyPlaneIndex + xyPlaneOffsets(r))); } } bufferOffset += numCurrentXYPlanes * freqZData.dim_outer(); } } template static auto compact_buffered_pack_backward_launch(const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D& freqZData, GPUArrayView1D buffer) -> void { assert(xyPlaneOffsets.size() == numXYPlanes.size()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((maxNumXYPlanes + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); launch_kernel(compact_buffered_pack_backward_kernel, threadGrid, threadBlock, 0, stream, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } auto compact_buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView1D::type> buffer) -> void { compact_buffered_pack_backward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } auto compact_buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView1D::type> buffer) -> void { compact_buffered_pack_backward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } auto compact_buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView1D::type> buffer) -> void { compact_buffered_pack_backward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, freqZData, buffer); } template __global__ static void compact_buffered_unpack_backward_kernel( const int maxNumZSticks, const GPUArrayConstView1D numZSticks, const GPUArrayConstView1D indices, const GPUArrayConstView1D buffer, GPUArrayView2D freqXYData) { const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; int bufferOffset = 0; if (xyPlaneIndex < freqXYData.dim_outer()) { for (int r = 0; r < numZSticks.size(); ++r) { const int numCurrentZSticks = numZSticks(r); for (int zStickIndex = blockIdx.y; zStickIndex < numCurrentZSticks; zStickIndex += gridDim.y) { const int currentIndex = indices(r * maxNumZSticks + zStickIndex); freqXYData(xyPlaneIndex, currentIndex) = ConvertComplex::apply( buffer(bufferOffset + zStickIndex * freqXYData.dim_outer() + xyPlaneIndex)); } bufferOffset += numCurrentZSticks * freqXYData.dim_outer(); } } } template static auto compact_buffered_unpack_backward_launch(const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView1D& buffer, GPUArrayView3D freqXYData) -> void { const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqXYData.dim_outer() + threadBlock.x - 1) / threadBlock.x, std::min(maxNumZSticks, gpu::GridSizeMedium)); launch_kernel(compact_buffered_unpack_backward_kernel, threadGrid, threadBlock, 0, stream, maxNumZSticks, numZSticks, indices, buffer, GPUArrayView2D(freqXYData.data(), freqXYData.dim_outer(), freqXYData.dim_mid() * freqXYData.dim_inner(), freqXYData.device_id())); } auto compact_buffered_unpack_backward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView1D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void { compact_buffered_unpack_backward_launch(stream, maxNumZSticks, numZSticks, indices, buffer, freqXYData); } auto compact_buffered_unpack_backward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView1D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void { compact_buffered_unpack_backward_launch(stream, maxNumZSticks, numZSticks, indices, buffer, freqXYData); } auto compact_buffered_unpack_backward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView1D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void { compact_buffered_unpack_backward_launch(stream, maxNumZSticks, numZSticks, indices, buffer, freqXYData); } template __global__ static void compact_buffered_unpack_forward_kernel( const GPUArrayConstView1D numXYPlanes, const GPUArrayConstView1D xyPlaneOffsets, const GPUArrayConstView1D buffer, GPUArrayView2D freqZData) { const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; int bufferOffset = 0; for (int r = 0; r < numXYPlanes.size(); ++r) { const int numCurrentXYPlanes = numXYPlanes(r); if (xyPlaneIndex < numCurrentXYPlanes) { for (int zStickIndex = blockIdx.y; zStickIndex < freqZData.dim_outer(); zStickIndex += gridDim.y) { freqZData(zStickIndex, xyPlaneIndex + xyPlaneOffsets(r)) = ConvertComplex::apply( buffer(bufferOffset + zStickIndex * numCurrentXYPlanes + xyPlaneIndex)); } } bufferOffset += numCurrentXYPlanes * freqZData.dim_outer(); } } template static auto compact_buffered_unpack_forward_launch(const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView1D& buffer, GPUArrayView2D freqZData) -> void { assert(xyPlaneOffsets.size() == numXYPlanes.size()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((maxNumXYPlanes + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); launch_kernel(compact_buffered_unpack_forward_kernel, threadGrid, threadBlock, 0, stream, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } auto compact_buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView1D::type>& buffer, GPUArrayView2D::type> freqZData) -> void { compact_buffered_unpack_forward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } auto compact_buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView1D::type>& buffer, GPUArrayView2D::type> freqZData) -> void { compact_buffered_unpack_forward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } auto compact_buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView1D::type>& buffer, GPUArrayView2D::type> freqZData) -> void { compact_buffered_unpack_forward_launch(stream, maxNumXYPlanes, numXYPlanes, xyPlaneOffsets, buffer, freqZData); } template __global__ static void compact_buffered_pack_forward_kernel( const int maxNumZSticks, const GPUArrayConstView1D numZSticks, const GPUArrayConstView1D indices, const GPUArrayConstView2D freqXYData, GPUArrayView1D buffer) { const int xyPlaneIndex = threadIdx.x + blockIdx.x * blockDim.x; int bufferOffset = 0; if (xyPlaneIndex < freqXYData.dim_outer()) { for (int r = 0; r < numZSticks.size(); ++r) { const int numCurrentZSticks = numZSticks(r); for (int zStickIndex = blockIdx.y; zStickIndex < numCurrentZSticks; zStickIndex += gridDim.y) { const int currentIndex = indices(r * maxNumZSticks + zStickIndex); buffer(bufferOffset + zStickIndex * freqXYData.dim_outer() + xyPlaneIndex) = ConvertComplex::apply(freqXYData(xyPlaneIndex, currentIndex)); } bufferOffset += numCurrentZSticks * freqXYData.dim_outer(); } } } template static auto compact_buffered_pack_forward_launch(const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D& freqXYData, GPUArrayView1D buffer) -> void { const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqXYData.dim_outer() + threadBlock.x - 1) / threadBlock.x, std::min(maxNumZSticks, gpu::GridSizeMedium)); launch_kernel(compact_buffered_pack_forward_kernel, threadGrid, threadBlock, 0, stream, maxNumZSticks, numZSticks, indices, GPUArrayConstView2D(freqXYData.data(), freqXYData.dim_outer(), freqXYData.dim_mid() * freqXYData.dim_inner(), freqXYData.device_id()), buffer); } auto compact_buffered_pack_forward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView1D::type> buffer) -> void { compact_buffered_pack_forward_launch(stream, maxNumZSticks, numZSticks, indices, freqXYData, buffer); } auto compact_buffered_pack_forward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView1D::type> buffer) -> void { compact_buffered_pack_forward_launch(stream, maxNumZSticks, numZSticks, indices, freqXYData, buffer); } auto compact_buffered_pack_forward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView1D::type> buffer) -> void { compact_buffered_pack_forward_launch(stream, maxNumZSticks, numZSticks, indices, freqXYData, buffer); } } // namespace spfft SpFFT-1.1.0/src/transpose/gpu_kernels/compact_buffered_kernels.hpp000066400000000000000000000134541457701740000253160ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_COMPACT_BUFFERED_KERNELS_HPP #define SPFFT_COMPACT_BUFFERED_KERNELS_HPP #include #include "gpu_util/gpu_fft_api.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { auto compact_buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView1D::type> buffer) -> void; auto compact_buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView1D::type> buffer) -> void; auto compact_buffered_pack_backward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView2D::type>& freqZData, GPUArrayView1D::type> buffer) -> void; auto compact_buffered_unpack_backward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView1D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void; auto compact_buffered_unpack_backward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView1D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void; auto compact_buffered_unpack_backward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView1D::type>& buffer, GPUArrayView3D::type> freqXYData) -> void; auto compact_buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView1D::type>& buffer, GPUArrayView2D::type> freqZData) -> void; auto compact_buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView1D::type>& buffer, GPUArrayView2D::type> freqZData) -> void; auto compact_buffered_unpack_forward( const gpu::StreamType stream, const int maxNumXYPlanes, const GPUArrayView1D numXYPlanes, const GPUArrayView1D& xyPlaneOffsets, const GPUArrayView1D::type>& buffer, GPUArrayView2D::type> freqZData) -> void; auto compact_buffered_pack_forward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView1D::type> buffer) -> void; auto compact_buffered_pack_forward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView1D::type> buffer) -> void; auto compact_buffered_pack_forward( const gpu::StreamType stream, const int maxNumZSticks, const GPUArrayView1D& numZSticks, const GPUArrayView1D& indices, const GPUArrayView3D::type>& freqXYData, GPUArrayView1D::type> buffer) -> void; } // namespace spfft #endif SpFFT-1.1.0/src/transpose/gpu_kernels/local_transpose_kernels.cu000066400000000000000000000243751457701740000250420ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_kernel_parameter.hpp" #include "gpu_util/gpu_runtime.hpp" #include "memory/array_view_utility.hpp" #include "memory/gpu_array_const_view.hpp" #include "spfft/config.h" namespace spfft { // ------------------ // Backward // ------------------ #ifdef SPFFT_CUDA // kernel optimized for NVIDIA // Places data from z-sticks into a full 3d grid template __global__ static void transpose_backward_kernel(const GPUArrayConstView1D indices, const GPUArrayConstView2D freqZData, GPUArrayView2D spaceDomainFlat) { // const int z = threadIdx.x + blockIdx.x * blockDim.x; const int stickIndex = threadIdx.x + blockIdx.x * blockDim.x; if (stickIndex < indices.size()) { const auto stickXYIndex = indices(stickIndex); for (int z = blockIdx.y; z < freqZData.dim_inner(); z += gridDim.y) { spaceDomainFlat(z, stickXYIndex) = freqZData(stickIndex, z); } } } auto local_transpose_backward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> spaceDomain) -> void { assert(indices.size() == freqZData.dim_outer()); assert(indices.size() <= spaceDomain.dim_inner() * spaceDomain.dim_mid()); assert(spaceDomain.dim_outer() == freqZData.dim_inner()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqZData.dim_outer() + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_inner(), gpu::GridSizeMedium)); launch_kernel(transpose_backward_kernel::type>, threadGrid, threadBlock, 0, stream, indices, freqZData, GPUArrayView2D::type>( spaceDomain.data(), spaceDomain.dim_outer(), spaceDomain.dim_mid() * spaceDomain.dim_inner(), spaceDomain.device_id())); } auto local_transpose_backward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> spaceDomain) -> void { assert(indices.size() == freqZData.dim_outer()); assert(indices.size() <= spaceDomain.dim_inner() * spaceDomain.dim_mid()); assert(spaceDomain.dim_outer() == freqZData.dim_inner()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqZData.dim_outer() + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_inner(), gpu::GridSizeMedium)); launch_kernel(transpose_backward_kernel::type>, threadGrid, threadBlock, 0, stream, indices, freqZData, GPUArrayView2D::type>( spaceDomain.data(), spaceDomain.dim_outer(), spaceDomain.dim_mid() * spaceDomain.dim_inner(), spaceDomain.device_id())); } #else // kernel optimized for AMD // (ideal memory access pattern is different) template __global__ static void transpose_backward_kernel(const GPUArrayConstView1D indices, const GPUArrayConstView2D freqZData, GPUArrayView2D spaceDomainFlat) { const int z = threadIdx.x + blockIdx.x * blockDim.x; if (z < freqZData.dim_inner()) { for (int stickIndex = blockIdx.y; stickIndex < indices.size(); stickIndex += gridDim.y) { const auto stickXYIndex = indices(stickIndex); spaceDomainFlat(z, stickXYIndex) = freqZData(stickIndex, z); } } } auto local_transpose_backward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> spaceDomain) -> void { assert(indices.size() == freqZData.dim_outer()); assert(indices.size() <= spaceDomain.dim_inner() * spaceDomain.dim_mid()); assert(spaceDomain.dim_outer() == freqZData.dim_inner()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqZData.dim_inner() + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); launch_kernel(transpose_backward_kernel::type>, threadGrid, threadBlock, 0, stream, indices, freqZData, GPUArrayView2D::type>( spaceDomain.data(), spaceDomain.dim_outer(), spaceDomain.dim_mid() * spaceDomain.dim_inner(), spaceDomain.device_id())); } auto local_transpose_backward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> spaceDomain) -> void { assert(indices.size() == freqZData.dim_outer()); assert(indices.size() <= spaceDomain.dim_inner() * spaceDomain.dim_mid()); assert(spaceDomain.dim_outer() == freqZData.dim_inner()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqZData.dim_inner() + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); launch_kernel(transpose_backward_kernel::type>, threadGrid, threadBlock, 0, stream, indices, freqZData, GPUArrayView2D::type>( spaceDomain.data(), spaceDomain.dim_outer(), spaceDomain.dim_mid() * spaceDomain.dim_inner(), spaceDomain.device_id())); } #endif // ------------------ // Forward // ------------------ template __global__ static void transpose_forward_kernel(const GPUArrayConstView1D indices, const GPUArrayConstView2D spaceDomainFlat, GPUArrayView2D freqZData) { const int z = threadIdx.x + blockIdx.x * blockDim.x; if (z < freqZData.dim_inner()) { for (int stickIndex = blockIdx.y; stickIndex < indices.size(); stickIndex += gridDim.y) { const auto stickXYIndex = indices(stickIndex); freqZData(stickIndex, z) = spaceDomainFlat(z, stickXYIndex); } } } auto local_transpose_forward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView3D::type>& spaceDomain, GPUArrayView2D::type> freqZData) -> void { assert(indices.size() == freqZData.dim_outer()); assert(indices.size() <= spaceDomain.dim_inner() * spaceDomain.dim_mid()); assert(spaceDomain.dim_outer() == freqZData.dim_inner()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqZData.dim_inner() + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); launch_kernel(transpose_forward_kernel::type>, threadGrid, threadBlock, 0, stream, indices, GPUArrayConstView2D::type>( spaceDomain.data(), spaceDomain.dim_outer(), spaceDomain.dim_mid() * spaceDomain.dim_inner(), spaceDomain.device_id()), freqZData); } auto local_transpose_forward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView3D::type>& spaceDomain, GPUArrayView2D::type> freqZData) -> void { assert(indices.size() == freqZData.dim_outer()); assert(indices.size() <= spaceDomain.dim_inner() * spaceDomain.dim_mid()); assert(spaceDomain.dim_outer() == freqZData.dim_inner()); const dim3 threadBlock(gpu::BlockSizeSmall); const dim3 threadGrid((freqZData.dim_inner() + threadBlock.x - 1) / threadBlock.x, std::min(freqZData.dim_outer(), gpu::GridSizeMedium)); launch_kernel(transpose_forward_kernel::type>, threadGrid, threadBlock, 0, stream, indices, GPUArrayConstView2D::type>( spaceDomain.data(), spaceDomain.dim_outer(), spaceDomain.dim_mid() * spaceDomain.dim_inner(), spaceDomain.device_id()), freqZData); } } // namespace spfft SpFFT-1.1.0/src/transpose/gpu_kernels/local_transpose_kernels.hpp000066400000000000000000000055111457701740000252110ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include "gpu_util/gpu_fft_api.hpp" #include "memory/gpu_array_view.hpp" namespace spfft { // ------------------ // Backward // ------------------ auto local_transpose_backward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> spaceDomain) -> void; auto local_transpose_backward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView2D::type>& freqZData, GPUArrayView3D::type> spaceDomain) -> void; // ------------------ // Forward // ------------------ auto local_transpose_forward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView3D::type>& spaceDomain, GPUArrayView2D::type> freqZData) -> void; auto local_transpose_forward( const gpu::StreamType stream, const GPUArrayView1D indices, const GPUArrayView3D::type>& spaceDomain, GPUArrayView2D::type> freqZData) -> void; } // namespace spfft SpFFT-1.1.0/src/transpose/transpose.hpp000066400000000000000000000050371457701740000200010ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_HPP #define SPFFT_TRANSPOSE_HPP #include "spfft/config.h" #include "util/common_types.hpp" namespace spfft { class Transpose { public: virtual auto pack_forward() -> void {} virtual auto exchange_forward_start(const bool nonBlockingExchange) -> void = 0; virtual auto exchange_forward_finalize() -> void {} virtual auto unpack_forward() -> void {} inline auto forward() -> void { this->pack_forward(); this->exchange_forward_start(false); this->exchange_forward_finalize(); this->unpack_forward(); } virtual auto pack_backward() -> void {} virtual auto exchange_backward_start(const bool nonBlockingExchange) -> void = 0; virtual auto exchange_backward_finalize() -> void {} virtual auto unpack_backward() -> void {} inline auto backward() -> void { this->pack_backward(); this->exchange_backward_start(false); this->exchange_backward_finalize(); this->unpack_backward(); } virtual ~Transpose() = default; }; } // namespace spfft #endif SpFFT-1.1.0/src/transpose/transpose_gpu.hpp000066400000000000000000000115631457701740000206550ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_GPU_HPP #define SPFFT_TRANSPOSE_GPU_HPP #include #include #include #include #include #include "parameters/parameters.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/array_view_utility.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "transpose/gpu_kernels/local_transpose_kernels.hpp" namespace spfft { // Transpose Z sticks, such that data is represented by xy planes, where the y-dimension is // continous and vice versa template class TransposeGPU : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = typename gpu::fft::ComplexType::type; public: TransposeGPU(const std::shared_ptr& param, GPUStreamHandle stream, GPUArrayView3D spaceDomainData, GPUArrayView2D freqDomainData) : stream_(std::move(stream)), spaceDomainData_(spaceDomainData), freqDomainData_(freqDomainData), indices_(param->num_z_sticks(0)) { // single node only checks assert(spaceDomainData.dim_outer() == freqDomainData.dim_inner()); // check data dimensions and parameters assert(param->dim_x_freq() == spaceDomainData.dim_inner()); assert(param->dim_y() == spaceDomainData.dim_mid()); assert(param->dim_z() == spaceDomainData.dim_outer()); assert(param->dim_z() == freqDomainData.dim_inner()); assert(param->num_z_sticks(0) == freqDomainData.dim_outer()); // data must be disjoint assert(disjoint(spaceDomainData, freqDomainData)); // copy xy indices const auto zStickXYIndices = param->z_stick_xy_indices(0); std::vector transposedIndices; transposedIndices.reserve(zStickXYIndices.size()); for (const auto& index : zStickXYIndices) { const int x = index / param->dim_y(); const int y = index - x * param->dim_y(); transposedIndices.emplace_back(y * param->dim_x_freq() + x); } copy_to_gpu(transposedIndices, indices_); } auto exchange_backward_start(const bool) -> void override { gpu::check_status(gpu::memset_async( static_cast(spaceDomainData_.data()), 0, spaceDomainData_.size() * sizeof(typename decltype(spaceDomainData_)::ValueType), stream_.get())); if (freqDomainData_.size() > 0 && spaceDomainData_.size() > 0) { local_transpose_backward(stream_.get(), create_1d_view(indices_, 0, indices_.size()), freqDomainData_, spaceDomainData_); } } auto unpack_backward() -> void override {} auto exchange_forward_start(const bool) -> void override { if (freqDomainData_.size() > 0 && spaceDomainData_.size() > 0) { local_transpose_forward(stream_.get(), create_1d_view(indices_, 0, indices_.size()), spaceDomainData_, freqDomainData_); } } auto unpack_forward() -> void override {} private: GPUStreamHandle stream_; GPUArrayView3D spaceDomainData_; GPUArrayView2D freqDomainData_; GPUArray indices_; }; } // namespace spfft #endif SpFFT-1.1.0/src/transpose/transpose_host.hpp000066400000000000000000000163011457701740000210320ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_HOST_HPP #define SPFFT_TRANSPOSE_HOST_HPP #include #include #include #include #include #include #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" namespace spfft { // Transpose Z sticks, such that data is represented by xy planes, where the y-dimension is // continous and vice versa template class TransposeHost : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; public: TransposeHost(const std::shared_ptr& param, HostArrayView3D spaceDomainData, HostArrayView2D freqDomainData) : spaceDomainData_(spaceDomainData), freqDomainData_(freqDomainData), param_(param) { // single rank only checks assert(spaceDomainData.dim_outer() == freqDomainData.dim_inner()); // check data dimensions and parameters assert(param->dim_x_freq() == spaceDomainData.dim_mid()); assert(param->dim_y() == spaceDomainData.dim_inner()); assert(param->dim_z() == spaceDomainData.dim_outer()); assert(param->dim_z() == freqDomainData.dim_inner()); assert(param->num_z_sticks(0) == freqDomainData.dim_outer()); // data must be disjoint assert(disjoint(spaceDomainData, freqDomainData)); } auto exchange_backward_start(const bool) -> void override {} auto unpack_backward() -> void override { SPFFT_OMP_PRAGMA("omp for schedule(static)") // implicit barrier for (SizeType z = 0; z < spaceDomainData_.dim_outer(); ++z) { std::memset(static_cast(&spaceDomainData_(z, 0, 0)), 0, sizeof(typename decltype(spaceDomainData_)::ValueType) * spaceDomainData_.dim_inner() * spaceDomainData_.dim_mid()); } const SizeType unrolledLoopEnd = freqDomainData_.dim_outer() < 4 ? 0 : freqDomainData_.dim_outer() - 3; auto stickIndicesView = param_->z_stick_xy_indices(0); auto spaceDomainDataFlat = create_2d_view(spaceDomainData_, 0, spaceDomainData_.dim_outer(), spaceDomainData_.dim_mid() * spaceDomainData_.dim_inner()); // unrolled loop SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < unrolledLoopEnd; zStickIndex += 4) { const SizeType xyIndex1 = stickIndicesView(zStickIndex); const SizeType xyIndex2 = stickIndicesView(zStickIndex + 1); const SizeType xyIndex3 = stickIndicesView(zStickIndex + 2); const SizeType xyIndex4 = stickIndicesView(zStickIndex + 3); for (SizeType zIndex = 0; zIndex < freqDomainData_.dim_inner(); ++zIndex) { spaceDomainDataFlat(zIndex, xyIndex1) = freqDomainData_(zStickIndex, zIndex); spaceDomainDataFlat(zIndex, xyIndex2) = freqDomainData_(zStickIndex + 1, zIndex); spaceDomainDataFlat(zIndex, xyIndex3) = freqDomainData_(zStickIndex + 2, zIndex); spaceDomainDataFlat(zIndex, xyIndex4) = freqDomainData_(zStickIndex + 3, zIndex); } } // transpose remaining elements SPFFT_OMP_PRAGMA("omp for schedule(static)") // keep barrier for (SizeType zStickIndex = unrolledLoopEnd; zStickIndex < freqDomainData_.dim_outer(); zStickIndex += 1) { const SizeType xyIndex = stickIndicesView(zStickIndex); for (SizeType zIndex = 0; zIndex < freqDomainData_.dim_inner(); ++zIndex) { spaceDomainDataFlat(zIndex, xyIndex) = freqDomainData_(zStickIndex, zIndex); } } } auto exchange_forward_start(const bool) -> void override {} auto unpack_forward() -> void override { const SizeType unrolledLoopEnd = freqDomainData_.dim_outer() < 4 ? 0 : freqDomainData_.dim_outer() - 3; auto stickIndicesView = param_->z_stick_xy_indices(0); auto spaceDomainDataFlat = create_2d_view(spaceDomainData_, 0, spaceDomainData_.dim_outer(), spaceDomainData_.dim_mid() * spaceDomainData_.dim_inner()); // unrolled loop SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < unrolledLoopEnd; zStickIndex += 4) { const SizeType xyIndex1 = stickIndicesView(zStickIndex); const SizeType xyIndex2 = stickIndicesView(zStickIndex + 1); const SizeType xyIndex3 = stickIndicesView(zStickIndex + 2); const SizeType xyIndex4 = stickIndicesView(zStickIndex + 3); for (SizeType zIndex = 0; zIndex < freqDomainData_.dim_inner(); ++zIndex) { freqDomainData_(zStickIndex, zIndex) = spaceDomainDataFlat(zIndex, xyIndex1); freqDomainData_(zStickIndex + 1, zIndex) = spaceDomainDataFlat(zIndex, xyIndex2); freqDomainData_(zStickIndex + 2, zIndex) = spaceDomainDataFlat(zIndex, xyIndex3); freqDomainData_(zStickIndex + 3, zIndex) = spaceDomainDataFlat(zIndex, xyIndex4); } } // transpose remaining elements SPFFT_OMP_PRAGMA("omp for schedule(static)") // keep barrier for (SizeType zStickIndex = unrolledLoopEnd; zStickIndex < freqDomainData_.dim_outer(); zStickIndex += 1) { const SizeType xyIndex = stickIndicesView(zStickIndex); for (SizeType zIndex = 0; zIndex < freqDomainData_.dim_inner(); ++zIndex) { freqDomainData_(zStickIndex, zIndex) = spaceDomainDataFlat(zIndex, xyIndex); } } } private: HostArrayView3D spaceDomainData_; HostArrayView2D freqDomainData_; std::shared_ptr param_; }; } // namespace spfft #endif SpFFT-1.1.0/src/transpose/transpose_mpi_buffered_gpu.cpp000066400000000000000000000306641457701740000233620ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" #if defined(SPFFT_MPI) && (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_transfer.hpp" #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #include "transpose/gpu_kernels/buffered_kernels.hpp" #include "transpose/transpose_mpi_buffered_gpu.hpp" namespace spfft { template TransposeMPIBufferedGPU::TransposeMPIBufferedGPU( const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView1D spaceDomainBufferHost, GPUArrayView3D spaceDomainDataGPU, GPUArrayView1D spaceDomainBufferGPU, GPUStreamHandle spaceDomainStream, HostArrayView1D freqDomainBufferHost, GPUArrayView2D freqDomainDataGPU, GPUArrayView1D freqDomainBufferGPU, GPUStreamHandle freqDomainStream) : param_(param), comm_(std::move(comm)), spaceDomainBufferHost_(create_new_type_1d_view( spaceDomainBufferHost, comm_.size() * param_->max_num_xy_planes() * param_->max_num_z_sticks())), freqDomainBufferHost_(create_new_type_1d_view( freqDomainBufferHost, comm_.size() * param_->max_num_xy_planes() * param_->max_num_z_sticks())), spaceDomainDataGPU_(spaceDomainDataGPU), freqDomainDataGPU_(freqDomainDataGPU), spaceDomainBufferGPU_(create_new_type_3d_view( spaceDomainBufferGPU, comm_.size(), param_->max_num_z_sticks(), param_->max_num_xy_planes())), freqDomainBufferGPU_(create_new_type_3d_view( freqDomainBufferGPU, comm_.size(), param_->max_num_z_sticks(), param_->max_num_xy_planes())), spaceDomainStream_(std::move(spaceDomainStream)), freqDomainStream_(std::move(freqDomainStream)) { assert(param_->dim_y() == spaceDomainDataGPU.dim_mid()); assert(param_->dim_x_freq() == spaceDomainDataGPU.dim_inner()); assert(param_->num_xy_planes(comm_.rank()) == spaceDomainDataGPU.dim_outer()); assert(param_->dim_z() == freqDomainDataGPU.dim_inner()); assert(param_->num_z_sticks(comm_.rank()) == freqDomainDataGPU.dim_outer()); assert(spaceDomainBufferGPU.size() >= param_->max_num_xy_planes() * param_->max_num_z_sticks() * comm_.size()); assert(freqDomainBufferGPU.size() >= param_->max_num_xy_planes() * param_->max_num_z_sticks() * comm_.size()); assert(spaceDomainBufferHost.size() >= param_->max_num_xy_planes() * param_->max_num_z_sticks() * comm_.size()); assert(freqDomainBufferHost.size() >= param_->max_num_xy_planes() * param_->max_num_z_sticks() * comm_.size()); // assert(disjoint(spaceDomainDataGPU, freqDomainDataGPU)); assert(disjoint(spaceDomainDataGPU, spaceDomainBufferGPU)); assert(disjoint(freqDomainDataGPU, freqDomainBufferGPU)); assert(disjoint(spaceDomainBufferHost, freqDomainBufferHost)); #ifdef SPFFT_GPU_DIRECT assert(disjoint(spaceDomainBufferGPU, freqDomainBufferGPU)); #endif // create underlying type mpiTypeHandle_ = MPIDatatypeHandle::create_contiguous(2, MPIMatchElementaryType::get()); // copy relevant parameters std::vector numZSticksHost(comm_.size()); std::vector numXYPlanesHost(comm_.size()); std::vector xyPlaneOffsetsHost(comm_.size()); std::vector indicesHost(comm_.size() * param_->max_num_z_sticks()); for (SizeType r = 0; r < comm_.size(); ++r) { numZSticksHost[r] = static_cast(param_->num_z_sticks(r)); numXYPlanesHost[r] = static_cast(param_->num_xy_planes(r)); xyPlaneOffsetsHost[r] = static_cast(param_->xy_plane_offset(r)); const auto zStickXYIndices = param_->z_stick_xy_indices(r); for (SizeType i = 0; i < zStickXYIndices.size(); ++i) { // transpose stick index const int xyIndex = zStickXYIndices(i); const int x = xyIndex / param_->dim_y(); const int y = xyIndex - x * param_->dim_y(); indicesHost[r * param_->max_num_z_sticks() + i] = y * param_->dim_x_freq() + x; } } numZSticksGPU_ = GPUArray(numZSticksHost.size()); numXYPlanesGPU_ = GPUArray(numXYPlanesHost.size()); xyPlaneOffsetsGPU_ = GPUArray(xyPlaneOffsetsHost.size()); indicesGPU_ = GPUArray(indicesHost.size()); copy_to_gpu(numZSticksHost, numZSticksGPU_); copy_to_gpu(numXYPlanesHost, numXYPlanesGPU_); copy_to_gpu(xyPlaneOffsetsHost, xyPlaneOffsetsGPU_); copy_to_gpu(indicesHost, indicesGPU_); } template auto TransposeMPIBufferedGPU::pack_backward() -> void { if (freqDomainDataGPU_.size() > 0 && freqDomainBufferGPU_.size() > 0) { buffered_pack_backward(freqDomainStream_.get(), param_->max_num_xy_planes(), create_1d_view(numXYPlanesGPU_, 0, numXYPlanesGPU_.size()), create_1d_view(xyPlaneOffsetsGPU_, 0, xyPlaneOffsetsGPU_.size()), freqDomainDataGPU_, freqDomainBufferGPU_); #ifndef SPFFT_GPU_DIRECT copy_from_gpu_async(freqDomainStream_, freqDomainBufferGPU_, freqDomainBufferHost_); #endif } } template auto TransposeMPIBufferedGPU::unpack_backward() -> void { if (spaceDomainDataGPU_.size() > 0) { gpu::check_status(gpu::memset_async( static_cast(spaceDomainDataGPU_.data()), 0, spaceDomainDataGPU_.size() * sizeof(typename decltype(spaceDomainDataGPU_)::ValueType), spaceDomainStream_.get())); if (spaceDomainBufferGPU_.size() > 0) { #ifndef SPFFT_GPU_DIRECT copy_to_gpu_async(spaceDomainStream_, spaceDomainBufferHost_, spaceDomainBufferGPU_); #endif buffered_unpack_backward(spaceDomainStream_.get(), param_->max_num_xy_planes(), create_1d_view(numZSticksGPU_, 0, numZSticksGPU_.size()), create_1d_view(indicesGPU_, 0, indicesGPU_.size()), spaceDomainBufferGPU_, spaceDomainDataGPU_); } } } template auto TransposeMPIBufferedGPU::exchange_backward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only master thread must be allowed to enter gpu::check_status(gpu::stream_synchronize(freqDomainStream_.get())); #ifdef SPFFT_GPU_DIRECT auto sendBufferPtr = freqDomainBufferGPU_.data(); auto recvBufferPtr = spaceDomainBufferGPU_.data(); #else auto sendBufferPtr = freqDomainBufferHost_.data(); auto recvBufferPtr = spaceDomainBufferHost_.data(); #endif if (nonBlockingExchange) { // start non-blocking exchange mpi_check_status( MPI_Ialltoall(sendBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), recvBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { // blocking exchange mpi_check_status(MPI_Alltoall(sendBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), recvBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get())); } } template auto TransposeMPIBufferedGPU::exchange_forward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPIBufferedGPU::pack_forward() -> void { if (spaceDomainDataGPU_.size() > 0 && spaceDomainBufferGPU_.size() > 0) { buffered_pack_forward(spaceDomainStream_.get(), param_->max_num_xy_planes(), create_1d_view(numZSticksGPU_, 0, numZSticksGPU_.size()), create_1d_view(indicesGPU_, 0, indicesGPU_.size()), spaceDomainDataGPU_, spaceDomainBufferGPU_); #ifndef SPFFT_GPU_DIRECT copy_from_gpu_async(spaceDomainStream_, spaceDomainBufferGPU_, spaceDomainBufferHost_); #endif } } template auto TransposeMPIBufferedGPU::unpack_forward() -> void { if (freqDomainDataGPU_.size() > 0 && freqDomainBufferGPU_.size() > 0) { #ifndef SPFFT_GPU_DIRECT copy_to_gpu_async(freqDomainStream_, freqDomainBufferHost_, freqDomainBufferGPU_); #endif buffered_unpack_forward(freqDomainStream_.get(), param_->max_num_xy_planes(), create_1d_view(numXYPlanesGPU_, 0, numXYPlanesGPU_.size()), create_1d_view(xyPlaneOffsetsGPU_, 0, xyPlaneOffsetsGPU_.size()), freqDomainBufferGPU_, freqDomainDataGPU_); } } template auto TransposeMPIBufferedGPU::exchange_forward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only master thread must be allowed to enter gpu::check_status(gpu::stream_synchronize(spaceDomainStream_.get())); #ifdef SPFFT_GPU_DIRECT auto sendBufferPtr = spaceDomainBufferGPU_.data(); auto recvBufferPtr = freqDomainBufferGPU_.data(); #else auto sendBufferPtr = spaceDomainBufferHost_.data(); auto recvBufferPtr = freqDomainBufferHost_.data(); #endif if (nonBlockingExchange) { // start non-blocking exchange mpi_check_status( MPI_Ialltoall(sendBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), recvBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { // blocking exchange mpi_check_status(MPI_Alltoall(sendBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), recvBufferPtr, param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get())); } } template auto TransposeMPIBufferedGPU::exchange_backward_finalize() -> void { mpiRequest_.wait_if_active(); } // Instantiate class for float and double #ifdef SPFFT_SINGLE_PRECISION template class TransposeMPIBufferedGPU; #endif template class TransposeMPIBufferedGPU; template class TransposeMPIBufferedGPU; } // namespace spfft #endif // SPFFT_MPI SpFFT-1.1.0/src/transpose/transpose_mpi_buffered_gpu.hpp000066400000000000000000000116301457701740000233570ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_MPI_BUFFERED_GPU_HPP #define SPFFT_TRANSPOSE_MPI_BUFFERED_GPU_HPP #include #include #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" #if defined(SPFFT_MPI) && (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_request_handle.hpp" namespace spfft { template class TransposeMPIBufferedGPU : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; using ComplexExchangeType = std::complex; using ComplexGPUType = typename gpu::fft::ComplexType::type; using ComplexExchangeGPUType = typename gpu::fft::ComplexType::type; public: // spaceDomainDataGPU and freqDomainDataGPU must NOT overlap // spaceDomainDataGPU and spaceDomainBufferGPU must NOT overlap // freqDomainDataGPU and freqDomainBufferGPU must NOT overlap // spaceDomainBufferGPU and freqDomainBufferGPU must NOT overlap // spaceDomainBufferHost and freqDomainBufferHost must NOT overlap // // spaceDomainBufferGPU and freqDomainDataGPU MAY overlap // freqDomainBufferGPU and spaceDomainDataGPU MAY overlap TransposeMPIBufferedGPU(const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView1D spaceDomainBufferHost, GPUArrayView3D spaceDomainDataGPU, GPUArrayView1D spaceDomainBufferGPU, GPUStreamHandle spaceDomainStream, HostArrayView1D freqDomainBufferHost, GPUArrayView2D freqDomainDataGPU, GPUArrayView1D freqDomainBufferGPU, GPUStreamHandle freqDomainStream); auto pack_backward() -> void override; auto exchange_backward_start(const bool nonBlockingExchange) -> void override; auto exchange_backward_finalize() -> void override; auto unpack_backward() -> void override; auto pack_forward() -> void override; auto exchange_forward_start(const bool nonBlockingExchange) -> void override; auto exchange_forward_finalize() -> void override; auto unpack_forward() -> void override; private: std::shared_ptr param_; MPIDatatypeHandle mpiTypeHandle_; MPICommunicatorHandle comm_; MPIRequestHandle mpiRequest_; HostArrayView1D spaceDomainBufferHost_; HostArrayView1D freqDomainBufferHost_; GPUArrayView3D spaceDomainDataGPU_; GPUArrayView2D freqDomainDataGPU_; GPUArrayView3D spaceDomainBufferGPU_; GPUArrayView3D freqDomainBufferGPU_; GPUStreamHandle spaceDomainStream_; GPUStreamHandle freqDomainStream_; GPUArray numZSticksGPU_; GPUArray numXYPlanesGPU_; GPUArray xyPlaneOffsetsGPU_; GPUArray indicesGPU_; }; } // namespace spfft #endif // SPFFT_MPI #endif SpFFT-1.1.0/src/transpose/transpose_mpi_buffered_host.cpp000066400000000000000000000317771457701740000235520ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #include "transpose/transpose_mpi_buffered_host.hpp" namespace spfft { template TransposeMPIBufferedHost::TransposeMPIBufferedHost( const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, HostArrayView2D freqDomainData, HostArrayView1D spaceDomainBuffer, HostArrayView1D freqDomainBuffer) : param_(param), comm_(std::move(comm)), spaceDomainData_(spaceDomainData), freqDomainData_(freqDomainData), spaceDomainBuffer_(create_new_type_1d_view(spaceDomainBuffer, spaceDomainBuffer.size())), freqDomainBuffer_( create_new_type_1d_view(freqDomainBuffer, freqDomainBuffer.size())) { // assert(param_->dim_x_freq() == spaceDomainData.dim_mid()); assert(param_->dim_y() == spaceDomainData.dim_inner()); assert(param_->num_xy_planes(comm_.rank()) == spaceDomainData.dim_outer()); assert(param_->dim_z() == freqDomainData.dim_inner()); assert(param_->num_z_sticks(comm_.rank()) == freqDomainData.dim_outer()); assert(spaceDomainBuffer.size() >= param_->max_num_xy_planes() * param_->max_num_z_sticks() * comm_.size()); assert(freqDomainBuffer.size() >= param_->max_num_xy_planes() * param_->max_num_z_sticks() * comm_.size()); assert(disjoint(spaceDomainData, freqDomainData)); assert(disjoint(spaceDomainData, spaceDomainBuffer)); assert(disjoint(freqDomainData, freqDomainBuffer)); assert(disjoint(spaceDomainBuffer, freqDomainBuffer)); // create underlying type mpiTypeHandle_ = MPIDatatypeHandle::create_contiguous(2, MPIMatchElementaryType::get()); } template auto TransposeMPIBufferedHost::pack_backward() -> void { auto freqDomainBuffer3d = create_3d_view(freqDomainBuffer_, 0, comm_.size(), param_->max_num_z_sticks(), param_->max_num_xy_planes()); // transpose locally from (numLocalZSticks, dimZ) to (dimZ, numLocalZSticks) with spacing // between ranks for (SizeType r = 0; r < static_cast(comm_.size()); ++r) { const auto xyPlaneOffset = param_->xy_plane_offset(r); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < freqDomainData_.dim_outer(); ++zStickIndex) { for (SizeType xyPlaneIndex = 0; xyPlaneIndex < param_->num_xy_planes(r); ++xyPlaneIndex) { freqDomainBuffer3d(r, zStickIndex, xyPlaneIndex) = freqDomainData_(zStickIndex, xyPlaneIndex + xyPlaneOffset); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPIBufferedHost::unpack_backward() -> void { // zero target data location (not all values are overwritten upon unpacking) SPFFT_OMP_PRAGMA("omp for schedule(static)") // implicit barrier for (SizeType z = 0; z < spaceDomainData_.dim_outer(); ++z) { std::memset(static_cast(&spaceDomainData_(z, 0, 0)), 0, sizeof(typename decltype(spaceDomainData_)::ValueType) * spaceDomainData_.dim_inner() * spaceDomainData_.dim_mid()); } auto spaceDomainDataFlat = create_2d_view(spaceDomainData_, 0, spaceDomainData_.dim_outer(), spaceDomainData_.dim_mid() * spaceDomainData_.dim_inner()); // unpack from (numZSticksTotal, numLocalXYPlanes) to (numLocalXYPlanes, dimX, dimY) const auto numLocalXYPlanes = param_->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < (SizeType)comm_.size(); ++r) { const auto zStickXYIndices = param_->z_stick_xy_indices(r); // take care with unsigned type const SizeType unrolledLoopEnd = zStickXYIndices.size() < 4 ? 0 : zStickXYIndices.size() - 3; auto spaceDomainBuffer2d = create_2d_view( spaceDomainBuffer_, r * param_->max_num_xy_planes() * param_->max_num_z_sticks(), param_->max_num_z_sticks(), param_->max_num_xy_planes()); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < unrolledLoopEnd; zStickIndex += 4) { // manual loop unrolling for better performance const SizeType xyIndex1 = zStickXYIndices(zStickIndex); const SizeType xyIndex2 = zStickXYIndices(zStickIndex + 1); const SizeType xyIndex3 = zStickXYIndices(zStickIndex + 2); const SizeType xyIndex4 = zStickXYIndices(zStickIndex + 3); for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { spaceDomainDataFlat(zIndex, xyIndex1) = spaceDomainBuffer2d(zStickIndex, zIndex); spaceDomainDataFlat(zIndex, xyIndex2) = spaceDomainBuffer2d(zStickIndex + 1, zIndex); spaceDomainDataFlat(zIndex, xyIndex3) = spaceDomainBuffer2d(zStickIndex + 2, zIndex); spaceDomainDataFlat(zIndex, xyIndex4) = spaceDomainBuffer2d(zStickIndex + 3, zIndex); } } SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = unrolledLoopEnd; zStickIndex < zStickXYIndices.size(); zStickIndex += 1) { const SizeType xyIndex = zStickXYIndices(zStickIndex); for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { spaceDomainDataFlat(zIndex, xyIndex) = spaceDomainBuffer2d(zStickIndex, zIndex); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPIBufferedHost::exchange_backward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only master thread must be allowed to enter // exchange data if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoall( freqDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), spaceDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoall(freqDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), spaceDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get())); } } template auto TransposeMPIBufferedHost::exchange_backward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPIBufferedHost::pack_forward() -> void { auto spaceDomainDataFlat = create_2d_view(spaceDomainData_, 0, spaceDomainData_.dim_outer(), spaceDomainData_.dim_mid() * spaceDomainData_.dim_inner()); // pack from (numLocalXYPlanes, dimX, dimY) to (numZSticksTotal, numLocalXYPlanes) const auto numLocalXYPlanes = param_->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < (SizeType)comm_.size(); ++r) { const auto zStickXYIndices = param_->z_stick_xy_indices(r); // take care with unsigned type const SizeType unrolledLoopEnd = zStickXYIndices.size() < 4 ? 0 : zStickXYIndices.size() - 3; auto spaceDomainBuffer2d = create_2d_view( spaceDomainBuffer_, r * param_->max_num_xy_planes() * param_->max_num_z_sticks(), param_->max_num_z_sticks(), param_->max_num_xy_planes()); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < unrolledLoopEnd; zStickIndex += 4) { // manual loop unrolling for better performance const SizeType xyIndex1 = zStickXYIndices(zStickIndex); const SizeType xyIndex2 = zStickXYIndices(zStickIndex + 1); const SizeType xyIndex3 = zStickXYIndices(zStickIndex + 2); const SizeType xyIndex4 = zStickXYIndices(zStickIndex + 3); for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { spaceDomainBuffer2d(zStickIndex, zIndex) = spaceDomainDataFlat(zIndex, xyIndex1); spaceDomainBuffer2d(zStickIndex + 1, zIndex) = spaceDomainDataFlat(zIndex, xyIndex2); spaceDomainBuffer2d(zStickIndex + 2, zIndex) = spaceDomainDataFlat(zIndex, xyIndex3); spaceDomainBuffer2d(zStickIndex + 3, zIndex) = spaceDomainDataFlat(zIndex, xyIndex4); } } SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = unrolledLoopEnd; zStickIndex < zStickXYIndices.size(); zStickIndex += 1) { const SizeType xyIndex = zStickXYIndices(zStickIndex); for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { spaceDomainBuffer2d(zStickIndex, zIndex) = spaceDomainDataFlat(zIndex, xyIndex); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPIBufferedHost::unpack_forward() -> void { auto freqDomainBuffer3d = create_3d_view(freqDomainBuffer_, 0, comm_.size(), param_->max_num_z_sticks(), param_->max_num_xy_planes()); for (SizeType r = 0; r < static_cast(comm_.size()); ++r) { const auto xyPlaneOffset = param_->xy_plane_offset(r); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < freqDomainData_.dim_outer(); ++zStickIndex) { for (SizeType xyPlaneIndex = 0; xyPlaneIndex < param_->num_xy_planes(r); ++xyPlaneIndex) { freqDomainData_(zStickIndex, xyPlaneIndex + xyPlaneOffset) = freqDomainBuffer3d(r, zStickIndex, xyPlaneIndex); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPIBufferedHost::exchange_forward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only master thread must be allowed to enter // exchange data if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoall( spaceDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), freqDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoall(spaceDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), freqDomainBuffer_.data(), param_->max_num_z_sticks() * param_->max_num_xy_planes(), mpiTypeHandle_.get(), comm_.get())); } } template auto TransposeMPIBufferedHost::exchange_forward_finalize() -> void { mpiRequest_.wait_if_active(); } // Instantiate class for float and double #ifdef SPFFT_SINGLE_PRECISION template class TransposeMPIBufferedHost; #endif template class TransposeMPIBufferedHost; template class TransposeMPIBufferedHost; } // namespace spfft #endif // SPFFT_MPI SpFFT-1.1.0/src/transpose/transpose_mpi_buffered_host.hpp000066400000000000000000000075331457701740000235500ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_MPI_BUFFERED_HOST_HPP #define SPFFT_TRANSPOSE_MPI_BUFFERED_HOST_HPP #include #include #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_request_handle.hpp" namespace spfft { template class TransposeMPIBufferedHost : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; using ComplexExchangeType = std::complex; public: // spaceDomainData and freqDomainData must NOT overlap // spaceDomainData and spaceDomainBuffer must NOT overlap // freqDomainData and freqDomainBuffer must NOT overlap // spaceDomainBuffer and freqDomainBuffer must NOT overlap // // spaceDomainBuffer and freqDomainData MAY overlap // freqDomainBuffer and spaceDomainData MAY overlap TransposeMPIBufferedHost(const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, HostArrayView2D freqDomainData, HostArrayView1D spaceDomainBuffer, HostArrayView1D freqDomainBuffer); auto pack_backward() -> void override; auto exchange_backward_start(const bool nonBlockingExchange) -> void override; auto exchange_backward_finalize() -> void override; auto unpack_backward() -> void override; auto pack_forward() -> void override; auto exchange_forward_start(const bool nonBlockingExchange) -> void override; auto exchange_forward_finalize() -> void override; auto unpack_forward() -> void override; private: std::shared_ptr param_; MPIDatatypeHandle mpiTypeHandle_; MPICommunicatorHandle comm_; MPIRequestHandle mpiRequest_; HostArrayView3D spaceDomainData_; HostArrayView2D freqDomainData_; HostArrayView1D spaceDomainBuffer_; HostArrayView1D freqDomainBuffer_; }; } // namespace spfft #endif // SPFFT_MPI #endif SpFFT-1.1.0/src/transpose/transpose_mpi_compact_buffered_gpu.cpp000066400000000000000000000326021457701740000250620ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "spfft/config.h" #if defined(SPFFT_MPI) && (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) #include #include #include #include #include #include #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_transfer.hpp" #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #include "transpose/gpu_kernels/compact_buffered_kernels.hpp" #include "transpose/transpose_mpi_compact_buffered_gpu.hpp" namespace spfft { template TransposeMPICompactBufferedGPU::TransposeMPICompactBufferedGPU( const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView1D spaceDomainBufferHost, GPUArrayView3D spaceDomainDataGPU, GPUArrayView1D spaceDomainBufferGPU, GPUStreamHandle spaceDomainStream, HostArrayView1D freqDomainBufferHost, GPUArrayView2D freqDomainDataGPU, GPUArrayView1D freqDomainBufferGPU, GPUStreamHandle freqDomainStream) : param_(param), comm_(std::move(comm)), spaceDomainBufferHost_(create_new_type_1d_view( spaceDomainBufferHost, param_->num_xy_planes(comm_.rank()) * param_->total_num_z_sticks())), freqDomainBufferHost_(create_new_type_1d_view( freqDomainBufferHost, param_->total_num_xy_planes() * param_->num_z_sticks(comm_.rank()))), spaceDomainDataGPU_(spaceDomainDataGPU), freqDomainDataGPU_(freqDomainDataGPU), spaceDomainBufferGPU_(create_new_type_1d_view( spaceDomainBufferGPU, param_->total_num_z_sticks() * param_->num_xy_planes(comm_.rank()))), freqDomainBufferGPU_(create_new_type_1d_view( freqDomainBufferGPU, param_->num_z_sticks(comm_.rank()) * param_->total_num_xy_planes())), spaceDomainStream_(std::move(spaceDomainStream)), freqDomainStream_(std::move(freqDomainStream)) { assert(param_->dim_y() == spaceDomainDataGPU.dim_mid()); assert(param_->dim_x_freq() == spaceDomainDataGPU.dim_inner()); assert(param_->num_xy_planes(comm_.rank()) == spaceDomainDataGPU.dim_outer()); assert(param_->dim_z() == freqDomainDataGPU.dim_inner()); assert(param_->num_z_sticks(comm_.rank()) == freqDomainDataGPU.dim_outer()); assert(spaceDomainBufferGPU.size() >= param_->total_num_z_sticks() * param_->num_xy_planes(comm_.rank())); assert(spaceDomainBufferHost.size() >= param_->total_num_z_sticks() * param_->num_xy_planes(comm_.rank())); assert(freqDomainBufferGPU.size() >= param_->total_num_xy_planes() * param_->num_z_sticks(comm_.rank())); assert(freqDomainBufferHost.size() >= param_->total_num_xy_planes() * param_->num_z_sticks(comm_.rank())); // assert(disjoint(spaceDomainDataGPU, freqDomainDataGPU)); assert(disjoint(spaceDomainDataGPU, spaceDomainBufferGPU)); assert(disjoint(freqDomainDataGPU, freqDomainBufferGPU)); assert(disjoint(spaceDomainBufferHost, freqDomainBufferHost)); #ifdef SPFFT_GPU_DIRECT assert(disjoint(spaceDomainBufferGPU, freqDomainBufferGPU)); #endif // create underlying type mpiTypeHandle_ = MPIDatatypeHandle::create_contiguous(2, MPIMatchElementaryType::get()); // prepare mpi parameters spaceDomainCount_.resize(comm_.size()); freqDomainCount_.resize(comm_.size()); const SizeType numLocalZSticks = param_->num_z_sticks(comm_.rank()); const SizeType numLocalXYPlanes = param_->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < (SizeType)comm_.size(); ++r) { freqDomainCount_[r] = numLocalZSticks * param_->num_xy_planes(r); spaceDomainCount_[r] = param_->num_z_sticks(r) * numLocalXYPlanes; } spaceDomainDispls_.resize(comm_.size()); freqDomainDispls_.resize(comm_.size()); int currentFreqDomainDispls = 0; int currentSpaceDomainDispls = 0; for (SizeType r = 0; r < (SizeType)comm_.size(); ++r) { assert(currentSpaceDomainDispls + spaceDomainCount_[r] <= static_cast(spaceDomainBufferHost.size())); assert(currentFreqDomainDispls + freqDomainCount_[r] <= static_cast(freqDomainBufferHost.size())); spaceDomainDispls_[r] = currentSpaceDomainDispls; freqDomainDispls_[r] = currentFreqDomainDispls; currentSpaceDomainDispls += spaceDomainCount_[r]; currentFreqDomainDispls += freqDomainCount_[r]; } // copy relevant parameters to gpu std::vector numZSticksHost(comm_.size()); std::vector numXYPlanesHost(comm_.size()); std::vector xyPlaneOffsetsHost(comm_.size()); std::vector indicesHost(comm_.size() * param_->max_num_z_sticks()); for (SizeType r = 0; r < comm_.size(); ++r) { numZSticksHost[r] = static_cast(param_->num_z_sticks(r)); numXYPlanesHost[r] = static_cast(param_->num_xy_planes(r)); xyPlaneOffsetsHost[r] = static_cast(param_->xy_plane_offset(r)); const auto zStickXYIndices = param_->z_stick_xy_indices(r); for (SizeType i = 0; i < zStickXYIndices.size(); ++i) { // transpose stick index const int xyIndex = zStickXYIndices(i); const int x = xyIndex / param_->dim_y(); const int y = xyIndex - x * param_->dim_y(); indicesHost[r * param_->max_num_z_sticks() + i] = y * param_->dim_x_freq() + x; } } numZSticksGPU_ = GPUArray(numZSticksHost.size()); numXYPlanesGPU_ = GPUArray(numXYPlanesHost.size()); xyPlaneOffsetsGPU_ = GPUArray(xyPlaneOffsetsHost.size()); indicesGPU_ = GPUArray(indicesHost.size()); copy_to_gpu(numZSticksHost, numZSticksGPU_); copy_to_gpu(numXYPlanesHost, numXYPlanesGPU_); copy_to_gpu(xyPlaneOffsetsHost, xyPlaneOffsetsGPU_); copy_to_gpu(indicesHost, indicesGPU_); } template auto TransposeMPICompactBufferedGPU::pack_backward() -> void { if (freqDomainDataGPU_.size() > 0 && freqDomainBufferGPU_.size() > 0) { compact_buffered_pack_backward(freqDomainStream_.get(), param_->max_num_xy_planes(), create_1d_view(numXYPlanesGPU_, 0, numXYPlanesGPU_.size()), create_1d_view(xyPlaneOffsetsGPU_, 0, xyPlaneOffsetsGPU_.size()), freqDomainDataGPU_, freqDomainBufferGPU_); #ifndef SPFFT_GPU_DIRECT copy_from_gpu_async(freqDomainStream_, freqDomainBufferGPU_, freqDomainBufferHost_); #endif } } template auto TransposeMPICompactBufferedGPU::unpack_backward() -> void { if (spaceDomainDataGPU_.size() > 0) { gpu::check_status(gpu::memset_async( static_cast(spaceDomainDataGPU_.data()), 0, spaceDomainDataGPU_.size() * sizeof(typename decltype(spaceDomainDataGPU_)::ValueType), spaceDomainStream_.get())); if (spaceDomainBufferGPU_.size() > 0) { #ifndef SPFFT_GPU_DIRECT copy_to_gpu_async(spaceDomainStream_, spaceDomainBufferHost_, spaceDomainBufferGPU_); #endif compact_buffered_unpack_backward(spaceDomainStream_.get(), param_->max_num_z_sticks(), create_1d_view(numZSticksGPU_, 0, numZSticksGPU_.size()), create_1d_view(indicesGPU_, 0, indicesGPU_.size()), spaceDomainBufferGPU_, spaceDomainDataGPU_); } } } template auto TransposeMPICompactBufferedGPU::exchange_backward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter gpu::check_status(gpu::stream_synchronize(freqDomainStream_.get())); #ifdef SPFFT_GPU_DIRECT auto sendBufferPtr = freqDomainBufferGPU_.data(); auto recvBufferPtr = spaceDomainBufferGPU_.data(); #else auto sendBufferPtr = freqDomainBufferHost_.data(); auto recvBufferPtr = spaceDomainBufferHost_.data(); #endif if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoallv( sendBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), recvBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoallv(sendBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), recvBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get())); } } template auto TransposeMPICompactBufferedGPU::exchange_backward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPICompactBufferedGPU::pack_forward() -> void { if (spaceDomainDataGPU_.size() > 0 && spaceDomainBufferGPU_.size() > 0) { compact_buffered_pack_forward(spaceDomainStream_.get(), param_->max_num_z_sticks(), create_1d_view(numZSticksGPU_, 0, numZSticksGPU_.size()), create_1d_view(indicesGPU_, 0, indicesGPU_.size()), spaceDomainDataGPU_, spaceDomainBufferGPU_); #ifndef SPFFT_GPU_DIRECT copy_from_gpu_async(spaceDomainStream_, spaceDomainBufferGPU_, spaceDomainBufferHost_); #endif } } template auto TransposeMPICompactBufferedGPU::unpack_forward() -> void { if (freqDomainDataGPU_.size() > 0 && freqDomainBufferGPU_.size() > 0) { #ifndef SPFFT_GPU_DIRECT copy_to_gpu_async(freqDomainStream_, freqDomainBufferHost_, freqDomainBufferGPU_); #endif compact_buffered_unpack_forward( freqDomainStream_.get(), param_->max_num_xy_planes(), create_1d_view(numXYPlanesGPU_, 0, numXYPlanesGPU_.size()), create_1d_view(xyPlaneOffsetsGPU_, 0, xyPlaneOffsetsGPU_.size()), freqDomainBufferGPU_, freqDomainDataGPU_); } } template auto TransposeMPICompactBufferedGPU::exchange_forward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter gpu::check_status(gpu::stream_synchronize(spaceDomainStream_.get())); #ifdef SPFFT_GPU_DIRECT auto sendBufferPtr = spaceDomainBufferGPU_.data(); auto recvBufferPtr = freqDomainBufferGPU_.data(); #else auto sendBufferPtr = spaceDomainBufferHost_.data(); auto recvBufferPtr = freqDomainBufferHost_.data(); #endif if (nonBlockingExchange) { // start non-blocking exchange mpi_check_status(MPI_Ialltoallv( sendBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), recvBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { // blocking exchange mpi_check_status(MPI_Alltoallv(sendBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), recvBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get())); } } template auto TransposeMPICompactBufferedGPU::exchange_forward_finalize() -> void { mpiRequest_.wait_if_active(); } // Instantiate class for float and double #ifdef SPFFT_SINGLE_PRECISION template class TransposeMPICompactBufferedGPU; #endif template class TransposeMPICompactBufferedGPU; template class TransposeMPICompactBufferedGPU; } // namespace spfft #endif // SPFFT_MPI SpFFT-1.1.0/src/transpose/transpose_mpi_compact_buffered_gpu.hpp000066400000000000000000000122501457701740000250640ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_MPI_COMPACT_BUFFERED_GPU_HPP #define SPFFT_TRANSPOSE_MPI_COMPACT_BUFFERED_GPU_HPP #include #include #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" #if defined(SPFFT_MPI) && (defined(SPFFT_CUDA) || defined(SPFFT_ROCM)) #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_request_handle.hpp" namespace spfft { template class TransposeMPICompactBufferedGPU : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; using ComplexExchangeType = std::complex; using ComplexGPUType = typename gpu::fft::ComplexType::type; using ComplexExchangeGPUType = typename gpu::fft::ComplexType::type; public: // spaceDomainDataGPU and freqDomainDataGPU must NOT overlap // spaceDomainDataGPU and spaceDomainBufferGPU must NOT overlap // freqDomainDataGPU and freqDomainBufferGPU must NOT overlap // spaceDomainBufferGPU and freqDomainBufferGPU must NOT overlap // spaceDomainBufferHost and freqDomainBufferHost must NOT overlap // // spaceDomainBufferGPU and freqDomainDataGPU MAY overlap // freqDomainBufferGPU and spaceDomainDataGPU MAY overlap TransposeMPICompactBufferedGPU(const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView1D spaceDomainBufferHost, GPUArrayView3D spaceDomainDataGPU, GPUArrayView1D spaceDomainBufferGPU, GPUStreamHandle spaceDomainStream, HostArrayView1D freqDomainBufferHost, GPUArrayView2D freqDomainDataGPU, GPUArrayView1D freqDomainBufferGPU, GPUStreamHandle freqDomainStream); auto pack_backward() -> void override; auto exchange_backward_start(const bool nonBlockingExchange) -> void override; auto exchange_backward_finalize() -> void override; auto unpack_backward() -> void override; auto pack_forward() -> void override; auto exchange_forward_start(const bool nonBlockingExchange) -> void override; auto exchange_forward_finalize() -> void override; auto unpack_forward() -> void override; private: std::shared_ptr param_; MPIDatatypeHandle mpiTypeHandle_; MPICommunicatorHandle comm_; MPIRequestHandle mpiRequest_; std::vector spaceDomainDispls_; std::vector freqDomainDispls_; std::vector spaceDomainCount_; std::vector freqDomainCount_; HostArrayView1D spaceDomainBufferHost_; HostArrayView1D freqDomainBufferHost_; GPUArrayView3D spaceDomainDataGPU_; GPUArrayView2D freqDomainDataGPU_; GPUArrayView1D spaceDomainBufferGPU_; GPUArrayView1D freqDomainBufferGPU_; GPUStreamHandle spaceDomainStream_; GPUStreamHandle freqDomainStream_; GPUArray numZSticksGPU_; GPUArray numXYPlanesGPU_; GPUArray xyPlaneOffsetsGPU_; GPUArray indicesGPU_; }; } // namespace spfft #endif // SPFFT_MPI #endif SpFFT-1.1.0/src/transpose/transpose_mpi_compact_buffered_host.cpp000066400000000000000000000340621457701740000252460ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #include "transpose/transpose_mpi_compact_buffered_host.hpp" namespace spfft { template TransposeMPICompactBufferedHost::TransposeMPICompactBufferedHost( const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, HostArrayView2D freqDomainData, HostArrayView1D spaceDomainBuffer, HostArrayView1D freqDomainBuffer) : param_(param), comm_(std::move(comm)), spaceDomainData_(spaceDomainData), freqDomainData_(freqDomainData), spaceDomainBuffer_(create_new_type_1d_view(spaceDomainBuffer, spaceDomainBuffer.size())), freqDomainBuffer_( create_new_type_1d_view(freqDomainBuffer, freqDomainBuffer.size())) { assert(param_->dim_x_freq() == spaceDomainData.dim_mid()); assert(param_->dim_y() == spaceDomainData.dim_inner()); assert(param_->num_xy_planes(comm_.rank()) == spaceDomainData.dim_outer()); assert(param_->dim_z() == freqDomainData.dim_inner()); assert(param_->num_z_sticks(comm_.rank()) == freqDomainData.dim_outer()); assert(spaceDomainBuffer.size() >= param_->total_num_z_sticks() * param_->num_xy_planes(comm_.rank())); assert(freqDomainBuffer.size() >= param_->total_num_xy_planes() * param_->num_z_sticks(comm_.rank())); assert(disjoint(spaceDomainData, freqDomainData)); assert(disjoint(spaceDomainData, spaceDomainBuffer)); assert(disjoint(freqDomainData, freqDomainBuffer)); assert(disjoint(spaceDomainBuffer, freqDomainBuffer)); // create underlying type mpiTypeHandle_ = MPIDatatypeHandle::create_contiguous(2, MPIMatchElementaryType::get()); spaceDomainCount_.resize(comm_.size()); freqDomainCount_.resize(comm_.size()); const SizeType numLocalZSticks = param_->num_z_sticks(comm_.rank()); const SizeType numLocalXYPlanes = param_->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < comm_.size(); ++r) { freqDomainCount_[r] = numLocalZSticks * param_->num_xy_planes(r); spaceDomainCount_[r] = param_->num_z_sticks(r) * numLocalXYPlanes; } spaceDomainDispls_.resize(comm_.size()); freqDomainDispls_.resize(comm_.size()); int currentFreqDomainDispls = 0; int currentSpaceDomainDispls = 0; for (SizeType r = 0; r < comm_.size(); ++r) { assert(currentSpaceDomainDispls + spaceDomainCount_[r] <= static_cast(spaceDomainBuffer.size())); assert(currentFreqDomainDispls + freqDomainCount_[r] <= static_cast(freqDomainBuffer.size())); spaceDomainDispls_[r] = currentSpaceDomainDispls; freqDomainDispls_[r] = currentFreqDomainDispls; currentSpaceDomainDispls += spaceDomainCount_[r]; currentFreqDomainDispls += freqDomainCount_[r]; } } template auto TransposeMPICompactBufferedHost::pack_backward() -> void { // transpose locally from (numLocalZSticks, dimZ) to (dimZ, numLocalZSticks) for (SizeType r = 0; r < static_cast(comm_.size()); ++r) { const auto xyPlaneOffset = param_->xy_plane_offset(r); auto freqDomainBuffer2d = create_2d_view(freqDomainBuffer_, freqDomainDispls_[r], freqDomainData_.dim_outer(), param_->num_xy_planes(r)); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < freqDomainData_.dim_outer(); ++zStickIndex) { for (SizeType zIndex = 0; zIndex < param_->num_xy_planes(r); ++zIndex) { freqDomainBuffer2d(zStickIndex, zIndex) = freqDomainData_(zStickIndex, zIndex + xyPlaneOffset); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPICompactBufferedHost::unpack_backward() -> void { // zero target data location (not all values are overwritten upon unpacking) SPFFT_OMP_PRAGMA("omp for schedule(static)") // implicit barrier for (SizeType z = 0; z < spaceDomainData_.dim_outer(); ++z) { std::memset(static_cast(&spaceDomainData_(z, 0, 0)), 0, sizeof(typename decltype(spaceDomainData_)::ValueType) * spaceDomainData_.dim_inner() * spaceDomainData_.dim_mid()); } auto spaceDomainDataFlat = create_2d_view(spaceDomainData_, 0, spaceDomainData_.dim_outer(), spaceDomainData_.dim_mid() * spaceDomainData_.dim_inner()); // unpack from (numZSticksTotal, numLocalXYPlanes) to (numLocalXYPlanes, dimX, dimY) const auto numLocalXYPlanes = param_->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < (SizeType)comm_.size(); ++r) { const auto zStickXYIndices = param_->z_stick_xy_indices(r); // take care with unsigned type const SizeType unrolledLoopEnd = zStickXYIndices.size() < 4 ? 0 : zStickXYIndices.size() - 3; auto recvBuffer = create_2d_view(spaceDomainBuffer_, spaceDomainDispls_[r], zStickXYIndices.size(), numLocalXYPlanes); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < unrolledLoopEnd; zStickIndex += 4) { const SizeType xyIndex1 = zStickXYIndices(zStickIndex); const SizeType xyIndex2 = zStickXYIndices(zStickIndex + 1); const SizeType xyIndex3 = zStickXYIndices(zStickIndex + 2); const SizeType xyIndex4 = zStickXYIndices(zStickIndex + 3); // manual loop unrolling for better performance for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { spaceDomainDataFlat(zIndex, xyIndex1) = recvBuffer(zStickIndex, zIndex); spaceDomainDataFlat(zIndex, xyIndex2) = recvBuffer(zStickIndex + 1, zIndex); spaceDomainDataFlat(zIndex, xyIndex3) = recvBuffer(zStickIndex + 2, zIndex); spaceDomainDataFlat(zIndex, xyIndex4) = recvBuffer(zStickIndex + 3, zIndex); } } SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = unrolledLoopEnd; zStickIndex < zStickXYIndices.size(); zStickIndex += 1) { const SizeType xyIndex = zStickXYIndices(zStickIndex); for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { spaceDomainDataFlat(zIndex, xyIndex) = recvBuffer(zStickIndex, zIndex); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPICompactBufferedHost::exchange_backward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPICompactBufferedHost::exchange_backward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter // exchange data if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoallv(freqDomainBuffer_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), spaceDomainBuffer_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoallv(freqDomainBuffer_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), spaceDomainBuffer_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get())); } } template auto TransposeMPICompactBufferedHost::pack_forward() -> void { auto spaceDomainDataFlat = create_2d_view(spaceDomainData_, 0, spaceDomainData_.dim_outer(), spaceDomainData_.dim_mid() * spaceDomainData_.dim_inner()); // pack from (numLocalXYPlanes, dimX, dimY) to (numZSticksTotal, numLocalXYPlanes) const auto numLocalXYPlanes = param_->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < (SizeType)comm_.size(); ++r) { const auto zStickXYIndices = param_->z_stick_xy_indices(r); // take care with unsigned type const SizeType unrolledLoopEnd = zStickXYIndices.size() < 4 ? 0 : zStickXYIndices.size() - 3; auto recvBuffer = create_2d_view(spaceDomainBuffer_, spaceDomainDispls_[r], zStickXYIndices.size(), numLocalXYPlanes); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < unrolledLoopEnd; zStickIndex += 4) { // manual loop unrolling for better performance const SizeType xyIndex1 = zStickXYIndices(zStickIndex); const SizeType xyIndex2 = zStickXYIndices(zStickIndex + 1); const SizeType xyIndex3 = zStickXYIndices(zStickIndex + 2); const SizeType xyIndex4 = zStickXYIndices(zStickIndex + 3); for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { recvBuffer(zStickIndex, zIndex) = spaceDomainDataFlat(zIndex, xyIndex1); recvBuffer(zStickIndex + 1, zIndex) = spaceDomainDataFlat(zIndex, xyIndex2); recvBuffer(zStickIndex + 2, zIndex) = spaceDomainDataFlat(zIndex, xyIndex3); recvBuffer(zStickIndex + 3, zIndex) = spaceDomainDataFlat(zIndex, xyIndex4); } } SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = unrolledLoopEnd; zStickIndex < zStickXYIndices.size(); zStickIndex += 1) { const SizeType xyIndex = zStickXYIndices(zStickIndex); for (SizeType zIndex = 0; zIndex < numLocalXYPlanes; ++zIndex) { recvBuffer(zStickIndex, zIndex) = spaceDomainDataFlat(zIndex, xyIndex); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPICompactBufferedHost::exchange_forward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPICompactBufferedHost::unpack_forward() -> void { // transpose locally from (dimZ, numLocalZSticks) to (numLocalZSticks, dimZ) for (SizeType r = 0; r < static_cast(comm_.size()); ++r) { const auto xyPlaneOffset = param_->xy_plane_offset(r); auto freqDomainBuffer2d = create_2d_view(freqDomainBuffer_, freqDomainDispls_[r], freqDomainData_.dim_outer(), param_->num_xy_planes(r)); SPFFT_OMP_PRAGMA("omp for schedule(static) nowait") for (SizeType zStickIndex = 0; zStickIndex < freqDomainData_.dim_outer(); ++zStickIndex) { for (SizeType zIndex = 0; zIndex < param_->num_xy_planes(r); ++zIndex) { freqDomainData_(zStickIndex, zIndex + xyPlaneOffset) = freqDomainBuffer2d(zStickIndex, zIndex); } } } SPFFT_OMP_PRAGMA("omp barrier") } template auto TransposeMPICompactBufferedHost::exchange_forward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoallv(spaceDomainBuffer_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), freqDomainBuffer_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoallv(spaceDomainBuffer_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), mpiTypeHandle_.get(), freqDomainBuffer_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), mpiTypeHandle_.get(), comm_.get())); } } // Instantiate class for float and double #ifdef SPFFT_SINGLE_PRECISION template class TransposeMPICompactBufferedHost; #endif template class TransposeMPICompactBufferedHost; template class TransposeMPICompactBufferedHost; } // namespace spfft #endif // SPFFT_MPI SpFFT-1.1.0/src/transpose/transpose_mpi_compact_buffered_host.hpp000066400000000000000000000100571457701740000252510ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_MPI_COMPACT_BUFFERED_HOST_HPP #define SPFFT_TRANSPOSE_MPI_COMPACT_BUFFERED_HOST_HPP #include #include #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "transpose.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_request_handle.hpp" namespace spfft { template class TransposeMPICompactBufferedHost : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; using ComplexExchangeType = std::complex; public: // spaceDomainData and freqDomainData must NOT overlap // spaceDomainData and spaceDomainBuffer must NOT overlap // freqDomainData and freqDomainBuffer must NOT overlap // spaceDomainBuffer and freqDomainBuffer must NOT overlap // // spaceDomainBuffer and freqDomainData MAY overlap // freqDomainBuffer and spaceDomainData MAY overlap TransposeMPICompactBufferedHost(const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, HostArrayView2D freqDomainData, HostArrayView1D spaceDomainBuffer, HostArrayView1D freqDomainBuffer); auto pack_backward() -> void override; auto exchange_backward_start(const bool nonBlockingExchange) -> void override; auto exchange_backward_finalize() -> void override; auto unpack_backward() -> void override; auto pack_forward() -> void override; auto exchange_forward_start(const bool nonBlockingExchange) -> void override; auto exchange_forward_finalize() -> void override; auto unpack_forward() -> void override; private: std::shared_ptr param_; MPIDatatypeHandle mpiTypeHandle_; MPICommunicatorHandle comm_; MPIRequestHandle mpiRequest_; HostArrayView3D spaceDomainData_; HostArrayView2D freqDomainData_; HostArrayView1D spaceDomainBuffer_; HostArrayView1D freqDomainBuffer_; std::vector spaceDomainDispls_; std::vector freqDomainDispls_; std::vector spaceDomainCount_; std::vector freqDomainCount_; }; } // namespace spfft #endif // SPFFT_MPI #endif SpFFT-1.1.0/src/transpose/transpose_mpi_unbuffered_gpu.cpp000066400000000000000000000256311457701740000237230ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "gpu_util/gpu_transfer.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #include "transpose/transpose_mpi_unbuffered_gpu.hpp" namespace spfft { template TransposeMPIUnbufferedGPU::TransposeMPIUnbufferedGPU( const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, GPUArrayView3D::type> spaceDomainDataGPU, GPUStreamHandle spaceDomainStream, HostArrayView2D freqDomainData, GPUArrayView2D::type> freqDomainDataGPU, GPUStreamHandle freqDomainStream) : comm_(std::move(comm)), spaceDomainBufferHost_(spaceDomainData), freqDomainBufferHost_(freqDomainData), spaceDomainBufferGPU_(spaceDomainDataGPU), freqDomainBufferGPU_(freqDomainDataGPU), numLocalXYPlanes_(spaceDomainData.dim_outer()), spaceDomainStream_(std::move(spaceDomainStream)), freqDomainStream_(std::move(freqDomainStream)) { assert(disjoint(spaceDomainData, freqDomainData)); assert(param->dim_x_freq() == spaceDomainData.dim_inner()); assert(param->dim_y() == spaceDomainData.dim_mid()); assert(param->num_xy_planes(comm_.rank()) == spaceDomainData.dim_outer()); assert(param->dim_z() == freqDomainData.dim_inner()); assert(param->num_z_sticks(comm_.rank()) == freqDomainData.dim_outer()); // create underlying type MPIDatatypeHandle complexType = MPIDatatypeHandle::create_contiguous(2, MPIMatchElementaryType::get()); // create types in frequency space for each rank: // each type represents a fixed length part of every z stick the rank holds freqDomainTypeHandles_.reserve(comm_.size()); freqDomainCount_.reserve(comm_.size()); freqDomainTypes_.reserve(comm_.size()); freqDomainDispls_.assign(comm_.size(), 0); const SizeType numLocalZSticks = param->num_z_sticks(comm_.rank()); const SizeType numLocalXYPlanes = param->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < comm_.size(); ++r) { if (param->num_xy_planes(r) > 0 && numLocalZSticks > 0) { const int ndims = 2; const int arrayOfSizes[] = {(int)numLocalZSticks, (int)freqDomainBufferHost_.dim_inner()}; const int arrayOfSubsizes[] = {(int)numLocalZSticks, (int)param->num_xy_planes(r)}; const int arrayOfStarts[] = {(int)0, (int)param->xy_plane_offset(r)}; const int order = MPI_ORDER_C; freqDomainCount_.emplace_back(1); freqDomainTypeHandles_.emplace_back(MPIDatatypeHandle::create_subarray( ndims, arrayOfSizes, arrayOfSubsizes, arrayOfStarts, order, complexType.get())); freqDomainTypes_.emplace_back(freqDomainTypeHandles_.back().get()); } else { freqDomainCount_.emplace_back(0); freqDomainTypeHandles_.emplace_back(complexType); freqDomainTypes_.emplace_back(freqDomainTypeHandles_.back().get()); } } // create types in space domain for each rank: // each type represents a batch of partial z sticks with inner stride dimX*dimY and placed // according to the assosiated x/y indices std::vector indexedBlocklengths; std::vector indexedDispls; spaceDomainTypes_.reserve(comm_.size()); spaceDomainCount_.reserve(comm_.size()); spaceDomainDispls_.assign(comm_.size(), 0); for (SizeType r = 0; r < comm_.size(); ++r) { if (param->num_z_sticks(r) > 0 && numLocalXYPlanes > 0) { // data type for single z stick part MPIDatatypeHandle stridedZStickType = MPIDatatypeHandle::create_vector( numLocalXYPlanes, 1, spaceDomainBufferHost_.dim_inner() * spaceDomainBufferHost_.dim_mid(), complexType.get()); const auto zStickXYIndices = param->z_stick_xy_indices(r); indexedBlocklengths.resize(zStickXYIndices.size(), 1); indexedDispls.resize(zStickXYIndices.size()); // displacements of all z stick parts to be send to current rank for (SizeType idxZStick = 0; idxZStick < zStickXYIndices.size(); ++idxZStick) { // transpose stick index const int xyIndex = zStickXYIndices(idxZStick); const int x = xyIndex / param->dim_y(); const int y = xyIndex - x * param->dim_y(); indexedDispls[idxZStick] = 2 * sizeof(T) * (y * param->dim_x_freq() + x); } spaceDomainCount_.emplace_back(1); spaceDomainTypeHandles_.emplace_back( MPIDatatypeHandle::create_hindexed(zStickXYIndices.size(), indexedBlocklengths.data(), indexedDispls.data(), stridedZStickType.get())); spaceDomainTypes_.emplace_back(spaceDomainTypeHandles_.back().get()); } else { spaceDomainCount_.emplace_back(0); spaceDomainTypeHandles_.emplace_back(complexType); spaceDomainTypes_.emplace_back(complexType.get()); } } } template auto TransposeMPIUnbufferedGPU::pack_backward() -> void { #ifdef SPFFT_GPU_DIRECT gpu::check_status(gpu::memset_async( static_cast(spaceDomainBufferGPU_.data()), 0, spaceDomainBufferGPU_.size() * sizeof(typename decltype(spaceDomainBufferGPU_)::ValueType), spaceDomainStream_.get())); #else copy_from_gpu_async(freqDomainStream_, freqDomainBufferGPU_, freqDomainBufferHost_); // zero target data location (not all values are overwritten upon unpacking) std::memset( static_cast(spaceDomainBufferHost_.data()), 0, sizeof(typename decltype(spaceDomainBufferHost_)::ValueType) * spaceDomainBufferHost_.size()); #endif } template auto TransposeMPIUnbufferedGPU::unpack_backward() -> void { #ifndef SPFFT_GPU_DIRECT copy_to_gpu_async(spaceDomainStream_, spaceDomainBufferHost_, spaceDomainBufferGPU_); #endif } template auto TransposeMPIUnbufferedGPU::exchange_backward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter gpu::check_status(gpu::stream_synchronize(freqDomainStream_.get())); #ifdef SPFFT_GPU_DIRECT auto sendBufferPtr = freqDomainBufferGPU_.data(); auto recvBufferPtr = spaceDomainBufferGPU_.data(); #else auto sendBufferPtr = freqDomainBufferHost_.data(); auto recvBufferPtr = spaceDomainBufferHost_.data(); #endif if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoallw( sendBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), recvBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoallw(sendBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), recvBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), comm_.get())); } } template auto TransposeMPIUnbufferedGPU::exchange_backward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPIUnbufferedGPU::exchange_forward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter gpu::check_status(gpu::stream_synchronize(spaceDomainStream_.get())); #ifdef SPFFT_GPU_DIRECT auto sendBufferPtr = spaceDomainBufferGPU_.data(); auto recvBufferPtr = freqDomainBufferGPU_.data(); #else auto sendBufferPtr = spaceDomainBufferHost_.data(); auto recvBufferPtr = freqDomainBufferHost_.data(); #endif if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoallw( sendBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), recvBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoallw(sendBufferPtr, spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), recvBufferPtr, freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), comm_.get())); } } template auto TransposeMPIUnbufferedGPU::exchange_forward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPIUnbufferedGPU::pack_forward() -> void { #ifndef SPFFT_GPU_DIRECT copy_from_gpu_async(spaceDomainStream_, spaceDomainBufferGPU_, spaceDomainBufferHost_); #endif } template auto TransposeMPIUnbufferedGPU::unpack_forward() -> void { #ifndef SPFFT_GPU_DIRECT copy_to_gpu_async(freqDomainStream_, freqDomainBufferHost_, freqDomainBufferGPU_); #endif } // Instantiate class for float and double #ifdef SPFFT_SINGLE_PRECISION template class TransposeMPIUnbufferedGPU; #endif template class TransposeMPIUnbufferedGPU; } // namespace spfft #endif // SPFFT_MPI SpFFT-1.1.0/src/transpose/transpose_mpi_unbuffered_gpu.hpp000066400000000000000000000101621457701740000237210ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_MPI_UNBUFFERED_GPU_HPP #define SPFFT_TRANSPOSE_MPI_UNBUFFERED_GPU_HPP #include #include #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_stream_handle.hpp" #include "memory/gpu_array.hpp" #include "memory/gpu_array_view.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_request_handle.hpp" namespace spfft { template class TransposeMPIUnbufferedGPU : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; public: TransposeMPIUnbufferedGPU( const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, GPUArrayView3D::type> spaceDomainDataGPU, GPUStreamHandle spaceDomainStream, HostArrayView2D freqDomainData, GPUArrayView2D::type> freqDomainDataGPU, GPUStreamHandle freqDomainStream); auto pack_backward() -> void override; auto exchange_backward_start(const bool nonBlockingExchange) -> void override; auto exchange_backward_finalize() -> void override; auto unpack_backward() -> void override; auto pack_forward() -> void override; auto exchange_forward_start(const bool nonBlockingExchange) -> void override; auto exchange_forward_finalize() -> void override; auto unpack_forward() -> void override; private: MPICommunicatorHandle comm_; MPIRequestHandle mpiRequest_; HostArrayView3D spaceDomainBufferHost_; HostArrayView2D freqDomainBufferHost_; GPUArrayView3D::type> spaceDomainBufferGPU_; GPUArrayView2D::type> freqDomainBufferGPU_; SizeType numLocalXYPlanes_; GPUStreamHandle spaceDomainStream_; GPUStreamHandle freqDomainStream_; std::vector freqDomainTypeHandles_; std::vector freqDomainTypes_; std::vector freqDomainDispls_; std::vector freqDomainCount_; std::vector spaceDomainTypeHandles_; std::vector spaceDomainTypes_; std::vector spaceDomainDispls_; std::vector spaceDomainCount_; }; } // namespace spfft #endif // SPFFT_MPI #endif SpFFT-1.1.0/src/transpose/transpose_mpi_unbuffered_host.cpp000066400000000000000000000215151457701740000241020ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "memory/array_view_utility.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/exceptions.hpp" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/omp_definitions.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_check_status.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_match_elementary_type.hpp" #include "transpose/transpose_mpi_unbuffered_host.hpp" namespace spfft { template TransposeMPIUnbufferedHost::TransposeMPIUnbufferedHost( const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, HostArrayView2D freqDomainData) : comm_(std::move(comm)), spaceDomainData_(spaceDomainData), freqDomainData_(freqDomainData), numLocalXYPlanes_(spaceDomainData.dim_outer()) { assert(disjoint(spaceDomainData, freqDomainData)); assert(param->dim_x_freq() == spaceDomainData.dim_mid()); assert(param->dim_y() == spaceDomainData.dim_inner()); assert(param->num_xy_planes(comm_.rank()) == spaceDomainData.dim_outer()); assert(param->dim_z() == freqDomainData.dim_inner()); assert(param->num_z_sticks(comm_.rank()) == freqDomainData.dim_outer()); // create underlying type MPIDatatypeHandle complexType = MPIDatatypeHandle::create_contiguous(2, MPIMatchElementaryType::get()); // create types in frequency space for each rank: // each type represents a fixed length part of every z stick the rank holds freqDomainTypeHandles_.reserve(comm_.size()); freqDomainCount_.reserve(comm_.size()); freqDomainTypes_.reserve(comm_.size()); freqDomainDispls_.assign(comm_.size(), 0); const SizeType numLocalZSticks = param->num_z_sticks(comm_.rank()); const SizeType numLocalXYPlanes = param->num_xy_planes(comm_.rank()); for (SizeType r = 0; r < comm_.size(); ++r) { if (param->num_xy_planes(r) > 0 && numLocalZSticks > 0) { const int ndims = 2; const int arrayOfSizes[] = {(int)numLocalZSticks, (int)freqDomainData_.dim_inner()}; const int arrayOfSubsizes[] = {(int)numLocalZSticks, (int)param->num_xy_planes(r)}; const int arrayOfStarts[] = {(int)0, (int)param->xy_plane_offset(r)}; const int order = MPI_ORDER_C; freqDomainCount_.emplace_back(1); freqDomainTypeHandles_.emplace_back(MPIDatatypeHandle::create_subarray( ndims, arrayOfSizes, arrayOfSubsizes, arrayOfStarts, order, complexType.get())); freqDomainTypes_.emplace_back(freqDomainTypeHandles_.back().get()); } else { freqDomainCount_.emplace_back(0); freqDomainTypeHandles_.emplace_back(complexType); freqDomainTypes_.emplace_back(freqDomainTypeHandles_.back().get()); } } // create types in space domain for each rank: // each type represents a batch of partial z sticks with inner stride dimX*dimY and placed // according to the assosiated x/y indices std::vector indexedBlocklengths; std::vector indexedDispls; spaceDomainTypes_.reserve(comm_.size()); spaceDomainCount_.reserve(comm_.size()); spaceDomainDispls_.assign(comm_.size(), 0); for (SizeType r = 0; r < comm_.size(); ++r) { if (param->num_z_sticks(r) > 0 && numLocalXYPlanes > 0) { // data type for single z stick part MPIDatatypeHandle stridedZStickType = MPIDatatypeHandle::create_vector( numLocalXYPlanes, 1, spaceDomainData_.dim_inner() * spaceDomainData_.dim_mid(), complexType.get()); const auto zStickXYIndices = param->z_stick_xy_indices(r); indexedBlocklengths.resize(zStickXYIndices.size(), 1); indexedDispls.resize(zStickXYIndices.size()); // displacements of all z stick parts to be send to current rank for (SizeType idxZStick = 0; idxZStick < zStickXYIndices.size(); ++idxZStick) { const auto& xyIndex = zStickXYIndices(idxZStick); indexedDispls[idxZStick] = 2 * sizeof(T) * xyIndex; } spaceDomainCount_.emplace_back(1); spaceDomainTypeHandles_.emplace_back( MPIDatatypeHandle::create_hindexed(zStickXYIndices.size(), indexedBlocklengths.data(), indexedDispls.data(), stridedZStickType.get())); spaceDomainTypes_.emplace_back(spaceDomainTypeHandles_.back().get()); } else { spaceDomainCount_.emplace_back(0); spaceDomainTypeHandles_.emplace_back(complexType); spaceDomainTypes_.emplace_back(complexType.get()); } } } template auto TransposeMPIUnbufferedHost::exchange_backward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter // zero target data location (not all values are overwritten upon unpacking) std::memset(static_cast(spaceDomainData_.data()), 0, sizeof(typename decltype(spaceDomainData_)::ValueType) * spaceDomainData_.size()); if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoallw(freqDomainData_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), spaceDomainData_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status( MPI_Alltoallw(freqDomainData_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), spaceDomainData_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), comm_.get())); } } template auto TransposeMPIUnbufferedHost::exchange_backward_finalize() -> void { mpiRequest_.wait_if_active(); } template auto TransposeMPIUnbufferedHost::exchange_forward_start(const bool nonBlockingExchange) -> void { assert(omp_get_thread_num() == 0); // only must thread must be allowed to enter if (nonBlockingExchange) { mpi_check_status(MPI_Ialltoallw(spaceDomainData_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), freqDomainData_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), comm_.get(), mpiRequest_.get_and_activate())); } else { mpi_check_status(MPI_Alltoallw(spaceDomainData_.data(), spaceDomainCount_.data(), spaceDomainDispls_.data(), spaceDomainTypes_.data(), freqDomainData_.data(), freqDomainCount_.data(), freqDomainDispls_.data(), freqDomainTypes_.data(), comm_.get())); } } template auto TransposeMPIUnbufferedHost::exchange_forward_finalize() -> void { mpiRequest_.wait_if_active(); } // Instantiate class for float and double #ifdef SPFFT_SINGLE_PRECISION template class TransposeMPIUnbufferedHost; #endif template class TransposeMPIUnbufferedHost; } // namespace spfft #endif // SPFFT_MPI SpFFT-1.1.0/src/transpose/transpose_mpi_unbuffered_host.hpp000066400000000000000000000065351457701740000241140ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TRANSPOSE_MPI_UNBUFFERED_HOST_HPP #define SPFFT_TRANSPOSE_MPI_UNBUFFERED_HOST_HPP #include #include #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/config.h" #include "transpose.hpp" #include "util/common_types.hpp" #include "util/type_check.hpp" #ifdef SPFFT_MPI #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_datatype_handle.hpp" #include "mpi_util/mpi_request_handle.hpp" namespace spfft { template class TransposeMPIUnbufferedHost : public Transpose { static_assert(IsFloatOrDouble::value, "Type T must be float or double"); using ValueType = T; using ComplexType = std::complex; public: TransposeMPIUnbufferedHost(const std::shared_ptr& param, MPICommunicatorHandle comm, HostArrayView3D spaceDomainData, HostArrayView2D freqDomainData); auto exchange_backward_start(const bool nonBlockingExchange) -> void override; auto exchange_backward_finalize() -> void override; auto exchange_forward_start(const bool nonBlockingExchange) -> void override; auto exchange_forward_finalize() -> void override; private: MPICommunicatorHandle comm_; MPIRequestHandle mpiRequest_; HostArrayView3D spaceDomainData_; HostArrayView2D freqDomainData_; SizeType numLocalXYPlanes_; std::vector freqDomainTypeHandles_; std::vector freqDomainTypes_; std::vector freqDomainDispls_; std::vector freqDomainCount_; std::vector spaceDomainTypeHandles_; std::vector spaceDomainTypes_; std::vector spaceDomainDispls_; std::vector spaceDomainCount_; }; } // namespace spfft #endif // SPFFT_MPI #endif SpFFT-1.1.0/src/util/000077500000000000000000000000001457701740000142045ustar00rootroot00000000000000SpFFT-1.1.0/src/util/common_types.hpp000066400000000000000000000033511457701740000174330ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_COMMON_TYPES_HPP #define SPFFT_COMMON_TYPES_HPP #include "spfft/config.h" namespace spfft { typedef unsigned long long SizeType; typedef long long SignedType; } // namespace spfft #endif SpFFT-1.1.0/src/util/omp_definitions.hpp000066400000000000000000000042111457701740000201010ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_OMP_DEFINITIONS_HPP #define SPFFT_OMP_DEFINITIONS_HPP #include "spfft/config.h" #ifdef SPFFT_OMP #include #define SPFFT_OMP_PRAGMA(content) _Pragma(content) #else #define SPFFT_OMP_PRAGMA(content) namespace spfft { inline int omp_get_num_threads() { return 1; } inline int omp_get_thread_num() { return 0; } inline int omp_get_max_threads() { return 1; } inline int omp_in_parallel() { return 0; } inline int omp_get_nested() { return 0; } inline int omp_get_num_procs() { return 1; } inline int omp_get_level() { return 0; } inline void omp_set_nested(int) {} } // namespace spfft #endif #endif SpFFT-1.1.0/src/util/type_check.hpp000066400000000000000000000037001457701740000170330ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TYPE_CHECK #define SPFFT_TYPE_CHECK #include #include "spfft/config.h" namespace spfft { template struct IsFloatOrDouble : std::integral_constant::type>::value || std::is_same::type>::value> {}; } // namespace spfft #endif SpFFT-1.1.0/style_guide.md000066400000000000000000000021351457701740000153000ustar00rootroot00000000000000# Style Guide for SpFFT ## Formatting The formatting style is based on the google style guide with the following exceptions: - Column size is limited to 100 instead of 80 - Access modifiers such as public and private are offset by -2 Clang-Format is used to format all files. ## Naming The following rules are not strict and consistency when using external types is preferred. ### Files Use underscores for separation. File suffix: - C++: .cpp and .hpp - C: .c and .h - CUDA: .cu and .cuh Example `my_class.cpp` ### Types Use camelcase and start with a capital letter. Example `using MyType = int;` ### Variables Use camelcase and start with lower case Example: `int myValue = 0;` #### Class / Struct Members Use a trailing underscore for non-public member variables. Public members are mamed like normal variables. #### Functions Function names use underscores and are all lower case Example: `my_function(int);` #### namespaces Namepsace are all lower case and use underscores. Example: ` namespace my_space {}` #### Macros Macros are all capital and use underscores. Example: `#define MY_MACRO_VALUE 1` SpFFT-1.1.0/tests/000077500000000000000000000000001457701740000136025ustar00rootroot00000000000000SpFFT-1.1.0/tests/CMakeLists.txt000066400000000000000000000052051457701740000163440ustar00rootroot00000000000000 if(SPFFT_BUILD_TESTS) cmake_minimum_required(VERSION 3.14 FATAL_ERROR) # FetchContent_MakeAvailable requires at least 3.14 # update time stamps when using FetchContent if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() set(BUILD_GMOCK OFF CACHE BOOL "") set(INSTALL_GTEST OFF CACHE BOOL "") mark_as_advanced(BUILD_GMOCK INSTALL_GTEST) include(FetchContent) # add googletest if(SPFFT_BUNDLED_GOOGLETEST) FetchContent_Declare( googletest URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.tar.gz URL_MD5 c8340a482851ef6a3fe618a082304cfc ) FetchContent_MakeAvailable(googletest) else() find_package(googletest CONFIG REQUIRED) endif() list(APPEND SPFFT_TEST_LIBRARIES gtest_main) # add command line parser if(SPFFT_BUNDLED_CLI11) FetchContent_Declare( cli11 URL https://github.com/CLIUtils/CLI11/archive/refs/tags/v2.3.2.tar.gz URL_MD5 b80cb645dee25982110b068b426363ff ) FetchContent_MakeAvailable(cli11) else() find_package(CLI11 CONFIG REQUIRED) endif() list(APPEND SPFFT_TEST_LIBRARIES CLI11::CLI11) # add json parser if(SPFFT_BUNDLED_JSON) FetchContent_Declare( json URL https://github.com/nlohmann/json/archive/refs/tags/v3.11.2.tar.gz URL_MD5 e8d56bc54621037842ee9f0aeae27746 ) FetchContent_MakeAvailable(json) else() find_package(nlohmann_json CONFIG REQUIRED) endif() list(APPEND SPFFT_INCLUDE_DIRS ${PROJECT_SOURCE_DIR}/tests) # benchmark executable add_executable(benchmark programs/benchmark.cpp) target_link_libraries(benchmark PRIVATE spfft_test ${SPFFT_EXTERNAL_LIBS} CLI11::CLI11 nlohmann_json::nlohmann_json) target_include_directories(benchmark PRIVATE ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS}) # test executables add_executable(run_local_tests run_local_tests.cpp local_tests/test_host_array.cpp local_tests/test_disjoint.cpp local_tests/test_fftw_prop_hash.cpp local_tests/test_local_transform.cpp ) target_link_libraries(run_local_tests PRIVATE gtest_main) target_link_libraries(run_local_tests PRIVATE spfft_test ${SPFFT_EXTERNAL_LIBS}) target_include_directories(run_local_tests PRIVATE ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS}) if(SPFFT_MPI) add_executable(run_mpi_tests run_mpi_tests.cpp gtest_mpi.cpp mpi_tests/test_transform.cpp mpi_tests/test_multi_transform.cpp mpi_tests/test_transpose.cpp mpi_tests/test_transpose_gpu.cpp ) target_link_libraries(run_mpi_tests PRIVATE gtest_main) target_link_libraries(run_mpi_tests PRIVATE spfft_test ${SPFFT_EXTERNAL_LIBS}) target_include_directories(run_mpi_tests PRIVATE ${SPFFT_INCLUDE_DIRS} ${SPFFT_EXTERNAL_INCLUDE_DIRS}) endif() endif() SpFFT-1.1.0/tests/gtest_mpi.cpp000066400000000000000000000141011457701740000162760ustar00rootroot00000000000000#include "gtest_mpi.hpp" #include #include #include #include #include #include namespace gtest_mpi { namespace { class MPIListener : public testing::EmptyTestEventListener { public: using UnitTest = testing::UnitTest; using TestCase = testing::TestCase; using TestInfo = testing::TestInfo; using TestPartResult = testing::TestPartResult; using TestSuite = testing::TestSuite; MPIListener(testing::TestEventListener *listener) : listener_(listener), comm_(MPI_COMM_WORLD), gather_called_(false) { MPI_Comm_dup(MPI_COMM_WORLD, &comm_); int rank; MPI_Comm_rank(comm_, &rank); if (rank != 0) listener_.reset(); } void OnTestProgramStart(const UnitTest &u) override { if (listener_) listener_->OnTestProgramStart(u); } void OnTestProgramEnd(const UnitTest &u) override { if (listener_) listener_->OnTestProgramEnd(u); } void OnTestStart(const TestInfo &test_info) override { gather_called_ = false; if (listener_) listener_->OnTestStart(test_info); } void OnTestPartResult(const TestPartResult &test_part_result) override { if (listener_) { listener_->OnTestPartResult(test_part_result); } else if (test_part_result.type() == TestPartResult::Type::kFatalFailure || test_part_result.type() == TestPartResult::Type::kNonFatalFailure) { std::size_t fileIndex = strings_.size(); strings_ += test_part_result.file_name(); strings_ += '\0'; std::size_t messageIndex = strings_.size(); strings_ += test_part_result.message(); strings_ += '\0'; infos_.emplace_back(ResultInfo{test_part_result.type(), fileIndex, test_part_result.line_number(), messageIndex}); } } void OnTestEnd(const TestInfo &test_info) override { if(!gather_called_){ std::cerr << "Missing GTEST_MPI_GUARD in test case!" << std::endl; throw std::runtime_error("Missing GTEST_MPI_GUARD in test case!"); } if (listener_) listener_->OnTestEnd(test_info); } void OnTestIterationStart(const UnitTest &u, int it) override { if (listener_) listener_->OnTestIterationStart(u, it); } void OnEnvironmentsSetUpStart(const UnitTest &u) override { if (listener_) listener_->OnEnvironmentsSetUpStart(u); } void OnEnvironmentsSetUpEnd(const UnitTest &u) override { if (listener_) listener_->OnEnvironmentsSetUpEnd(u); } void OnTestSuiteStart(const TestSuite &t) override { if (listener_) listener_->OnTestSuiteStart(t); } void OnTestDisabled(const TestInfo &t) override { if (listener_) listener_->OnTestDisabled(t); } void OnTestSuiteEnd(const TestSuite &t) override { if (listener_) listener_->OnTestSuiteEnd(t); } void OnEnvironmentsTearDownStart(const UnitTest &u) override { if (listener_) listener_->OnEnvironmentsTearDownStart(u); } void OnEnvironmentsTearDownEnd(const UnitTest &u) override { if (listener_) listener_->OnEnvironmentsTearDownEnd(u); } void OnTestIterationEnd(const UnitTest &u, int it) override { if (listener_) listener_->OnTestIterationEnd(u, it); } void GatherPartResults() { gather_called_ = true; int rank, n_proc; MPI_Comm_rank(comm_, &rank); MPI_Comm_size(comm_, &n_proc); if (rank == 0) { decltype(infos_) remoteInfos; decltype(strings_) remoteStrings; for (int r = 1; r < n_proc; ++r) { MPI_Status status; int count; // Result infos MPI_Probe(r, 0, comm_, &status); MPI_Get_count(&status, MPI_CHAR, &count); auto numResults = static_cast(count) / sizeof(decltype(remoteInfos)::value_type); remoteInfos.resize(numResults); MPI_Recv(remoteInfos.data(), count, MPI_BYTE, r, 0, comm_, MPI_STATUS_IGNORE); // Only continue if any results if (numResults) { // Get strings MPI_Probe(r, 0, comm_, &status); MPI_Get_count(&status, MPI_CHAR, &count); auto stringSize = static_cast(count) / sizeof(decltype(remoteStrings)::value_type); remoteStrings.resize(stringSize); MPI_Recv(&remoteStrings[0], count, MPI_BYTE, r, 0, comm_, MPI_STATUS_IGNORE); // Create error for every remote fail for (const auto &info : remoteInfos) { if (info.type == TestPartResult::Type::kFatalFailure || info.type == TestPartResult::Type::kNonFatalFailure) { ADD_FAILURE_AT(&remoteStrings[info.fileIndex], info.lineNumber) << "Rank " << r << ": " << &remoteStrings[info.messageIndex]; } } } } } else { MPI_Send(infos_.data(), infos_.size() * sizeof(decltype(infos_)::value_type), MPI_BYTE, 0, 0, comm_); // Only send string if results exist if (infos_.size()) { MPI_Send(strings_.data(), strings_.size() * sizeof(decltype(strings_)::value_type), MPI_BYTE, 0, 0, comm_); } } infos_.clear(); strings_.clear(); } private: struct ResultInfo { TestPartResult::Type type; std::size_t fileIndex; int lineNumber; std::size_t messageIndex; }; std::unique_ptr listener_; MPI_Comm comm_; bool gather_called_; std::vector infos_; std::string strings_; }; MPIListener *globalMPIListener = nullptr; } // namespace void InitGoogleTestMPI(int *argc, char **argv) { ::testing::InitGoogleTest(argc, argv); auto &test_listeners = ::testing::UnitTest::GetInstance()->listeners(); globalMPIListener = new MPIListener( test_listeners.Release(test_listeners.default_result_printer())); test_listeners.Append(globalMPIListener); } TestGuard CreateTestGuard() { return TestGuard{[]() { globalMPIListener->GatherPartResults(); }}; } } // namespace gtest_mpi SpFFT-1.1.0/tests/gtest_mpi.hpp000066400000000000000000000011621457701740000163060ustar00rootroot00000000000000#ifndef GTEST_MPI_HPP #define GTEST_MPI_HPP #include namespace gtest_mpi { // Internal helper struct struct TestGuard { void (*func)() = nullptr; ~TestGuard() { if (func) func(); } }; // Initialize GoogleTest and MPI functionality. MPI_Init has to called before. void InitGoogleTestMPI(int *argc, char **argv); // Create a test guard, which has to be placed in all test cases. TestGuard CreateTestGuard(); } // namespace gtest_mpi // Helper macro for creating a test guard within test cases. #define GTEST_MPI_GUARD auto gtest_mpi_guard__LINE__ = ::gtest_mpi::CreateTestGuard(); #endif SpFFT-1.1.0/tests/local_tests/000077500000000000000000000000001457701740000161165ustar00rootroot00000000000000SpFFT-1.1.0/tests/local_tests/test_disjoint.cpp000066400000000000000000000057241457701740000215140ustar00rootroot00000000000000#include #include "gtest/gtest.h" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" using namespace spfft; class DisjointTest : public ::testing::Test { protected: void SetUp() override { array_ = HostArray(100); } HostArray array_; }; TEST_F(DisjointTest, dim1AndDim1) { { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_1d_view(array_, 0, 10); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_1d_view(array_, 5, 10); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_1d_view(array_, 10, 10); EXPECT_TRUE(disjoint(view1, view2)); } } TEST_F(DisjointTest, dim1AndDim2) { { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_2d_view(array_, 0, 2, 5); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_2d_view(array_, 5, 2, 5); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_2d_view(array_, 10, 5, 2); EXPECT_TRUE(disjoint(view1, view2)); } } TEST_F(DisjointTest, dim1AndDim3) { { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_3d_view(array_, 0, 2, 2, 2); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_3d_view(array_, 5, 2, 2, 2); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_1d_view(array_, 0, 10); auto view2 = create_3d_view(array_, 10, 5, 2, 2); EXPECT_TRUE(disjoint(view1, view2)); } } TEST_F(DisjointTest, dim2AndDim3) { { auto view1 = create_2d_view(array_, 0, 2, 3); auto view2 = create_3d_view(array_, 0, 2, 3, 2); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_2d_view(array_, 0, 2, 3); auto view2 = create_3d_view(array_, 5, 2, 2, 2); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_2d_view(array_, 0, 2, 3); auto view2 = create_3d_view(array_, 6, 5, 2, 2); EXPECT_TRUE(disjoint(view1, view2)); } } TEST_F(DisjointTest, dim3AndDim3) { { auto view1 = create_3d_view(array_, 0, 2, 3, 4); auto view2 = create_3d_view(array_, 0, 2, 3, 2); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_3d_view(array_, 0, 2, 3, 4); auto view2 = create_3d_view(array_, 5, 2, 2, 2); EXPECT_FALSE(disjoint(view1, view2)); } { auto view1 = create_3d_view(array_, 0, 2, 3, 2); auto view2 = create_3d_view(array_, 12, 5, 2, 2); EXPECT_TRUE(disjoint(view1, view2)); } } TEST_F(DisjointTest, DifferentValueTypes) { auto view1 = create_3d_view(array_, 0, 2, 3, 4); auto view2 = HostArrayView3D(reinterpret_cast(array_.data()), 2, 3, 4, false); EXPECT_FALSE(disjoint(view1, view2)); } SpFFT-1.1.0/tests/local_tests/test_fftw_prop_hash.cpp000066400000000000000000000010371457701740000226730ustar00rootroot00000000000000#include #include "gtest/gtest.h" #include "fft/fftw_plan_1d.hpp" TEST(FFTWPropHashTest, Unique) { std::unordered_set, spfft::FFTWPropHash> set; int maxAlignment = 1024; for (int inPlace = 0; inPlace < 2; ++inPlace) { for (int i = 0 ;i < maxAlignment; ++i) { for (int j = 0; j < maxAlignment; ++j) { set.emplace(inPlace, i, j); } } } EXPECT_EQ(static_cast(maxAlignment) * static_cast(maxAlignment) * 2, set.size()); } SpFFT-1.1.0/tests/local_tests/test_host_array.cpp000066400000000000000000000016631457701740000220420ustar00rootroot00000000000000#include #include "gtest/gtest.h" #include "memory/host_array.hpp" using namespace spfft; class HostArrayTest : public ::testing::Test { protected: void SetUp() override { array_ = HostArray(5); int count = 0; auto data_ptr = array_.data(); for (SizeType i = 0; i < 5; ++i) { data_ptr[i] = ++count; } } HostArray array_; }; TEST_F(HostArrayTest, Iterators) { ASSERT_EQ(*array_.begin(), 1); ASSERT_EQ(*(array_.end() - 1), 5); int count = 0; for (auto& val : array_) { EXPECT_EQ(val, ++count); } } TEST_F(HostArrayTest, OperatorAccess) { int count = 0; ASSERT_EQ(array_.size(), 5); for (SizeType i = 0; i < array_.size(); ++i) { ASSERT_EQ(array_[i], ++count); } count = 0; for (SizeType i = 0; i < array_.size(); ++i) { ASSERT_EQ(array_(i), ++count); } } TEST_F(HostArrayTest, Accessors) { ASSERT_EQ(array_.front(), 1); ASSERT_EQ(array_.back(), 5); } SpFFT-1.1.0/tests/local_tests/test_local_transform.cpp000066400000000000000000000100211457701740000230400ustar00rootroot00000000000000#include #include #include #include #include #include #include "gtest/gtest.h" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/grid.hpp" #include "spfft/transform.hpp" #include "test_util/generate_indices.hpp" #include "test_util/test_check_values.hpp" #include "test_util/test_transform.hpp" #include "util/common_types.hpp" class TestLocalTransform : public TransformTest { protected: TestLocalTransform() : TransformTest(), grid_(dimX_, dimY_, dimZ_, dimX_ * dimY_, std::get<1>(GetParam()), -1) {} auto grid() -> Grid& override { return grid_; } Grid grid_; }; TEST_P(TestLocalTransform, ForwardC2C) { try { std::vector zStickDistribution(comm_size(), 1.0); std::vector xyPlaneDistribution(comm_size(), 1.0); test_forward_c2c(zStickDistribution, xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(TestLocalTransform, BackwardC2C) { try { std::vector zStickDistribution(comm_size(), 1.0); std::vector xyPlaneDistribution(comm_size(), 1.0); test_backward_c2c(zStickDistribution, xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(TestLocalTransform, R2C) { try { std::vector xyPlaneDistribution(comm_size(), 1.0); test_r2c(xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } // Show exchange name instead of enum value for test output static auto param_type_names( const ::testing::TestParamInfo< std::tuple>& info) -> std::string { const auto exchType = std::get<0>(info.param); const auto procType = std::get<1>(info.param); std::string name; switch (procType) { case SpfftProcessingUnitType::SPFFT_PU_HOST: { name += "Host"; } break; case SpfftProcessingUnitType::SPFFT_PU_GPU: { name += "GPU"; } break; default: { name += "Host+GPU"; } } name += "Size"; name += std::to_string(std::get<2>(info.param)); name += "x"; name += std::to_string(std::get<3>(info.param)); name += "x"; name += std::to_string(std::get<4>(info.param)); return name; } // instantiate tests with parameters #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #define TEST_PROCESSING_UNITS \ SpfftProcessingUnitType::SPFFT_PU_HOST, SpfftProcessingUnitType::SPFFT_PU_GPU #else #define TEST_PROCESSING_UNITS SpfftProcessingUnitType::SPFFT_PU_HOST #endif INSTANTIATE_TEST_SUITE_P(FullTest, TestLocalTransform, ::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_DEFAULT), ::testing::Values(TEST_PROCESSING_UNITS), ::testing::Values(1, 2, 11, 12, 13, 100), ::testing::Values(1, 2, 11, 12, 13, 100), ::testing::Values(1, 2, 11, 12, 13, 100), ::testing::Values(false)), param_type_names); INSTANTIATE_TEST_SUITE_P(CenteredIndicesTest, TestLocalTransform, ::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_DEFAULT), ::testing::Values(TEST_PROCESSING_UNITS), ::testing::Values(1, 2, 11, 100), ::testing::Values(1, 2, 11, 100), ::testing::Values(1, 2, 11, 100), ::testing::Values(true)), param_type_names); SpFFT-1.1.0/tests/mpi_tests/000077500000000000000000000000001457701740000156115ustar00rootroot00000000000000SpFFT-1.1.0/tests/mpi_tests/test_multi_transform.cpp000066400000000000000000000066361457701740000226140ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "gtest/gtest.h" #include "gtest_mpi.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "parameters/parameters.hpp" #include "spfft/spfft.hpp" #include "test_util/generate_indices.hpp" #include "test_util/test_transform.hpp" #include "util/common_types.hpp" TEST(MPIMultiTransformTest, BackwardsForwards) { GTEST_MPI_GUARD try { MPICommunicatorHandle comm(MPI_COMM_WORLD); const std::vector zStickDistribution(comm.size(), 1.0); const std::vector xyPlaneDistribution(comm.size(), 1.0); const int dimX = comm.size() * 10; const int dimY = comm.size() * 11; const int dimZ = comm.size() * 12; const int numTransforms = 3; std::mt19937 randGen(42); const auto valueIndicesPerRank = create_value_indices(randGen, zStickDistribution, 0.7, 0.7, dimX, dimY, dimZ, false); const int numLocalXYPlanes = calculate_num_local_xy_planes(comm.rank(), dimZ, xyPlaneDistribution); const auto& localIndices = valueIndicesPerRank[comm.rank()]; const int numValues = localIndices.size() / 3; std::vector>> freqValuesPerTrans( numTransforms, std::vector>(numValues)); std::vector freqValuePtr; for (auto& values : freqValuesPerTrans) { freqValuePtr.push_back(reinterpret_cast(values.data())); } // set frequency values to constant for each transform for (std::size_t i = 0; i < freqValuesPerTrans.size(); ++i) { for (auto& val : freqValuesPerTrans[i]) { val = std::complex(i, i); } } std::vector transforms; // create first transforms transforms.push_back(Grid(dimX, dimY, dimZ, dimX * dimY, numLocalXYPlanes, SPFFT_PU_HOST, -1, comm.get(), SPFFT_EXCH_DEFAULT) .create_transform(SPFFT_PU_HOST, SPFFT_TRANS_C2C, dimX, dimY, dimZ, numLocalXYPlanes, numValues, SPFFT_INDEX_TRIPLETS, localIndices.data())); // clone first transform for (int i = 1; i < numTransforms; ++i) { transforms.push_back(transforms.front().clone()); } std::vector processingUnits(numTransforms, SPFFT_PU_HOST); std::vector scalingTypes(numTransforms, SPFFT_NO_SCALING); // backward multi_transform_backward(numTransforms, transforms.data(), freqValuePtr.data(), processingUnits.data()); // forward multi_transform_forward(numTransforms, transforms.data(), processingUnits.data(), freqValuePtr.data(), scalingTypes.data()); // check all values for (std::size_t i = 0; i < freqValuesPerTrans.size(); ++i) { const auto targetValue = std::complex(i * dimX * dimY * dimZ, i * dimX * dimY * dimZ); for (auto& val : freqValuesPerTrans[i]) { ASSERT_NEAR(targetValue.real(), val.real(), 1e-8); ASSERT_NEAR(targetValue.imag(), val.imag(), 1e-8); } } } catch (const std::exception& e) { std::cout << "ERROR: " << e.what() << std::endl; ASSERT_TRUE(false); } } SpFFT-1.1.0/tests/mpi_tests/test_transform.cpp000066400000000000000000000153641457701740000214000ustar00rootroot00000000000000#include "test_util/test_transform.hpp" #include #include #include #include #include #include #include #include "gtest/gtest.h" #include "gtest_mpi.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "parameters/parameters.hpp" #include "spfft/grid.hpp" #include "spfft/transform.hpp" #include "test_util/generate_indices.hpp" #include "test_util/test_check_values.hpp" #include "util/common_types.hpp" class MPITransformTest : public TransformTest { protected: MPITransformTest() : TransformTest(), comm_(MPI_COMM_WORLD), grid_(dimX_, dimY_, dimZ_, dimX_ * dimY_, dimZ_, std::get<1>(GetParam()), -1, comm_.get(), std::get<0>(GetParam())) {} auto comm_rank() -> SizeType override { return comm_.rank(); } auto comm_size() -> SizeType override { return comm_.size(); } auto grid() -> Grid& override { return grid_; } MPICommunicatorHandle comm_; Grid grid_; }; TEST_P(MPITransformTest, ForwardUniformDistribution) { GTEST_MPI_GUARD try { std::vector zStickDistribution(comm_size(), 1.0); std::vector xyPlaneDistribution(comm_size(), 1.0); test_forward_c2c(zStickDistribution, xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(MPITransformTest, BackwardAllOneRank) { GTEST_MPI_GUARD try { std::vector zStickDistribution(comm_size(), 0.0); zStickDistribution[0] = 1.0; std::vector xyPlaneDistribution(comm_size(), 0.0); xyPlaneDistribution[0] = 1.0; test_backward_c2c(zStickDistribution, xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(MPITransformTest, ForwardAllOneRank) { GTEST_MPI_GUARD try { std::vector zStickDistribution(comm_size(), 0.0); zStickDistribution[0] = 1.0; std::vector xyPlaneDistribution(comm_size(), 0.0); xyPlaneDistribution[0] = 1.0; test_forward_c2c(zStickDistribution, xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(MPITransformTest, BackwardAllOneRankPerSide) { GTEST_MPI_GUARD try { std::vector zStickDistribution(comm_size(), 0.0); zStickDistribution[0] = 1.0; std::vector xyPlaneDistribution(comm_size(), 0.0); xyPlaneDistribution[comm_size() - 1] = 1.0; test_backward_c2c(zStickDistribution, xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(MPITransformTest, ForwardAllOneRankPerSide) { GTEST_MPI_GUARD try { std::vector zStickDistribution(comm_size(), 0.0); zStickDistribution[0] = 1.0; std::vector xyPlaneDistribution(comm_size(), 0.0); xyPlaneDistribution[comm_size() - 1] = 1.0; test_forward_c2c(zStickDistribution, xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(MPITransformTest, R2CUniformDistribution) { GTEST_MPI_GUARD try { std::vector xyPlaneDistribution(comm_size(), 1.0); test_r2c(xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } TEST_P(MPITransformTest, R2COneRankAllPlanes) { GTEST_MPI_GUARD try { std::vector xyPlaneDistribution(comm_size(), 0.0); xyPlaneDistribution[0] = 1.0; test_r2c(xyPlaneDistribution); } catch (const std::exception& e) { std::cout << "ERROR: Rank " << comm_rank() << ", " << e.what() << std::endl; ASSERT_TRUE(false); } } // Show exchange name instead of enum value for test output static auto param_type_names( const ::testing::TestParamInfo< std::tuple>& info) -> std::string { const auto exchType = std::get<0>(info.param); const auto procType = std::get<1>(info.param); std::string name; switch (exchType) { case SpfftExchangeType::SPFFT_EXCH_BUFFERED: { name += "Buffered"; } break; case SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED: { name += "CompactBuffered"; } break; case SpfftExchangeType::SPFFT_EXCH_UNBUFFERED: { name += "Unbuffered"; } break; default: name += "Default"; } switch (procType) { case SpfftProcessingUnitType::SPFFT_PU_HOST: { name += "Host"; } break; case SpfftProcessingUnitType::SPFFT_PU_GPU: { name += "GPU"; } break; default: { name += "Host+GPU"; } } name += "Size"; name += std::to_string(std::get<2>(info.param)); name += "x"; name += std::to_string(std::get<3>(info.param)); name += "x"; name += std::to_string(std::get<4>(info.param)); return name; } // instantiate tests with parameters #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #define TEST_PROCESSING_UNITS \ SpfftProcessingUnitType::SPFFT_PU_HOST, SpfftProcessingUnitType::SPFFT_PU_GPU #else #define TEST_PROCESSING_UNITS SpfftProcessingUnitType::SPFFT_PU_HOST #endif INSTANTIATE_TEST_SUITE_P( FullTest, MPITransformTest, ::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_BUFFERED, SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED, SpfftExchangeType::SPFFT_EXCH_UNBUFFERED, SpfftExchangeType::SPFFT_EXCH_DEFAULT), ::testing::Values(TEST_PROCESSING_UNITS), ::testing::Values(1, 2, 11, 12, 13, 100), ::testing::Values(1, 2, 11, 12, 13, 100), ::testing::Values(1, 2, 11, 12, 13, 100), ::testing::Values(false)), param_type_names); INSTANTIATE_TEST_SUITE_P(CenteredIndicesTest, MPITransformTest, ::testing::Combine(::testing::Values(SpfftExchangeType::SPFFT_EXCH_DEFAULT), ::testing::Values(TEST_PROCESSING_UNITS), ::testing::Values(1, 2, 11, 100), ::testing::Values(1, 2, 11, 100), ::testing::Values(1, 2, 11, 100), ::testing::Values(true)), param_type_names); SpFFT-1.1.0/tests/mpi_tests/test_transpose.cpp000066400000000000000000000176121457701740000214010ustar00rootroot00000000000000#include #include #include #include #include #include "gtest/gtest.h" #include "gtest_mpi.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "parameters/parameters.hpp" #include "transpose/transpose_mpi_buffered_host.hpp" #include "transpose/transpose_mpi_compact_buffered_host.hpp" #include "transpose/transpose_mpi_unbuffered_host.hpp" #include "util/common_types.hpp" using namespace spfft; class TransposeTest : public ::testing::Test { protected: void SetUp() override { comm_ = MPICommunicatorHandle(MPI_COMM_WORLD); SizeType dimX = 2 * comm_.size(); SizeType dimY = 3 * comm_.size(); SizeType dimZ = 4 * comm_.size(); // create memory space array1_ = HostArray>(dimX * dimY * dimZ, std::complex(1.0, 1.0)); array2_ = HostArray>(dimX * dimY * dimZ, std::complex(1.0, 1.0)); fullArray_ = HostArray>(dimX * dimY * dimZ); // plane split between ranks const SizeType numLocalXYPlanes = (dimZ / comm_.size()) + (comm_.rank() == comm_.size() - 1 ? dimZ % comm_.size() : 0); const SizeType localXYPlaneOffset = (dimZ / comm_.size()) * comm_.rank(); // create all indices the same way (random generator must be equally initialized) std::mt19937 sharedRandGen(42); std::uniform_real_distribution dis(0.0, 1.0); std::uniform_int_distribution rankSelector(0, comm_.size() - 1); std::vector indexTriplets; indexTriplets.reserve(dimX * dimY * dimZ); for (int x = 0; x < static_cast(dimX); ++x) { for (int y = 0; y < static_cast(dimY); ++y) { // create sparse z stick distribution if (dis(sharedRandGen) < 0.5 && rankSelector(sharedRandGen) == static_cast(comm_.size())) { for (int z = 0; z < static_cast(dimY); ++z) { indexTriplets.push_back(x); indexTriplets.push_back(y); indexTriplets.push_back(z); } } } } paramPtr_.reset(new Parameters(comm_, SPFFT_TRANS_C2C, dimX, dimY, dimZ, numLocalXYPlanes, indexTriplets.size() / 3, SPFFT_INDEX_TRIPLETS, indexTriplets.data())); // initialize random z-stick data auto fullView = create_3d_view(fullArray_, 0, dimX, dimY, dimZ); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), dimZ); for (SizeType r = 0; r < comm_.size(); ++r) { for (const auto& stickIdx : paramPtr_->z_stick_xy_indices(r)) { const auto x = stickIdx / dimY; const auto y = stickIdx - x * dimY; for (SizeType z = 0; z < freqView.dim_inner(); ++z) { fullView(x, y, z) = std::complex(dis(sharedRandGen), dis(sharedRandGen)); } } } // copy data into sticks SizeType count = 0; for (const auto& stickIdx : paramPtr_->z_stick_xy_indices(comm_.rank())) { const auto x = stickIdx / dimY; const auto y = stickIdx - x * dimY; for (SizeType z = 0; z < freqView.dim_inner(); ++z) { freqView(count, z) = fullView(x, y, z); } ++count; } } MPICommunicatorHandle comm_; std::shared_ptr paramPtr_; HostArray> array1_; HostArray> array2_; HostArray> fullArray_; }; static void check_space_domain(const HostArrayView3D>& realView, const HostArrayView3D>& fullView, const SizeType planeOffset, const SizeType numLocalXYPlanes) { for (SizeType z = 0; z < numLocalXYPlanes; ++z) { for (SizeType x = 0; x < fullView.dim_outer(); ++x) { for (SizeType y = 0; y < fullView.dim_mid(); ++y) { EXPECT_EQ(realView(z, x, y).real(), fullView(x, y, z + planeOffset).real()); EXPECT_EQ(realView(z, x, y).imag(), fullView(x, y, z + planeOffset).imag()); } } } } static void check_freq_domain(const HostArrayView2D>& freqView, const HostArrayView3D>& fullView, HostArrayConstView1D xyIndices) { for (SizeType stickIdx = 0; stickIdx < freqView.dim_outer(); ++stickIdx) { const auto x = xyIndices(stickIdx) / fullView.dim_outer(); const auto y = xyIndices(stickIdx) - x * fullView.dim_outer(); for (SizeType z = 0; z < freqView.dim_inner(); ++z) { EXPECT_EQ(freqView(stickIdx, z).real(), fullView(x, y, z).real()); EXPECT_EQ(freqView(stickIdx, z).imag(), fullView(x, y, z).imag()); } } } TEST_F(TransposeTest, Unbuffered) { GTEST_MPI_GUARD auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_x(), paramPtr_->dim_y()); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto fullView = create_3d_view(fullArray_, 0, paramPtr_->dim_x(), paramPtr_->dim_y(), paramPtr_->dim_z()); TransposeMPIUnbufferedHost transpose(paramPtr_, comm_, freqXYView, freqView); transpose.backward(); check_space_domain(freqXYView, fullView, paramPtr_->xy_plane_offset(comm_.rank()), paramPtr_->num_xy_planes(comm_.rank())); transpose.forward(); check_freq_domain(freqView, fullView, paramPtr_->z_stick_xy_indices(comm_.rank())); } TEST_F(TransposeTest, CompactBuffered) { GTEST_MPI_GUARD auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_x(), paramPtr_->dim_y()); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto fullView = create_3d_view(fullArray_, 0, paramPtr_->dim_x(), paramPtr_->dim_y(), paramPtr_->dim_z()); auto transposeBufferZ = create_1d_view( array2_, 0, paramPtr_->total_num_xy_planes() * paramPtr_->num_z_sticks(comm_.rank())); auto transposeBufferXY = create_1d_view( array1_, 0, paramPtr_->total_num_z_sticks() * paramPtr_->num_xy_planes(comm_.rank())); TransposeMPICompactBufferedHost transpose(paramPtr_, comm_, freqXYView, freqView, transposeBufferXY, transposeBufferZ); transpose.backward(); check_space_domain(freqXYView, fullView, paramPtr_->xy_plane_offset(comm_.rank()), paramPtr_->num_xy_planes(comm_.rank())); transpose.forward(); check_freq_domain(freqView, fullView, paramPtr_->z_stick_xy_indices(comm_.rank())); } TEST_F(TransposeTest, Buffered) { GTEST_MPI_GUARD auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_x(), paramPtr_->dim_y()); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto fullView = create_3d_view(fullArray_, 0, paramPtr_->dim_x(), paramPtr_->dim_y(), paramPtr_->dim_z()); auto transposeBufferZ = create_1d_view( array2_, 0, paramPtr_->max_num_z_sticks() * paramPtr_->max_num_xy_planes() * comm_.size()); auto transposeBufferXY = create_1d_view( array1_, 0, paramPtr_->max_num_z_sticks() * paramPtr_->max_num_xy_planes() * comm_.size()); TransposeMPIBufferedHost transpose(paramPtr_, comm_, freqXYView, freqView, transposeBufferXY, transposeBufferZ); transpose.backward(); check_space_domain(freqXYView, fullView, paramPtr_->xy_plane_offset(comm_.rank()), paramPtr_->num_xy_planes(comm_.rank())); transpose.forward(); check_freq_domain(freqView, fullView, paramPtr_->z_stick_xy_indices(comm_.rank())); } SpFFT-1.1.0/tests/mpi_tests/test_transpose_gpu.cpp000066400000000000000000000256231457701740000222550ustar00rootroot00000000000000#include #include #include #include #include #include "gtest/gtest.h" #include "gtest_mpi.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" #include "mpi_util/mpi_communicator_handle.hpp" #include "parameters/parameters.hpp" #include "util/common_types.hpp" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "execution/execution_gpu.hpp" #include "memory/gpu_array.hpp" #include "transpose/transpose_mpi_buffered_gpu.hpp" #include "transpose/transpose_mpi_compact_buffered_gpu.hpp" #include "transpose/transpose_mpi_unbuffered_gpu.hpp" using namespace spfft; class TransposeGPUTest : public ::testing::Test { protected: void SetUp() override { comm_ = MPICommunicatorHandle(MPI_COMM_WORLD); SizeType dimX = 2 * comm_.size(); SizeType dimY = 3 * comm_.size(); SizeType dimZ = 4 * comm_.size(); // create memory space array1_ = HostArray>(dimX * dimY * dimZ, std::complex(1.0, 1.0)); array2_ = HostArray>(dimX * dimY * dimZ, std::complex(1.0, 1.0)); fullArray_ = HostArray>(dimX * dimY * dimZ); gpuArray1_ = GPUArray::type>(array1_.size()); gpuArray2_ = GPUArray::type>(array1_.size()); // pinn arrays array1_.pin_memory(); array2_.pin_memory(); // plane split between ranks const SizeType numLocalXYPlanes = (dimZ / comm_.size()) + (comm_.rank() == comm_.size() - 1 ? dimZ % comm_.size() : 0); const SizeType localXYPlaneOffset = (dimZ / comm_.size()) * comm_.rank(); // create all indices the same way (random generator must be equally initialized) std::mt19937 sharedRandGen(42); std::uniform_real_distribution dis(0.0, 1.0); std::uniform_int_distribution rankSelector(0, comm_.size() - 1); std::vector indexTriplets; indexTriplets.reserve(dimX * dimY * dimZ); for (int x = 0; x < static_cast(dimX); ++x) { for (int y = 0; y < static_cast(dimY); ++y) { // create sparse z stick distribution if (dis(sharedRandGen) < 0.5 && rankSelector(sharedRandGen) == comm_.size()) { for (int z = 0; z < static_cast(dimY); ++z) { indexTriplets.push_back(x); indexTriplets.push_back(y); indexTriplets.push_back(z); } } } } paramPtr_.reset(new Parameters(comm_, SPFFT_TRANS_C2C, dimX, dimY, dimZ, numLocalXYPlanes, indexTriplets.size() / 3, SPFFT_INDEX_TRIPLETS, indexTriplets.data())); // initialize random z-stick data auto fullView = create_3d_view(fullArray_, 0, dimX, dimY, dimZ); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), dimZ); for (SizeType r = 0; r < comm_.size(); ++r) { for (const auto& stickIdx : paramPtr_->z_stick_xy_indices(r)) { const auto x = stickIdx / dimY; const auto y = stickIdx - x * dimY; for (SizeType z = 0; z < freqView.dim_inner(); ++z) { fullView(x, y, z) = std::complex(dis(sharedRandGen), dis(sharedRandGen)); } } } // copy data into sticks SizeType count = 0; for (const auto& stickIdx : paramPtr_->z_stick_xy_indices(comm_.rank())) { const auto x = stickIdx / dimY; const auto y = stickIdx - x * dimY; for (SizeType z = 0; z < freqView.dim_inner(); ++z) { freqView(count, z) = fullView(x, y, z); } ++count; } } MPICommunicatorHandle comm_; std::shared_ptr paramPtr_; HostArray> array1_; HostArray> array2_; HostArray> fullArray_; GPUArray::type> gpuArray1_; GPUArray::type> gpuArray2_; }; static void check_space_domain(const HostArrayView3D>& realView, const HostArrayView3D>& fullView, const SizeType planeOffset, const SizeType numLocalXYPlanes) { for (SizeType z = 0; z < numLocalXYPlanes; ++z) { for (SizeType x = 0; x < fullView.dim_outer(); ++x) { for (SizeType y = 0; y < fullView.dim_mid(); ++y) { EXPECT_EQ(realView(z, y, x).real(), fullView(x, y, z + planeOffset).real()); EXPECT_EQ(realView(z, y, x).imag(), fullView(x, y, z + planeOffset).imag()); } } } } static void check_freq_domain(const HostArrayView2D>& freqView, const HostArrayView3D>& fullView, HostArrayConstView1D xyIndices) { for (SizeType stickIdx = 0; stickIdx < freqView.dim_outer(); ++stickIdx) { const auto x = stickIdx / fullView.dim_outer(); const auto y = stickIdx - x * fullView.dim_outer(); for (SizeType z = 0; z < freqView.dim_inner(); ++z) { EXPECT_EQ(freqView(stickIdx, z).real(), fullView(x, y, z).real()); EXPECT_EQ(freqView(stickIdx, z).imag(), fullView(x, y, z).imag()); } } } TEST_F(TransposeGPUTest, Buffered) { GTEST_MPI_GUARD auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_y(), paramPtr_->dim_x()); auto freqXYViewGPU = create_3d_view(gpuArray2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_y(), paramPtr_->dim_x()); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto freqViewGPU = create_2d_view(gpuArray1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto fullView = create_3d_view(fullArray_, 0, paramPtr_->dim_x(), paramPtr_->dim_y(), paramPtr_->dim_z()); GPUStreamHandle stream(false); auto transposeBufferZ = create_1d_view( array2_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); auto transposeBufferZGPU = create_1d_view( gpuArray2_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); auto transposeBufferXY = create_1d_view( array1_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); auto transposeBufferXYGPU = create_1d_view( gpuArray1_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); TransposeMPIBufferedGPU transpose( paramPtr_, comm_, transposeBufferXY, freqXYViewGPU, transposeBufferXYGPU, stream, transposeBufferZ, freqViewGPU, transposeBufferZGPU, stream); copy_to_gpu_async(stream, freqView, freqViewGPU); transpose.backward(); copy_from_gpu_async(stream, freqXYViewGPU, freqXYView); gpu::check_status(gpu::stream_synchronize(stream.get())); check_space_domain(freqXYView, fullView, paramPtr_->xy_plane_offset(comm_.rank()), paramPtr_->num_xy_planes(comm_.rank())); transpose.forward(); copy_from_gpu_async(stream, freqViewGPU, freqView); gpu::check_status(gpu::stream_synchronize(stream.get())); check_freq_domain(freqView, fullView, paramPtr_->z_stick_xy_indices(comm_.rank())); } TEST_F(TransposeGPUTest, CompactBuffered) { GTEST_MPI_GUARD auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_y(), paramPtr_->dim_x()); auto freqXYViewGPU = create_3d_view(gpuArray2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_y(), paramPtr_->dim_x()); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto freqViewGPU = create_2d_view(gpuArray1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto fullView = create_3d_view(fullArray_, 0, paramPtr_->dim_x(), paramPtr_->dim_y(), paramPtr_->dim_z()); GPUStreamHandle stream(false); auto transposeBufferZ = create_1d_view( array2_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); auto transposeBufferZGPU = create_1d_view( gpuArray2_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); auto transposeBufferXY = create_1d_view( array1_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); auto transposeBufferXYGPU = create_1d_view( gpuArray1_, 0, comm_.size() * paramPtr_->max_num_xy_planes() * paramPtr_->max_num_z_sticks()); TransposeMPICompactBufferedGPU transpose( paramPtr_, comm_, transposeBufferXY, freqXYViewGPU, transposeBufferXYGPU, stream, transposeBufferZ, freqViewGPU, transposeBufferZGPU, stream); copy_to_gpu_async(stream, freqView, freqViewGPU); transpose.pack_backward(); transpose.backward(); transpose.unpack_backward(); copy_from_gpu_async(stream, freqXYViewGPU, freqXYView); gpu::check_status(gpu::stream_synchronize(stream.get())); check_space_domain(freqXYView, fullView, paramPtr_->xy_plane_offset(comm_.rank()), paramPtr_->num_xy_planes(comm_.rank())); transpose.forward(); copy_from_gpu_async(stream, freqViewGPU, freqView); gpu::check_status(gpu::stream_synchronize(stream.get())); check_freq_domain(freqView, fullView, paramPtr_->z_stick_xy_indices(comm_.rank())); } TEST_F(TransposeGPUTest, Unbuffered) { GTEST_MPI_GUARD auto freqXYView = create_3d_view(array2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_y(), paramPtr_->dim_x()); auto freqXYViewGPU = create_3d_view(gpuArray2_, 0, paramPtr_->num_xy_planes(comm_.rank()), paramPtr_->dim_y(), paramPtr_->dim_x()); auto freqView = create_2d_view(array1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto freqViewGPU = create_2d_view(gpuArray1_, 0, paramPtr_->num_z_sticks(comm_.rank()), paramPtr_->dim_z()); auto fullView = create_3d_view(fullArray_, 0, paramPtr_->dim_x(), paramPtr_->dim_y(), paramPtr_->dim_z()); GPUStreamHandle stream(false); TransposeMPIUnbufferedGPU transpose(paramPtr_, comm_, freqXYView, freqXYViewGPU, stream, freqView, freqViewGPU, stream); copy_to_gpu_async(stream, freqView, freqViewGPU); transpose.backward(); copy_from_gpu_async(stream, freqXYViewGPU, freqXYView); gpu::check_status(gpu::stream_synchronize(stream.get())); check_space_domain(freqXYView, fullView, paramPtr_->xy_plane_offset(comm_.rank()), paramPtr_->num_xy_planes(comm_.rank())); transpose.forward(); copy_from_gpu_async(stream, freqViewGPU, freqView); gpu::check_status(gpu::stream_synchronize(stream.get())); check_freq_domain(freqView, fullView, paramPtr_->z_stick_xy_indices(comm_.rank())); } #endif SpFFT-1.1.0/tests/programs/000077500000000000000000000000001457701740000154345ustar00rootroot00000000000000SpFFT-1.1.0/tests/programs/benchmark.cpp000066400000000000000000000277571457701740000201140ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #include #include #include "fft/transform_1d_host.hpp" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "spfft/config.h" #include "spfft/spfft.hpp" #include "timing/timing.hpp" #include "util/omp_definitions.hpp" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "gpu_util/gpu_runtime_api.hpp" #include "gpu_util/gpu_transfer.hpp" #include "memory/gpu_array.hpp" #endif #ifdef SPFFT_MPI #include #include "mpi_util/mpi_communicator_handle.hpp" #include "mpi_util/mpi_init_handle.hpp" #endif // external dependencies #include "CLI/CLI.hpp" #include "nlohmann/json.hpp" // #include // for MPI debugging using namespace spfft; void run_benchmark(const SpfftTransformType transformType, const int dimX, const int dimY, const int dimZ, const int numLocalZSticks, const int numLocalXYPlanes, const SpfftProcessingUnitType executionUnit, const SpfftProcessingUnitType targetUnit, const int numThreads, const SpfftExchangeType exchangeType, const std::vector& indices, const int numRepeats, const int numTransforms, double** freqValuesPTR) { std::vector transforms; for (int t = 0; t < numTransforms; ++t) { #ifdef SPFFT_MPI Grid grid(dimX, dimY, dimZ, numLocalZSticks, numLocalXYPlanes, executionUnit, numThreads, MPI_COMM_WORLD, exchangeType); #else Grid grid(dimX, dimY, dimZ, numLocalZSticks, executionUnit, numThreads); #endif auto transform = grid.create_transform( executionUnit, transformType, dimX, dimY, dimZ, numLocalXYPlanes, indices.size() / 3, SpfftIndexFormatType::SPFFT_INDEX_TRIPLETS, indices.data()); transforms.emplace_back(std::move(transform)); } std::vector targetUnits(numTransforms, targetUnit); std::vector scalingTypes(numTransforms, SPFFT_NO_SCALING); // run once for warm cache { HOST_TIMING_SCOPED("Warming") multi_transform_backward(transforms.size(), transforms.data(), freqValuesPTR, targetUnits.data()); multi_transform_forward(transforms.size(), transforms.data(), targetUnits.data(), freqValuesPTR, scalingTypes.data()); } std::string exchName("Compact buffered"); if (exchangeType == SpfftExchangeType::SPFFT_EXCH_BUFFERED) { exchName = "Buffered"; } else if (exchangeType == SpfftExchangeType::SPFFT_EXCH_UNBUFFERED) { exchName = "Unbuffered"; } else if (exchangeType == SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED_FLOAT) { exchName = "Compact buffered float"; } else if (exchangeType == SpfftExchangeType::SPFFT_EXCH_BUFFERED_FLOAT) { exchName = "Buffered float"; } HOST_TIMING_SCOPED(exchName) if (numTransforms == 1) { for (int repeat = 0; repeat < numRepeats; ++repeat) { transforms.front().backward(*freqValuesPTR, targetUnits.front()); transforms.front().forward(targetUnits.front(), *freqValuesPTR, scalingTypes.front()); } } else { for (int repeat = 0; repeat < numRepeats; ++repeat) { multi_transform_backward(transforms.size(), transforms.data(), freqValuesPTR, targetUnits.data()); multi_transform_forward(transforms.size(), transforms.data(), targetUnits.data(), freqValuesPTR, scalingTypes.data()); } } } int main(int argc, char** argv) { #ifdef SPFFT_MPI MPIInitHandle initHandle(argc, argv, true); MPICommunicatorHandle comm(MPI_COMM_WORLD); const SizeType commRank = comm.rank(); const SizeType commSize = comm.size(); #else const SizeType commRank = 0; const SizeType commSize = 1; #endif #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) // set device for multi-gpu nodes int deviceCount = 0; gpu::check_status(gpu::get_device_count(&deviceCount)); if (deviceCount > 1) { gpu::check_status(gpu::set_device(commRank % deviceCount)); } #endif // if(commRank == 0) { // std::cout << "PID = " << getpid() << std::endl; // } // bool waitLoop = commRank == 0; // while(waitLoop) { // sleep(5); // } int numRepeats = 1; int numTransforms = 1; std::string outputFileName; std::string exchName; std::string procName; std::string transformTypeName = "c2c"; double sparsity = 1.0; std::vector dimensions; CLI::App app{"fft test"}; app.add_option("-d", dimensions, "Size of fft grid in each dimension")->required()->expected(3); app.add_option("-r", numRepeats, "Number of repeats")->required(); app.add_option("-o", outputFileName, "Output file name")->required(); app.add_option("-m", numTransforms, "Multiple transform number")->default_val("1"); app.add_option("-s", sparsity, "Sparsity"); app.add_option("-t", transformTypeName, "Transform type") ->check(CLI::IsMember({"c2c", "r2c"})) ->default_val("c2c"); app.add_option("-e", exchName, "Exchange type") ->check(CLI::IsMember( {"all", "compact", "compactFloat", "buffered", "bufferedFloat", "unbuffered"})) ->required(); app.add_option("-p", procName, "Processing unit. With gpu-gpu, device memory is used as input and output.") ->check(CLI::IsMember({"cpu", "gpu", "gpu-gpu"})) ->required(); CLI11_PARSE(app, argc, argv); auto transformType = SPFFT_TRANS_C2C; if(transformTypeName == "r2c") { transformType = SPFFT_TRANS_R2C; } const int dimX = dimensions[0]; const int dimY = dimensions[1]; const int dimZ = dimensions[2]; const int dimXFreq = transformType == SPFFT_TRANS_R2C ? dimX / 2 + 1 : dimX; const int dimYFreq = transformType == SPFFT_TRANS_R2C ? dimY / 2 + 1 : dimY; const int dimZFreq = transformType == SPFFT_TRANS_R2C ? dimZ / 2 + 1 : dimZ; const int numThreads = omp_get_max_threads(); const SizeType numLocalXYPlanes = (dimZ / commSize) + (commRank < dimZ % commSize ? 1 : 0); int numLocalZSticks = 0; std::vector xyzIndices; { // std::mt19937 randGen(42); // std::uniform_real_distribution uniformRandDis(0.0, 1.0); // create all global x-y index pairs std::vector> xyIndicesGlobal; xyIndicesGlobal.reserve(dimX * dimY); for (int x = 0; x < dimXFreq * sparsity; ++x) { for (int y = 0; y < (x == 0 ? dimYFreq : dimY); ++y) { xyIndicesGlobal.emplace_back(x, y); } } // distribute z-sticks as evenly as possible numLocalZSticks = (xyIndicesGlobal.size()) / commSize + (commRank < (xyIndicesGlobal.size()) % commSize ? 1 : 0); const int offset = ((xyIndicesGlobal.size()) / commSize) * commRank + std::min(commRank, static_cast(xyIndicesGlobal.size()) % commSize); // assemble index triplets xyzIndices.reserve(numLocalZSticks); for (int i = offset; i < offset + numLocalZSticks; ++i) { for (int z = 0; z < dimZ; ++z) { xyzIndices.push_back(xyIndicesGlobal[i].first); xyzIndices.push_back(xyIndicesGlobal[i].second); xyzIndices.push_back(z); } } } // store full z-sticks values const auto executionUnit = procName == "cpu" ? SpfftProcessingUnitType::SPFFT_PU_HOST : SpfftProcessingUnitType::SPFFT_PU_GPU; const auto targetUnit = procName == "gpu-gpu" ? SpfftProcessingUnitType::SPFFT_PU_GPU : SpfftProcessingUnitType::SPFFT_PU_HOST; std::vector freqValuesPointers(numTransforms); std::vector>> freqValues; for (int t = 0; t < numTransforms; ++t) freqValues.emplace_back(xyzIndices.size() / 3); #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) std::vector> freqValuesGPU; for (int t = 0; t < numTransforms; ++t) freqValuesGPU.emplace_back(xyzIndices.size() / 3); for (int t = 0; t < numTransforms; ++t) { freqValuesPointers[t] = procName == "gpu-gpu" ? reinterpret_cast(freqValuesGPU[t].data()) : reinterpret_cast(freqValues[t].data()); } #else for (int t = 0; t < numTransforms; ++t) { freqValuesPointers[t] = reinterpret_cast(freqValues[t].data()); } #endif #ifdef SPFFT_GPU_DIRECT const bool gpuDirectEnabled = true; #else const bool gpuDirectEnabled = false; #endif if (commRank == 0) { std::cout << "Num MPI ranks: " << commSize << std::endl; std::cout << "Grid size: " << dimX << ", " << dimY << ", " << dimZ << std::endl; std::cout << "Transform type: " << transformTypeName << std::endl; std::cout << "Sparsity: " << sparsity << std::endl; std::cout << "Proc: " << procName << std::endl; std::cout << "GPU Direct: " << (gpuDirectEnabled ? "Enabled" : "Disabled") << std::endl; } if (exchName == "all") { run_benchmark(transformType, dimX, dimY, dimZ, numLocalZSticks, numLocalXYPlanes, executionUnit, targetUnit, numThreads, SpfftExchangeType::SPFFT_EXCH_BUFFERED, xyzIndices, numRepeats, numTransforms, freqValuesPointers.data()); run_benchmark(transformType, dimX, dimY, dimZ, numLocalZSticks, numLocalXYPlanes, executionUnit, targetUnit, numThreads, SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED, xyzIndices, numRepeats, numTransforms, freqValuesPointers.data()); run_benchmark(transformType, dimX, dimY, dimZ, numLocalZSticks, numLocalXYPlanes, executionUnit, targetUnit, numThreads, SpfftExchangeType::SPFFT_EXCH_UNBUFFERED, xyzIndices, numRepeats, numTransforms, freqValuesPointers.data()); } else { auto exchangeType = SpfftExchangeType::SPFFT_EXCH_DEFAULT; if (exchName == "compact") { exchangeType = SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED; } else if (exchName == "compactFloat") { exchangeType = SpfftExchangeType::SPFFT_EXCH_COMPACT_BUFFERED_FLOAT; } else if (exchName == "buffered") { exchangeType = SpfftExchangeType::SPFFT_EXCH_BUFFERED; } else if (exchName == "bufferedFloat") { exchangeType = SpfftExchangeType::SPFFT_EXCH_BUFFERED_FLOAT; } else if (exchName == "unbuffered") { exchangeType = SpfftExchangeType::SPFFT_EXCH_UNBUFFERED; } run_benchmark(transformType, dimX, dimY, dimZ, numLocalZSticks, numLocalXYPlanes, executionUnit, targetUnit, numThreads, exchangeType, xyzIndices, numRepeats, numTransforms, freqValuesPointers.data()); } if (commRank == 0) { auto timingResults = ::spfft::timing::GlobalTimer.process(); std::cout << timingResults.print({::rt_graph::Stat::Count, ::rt_graph::Stat::Total, ::rt_graph::Stat::Percentage, ::rt_graph::Stat::ParentPercentage, ::rt_graph::Stat::Median, ::rt_graph::Stat::Min, ::rt_graph::Stat::Max}) << std::endl; if (!outputFileName.empty()) { nlohmann::json j; const std::time_t t = std::time(nullptr); std::string time(std::ctime(&t)); time.pop_back(); j["timings"] =nlohmann::json::parse(timingResults.json()); const bool data_on_gpu = procName == "gpu-gpu"; j["parameters"] = {{"proc", procName}, {"data_on_gpu", data_on_gpu}, {"gpu_direct", gpuDirectEnabled}, {"num_ranks", commSize}, {"num_threads", numThreads}, {"dim_x", dimX}, {"dim_y", dimY}, {"dim_z", dimZ}, {"exchange_type", exchName}, {"num_repeats", numRepeats}, {"transform_type", transformTypeName}, {"time", time}}; std::ofstream file(outputFileName); file << std::setw(2) << j; file.close(); } } return 0; } SpFFT-1.1.0/tests/run_local_tests.cpp000066400000000000000000000002031457701740000175010ustar00rootroot00000000000000#include "gtest/gtest.h" int main(int argc, char *argv[]) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } SpFFT-1.1.0/tests/run_mpi_tests.cpp000066400000000000000000000005411457701740000172010ustar00rootroot00000000000000#include #include "gtest/gtest.h" #include "gtest_mpi.hpp" int main(int argc, char* argv[]) { // Initialize MPI before any call to gtest_mpi int provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); gtest_mpi::InitGoogleTestMPI(&argc, argv); auto status = RUN_ALL_TESTS(); MPI_Finalize(); return status; } SpFFT-1.1.0/tests/test_util/000077500000000000000000000000001457701740000156165ustar00rootroot00000000000000SpFFT-1.1.0/tests/test_util/generate_indices.hpp000066400000000000000000000135761457701740000216330ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_GENERATE_INDICES_HPP #define SPFFT_GENERATE_INDICES_HPP #include #include #include #include "spfft/config.h" namespace spfft { // creates randomly distributed indices for all ranks according to the input distributions template auto create_value_indices(T& sharedRandGen, const std::vector& zStickDistribution, const double totalZStickFraction, const double zStickFillFraction, const int dimX, const int dimY, const int dimZ, const bool hermitianSymmetry) -> std::vector> { std::uniform_real_distribution uniformRandDis(0.0, 1.0); std::discrete_distribution rankSelectDis(zStickDistribution.begin(), zStickDistribution.end()); const double zStickFractionSum = std::accumulate(zStickDistribution.begin(), zStickDistribution.end(), 0.0); std::vector>> xyIndicesPerRank(zStickDistribution.size()); const int dimXFreq = hermitianSymmetry ? dimX / 2 + 1 : dimX; const int dimYFreq = hermitianSymmetry ? dimY / 2 + 1 : dimY; for (int x = 0; x < dimXFreq; ++x) { for (int y = 0; y < dimY; ++y) { if (!(x == 0 && y >= dimYFreq) && uniformRandDis(sharedRandGen) < totalZStickFraction) { // use full hermitian symmetry on x = 0 plane if (!hermitianSymmetry || x != 0 || y < dimYFreq) { const auto selectedRank = rankSelectDis(sharedRandGen); xyIndicesPerRank[selectedRank].emplace_back(std::make_pair(x, y)); } } } } const int dimZFreq = hermitianSymmetry ? dimZ / 2 + 1 : dimZ; std::vector> valueIndices(zStickDistribution.size()); auto valueIndicesIt = valueIndices.begin(); for (const auto& xyIndices : xyIndicesPerRank) { for (const auto& xyIndex : xyIndices) { for (int z = 0; z < dimZ; ++z) { // only add half x=0, y=0 stick if hermitian symmetry is used if (!(hermitianSymmetry && xyIndex.first == 0 && xyIndex.second == 0 && z >= dimZFreq) && uniformRandDis(sharedRandGen) < zStickFillFraction) { valueIndicesIt->emplace_back(xyIndex.first); valueIndicesIt->emplace_back(xyIndex.second); valueIndicesIt->emplace_back(z); } } } ++valueIndicesIt; } return valueIndices; } inline auto center_indices(const int dimX, const int dimY, const int dimZ, std::vector>& indicesPerRank) -> void { const int positiveSizeX = dimX / 2 + 1; const int positiveSizeY = dimY / 2 + 1; const int positiveSizeZ = dimZ / 2 + 1; for (auto& rankIndices : indicesPerRank) { for (std::size_t i = 0; i < rankIndices.size(); i += 3) { if (rankIndices[i] >= positiveSizeX) rankIndices[i] -= dimX; if (rankIndices[i + 1] >= positiveSizeY) rankIndices[i + 1] -= dimY; if (rankIndices[i + 2] >= positiveSizeZ) rankIndices[i + 2] -= dimZ; } } } // assigns a number of xy planes to the local rank according to the xy plane distribution inline auto calculate_num_local_xy_planes(const int rank, const int dimZ, const std::vector& planeRankDistribution) -> int { const double planeDistriSum = std::accumulate(planeRankDistribution.begin(), planeRankDistribution.end(), 0.0); std::vector numXYPlanesPerRank(planeRankDistribution.size()); for (std::size_t i = 0; i < planeRankDistribution.size(); ++i) { numXYPlanesPerRank[i] = planeRankDistribution[i] / planeDistriSum * dimZ; } int numMissingPlanes = dimZ - std::accumulate(numXYPlanesPerRank.begin(), numXYPlanesPerRank.end(), 0); for (auto& val : numXYPlanesPerRank) { // add missing planes to rank with non-zero number if (val > 0 && numMissingPlanes > 0) { val += numMissingPlanes; numMissingPlanes = 0; break; } // substract extra planes if (numMissingPlanes < 0) { val -= std::min(val, -numMissingPlanes); numMissingPlanes += val; if (numMissingPlanes >= 0) { numMissingPlanes = 0; break; } } } // if all ranks have 0 planes, some planes have to be assigned somewhere if (numMissingPlanes > 0) { numXYPlanesPerRank[0] = numMissingPlanes; } return numXYPlanesPerRank[rank]; } } // namespace spfft #endif SpFFT-1.1.0/tests/test_util/test_check_values.hpp000066400000000000000000000072301457701740000220240ustar00rootroot00000000000000/* * Copyright (c) 2019 ETH Zurich, Simon Frasch * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the copyright holder nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef SPFFT_TEST_CHECK_VALUES_HPP #define SPFFT_TEST_CHECK_VALUES_HPP #include #include #include #include "gtest/gtest.h" #include "memory/host_array_view.hpp" #include "spfft/config.h" namespace spfft { inline void check_c2c_space_domain(const HostArrayView3D>& realView, const HostArrayView3D>& fftwView, const SizeType planeOffset, const SizeType numLocalXYPlanes) { for (SizeType z = 0; z < numLocalXYPlanes; ++z) { for (SizeType x = 0; x < fftwView.dim_outer(); ++x) { for (SizeType y = 0; y < fftwView.dim_mid(); ++y) { ASSERT_NEAR(realView(z, y, x).real(), fftwView(x, y, z + planeOffset).real(), 1e-6); ASSERT_NEAR(realView(z, y, x).imag(), fftwView(x, y, z + planeOffset).imag(), 1e-6); } } } } inline void check_r2c_space_domain(const HostArrayView3D& realView, const HostArrayView3D>& fftwView, const SizeType planeOffset, const SizeType numLocalXYPlanes) { for (SizeType z = 0; z < numLocalXYPlanes; ++z) { for (SizeType x = 0; x < fftwView.dim_outer(); ++x) { for (SizeType y = 0; y < fftwView.dim_mid(); ++y) { ASSERT_NEAR(realView(z, y, x), fftwView(x, y, z + planeOffset).real(), 1e-6); } } } } inline void check_freq_domain(const std::vector>& freqValues, const HostArrayView3D>& fftwView, const std::vector& indices) { assert(indices.size() == freqValues.size() * 3); for (SizeType i = 0; i < freqValues.size(); ++i) { int x = indices[i * 3]; int y = indices[i * 3 + 1]; int z = indices[i * 3 + 2]; if (x < 0) x = fftwView.dim_outer() + x; if (y < 0) y = fftwView.dim_mid() + y; if (z < 0) z = fftwView.dim_inner() + z; ASSERT_NEAR(freqValues[i].real(), fftwView(x, y, z).real(), 1e-6); ASSERT_NEAR(freqValues[i].imag(), fftwView(x, y, z).imag(), 1e-6); } } } // namespace spfft #endif SpFFT-1.1.0/tests/test_util/test_transform.hpp000066400000000000000000000257561457701740000214200ustar00rootroot00000000000000#ifndef SPFFT_TEST_TRANSFORM_HPP #define SPFFT_TEST_TRANSFORM_HPP #include #include #include #include #include #include #include #include "gtest/gtest.h" #include "memory/array_view_utility.hpp" #include "memory/host_array.hpp" #include "memory/host_array_view.hpp" #include "parameters/parameters.hpp" #include "spfft/grid.hpp" #include "spfft/transform.hpp" #include "test_util/generate_indices.hpp" #include "test_util/test_check_values.hpp" #include "util/common_types.hpp" #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) #include "gpu_util/gpu_fft_api.hpp" #include "gpu_util/gpu_transfer.hpp" #include "memory/gpu_array.hpp" #endif using namespace spfft; class TransformTest : public ::testing::TestWithParam< std::tuple> { protected: TransformTest() : dimX_(std::get<2>(GetParam())), dimY_(std::get<3>(GetParam())), dimZ_(std::get<4>(GetParam())), fftwArray_(dimX_ * dimY_ * dimZ_), fftwView_(create_3d_view(fftwArray_, 0, dimX_, dimY_, dimZ_)), centeredIndices_(std::get<5>(GetParam())) { // initialize ffw plans fftwPlanBackward_ = fftw_plan_dft_3d(dimX_, dimY_, dimZ_, (fftw_complex*)fftwArray_.data(), (fftw_complex*)fftwArray_.data(), FFTW_BACKWARD, FFTW_ESTIMATE); fftwPlanForward_ = fftw_plan_dft_3d(dimX_, dimY_, dimZ_, (fftw_complex*)fftwArray_.data(), (fftw_complex*)fftwArray_.data(), FFTW_FORWARD, FFTW_ESTIMATE); } inline auto test_backward_c2c(const std::vector& zStickDistribution, const std::vector& xyPlaneDistribution) -> void; inline auto test_forward_c2c(const std::vector& zStickDistribution, const std::vector& xyPlaneDistribution) -> void; inline auto test_r2c(const std::vector& xyPlaneDistribution) -> void; virtual auto comm_rank() -> SizeType { return 0; } virtual auto comm_size() -> SizeType { return 1; } virtual auto grid() -> Grid& = 0; ~TransformTest() override { if (fftwPlanBackward_) fftw_destroy_plan(fftwPlanBackward_); if (fftwPlanForward_) fftw_destroy_plan(fftwPlanForward_); fftwPlanBackward_ = nullptr; fftwPlanForward_ = nullptr; } int dimX_, dimY_, dimZ_; HostArray> fftwArray_; HostArrayView3D> fftwView_; fftw_plan fftwPlanBackward_ = nullptr; fftw_plan fftwPlanForward_ = nullptr; bool centeredIndices_; }; auto TransformTest::test_backward_c2c(const std::vector& zStickDistribution, const std::vector& xyPlaneDistribution) -> void { std::mt19937 randGen(42); std::uniform_real_distribution uniformRandDis(0.0, 1.0); auto valueIndicesPerRank = create_value_indices(randGen, zStickDistribution, 0.7, 0.7, dimX_, dimY_, dimZ_, false); const int numLocalXYPlanes = calculate_num_local_xy_planes(comm_rank(), dimZ_, xyPlaneDistribution); // assign values to fftw input for (const auto& valueIndices : valueIndicesPerRank) { for (std::size_t i = 0; i < valueIndices.size(); i += 3) { fftwView_(valueIndices[i], valueIndices[i + 1], valueIndices[i + 2]) = std::complex(uniformRandDis(randGen), uniformRandDis(randGen)); } } // extract local rank values std::vector> values(valueIndicesPerRank[comm_rank()].size() / 3); for (std::size_t i = 0; i < values.size(); ++i) { const auto x = valueIndicesPerRank[comm_rank()][i * 3]; const auto y = valueIndicesPerRank[comm_rank()][i * 3 + 1]; const auto z = valueIndicesPerRank[comm_rank()][i * 3 + 2]; values[i] = fftwView_(x, y, z); } if (centeredIndices_) { center_indices(dimX_, dimY_, dimZ_, valueIndicesPerRank); } auto transform = grid().create_transform( std::get<1>(GetParam()), SpfftTransformType::SPFFT_TRANS_C2C, dimX_, dimY_, dimZ_, numLocalXYPlanes, values.size(), SpfftIndexFormatType::SPFFT_INDEX_TRIPLETS, valueIndicesPerRank[comm_rank()].data()); HostArrayView3D> realView( reinterpret_cast*>( transform.space_domain_data(SpfftProcessingUnitType::SPFFT_PU_HOST)), numLocalXYPlanes, dimY_, dimX_, false); fftw_execute(fftwPlanBackward_); #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) if (std::get<1>(GetParam()) == SpfftProcessingUnitType::SPFFT_PU_GPU) { // copy frequency values to GPU GPUArray::type> valuesGPU(values.size()); copy_to_gpu(values, valuesGPU); // transform transform.backward(reinterpret_cast(valuesGPU.data()), SpfftProcessingUnitType::SPFFT_PU_GPU); // run twice to ensure memory is zeroed correctly transform.backward(reinterpret_cast(valuesGPU.data()), SpfftProcessingUnitType::SPFFT_PU_GPU); // use transform buffer to copy values GPUArrayView3D::type> realViewGPU( reinterpret_cast::type*>( transform.space_domain_data(SpfftProcessingUnitType::SPFFT_PU_GPU)), numLocalXYPlanes, dimY_, dimX_, false); copy_from_gpu(realViewGPU, realView); } #endif if (std::get<1>(GetParam()) == SpfftProcessingUnitType::SPFFT_PU_HOST) { transform.backward(reinterpret_cast(values.data()), SpfftProcessingUnitType::SPFFT_PU_HOST); // run twice to ensure memory is zeroed correctly transform.backward(reinterpret_cast(values.data()), SpfftProcessingUnitType::SPFFT_PU_HOST); } check_c2c_space_domain(realView, fftwView_, transform.local_z_offset(), numLocalXYPlanes); } auto TransformTest::test_forward_c2c(const std::vector& zStickDistribution, const std::vector& xyPlaneDistribution) -> void { std::mt19937 randGen(42); std::uniform_real_distribution uniformRandDis(0.0, 1.0); auto valueIndicesPerRank = create_value_indices(randGen, zStickDistribution, 0.7, 0.7, dimX_, dimY_, dimZ_, false); const int numLocalXYPlanes = calculate_num_local_xy_planes(comm_rank(), dimZ_, xyPlaneDistribution); // assign values to fftw input for (const auto& valueIndices : valueIndicesPerRank) { for (std::size_t i = 0; i < valueIndices.size(); i += 3) { fftwView_(valueIndices[i], valueIndices[i + 1], valueIndices[i + 2]) = std::complex(uniformRandDis(randGen), uniformRandDis(randGen)); } } std::vector> freqValues(valueIndicesPerRank[comm_rank()].size() / 3); if (centeredIndices_) { center_indices(dimX_, dimY_, dimZ_, valueIndicesPerRank); } auto transform = grid().create_transform( std::get<1>(GetParam()), SpfftTransformType::SPFFT_TRANS_C2C, dimX_, dimY_, dimZ_, numLocalXYPlanes, freqValues.size(), SpfftIndexFormatType::SPFFT_INDEX_TRIPLETS, valueIndicesPerRank[comm_rank()].data()); HostArrayView3D> realView( reinterpret_cast*>( transform.space_domain_data(SpfftProcessingUnitType::SPFFT_PU_HOST)), numLocalXYPlanes, dimY_, dimX_, false); fftw_execute(fftwPlanBackward_); // copy space domain values from fftw buffer const auto zOffset = transform.local_z_offset(); for (int z = 0; z < numLocalXYPlanes; ++z) { for (int y = 0; y < dimY_; ++y) { for (int x = 0; x < dimX_; ++x) { realView(z, y, x) = fftwView_(x, y, z + zOffset); } } } fftw_execute(fftwPlanForward_); #if defined(SPFFT_CUDA) || defined(SPFFT_ROCM) if (std::get<1>(GetParam()) == SpfftProcessingUnitType::SPFFT_PU_GPU) { // use transform buffer to copy values GPUArrayView3D::type> realViewGPU( reinterpret_cast::type*>( transform.space_domain_data(SpfftProcessingUnitType::SPFFT_PU_GPU)), numLocalXYPlanes, dimY_, dimX_, false); copy_to_gpu(realView, realViewGPU); GPUArray::type> freqValuesGPU(freqValues.size()); transform.forward(SpfftProcessingUnitType::SPFFT_PU_GPU, reinterpret_cast(freqValuesGPU.data())); copy_from_gpu(freqValuesGPU, freqValues); } #endif if (std::get<1>(GetParam()) == SpfftProcessingUnitType::SPFFT_PU_HOST) { transform.forward(SpfftProcessingUnitType::SPFFT_PU_HOST, reinterpret_cast(freqValues.data())); } check_freq_domain(freqValues, fftwView_, valueIndicesPerRank[comm_rank()]); } auto TransformTest::test_r2c(const std::vector& xyPlaneDistribution) -> void { std::mt19937 randGen(42); std::uniform_real_distribution uniformRandDis(0.0, 1.0); // create full set of global z-sticks (up to dimX_ / 2 + 1, due to symmetry) std::vector zStickDistribution(xyPlaneDistribution.size(), 1.0); auto valueIndicesPerRank = create_value_indices(randGen, zStickDistribution, 1.0, 1.0, dimX_, dimY_, dimZ_, true); const int numLocalXYPlanes = calculate_num_local_xy_planes(comm_rank(), dimZ_, xyPlaneDistribution); // assign values to fftw input for (const auto& valueIndices : valueIndicesPerRank) { for (std::size_t i = 0; i < valueIndices.size(); i += 3) { fftwView_(valueIndices[i], valueIndices[i + 1], valueIndices[i + 2]) = std::complex(uniformRandDis(randGen), 0.0); } } std::vector> freqValues(valueIndicesPerRank[comm_rank()].size() / 3); if (centeredIndices_) { center_indices(dimX_, dimY_, dimZ_, valueIndicesPerRank); } auto transform = grid().create_transform( std::get<1>(GetParam()), SpfftTransformType::SPFFT_TRANS_R2C, dimX_, dimY_, dimZ_, numLocalXYPlanes, freqValues.size(), SpfftIndexFormatType::SPFFT_INDEX_TRIPLETS, valueIndicesPerRank[comm_rank()].data()); HostArrayView3D realView( transform.space_domain_data(SpfftProcessingUnitType::SPFFT_PU_HOST), numLocalXYPlanes, dimY_, dimX_, false); // copy space domain values from fftw buffer const auto zOffset = transform.local_z_offset(); for (int z = 0; z < numLocalXYPlanes; ++z) { for (int y = 0; y < dimY_; ++y) { for (int x = 0; x < dimX_; ++x) { realView(z, y, x) = fftwView_(x, y, z + zOffset).real(); } } } // check forward transform.forward(SpfftProcessingUnitType::SPFFT_PU_HOST, reinterpret_cast(freqValues.data())); fftw_execute(fftwPlanForward_); check_freq_domain(freqValues, fftwView_, valueIndicesPerRank[comm_rank()]); // check backward transform.backward(reinterpret_cast(freqValues.data()), SpfftProcessingUnitType::SPFFT_PU_HOST); fftw_execute(fftwPlanBackward_); check_r2c_space_domain(realView, fftwView_, transform.local_z_offset(), numLocalXYPlanes); } #endif