pax_global_header00006660000000000000000000000064132057765520014526gustar00rootroot0000000000000052 comment=562bd546f7f080801ffb79c9f86b2545ab36675b bart-0.4.02/000077500000000000000000000000001320577655200125415ustar00rootroot00000000000000bart-0.4.02/.gitignore.main000066400000000000000000000007031320577655200154540ustar00rootroot00000000000000 # autogenerated .gitignore files .gitignore # dependency files *.d # object files *.o # mac debug files *.dSYM # python compiled files *.pyc python/.ipynb_checkpoints/ # temporary files .*.swp *~ # local Makefile Makefile.local # version string src/misc/version.inc # noise simulations save/nsv/*.dat # ctags tags GTAGS GSYMS GRTAGS GPATH # autogenerated documentation doc/html doc/latex doc/dx doc/commands.txt # test files tests/test-* bart-0.4.02/.travis.yml000066400000000000000000000130551320577655200146560ustar00rootroot00000000000000language: c addons: apt: packages: &default_packages - libfftw3-dev - libblas-dev - libpng-dev matrix: include: - env: PARALLEL=1 os: linux compiler: gcc sudo: required dist: trusty addons: apt: packages: - *default_packages before_install: - sudo apt-get update -qq - sudo apt-get install -qq liblapacke-dev - env: os: linux compiler: gcc sudo: required dist: trusty addons: apt: packages: - *default_packages script: - make test - make utest before_install: - sudo apt-get update -qq - sudo apt-get install -qq liblapacke-dev - env: SLINK=1 os: linux compiler: gcc sudo: required dist: trusty addons: apt: packages: - *default_packages - gfortran before_install: - sudo apt-get update -qq - sudo apt-get install -qq liblapacke-dev liblapack-dev - env: OMP=0 os: linux compiler: clang sudo: required dist: trusty addons: apt: packages: - *default_packages script: - make test - make utest before_install: - sudo apt-get update -qq - sudo apt-get install -qq liblapacke-dev - env: OMP=1 os: linux compiler: gcc sudo: required dist: trusty addons: apt: packages: - *default_packages script: - make test - make utest before_install: - sudo apt-get update -qq - sudo apt-get install -qq liblapacke-dev - env: NOLAPACKE=1 os: linux compiler: gcc sudo: required dist: trusty addons: apt: packages: - *default_packages - gfortran before_install: - sudo apt-get update -qq - sudo apt-get install -qq liblapack-dev - env: CUDA=1 CUDA_BASE=/usr/local/cuda-7.5/ os: linux compiler: clang-3.5 sudo: required dist: trusty addons: apt: packages: - *default_packages before_install: - wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_7.5-18_amd64.deb - sudo dpkg -i cuda-repo-ubuntu1404_7.5-18_amd64.deb - sudo apt-get update -qq - sudo apt-get install -qq clang-3.5 - sudo apt-get install -qq --no-install-recommends cuda-drivers cuda-core-7.5 cuda-cudart-dev-7.5 cuda-cufft-dev-7.5 cuda-cublas-dev-7.5 - sudo apt-get install -qq liblapacke-dev - env: BUILD_NAME="CMake" os: linux compiler: gcc sudo: required dist: trusty addons: apt: packages: - *default_packages - gfortran before_install: - sudo apt-get update -qq - sudo apt-get install -qq liblapack-dev liblapacke-dev script: - wget http://www.netlib.org/blas/blast-forum/cblas.tgz - tar zxvf cblas.tgz && rm cblas.tgz - pushd CBLAS - | cat << EOF > Makefile.in SHELL = /bin/sh PLAT = LINUX CBLIB = ../lib/libcblas.so CFLAGS = -O3 -DADD_ -fPIC FFLAGS = -O3 -fPIC CC = gcc FC = gfortran LOADER = $(FC) ARCH = cc ARCHFLAGS = -shared -o RANLIB = echo EOF - cd src && make all && sudo cp -v ../lib/libcblas.so /usr/lib/ - popd - mkdir build && cd build - export FC=$(which gfortran) - CC=gcc CXX=g++ cmake -DLINALG_VENDOR=LAPACKE -DLAPACKE_DIR=/usr/lib -DUSE_MATLAB=OFF .. - make - env: BUILD_NAME="CMake + CUDA" CUDA_BASE=/usr/local/cuda-7.5/ os: linux compiler: clang-3.5 sudo: required dist: trusty addons: apt: packages: - *default_packages - gfortran before_install: - wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_7.5-18_amd64.deb - sudo dpkg -i cuda-repo-ubuntu1404_7.5-18_amd64.deb - sudo apt-get update -qq - sudo apt-get install -qq clang-3.5 - sudo apt-get install -qq --no-install-recommends cuda-drivers cuda-core-7.5 cuda-cudart-dev-7.5 cuda-cufft-dev-7.5 cuda-cublas-dev-7.5 - sudo apt-get install -qq liblapack-dev liblapacke-dev script: - wget http://www.netlib.org/blas/blast-forum/cblas.tgz - tar zxvf cblas.tgz && rm cblas.tgz - pushd CBLAS - | cat << EOF > Makefile.in SHELL = /bin/sh PLAT = LINUX CBLIB = ../lib/libcblas.so CFLAGS = -O3 -DADD_ -fPIC FFLAGS = -O3 -fPIC CC = gcc FC = gfortran LOADER = $(FC) ARCH = cc ARCHFLAGS = -shared -o RANLIB = echo EOF - cd src && make all && sudo cp -v ../lib/libcblas.so /usr/lib/ - popd - mkdir build && cd build - export FC=$(which gfortran) - CC=gcc CXX=g++ cmake -DLINALG_VENDOR=LAPACKE -DLAPACKE_DIR=/usr/lib -DUSE_MATLAB=OFF -DUSE_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_BASE} .. - make - env: MACPORTS=0 os: osx compiler: gcc-4.8 script: - make test # make utest before_install: - brew update - brew install fftw gcc48 homebrew/science/openblas script: - make bart - make all bart-0.4.02/ACKNOWLEDGEMENTS000066400000000000000000000007641320577655200150250ustar00rootroot00000000000000 Contributors: Marcus T. Alley Dara Bahri Joseph Y. Cheng Siddharth Iyer Miki Lustig Mark Murphy Frank Ong Jonathan Tamir Martin Uecker Shreyas S. Vasanawala Sana Vaziri Pat Virtue Tao Zhang Logo designed by: Michelle Tamir Different parts of this software have been written for projects funded by: American Heart Association Grant 12BGIA9660006 NIH Grant P41RR09784 and Grant R01EB009690 UC Discovery Grant 193037 Sloan Research Fellowship GE Healthcare A personal donation by David Donoho bart-0.4.02/CMakeLists.txt000066400000000000000000000405611320577655200153070ustar00rootroot00000000000000# Copyright 2015-2016. Hans J. Johnson # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. # \author Hans J. Johnson cmake_minimum_required(VERSION 2.8.7) project(bart C) enable_language(C) # http://stackoverflow.com/questions/24840030/forcing-c99-in-cmake-to-use-for-loop-initial-declaration macro(use_c99) if (CMAKE_VERSION VERSION_LESS "3.1") if (CMAKE_C_COMPILER_ID STREQUAL "GNU") set (CMAKE_C_FLAGS "--std=gnu99 ${CMAKE_C_FLAGS}") endif () else () set (CMAKE_C_STANDARD 99) endif () endmacro(use_c99) use_c99() set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake ${CMAKE_MODULE_PATH}) # --- Provide good defaults for searching for packages (i.e. ismrmrd) set(CMAKE_PREFIX_PATH "") if(CMAKE_PREFIX_PATH) list(APPEND CMAKE_PREFIX_PATH "/usr/local") endif() if(EXISTS $ENV{CMAKE_PREFIX_PATH}) list(APPEND CMAKE_PREFIX_PATH $ENV{CMAKE_PREFIX_PATH}) endif() if(EXISTS $ENV{ISMRMRD_HOME}) list(APPEND CMAKE_PREFIX_PATH $ENV{ISMRMRD_HOME}) endif() list(REMOVE_DUPLICATES CMAKE_PREFIX_PATH) ## -message(STATUS "Looking for packages in : ${CMAKE_PREFIX_PATH}") ## Options option(USE_CUDA "Provide support for CUDA processing" OFF) if(USE_CUDA) enable_language(CXX) find_package(CUDA) add_definitions(-DUSE_CUDA) CUDA_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_LIST_DIR}/src) # set(CUDA_NVCC_FLAGS "-DUSE_CUDA;-Xcompiler;-fPIC;-Xcompiler;-fopenmp;-O3;-arch=sm_20;-m64;-ccbin ${CMAKE_C_COMPILER}") set(CUDA_NVCC_FLAGS "-DUSE_CUDA;-Xcompiler;-fPIC;-Xcompiler;-fopenmp;-O3;-arch=sm_20;-ccbin ${CMAKE_C_COMPILER}") macro(bart_add_executable target_name) CUDA_ADD_EXECUTABLE(${target_name} ${ARGN}) CUDA_ADD_CUFFT_TO_TARGET(${target_name}) CUDA_ADD_CUBLAS_TO_TARGET(${target_name}) target_link_libraries(${target_name} ${CUDA_LIBRARIES}) endmacro(bart_add_executable) macro(bart_add_library target_name) CUDA_ADD_LIBRARY(${target_name} ${ARGN} STATIC) CUDA_ADD_CUFFT_TO_TARGET(${target_name}) CUDA_ADD_CUBLAS_TO_TARGET(${target_name}) target_link_libraries(${target_name} ${CUDA_LIBRARIES}) endmacro(bart_add_library) else() macro(bart_add_executable target_name) add_executable(${target_name} ${ARGN}) endmacro(bart_add_executable) macro(bart_add_library target_name) add_library(${target_name} ${ARGN}) endmacro(bart_add_library) endif(USE_CUDA) ##- TODO option(USE_ACML "Provide support for ACML processing" OFF) find_package(OpenMP) if (OPENMP_FOUND) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") endif() ##- TODO option(USE_SLINK "Provide SLINK support" OFF) # Set a default build type if none was specified if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to 'Release' as none was specified.") set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) # Set the possible values of build type for cmake-gui set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo") endif() find_package(ISMRMRD QUIET) ## if you can find ISMRMRD by default, then default configuration is ON option(USE_ISMRMRD "Use external ISMRMRD package for reading/writing" ${ISMRMRD_FOUND}) if(USE_ISMRMRD) find_package(ISMRMRD REQUIRED) endif() ## Compiler flags -- TODO This could be better, see ITK it won't work on windows builds if(${CMAKE_BUILD_TYPE} MATCHES "Release") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math -O3") # -Wall -Wextra -Wmissing-prototypes") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -O3") # -Wall -Wextra") endif() ##============================================================== ## Switch based on the linear algebra optimized library to ## use. Note that the first library successfully found ## will be used ## ## -*-*- Try OpenBLAS first if(NOT LINALG_VENDOR OR LINALG_VENDOR MATCHES "OpenBLAS") include( ${CMAKE_CURRENT_LIST_DIR}/cmake/FindOpenBLAS.cmake ) if(OpenBLAS_FOUND) set(LINALG_VENDOR_FOUND TRUE) set(LINALG_VENDOR "OpenBLAS") set(LINALG_INCLUDE_DIRS ${OpenBLAS_INCLUDE_DIRS}) if(OpenBLAS_HAS_PARALLEL_LIBRARIES) # HACK!! set(LINALG_LIBRARIES ${OpenBLAS_PARALLEL_LIBRARIES}) set(LINALG_LIBRARIES ${OpenBLAS_LIBRARIES}) else() set(LINALG_LIBRARIES ${OpenBLAS_LIBRARIES}) endif() endif() endif() ## ## -*-*- Try ATLAS version next if(NOT LINALG_VENDOR OR LINALG_VENDOR MATCHES "ATLAS") include( ${CMAKE_CURRENT_LIST_DIR}/cmake/FindATLAS.cmake ) if(ATLAS_FOUND) set(LINALG_VENDOR_FOUND TRUE) set(LINALG_VENDOR "ATLAS") set(LINALG_INCLUDE_DIRS ${ATLAS_INCLUDE_DIRS}) set(LINALG_LIBRARIES ${ATLAS_LIBRARIES}) endif() endif() ## ## -*-*- Try Generic LAPACKE version Last if(NOT LINALG_VENDOR OR LINALG_VENDOR MATCHES "LAPACKE") #NOTE: By specifying Fortran here, linking to lapack becomes easier # See https://blog.kitware.com/fortran-for-cc-developers-made-easier-with-cmake/ enable_language(Fortran) ## Only very new versions of LAPACK (> 3.5.0) have built in support ## for cblas and lapacke. This method is not very robust to older ## versions of lapack that might be able to be supported. ## It is know to work local builds find_package(LAPACKE REQUIRED) if(LAPACKE_FOUND) set(LINALG_VENDOR_FOUND TRUE) set(LINALG_VENDOR "LAPACKE") set(LINALG_INCLUDE_DIRS ${LAPACKE_INCLUDE_DIRS}) set(LINALG_LIBRARIES ${LAPACKE_LIBRARIES}) endif() endif() ## ## -*-*- Finally, set include_directories -*-*-* if(NOT LINALG_VENDOR_FOUND) message(FATAL_ERROR "No valid linear algebra libraries found!") endif() include_directories(${LINALG_INCLUDE_DIRS}) ##====================================================================== set(USE_FFTWF ON) # Only find single precision fftw find_package(FFTW REQUIRED) message(STATUS "FFTWF_LIBRARIES: ${FFTWF_LIBRARIES}") find_package(PNG REQUIRED) add_definitions(${PNG_DEFINITIONS}) include_directories(${PNG_INCLUDE_DIRS}) execute_process(COMMAND ${CMAKE_CURRENT_LIST_DIR}/git-version.sh OUTPUT_VARIABLE BART_FULL_VERSION_STRING WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) string(STRIP ${BART_FULL_VERSION_STRING} BART_FULL_VERSION_STRING) ## Remove trailing whitespace (return characters) string(REGEX REPLACE ".*v([0-9]*)\\.([0-9]*)\\.([0-9]*)-.*" "\\1" BART_VERSION_MAJOR "${BART_FULL_VERSION_STRING}") string(REGEX REPLACE ".*v([0-9]*)\\.([0-9]*)\\.([0-9]*)-.*" "\\2" BART_VERSION_MINOR "${BART_FULL_VERSION_STRING}") string(REGEX REPLACE ".*v([0-9]*)\\.([0-9]*)\\.([0-9]*)-.*" "\\3" BART_VERSION_PATCH "${BART_FULL_VERSION_STRING}") message(STATUS "BART VERSION: ${BART_FULL_VERSION_STRING}: ${BART_VERSION_MAJOR} ${BART_VERSION_MINOR} ${BART_VERSION_PATCH}") file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/version.inc.in "VERSION(\@BART_FULL_VERSION_STRING\@)") configure_file(${CMAKE_CURRENT_BINARY_DIR}/version.inc.in ${CMAKE_CURRENT_BINARY_DIR}/version.inc @ONLY) include_directories(${CMAKE_CURRENT_BINARY_DIR}) ## ========== Process build_targets.mk set (ALLPROGS "") file(READ "${CMAKE_CURRENT_LIST_DIR}/build_targets.mk" bld_tgt_list_of_list) # Convert file contents into a CMake list (where each element in the list # is one line of the file) # string(REGEX REPLACE ";" "\\\\;" bld_tgt_list_of_list "${bld_tgt_list_of_list}") string(REGEX REPLACE "\n" ";" bld_tgt_list_of_list "${bld_tgt_list_of_list}") foreach(bld_tgt_line ${bld_tgt_list_of_list}) if( "${bld_tgt_line}" MATCHES "^[^#].*=.*") string(REGEX REPLACE "^ *([^=]*) *= *(.*) *" "\\1" BLD_KEY "${bld_tgt_line}") string(REGEX REPLACE "^ *([^=]*) *= *(.*) *" "\\2" BLD_VALUE "${bld_tgt_line}") string(REPLACE " " ";" ${BLD_KEY} "${BLD_VALUE}") ## Create a new variable called ${BLD_KEY} ## message(STATUS "KEY:${BLD_KEY}: VALUE:${${BLD_KEY}}:") list(APPEND ALLPROGS ${${BLD_KEY}}) endif() endforeach() ## BART Makefile depends heavily on pre-processor replacements ## on the command line rather than a library of common functionality ## that is used to compile code once and link many times. ## In this cmake build, it was choosen to generate separate files ## for separate build uses of each source file. ## This convoluted mechansims is needed to avoid changing ## any build organizaiton so that the Makefile build system is ## not changed at all. A different organziation of the build ## proceedures could make this much less complicated. ## ## usage CONFIG_BY_REPLACEMENT( in.file out.file "at top string" match1 rep1 match2 rep2 .... matchN repN) macro(CONFIG_BY_REPLACEMENT INFILE OUTFILE PREFIX ) if(NOT EXISTS "${INFILE}") message(FATAL_ERROR "\n\nMISSING INPUT FILE \"${INFILE}\" for generating \"${OUTFILE}\"\n\n") endif() file(READ "${INFILE}" MAIN_TEMPLATE_STRING) set(all_repacement_pairs ${ARGN}) list(LENGTH all_repacement_pairs RP_LENGTH) while(NOT ${RP_LENGTH} LESS 2) list(GET all_repacement_pairs 0 INSTRING) list(REMOVE_AT all_repacement_pairs 0 ) list(LENGTH all_repacement_pairs RP_LENGTH) if(${RP_LENGTH} GREATER 0) list(GET all_repacement_pairs 0 OUTSTRING) list(REMOVE_AT all_repacement_pairs 0 ) list(LENGTH all_repacement_pairs RP_LENGTH) else() message(FATAL_ERROR "Replacement pairs unmatched: ${ARGN}") endif() string(REPLACE "${INSTRING}" "${OUTSTRING}" MAIN_TEMPLATE_STRING "${MAIN_TEMPLATE_STRING}") endwhile() string(REPLACE "__EOL__" ";" MAIN_TEMPLATE_STRING "${MAIN_TEMPLATE_STRING}") ## UglyFix set(MAIN_TEMPLATE_STRING "${PREFIX}\n${MAIN_TEMPLATE_STRING}") if(EXISTS "${OUTFILE}" ) file(READ "${OUTFILE}" PREVIOUS_CONTENT) else() set(PREVIOUS_CONTENT "") endif() string( COMPARE EQUAL "${PREVIOUS_CONTENT}" "${MAIN_TEMPLATE_STRING}" STRING_NO_CHANGE) if( NOT STRING_NO_CHANGE ) file(WRITE "${OUTFILE}" "${MAIN_TEMPLATE_STRING}") endif() endmacro() set(BART_SUPPORT_LIBS calib misc dfwavelet grecon iter linops lowrank nlops noir noncart num sake sense simu wavelet) if(USE_ISMRMRD) list(APPEND BART_SUPPORT_LIBS ismrm) link_directories(${ISMRMRD_LIBRARY_DIRS}) include_directories(${ISMRMRD_INCLUDE_DIRS}) endif() include_directories(src) set(bart_support_SRCS "") foreach(curr_lib ${BART_SUPPORT_LIBS}) file(GLOB ${curr_lib}_SRCS "src/${curr_lib}/*.c") list(APPEND bart_support_SRCS ${${curr_lib}_SRCS}) if(USE_CUDA) file(GLOB ${curr_lib}_SRCS "src/${curr_lib}/*.cu") list(APPEND bart_support_SRCS ${${curr_lib}_SRCS}) endif() endforeach() bart_add_library(bartsupport ${bart_support_SRCS}) target_link_libraries(bartsupport ${PNG_LIBRARIES} ${FFTWF_LIBRARIES} ${LINALG_LIBRARIES}) if(USE_ISMRMRD) target_link_libraries(bartsupport ${ISMRMRD_LIBRARIES}) endif() install(TARGETS bartsupport RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib/static) ## PASS #1: Build stand-alone programs ## Generate stand alone programs file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/StandAloneCode) foreach(curr_prog ${ALLPROGS}) CONFIG_BY_REPLACEMENT( ${CMAKE_CURRENT_LIST_DIR}/src/main.c ${CMAKE_CURRENT_BINARY_DIR}/StandAloneCode/${curr_prog}.c "/* Generated by cmake */" "main_real" "main_${curr_prog}") bart_add_executable(${curr_prog} ${CMAKE_CURRENT_BINARY_DIR}/StandAloneCode/${curr_prog}.c ${CMAKE_CURRENT_LIST_DIR}/src/${curr_prog}.c) target_link_libraries(${curr_prog} bartsupport ) install(TARGETS ${curr_prog} RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib/static) endforeach() ## More crafty file manipulation so that we maintain backward comaptibility ## with the Makefile ## Generate combined programs #============================================== set(EXTERN_LIST "\n\n/* Generated by cmake */\n") foreach(driver ${ALLPROGS}) set(EXTERN_LIST "${EXTERN_LIST}extern int main_${driver}(int argc, char* argv[])__EOL__\n") endforeach() file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/CombinedCode) ## Configure include header CONFIG_BY_REPLACEMENT( "${CMAKE_CURRENT_LIST_DIR}/src/main.h" "${CMAKE_CURRENT_BINARY_DIR}/CombinedCode/config_main.h" "/* Generated by cmake */" "MAP(DECLMAIN, MAIN_LIST)" "${EXTERN_LIST}" "misc/cppmap.h" "stdio.h") ## Replacement 3 no longer need this include include_directories(${CMAKE_CURRENT_BINARY_DIR}/CombinedCode) #============================================== set(DRIVER_LIST "\n\n/* Generated by cmake */\n") foreach(driver ${ALLPROGS}) set(DRIVER_LIST "${DRIVER_LIST}{ main_${driver}, \"${driver}\" },\n") endforeach() set(ALL_BART_SRCS "") foreach(curr_prog ${ALLPROGS}) CONFIG_BY_REPLACEMENT( "${CMAKE_CURRENT_LIST_DIR}/src/${curr_prog}.c" "${CMAKE_CURRENT_BINARY_DIR}/CombinedCode/${curr_prog}.c" "/* Generated by cmake */\n#include \"config_main.h\"" "MAP(DENTRY, MAIN_LIST)" "${DRIVER_LIST}") list(APPEND ALL_BART_SRCS "${CMAKE_CURRENT_BINARY_DIR}/CombinedCode/${curr_prog}.c") endforeach() foreach(curr_prog bart) CONFIG_BY_REPLACEMENT( "${CMAKE_CURRENT_LIST_DIR}/src/${curr_prog}.c" "${CMAKE_CURRENT_BINARY_DIR}/CombinedCode/${curr_prog}.c" "#include \"config_main.h\"" "MAP(DENTRY, MAIN_LIST)" "${DRIVER_LIST}" ## Replacement 1 "main_bart" "main" ## Replacement 2 "misc/cppmap.h" "stdio.h") ## Replacement 3 no longer need this include list(APPEND ALL_BART_SRCS "${CMAKE_CURRENT_BINARY_DIR}/CombinedCode/${curr_prog}.c") endforeach() bart_add_executable(bart ${ALL_BART_SRCS}) target_link_libraries(bart bartsupport) install(TARGETS bart RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib/static) #============================================== # TODO: Matlab code option(USE_MATLAB "Specify if the optional matlab programs should be built" ON) if(USE_MATLAB) find_package(Matlab REQUIRED) if(MATLAB_FOUND) message(STATUS "MATLAB LIBRARIES FOUND: ${MATLAB_LIBRARIES_DIR}") include_directories(${MATLAB_INCLUDE_DIR}) bart_add_executable(mat2cfl ${CMAKE_CURRENT_LIST_DIR}/src/mat2cfl.c) target_link_libraries(mat2cfl bartsupport ${MATLAB_MAT_LIBRARY} ${MATLAB_ENG_LIBRARY} ${MATLAB_MX_LIBRARY}) install(TARGETS mat2cfl RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib/static) endif() endif() #============================================== # Testing code include_directories(${CMAKE_CURRENT_LIST_DIR}/utests) set(UNIT_TEST_SRC utests/test_batchsvd.c utests/test_flpmath.c utests/test_pattern.c utests/test_splines.c) set(UTESTS "call_test_batch_svthresh_tall, call_test_batch_svthresh_wide, ") file(READ utests/utest.c TEST_DRIVER_TEMPLATE) string(REPLACE "UTESTS" "${UTESTS}" TEST_DRIVER_CODE "${TEST_DRIVER_TEMPLATE}") file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/utest.c "${TEST_DRIVER_CODE}") #configure_file(utests/utest.c.in ${CMAKE_CURRENT_BINARY_DIR}/utest.c @ONLY) bart_add_executable(utest ${CMAKE_CURRENT_BINARY_DIR}/utest.c ${UNIT_TEST_SRC}) target_link_libraries(utest bartsupport) #----------------------------------------------------------------------------- include(CTest) enable_testing() add_test(NAME BartUTest COMMAND $) set(TARBALL_VERSION "${BART_VERSION_MAJOR}.${BART_VERSION_MINOR}.${BART_VERSION_PATCH}") add_custom_target(tarball COMMAND git archive --prefix=bart-${TARBALL_VERSION}/ -o ${CMAKE_CURRENT_BINARY_DIR}/bart-${TARBALL_VERSION}.tar.gz v${TARBALL_VERSION} WORKING_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" COMMENT "BUILD TARBALL FOR BART WITH LATEST VERSION" ) add_custom_target(doxygen COMMAND ${CMAKE_CURRENT_LIST_DIR}/makedoc.sh WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} DEPENDS bart SOURCES makedoc.sh doxyconfig) add_custom_target(bart.syms COMMAND ${CMAKE_CURRENT_LIST_DIR}/rules/make_symbol_table.sh $ ${CMAKE_CURRENT_BINARY_DIR}/bart.syms WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS bart SOURCES makedoc.sh doxyconfig) add_custom_command(TARGET bart POST_BUILD COMMAND ${CMAKE_CURRENT_LIST_DIR}/rules/update_commands.sh $ ${CMAKE_CURRENT_LIST_DIR}/doc/commands.txt ${ALLPROGS} ) file(GLOB DOCS "${CMAKE_CURRENT_LIST_DIR}/doc/*.txt") list(APPEND ${CMAKE_CURRENT_LIST_DIR}/README) install( FILES ${DOCS} DESTINATION share/doc/bart/ ) bart-0.4.02/LICENSE000066400000000000000000000031471320577655200135530ustar00rootroot00000000000000Copyright (c) 2013-2017. The Regents of the University of California. Copyright (c) 2013-2017. BART Developer Team and Contributors. Copyright (c) 2012. Intel Coorperation. (src/lapacke/) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bart-0.4.02/Makefile000066400000000000000000000226101320577655200142020ustar00rootroot00000000000000# Copyright 2013-2015. The Regents of the University of California. # Copyright 2015-2016. Martin Uecker # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. # we have a two stage Makefile MAKESTAGE ?= 1 # silent make #MAKEFLAGS += --silent # clear out all implicit rules and variables MAKEFLAGS += -R # use for parallel make AR=./ar_lock.sh CUDA?=0 ACML?=0 OMP?=1 SLINK?=0 DEBUG?=0 FFTWTHREADS?=1 ISMRMRD?=0 DESTDIR ?= / PREFIX ?= usr/ BUILDTYPE = Linux UNAME = $(shell uname -s) NNAME = $(shell uname -n) MYLINK=ln ifeq ($(UNAME),Darwin) BUILDTYPE = MacOSX MYLINK = ln -s endif ifeq ($(BUILDTYPE), MacOSX) MACPORTS ?= 1 endif ifeq ($(BUILDTYPE), Linux) # as the defaults changed on most Linux distributions # explicitly specify non-deterministic archives to not break make ARFLAGS ?= rsU else ARFLAGS ?= rs endif ifeq ($(UNAME),Cygwin) BUILDTYPE = Cygwin NOLAPACKE ?= 1 endif ifeq ($(UNAME),CYGWIN_NT-10.0) BUILDTYPE = Cygwin NOLAPACKE ?= 1 endif # Paths here = $(realpath $(dir $(lastword $(MAKEFILE_LIST)))) root := $(here) srcdir = $(root)/src libdir = $(root)/lib bindir = $(root)/bin export TOOLBOX_PATH=$(root) # Automatic dependency generation DEPFILE = $(*D)/.$(*F).d DEPFLAG = -MMD -MF $(DEPFILE) ALLDEPS = $(shell find $(srcdir) utests -name ".*.d") # Compilation flags OPT = -O3 -ffast-math CPPFLAGS ?= -Wall -Wextra CFLAGS ?= $(OPT) -Wmissing-prototypes CXXFLAGS ?= $(OPT) ifeq ($(BUILDTYPE), MacOSX) CC ?= gcc-mp-4.7 else CC ?= gcc # for symbols in backtraces LDFLAGS += -rdynamic endif CXX ?= g++ # openblas ifneq ($(BUILDTYPE), MacOSX) BLAS_BASE ?= /usr/ else ifeq ($(MACPORTS),1) BLAS_BASE ?= /opt/local/ CPPFLAGS += -DUSE_MACPORTS endif BLAS_BASE ?= /usr/local/opt/openblas/ endif # cuda CUDA_BASE ?= /usr/local/ # acml ACML_BASE ?= /usr/local/acml/acml4.4.0/gfortran64_mp/ # fftw ifneq ($(BUILDTYPE), MacOSX) FFTW_BASE ?= /usr/ else FFTW_BASE ?= /opt/local/ endif # Matlab MATLAB_BASE ?= /usr/local/matlab/ # ISMRM ISMRM_BASE ?= /usr/local/ismrmrd/ # Main build targets are defined in build_targets.mk so that both CMake and Make can use the same definitions # set values for TBASE TFLP TNUM TRECO TCALIB TMRI TSIM TIO in build_targets.mk include build_targets.mk MODULES = -lnum -lmisc -lnum -lmisc MODULES_pics = -lgrecon -lsense -liter -llinops -lwavelet -llowrank -lnoncart MODULES_sqpics = -lsense -liter -llinops -lwavelet -llowrank -lnoncart MODULES_pocsense = -lsense -liter -llinops -lwavelet MODULES_nlinv = -lnoir -liter -lnlops -llinops MODULES_bpsense = -lsense -lnoncart -liter -llinops -lwavelet MODULES_itsense = -liter -llinops MODULES_ecalib = -lcalib MODULES_ecaltwo = -lcalib MODULES_caldir = -lcalib MODULES_walsh = -lcalib MODULES_calmat = -lcalib MODULES_cc = -lcalib MODULES_ccapply = -lcalib MODULES_estvar = -lcalib MODULES_nufft = -lnoncart -liter -llinops MODULES_rof = -liter -llinops MODULES_bench = -lwavelet -llinops MODULES_phantom = -lsimu MODULES_bart = -lbox -lgrecon -lsense -lnoir -liter -llinops -lwavelet -llowrank -lnoncart -lcalib -lsimu -lsake -ldfwavelet -lnlops MODULES_sake = -lsake MODULES_wave = -liter -lwavelet -llinops -lsense MODULES_threshold = -llowrank -liter -ldfwavelet -llinops -lwavelet MODULES_fakeksp = -lsense -llinops MODULES_lrmatrix = -llowrank -liter -llinops MODULES_estdims = -lnoncart -llinops MODULES_ismrmrd = -lismrm MODULES_wavelet = -llinops -lwavelet MAKEFILES = $(root)/Makefiles/Makefile.* -include Makefile.$(NNAME) -include Makefile.local -include $(MAKEFILES) ifeq ($(ISMRMRD),1) TMRI += ismrmrd MODULES_bart += -lismrm endif ifeq ($(NOLAPACKE),1) CPPFLAGS += -DNOLAPACKE MODULES += -llapacke endif XTARGETS += $(TBASE) $(TFLP) $(TNUM) $(TIO) $(TRECO) $(TCALIB) $(TMRI) $(TSIM) TARGETS = bart $(XTARGETS) ifeq ($(DEBUG),1) CPPFLAGS += -g CFLAGS += -g endif ifeq ($(PARALLEL),1) MAKEFLAGS += -j endif ifeq ($(MAKESTAGE),1) .PHONY: doc/commands.txt $(TARGETS) default all clean allclean distclean doc/commands.txt doxygen test utest gputest $(TARGETS): make MAKESTAGE=2 $(MAKECMDGOALS) else CPPFLAGS += $(DEPFLAG) -I$(srcdir)/ CFLAGS += -std=gnu11 -I$(srcdir)/ CXXFLAGS += -I$(srcdir)/ default: bart doc/commands.txt .gitignore -include $(ALLDEPS) # cuda NVCC = $(CUDA_BASE)/bin/nvcc ifeq ($(CUDA),1) CUDA_H := -I$(CUDA_BASE)/include CPPFLAGS += -DUSE_CUDA $(CUDA_H) ifeq ($(BUILDTYPE), MacOSX) CUDA_L := -L$(CUDA_BASE)/lib -lcufft -lcudart -lcublas -m64 -lstdc++ else CUDA_L := -L$(CUDA_BASE)/lib64 -lcufft -lcudart -lcublas -lstdc++ -Wl,-rpath $(CUDA_BASE)/lib64 endif else CUDA_H := CUDA_L := endif # sm_20 no longer supported in CUDA 9 GPUARCH_FLAGS ?= NVCCFLAGS = -DUSE_CUDA -Xcompiler -fPIC -Xcompiler -fopenmp -O3 $(GPUARCH_FLAGS) -I$(srcdir)/ -m64 -ccbin $(CC) #NVCCFLAGS = -Xcompiler -fPIC -Xcompiler -fopenmp -O3 -I$(srcdir)/ %.o: %.cu $(NVCC) $(NVCCFLAGS) -c $^ -o $@ $(NVCC) $(NVCCFLAGS) -M $^ -o $(DEPFILE) # OpenMP ifeq ($(OMP),1) CFLAGS += -fopenmp CXXFLAGS += -fopenmp else CFLAGS += -Wno-unknown-pragmas CXXFLAGS += -Wno-unknown-pragmas endif # BLAS/LAPACK ifeq ($(ACML),1) BLAS_H := -I$(ACML_BASE)/include BLAS_L := -L$(ACML_BASE)/lib -lgfortran -lacml_mp -Wl,-rpath $(ACML_BASE)/lib CPPFLAGS += -DUSE_ACML else BLAS_H := -I$(BLAS_BASE)/include ifeq ($(BUILDTYPE), MacOSX) BLAS_L := -L$(BLAS_BASE)/lib -lopenblas else ifeq ($(NOLAPACKE),1) BLAS_L := -L$(BLAS_BASE)/lib -llapack -lblas CPPFLAGS += -Isrc/lapacke else BLAS_L := -L$(BLAS_BASE)/lib -llapacke -lblas endif endif endif CPPFLAGS += $(FFTW_H) $(BLAS_H) # png PNG_L := -lpng ifeq ($(SLINK),1) PNG_L += -lz endif # fftw FFTW_H := -I$(FFTW_BASE)/include/ FFTW_L := -L$(FFTW_BASE)/lib -lfftw3f ifeq ($(FFTWTHREADS),1) FFTW_L += -lfftw3f_threads CPPFLAGS += -DFFTWTHREADS endif # Matlab MATLAB_H := -I$(MATLAB_BASE)/extern/include MATLAB_L := -Wl,-rpath $(MATLAB_BASE)/bin/glnxa64 -L$(MATLAB_BASE)/bin/glnxa64 -lmat -lmx -lm -lstdc++ # ISMRM ifeq ($(ISMRMRD),1) ISMRM_H := -I$(ISMRM_BASE)/include ISMRM_L := -L$(ISMRM_BASE)/lib -lismrmrd else ISMRM_H := ISMRM_L := endif # change for static linking ifeq ($(SLINK),1) # work around fortran problems with static linking LDFLAGS += -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -Wl,--allow-multiple-definition BLAS_L += -llapack -lblas -lgfortran -lquadmath endif # Modules .LIBPATTERNS := lib%.a vpath %.a lib DIRS = $(root)/rules/*.mk include $(DIRS) # sort BTARGETS after everything is included BTARGETS:=$(sort $(BTARGETS)) XTARGETS:=$(sort $(XTARGETS)) .gitignore: .gitignore.main Makefile* @echo '# AUTOGENERATED. DO NOT EDIT. (are you looking for .gitignore.main ?)' > .gitignore cat .gitignore.main >> .gitignore @echo $(patsubst %, /%, $(TARGETS) $(UTARGETS)) | tr ' ' '\n' >> .gitignore doc/commands.txt: bart ./rules/update_commands.sh ./bart doc/commands.txt $(XTARGETS) doxygen: makedoc.sh doxyconfig bart ./makedoc.sh all: .gitignore $(TARGETS) # special targets $(XTARGETS): CPPFLAGS += -DMAIN_LIST="$(XTARGETS:%=%,) ()" -include src/main.h bart: CPPFLAGS += -DMAIN_LIST="$(XTARGETS:%=%,) ()" -include src/main.h mat2cfl: $(srcdir)/mat2cfl.c -lnum -lmisc $(CC) $(CFLAGS) $(MATLAB_H) -omat2cfl $+ $(MATLAB_L) $(CUDA_L) # implicit rules %.o: %.c $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< %.o: %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< ifeq ($(PARALLEL),1) (%): % $(AR) $(ARFLAGS) $@ $% else (%): % $(AR) $(ARFLAGS) $@ $% rm $% endif # we add the rm because intermediate files are not deleted # automatically for some reason # (but it produces errors for parallel builds for make all) .SECONDEXPANSION: $(TARGETS): % : src/main.c $(srcdir)/%.o $$(MODULES_%) $(MODULES) $(CC) $(LDFLAGS) $(CFLAGS) -Dmain_real=main_$@ -o $@ $+ $(FFTW_L) $(CUDA_L) $(BLAS_L) $(PNG_L) $(ISMRM_L) -lm # rm $(srcdir)/$@.o UTESTS=$(shell $(root)/utests/utests-collect.sh ./utests/$@.c) .SECONDEXPANSION: $(UTARGETS): % : utests/utest.c utests/%.o $$(MODULES_%) $(MODULES) $(CC) $(LDFLAGS) $(CFLAGS) -DUTESTS="$(UTESTS)" -o $@ $+ $(FFTW_L) $(CUDA_L) $(BLAS_L) -lm # linker script version - does not work on MacOS X # $(CC) $(LDFLAGS) -Wl,-Tutests/utests.ld $(CFLAGS) -o $@ $+ $(FFTW_L) $(CUDA_L) $(BLAS_L) -lm clean: rm -f `find $(srcdir) -name "*.o"` rm -f utests/*.o rm -f $(patsubst %, %, $(UTARGETS)) rm -f $(root)/lib/.*.lock allclean: clean rm -f $(libdir)/*.a $(ALLDEPS) rm -f $(patsubst %, %, $(TARGETS)) rm -f $(srcdir)/misc/version.inc rm -rf doc/dx rm -f doc/commands.txt distclean: allclean # automatic tests # system tests TOOLDIR=$(root) TESTS_TMP=$(root)/tests/tmp/$$$$/ TESTS_OUT=$(root)/tests/out/ include $(root)/tests/*.mk test: ${TESTS} gputest: ${TESTS_GPU} pythontest: ${TESTS_PYTHON} # unit tests # define space to faciliate running executables define \n endef utests-all: $(UTARGETS) $(patsubst %,$(\n)./%,$(UTARGETS)) utest: utests-all @echo ALL UNIT TESTS PASSED. endif # MAKESTAGE install: bart $(root)/doc/commands.txt install -d $(DESTDIR)/$(PREFIX)/bin/ install bart $(DESTDIR)/$(PREFIX)/bin/ install -d $(DESTDIR)/$(PREFIX)/share/doc/bart/ install $(root)/doc/*.txt $(root)/README $(DESTDIR)/$(PREFIX)/share/doc/bart/ install -d $(DESTDIR)/$(PREFIX)/lib/bart/commands/ # generate release tar balls (identical to github) %.tar.gz: git archive --prefix=bart-$(patsubst bart-%.tar.gz,%,$@)/ -o $@ v$(patsubst bart-%.tar.gz,%,$@) # symbol table bart.syms: bart rules/make_symbol_table.sh bart bart.syms bart-0.4.02/Makefiles/000077500000000000000000000000001320577655200144415ustar00rootroot00000000000000bart-0.4.02/Makefiles/README.md000066400000000000000000000017261320577655200157260ustar00rootroot00000000000000### Custom Makefiles Put custom Makefiles here, to be included in the standard Makefile. The build will automatically include the following files in this directory matching the expansion `Makefiles.*` Example custom Makefile for modifying build: ```bash ## Makefile.local # Makefile for my local build DEBUG = 1 # Parallel make PARALLEL ?= 1 # GPU CUDA=0 CC=clang OMP=0 # Paths FFTW_BASE := /opt/local/ MATLAB_BASE := /Applications/MATLAB_R2016a.app CUDA_BASE = /usr/local/cuda/ BLAS_BASE := /opt/local ``` Example Makefile and library rules for adding a custom program: ```bash ## Makefiles/Makefile.sum # Compile my custom program, src/sum.c, which relies on # my custom library, lib/libsum.a MODULES_sum = -lsum MODULES_bart += -lsum XTARGETS += sum ``` ```bash ### rules/sum.mk # Build my custom library with files under src/sum/ sumsrcs := $(wildcard $(srcdir)/sum/*.c) sumobjs := $(sumsrcs:.c=.o) .INTERMEDIATE: $(sumobjs) lib/libsum.a: libsum.a($(sumobjs)) ``` bart-0.4.02/README000066400000000000000000000264711320577655200134330ustar00rootroot00000000000000 0. License ========== See LICENSE file for licensing information. ------------------------------------------------------------------------------- The tools in this software implement various reconstruction algorithms for Magnetic Resonance Imaging. The software is intended for research use only and NOT FOR DIAGNOSTIC USE. It comes without any warranty (see LICENSE for details). Please cite the corresponding articles when using these tools. Some references can be found at the end of this file. The source code might provide more detailed references, e.g. for specific iterative algorithms. 1. Help ======= Please direct all questions or comments to the public mailing list: mrirecon@lists.eecs.berkeley.edu https://lists.eecs.berkeley.edu/sympa/info/mrirecon Note: This list has a public archive! Please do not send any confidential information. Updates and further information can be found here: http://mrirecon.github.io/bart/ 2. Installation =============== 2.1. Prerequisites ------------------ GCC compiler, the FFTW library, and optionally CUDA. (see recon/Makefile to turn options on or off) The software can be used in combination with Matlab or octave. There is limited support for reading Cartesian data encoded with the ISMRM Raw Data format when linking with the ISMRMRD library (http://ismrmrd.sourceforge.net/). In the following, the symbol '`$`' indicates a shell prompt. Do not type '`$`' when entering commands. ### 2.1.1. Linux The software tools in recon should run on any recent Linux distribution. To install the required libraries on Debian and Ubuntu run: $ sudo apt-get install gcc make libfftw3-dev liblapacke-dev libpng-dev libopenblas-dev (optional) $ sudo apt-get install octave (optional) install version 0.5.2 of the ISMRMRD library ### 2.1.2. Mac OS X Xcode is required and it is recommended to install a newer version of gcc (4.7 seems to work) from MacPorts (http://www.macports.org/). $ sudo port install fftw-3-single $ sudo port install gcc47 $ sudo port install libpng $ sudo port install openblas (optional) $ sudo port install octave (optional) install version 0.5.2 of the ISMRMRD library ### 2.1.3. Windows The recommended way to use BART on Windows is with the Windows Subsystem for Linux (WSL) which is available for Windows 10. BART should also work with Cygwin: https://www.cygwin.com/ Install Cygwin and select the following packages: Devel: gcc, make Math: fftw3, fftw3-doc, libfftw3-devel, libfftw3_3 Math: liblapack-devel, liblapack-doc, liblapack0 Libs: libpng, libpng-devel Then use the cygwin shell to compile BART as described below. An alternative to using the Windows Subsystem for Linux or Cygwin is a virtual machine with Linux installed. 2.2. Downloading and Compilation -------------------------------- If you are a git user, you can simply clone our public repository: $ git clone https://github.com/mrirecon/bart Otherwise, please download the latest version as a zip file from Github: http://github.com/mrirecon/bart/releases/latest and unpack it somewhere on your computer. Open a terminal window and enter the bart directory (the top-level directory with the Makefile in it). To build the reconstruction tools type: $ make If you have installed the ISMRMRD library version 0.5.2, you can also build the ISMRM raw data import tool: $ make ismrmrd 2.3. Getting Started -------------------- ### 2.3.1. Organization . main directory / built software tools Makefile Makefile matlab/ Matlab helper scripts python/ Python helper functions doc/ documentation rules/ more built-related files scripts/ various helper scripts and examples src/ source code src/calib source code for sensitivity calibration src/sense source code for SENSE or ESPIRiT reconstruction src/noir source code for nonlinear inversion src/sake source code for SAKE reconstruction src/wavelet source code for wavelets src/num base library with numerical functions src/iter iterative algorithms src/linops library of linear operators src/nlops library of nonlinear operators src/misc miscellaneous (e.g. I/O) src/ismrm support for ISMRM raw data format src/simu source code for simulation src/noncart source code for non-uniform FFT tests/ system tests utests/ unit tests lib/ built software libraries ### 2.3.2. Terminal When using the toolbox commands from a UNIX shell, it is recommended to set the TOOLBOX_PATH to the base directory and to add it to the PATH variable. You can do this by running the following command: $ . startup.sh Note: The dot or 'source' command is needed so that the variables are imported into the current shell. ### 2.3.3. Matlab You can set the TOOLBOX_PATH to the base directory and to add it to the Matlab path by running the following command in the bart directory: >> startup (Note: The '>>' indicates the shell prompt. Do not type '>>' when entering commands.) You can use Matlab to read and visualize/process files. To write a data file 'xyz' from Matlab you can run: >> writecfl('xyz', A); Note, that the name 'xyz' is used without filename extension. See below for more information about the file format used in BART. To read the data file 'xyz' back into Matlab use: >> A = readcfl('xyz'); To call a BART tool (e.g. ecalib) from Matlab, you can use the 'bart' command: >> sensitivities = bart('ecalib', kspace); Download and unpack the examples which demonstrate interoperability with Matlab. Go to the examples directory and run: >> examples ### 2.3.4. Python You can set the TOOLBOX_PATH to the base directory and start a Python interactively as follows: $ python -i startup.py To avoid doing the above everytime, it is recommended to update your PYTHONPATH environment. For example, in Linux, assuming your TOOLBOX_PATH is set, add the below line to your bashrc file. $ export PYTHONPATH="${TOOLBOX_PATH}/python:$PYTHONPATH" After doing so, we can simply import as needed. >>> from bart import bart >>> import cfl You can use Python to read and visualize/process files. To write a data file 'xyz' from Python you can run: >>> cfl.writecfl('xyz', A); Note, that the name 'xyz' is used without filename extension. See below for more information about the file format used in BART. To read the data file 'xyz' back into Python use: >>> A = cfl.readcfl('xyz'); To call a BART tool (e.g. ecalib) from Python, you can use the 'bart' command: >>> sensitivities = bart(1, 'ecalib', kspace); The bart function expects the following signature: >>> = bart(, , , ...) To use BART in a script, please follow the steps in the startup.py file. 3. Data Format ============== 3.1. Generic ------------ The input and output datasets are each stored in a pair of files: one header (*.hdr) and one raw data (*.cfl). The header is a simple text readable file that describes the dimensions of the data. The raw data file is a binary file containing a single contiguous block of array data of dimensions described in the header stored in column-major order (first index is sequential). The raw data file is complex float (32 bit real + 32 bit imaginary, IEEE 747 binary32 little-endian). Convenience methods to read and write our data files using Matlab may be found in the matlab/ directory (readcfl.m and writecfl.m). Similar methods for Python may be found in the python/ directory (cfl.py). 3.2. Magnetic Resonance Imaging Data ------------------------------------ For MRI data and images, the dimensions are usually assigned in the following order: 0 readout 1 phase-encoding dimension 1 2 phase-encoding dimension 2 3 receive channels 4 ESPIRiT maps ... ... (more dimensions are defined in src/misc/mri.h) Undersampled data is stored with zeros in the unsampled positions. 3.3. Non-Cartesian Trajectories and Samples ------------------------------------------- The k-space coordinates for each sample are stored along dimension 0 which must have size equal to three. The unit of measurement is 1/FOV. Dimension 1 stores the samples along a single readout windows while dimension 2 may be used to differentiate between different lines (e.g. radial spokes). Channel (3) and map (4) dimensions must not be used (i.e. have size one), while other dimensions can be used as for Cartesian data. Non-Cartesian samples are stored in a similar way as trajectories except that dimension 0 is not used. The channel dimension can be used for different receiver coils as usual. 4. Command-line Tools ===================== All tools operate on the simple file format given above. Indices and dimensions run from 0 to N-1. Sometimes a set of dimensions is given as a bitmask where the lowest bit corresponds to the 0st dimension. For example, an inverse Fourier transform of first three dimensions can be performed with the following command: $ bart fft -i 7 kspace volume More information about each command can be found in 'doc/commands.txt'. 5. Information for Contributors =============================== Thank you for helping to improve BART! In order for us to be able to accept your contribution, it has to be released under the BSD license used by BART (see LICENSE file). By submitting patches to us it is understood that you agree to these terms and that you confirm that you hold all necessary rights yourself or have permission from the copyright holder. Please also add the name of the copyright holder and name and email of the author(s) to the copyright headers in all new or changed files. 6. Troubleshooting ================== 6.1. Installation Problems -------------------------- When problems occur after updating BART or changing build variables, it may help to clean the build environment and to recompile BART: $ make allclean $ make Make sure the PATH and TOOLBOX_PATH environment variables are set correctly. Sometimes, several versions of BART are installed and the wrong version is used accidentally. 6.2. Reporting Problems ----------------------- Please report problems to our mailing list and include the following information (as applicable): * The output of the 'version' command: $ bart version -V * The exact BART command-line that caused the problem. * The specific error message. * Information about the data files used when the problem occured (please provide atleast the dimensions of all input files). 6.3. Debugging -------------- See 'doc/debugging.txt' for details. 7. References ============= * Tamir JI, Ong F, Cheng JY, Uecker M, Lustig M, Generalized Magnetic Resonance Image Reconstruction using The Berkeley Advanced Reconstruction Toolbox, ISMRM Workshop on Data Sampling and Image Reconstruction, Sedona 2016 * Uecker M, Ong F, Tamir JI, Bahri D, Virtue P, Cheng JY, Zhang T, Lustig M, Berkeley Advanced Reconstruction Toolbox, Annual Meeting ISMRM, Toronto 2015 In: Proc Intl Soc Mag Reson Med 23:2486 * Uecker M, Virtue P, Ong F, Murphy MJ, Alley MT, Vasanawala SS, Lustig M, Software Toolbox and Programming Library for Compressed Sensing and Parallel Imaging, ISMRM Workshop on Data Sampling and Image Reconstruction, Sedona 2013 References related to implemented methods and algorithms can be found in the file 'doc/references.txt'. bart-0.4.02/README.md000066400000000000000000000023171320577655200140230ustar00rootroot00000000000000 BART: Toolbox for Computational Magnetic Resonance Imaging ========================================================== [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.592960.svg)](https://doi.org/10.5281/zenodo.592960) The Berkeley Advanced Reconstruction Toolbox (BART) is a free and open-source image-reconstruction framework for Computational Magnetic Resonance Imaging. The tools in this software implement various reconstruction algorithms for Magnetic Resonance Imaging. The software is intended for research use only and NOT FOR DIAGNOSTIC USE. It comes without any warranty (see LICENSE for details). For more information: https://mrirecon.github.io/bart/ Information for Contributors ---------------------------- Thank you for helping to improve BART! In order for us to be able to accept your contribution, it has to be released under the BSD license used by BART (see LICENSE file). By submitting patches to us it is understood that you agree to these terms and that you confirm that you hold all necessary rights yourself or have permission from the copyright holder. Please also add the name of the copyright holder and name and email of the author(s) to the copyright headers in all new or changed files. bart-0.4.02/ar_lock.sh000077500000000000000000000006411320577655200145130ustar00rootroot00000000000000#!/bin/bash set -e if command -v flock > /dev/null ; then flock `dirname $2`/.`basename $2`.lock -c "ar $*" exit 0 fi if command -v shlock > /dev/null ; then LOCK=/tmp/`basename $2`.lock trap 'rm -f ${LOCK} ; exit 1' 1 2 3 15 while true ; do if shlock -p $$ -f ${LOCK} ; then ar $* rm -rf ${LOCK} exit 0 else sleep 1 fi done fi echo "Error: no flock/shlock command!" exit 1 bart-0.4.02/build_targets.mk000066400000000000000000000011561320577655200157250ustar00rootroot00000000000000# Main build targets # TBASE=show slice crop resize join transpose squeeze flatten zeros ones flip circshift extract repmat bitmask reshape version delta copy casorati vec TFLP=scale invert conj fmac saxpy sdot spow cpyphs creal carg normalize cdf97 pattern nrmse mip avg cabs zexpj TNUM=fft fftmod fftshift noise bench threshold conv rss filter mandelbrot wavelet window var std TRECO=pics pocsense sqpics itsense nlinv nufft rof sake wave lrmatrix estdims estshift estdelay wavepsf TCALIB=ecalib ecaltwo caldir walsh cc ccapply calmat svd estvar whiten TMRI=homodyne poisson twixread fakeksp TSIM=phantom traj TIO=toimg bart-0.4.02/cmake/000077500000000000000000000000001320577655200136215ustar00rootroot00000000000000bart-0.4.02/cmake/FindATLAS.cmake000066400000000000000000000144341320577655200162760ustar00rootroot00000000000000#.rst: # FindATLAS # ------------- # # Find the ATLAS library # # Using ATLAS: # # :: # # [OPTIONAL] set(ATLAS_REQUIRE_THREADED [TRUE|FALSE]) to find threaded versions of the libraries # find_package(ATLAS REQUIRED) # include_directories(${ATLAS_INCLUDE_DIRS}) # add_executable(foo foo.cc) # target_link_libraries(foo ${ATLAS_LIBRARIES}) # -- OR -- # target_link_libraries(foo ${ATLAS_PARALLEL_LIBRARIES}) # # This module sets the following variables: # # :: # # ATLAS_FOUND - set to true if the library is found # ATLAS_INCLUDE_DIRS - list of required include directories # ATLAS_LIBRARIES - list of libraries to be linked # ATLAS_VERSION_MAJOR - major version number # ATLAS_VERSION_MINOR - minor version number # ATLAS_VERSION_PATCH - patch version number # ATLAS_VERSION_STRING - version number as a string (ex: "0.2.18") #============================================================================= # Copyright 2016 Hans J. Johnson # # Distributed under the OSI-approved BSD License (the "License") # # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #============================================================================= # set(ATLAS_SEARCH_PATHS ${ATLAS_DIR} $ENV{ATLAS_DIR} $ENV{CMAKE_PREFIX_PATH} ${CMAKE_PREFIX_PATH} /usr /usr/local /usr/local/opt/openblas ## Mac Homebrew install path /opt/ATLAS ) set(CMAKE_PREFIX_PATH ${ATLAS_SEARCH_PATHS}) list(REMOVE_DUPLICATES CMAKE_PREFIX_PATH) ## First try to find ATLAS with NO_MODULE, ## As of 20160706 version 0.2.18 there is limited cmake support for ATLAS ## that is not as complete as this version, if found, use it ## to identify the ATLAS_VERSION_STRING and improve searching. find_package(ATLAS NO_MODULE QUIET) if(ATLAS_VERSION) set(ATLAS_VERSION_STRING ${ATLAS_VERSION}) unset(ATLAS_VERSION) # Use cmake conventional naming endif() ################################################################################################## ### First search for headers find_path(ATLAS_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${ATLAS_SEARCH_PATHS} PATH_SUFFIXES include include/openblas) find_path(ATLAS_LAPACKE_INCLUDE_DIR NAMES lapacke.h PATHS ${ATLAS_SEARCH_PATHS} PATH_SUFFIXES include) ################################################################################################## set(PATH_SUFFIXES_LIST lib64/atlas-sse3 #openSUSE 13.2 (Harlequin) lib64/atlas-sse2 #openSUSE 13.2 (Harlequin) lib64/atlas-sse #openSUSE 13.2 (Harlequin) lib64/atlas #openSUSE 13.2 (Harlequin) lib64 lib/atlas-sse3 #openSUSE 13.2 (Harlequin) lib/atlas-sse2 #openSUSE 13.2 (Harlequin) lib/atlas-sse #openSUSE 13.2 (Harlequin) lib/atlas #openSUSE 13.2 (Harlequin) lib ) ### Second, search for libraries find_library(ATLAS_LIB NAMES atlas PATHS ${ATLAS_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) set(ATLAS_THREAD_PREFIX "") if(ATLAS_REQUIRE_THREADED) set(ATLAS_THREAD_PREFIX "pt") endif() find_library(CBLAS_LIB NAMES ${ATLAS_THREAD_PREFIX}cblas PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) find_library(LAPACK_LIB NAMES ${ATLAS_THREAD_PREFIX}lapack PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) find_library(LAPACKE_LIB NAMES ${ATLAS_THREAD_PREFIX}lapacke PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) find_library(F77BLAS_LIB NAMES ${ATLAS_THREAD_PREFIX}f77blas PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) # ------------------------------------------------------------------------ # Extract version information # ------------------------------------------------------------------------ # WARNING: We may not be able to determine the version of some ATLAS set(ATLAS_VERSION_MAJOR 0) set(ATLAS_VERSION_MINOR 0) set(ATLAS_VERSION_PATCH 0) if(ATLAS_VERSION_STRING) string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\1" ATLAS_VERSION_MAJOR "${ATLAS_VERSION_STRING}") string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\2" ATLAS_VERSION_MINOR "${ATLAS_VERSION_STRING}") string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\3" ATLAS_VERSION_PATCH "${ATLAS_VERSION_STRING}") else() set(ATLAS_VERSION_STRING "ATLAS.UNKOWN.VERSION") endif() #====================== # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) find_package_handle_standard_args(ATLAS FOUND_VAR ATLAS_FOUND REQUIRED_VARS ATLAS_CBLAS_INCLUDE_DIR ATLAS_LAPACKE_INCLUDE_DIR LAPACKE_LIB LAPACK_LIB F77BLAS_LIB CBLAS_LIB ATLAS_LIB VERSION_VAR ATLAS_VERSION_STRING ) if (ATLAS_FOUND) set(ATLAS_INCLUDE_DIRS ${ATLAS_CBLAS_INCLUDE_DIR} ${ATLAS_CBLAS_INCLUDE_DIR}) list(REMOVE_DUPLICATES ATLAS_INCLUDE_DIRS) if("${CMAKE_C_COMPILER_ID}" MATCHES ".*Clang.*" OR "${CMAKE_C_COMPILER_ID}" MATCHES ".*GNU.*" OR "${CMAKE_C_COMPILER_ID}" MATCHES ".*Intel.*" ) #NOT MSVC set(MATH_LIB m) endif() list(APPEND ATLAS_LIBRARIES ${LAPACKE_LIB} ${LAPACK_LIB} ${F77BLAS_LIB} ${CBLAS_LIB} ${ATLAS_LIB} ${MATH_LIB}) endif() mark_as_advanced( ATLAS_FOUND ATLAS_INCLUDE_DIRS ATLAS_LIBRARIES ATLAS_VERSION_MAJOR ATLAS_VERSION_MINOR ATLAS_VERSION_PATCH ATLAS_VERSION_STRING ) ## For debugging message(STATUS "ATLAS_FOUND :${ATLAS_FOUND}: - set to true if the library is found") message(STATUS "ATLAS_INCLUDE_DIRS :${ATLAS_INCLUDE_DIRS}: - list of required include directories") message(STATUS "ATLAS_LIBRARIES :${ATLAS_LIBRARIES}: - list of libraries to be linked") message(STATUS "ATLAS_VERSION_MAJOR :${ATLAS_VERSION_MAJOR}: - major version number") message(STATUS "ATLAS_VERSION_MINOR :${ATLAS_VERSION_MINOR}: - minor version number") message(STATUS "ATLAS_VERSION_PATCH :${ATLAS_VERSION_PATCH}: - patch version number") message(STATUS "ATLAS_VERSION_STRING :${ATLAS_VERSION_STRING}: - version number as a string") bart-0.4.02/cmake/FindFFTW.cmake000066400000000000000000000034741320577655200162020ustar00rootroot00000000000000## FFTW can be compiled and subsequently linked against ## various data types. ## There is a single set of include files, and then muttiple libraries, ## One for each type. I.e. libfftw.a-->double, libfftwf.a-->float set(FFTW_INC_SEARCHPATH /sw/include /usr/include /usr/local/include /usr/include/fftw /usr/include/fftw3 /usr/local/include/fftw /usr/local/include/fftw3 ) find_path(FFTW_INCLUDE_PATH fftw3.h ${FFTW_INC_SEARCHPATH}) if(FFTW_INCLUDE_PATH) file(TO_CMAKE_PATH "${FFTW_INCLUDE_PATH}" FFTW_INCLUDE_PATH) set(FFTW_INCLUDE ${FFTW_INCLUDE_PATH}) endif() if(FFTW_INCLUDE) include_directories(${FFTW_INCLUDE}) endif() get_filename_component(FFTW_INSTALL_BASE_PATH ${FFTW_INCLUDE_PATH} PATH) set(FFTW_LIB_SEARCHPATH ${FFTW_INSTALL_BASE_PATH}/lib ${FFTW_INSTALL_BASE_PATH}/lib64 /usr/lib/fftw /usr/local/lib/fftw ) if(USE_FFTWD) mark_as_advanced(FFTWD_LIB) find_library(FFTWD_LIB fftw3 ${FFTW_LIB_SEARCHPATH}) #Double Precision Lib find_library(FFTWD_THREADS_LIB fftw3_threads ${FFTW_LIB_SEARCHPATH}) #Double Precision Lib only if compiled with threads support if(FFTWD_LIB) set(FFTWD_FOUND 1) get_filename_component(FFTW_LIBDIR ${FFTWD_LIB} PATH) if(FFTWD_THREADS_LIB) set(FFTWD_LIB ${FFTWD_LIB} ${FFTWD_THREADS_LIB} ) endif() endif() endif() if(USE_FFTWF) mark_as_advanced(FFTWF_LIB) find_library(FFTWF_LIB fftw3f ${FFTW_LIB_SEARCHPATH}) #Single Precision Lib find_library(FFTWF_THREADS_LIB fftw3f_threads ${FFTW_LIB_SEARCHPATH}) #Single Precision Lib only if compiled with threads support if(FFTWF_LIB) set(FFTWF_FOUND 1) get_filename_component(FFTW_LIBDIR ${FFTWF_LIB} PATH) if(FFTWF_THREADS_LIB) set(FFTWF_LIB ${FFTWF_LIB} ${FFTWF_THREADS_LIB} ) endif() endif() endif() set(FFTWD_LIBRARIES ${FFTWD_LIB}) set(FFTWF_LIBRARIES ${FFTWF_LIB}) bart-0.4.02/cmake/FindLAPACKE.cmake000066400000000000000000000171001320577655200164630ustar00rootroot00000000000000#.rst: # FindLAPACKE # ------------- # # Find the LAPACKE library # # Using LAPACKE: # # :: # # find_package(LAPACKE REQUIRED) # include_directories(${LAPACKE_INCLUDE_DIRS}) # add_executable(foo foo.cc) # target_link_libraries(foo ${LAPACKE_LIBRARIES}) # # This module sets the following variables: # # :: # # LAPACKE_FOUND - set to true if the library is found # LAPACKE_INCLUDE_DIRS - list of required include directories # LAPACKE_LIBRARIES - list of libraries to be linked # LAPACKE_VERSION_MAJOR - major version number # LAPACKE_VERSION_MINOR - minor version number # LAPACKE_VERSION_PATCH - patch version number # LAPACKE_VERSION_STRING - version number as a string (ex: "0.2.18") #============================================================================= # Copyright 2016 Hans J. Johnson # # Distributed under the OSI-approved BSD License (the "License") # # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #============================================================================= # set(LAPACKE_SEARCH_PATHS ${LAPACKE_DIR} $ENV{LAPACKE_DIR} $ENV{CMAKE_PREFIX_PATH} ${CMAKE_PREFIX_PATH} /usr /usr/local /usr/local/opt/lapack ## Mac Homebrew install path /opt/LAPACKE ) message(STATUS "LAPACKE_SEARCH_PATHS: ${LAPACKE_SEARCH_PATHS}") set(CMAKE_PREFIX_PATH ${LAPACKE_SEARCH_PATHS}) list(REMOVE_DUPLICATES CMAKE_PREFIX_PATH) ## First try to find LAPACKE with NO_MODULE, ## As of 20160706 version 0.2.18 there is limited cmake support for LAPACKE ## that is not as complete as this version, if found, use it ## to identify the LAPACKE_VERSION_STRING and improve searching. find_package(LAPACKE NO_MODULE QUIET) if(LAPACKE_FOUND) if(EXISTS ${LAPACKE_DIR}/lapacke-config-version.cmake) include(${LAPACKE_DIR}/lapacke-config-version.cmake) set(LAPACKE_VERSION_STRING ${PACKAGE_VERSION}) unset(PACKAGE_VERSION) # Use cmake conventional naming endif() find_package(LAPACK NO_MODULE QUIET) #Require matching versions here! find_package(BLAS NO_MODULE QUIET) #Require matching versions here! endif() ################################################################################################## ### First search for headers find_path(LAPACKE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES include include/lapack) find_path(LAPACKE_LAPACKE_INCLUDE_DIR NAMES lapacke.h PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES include) ################################################################################################## ### Second, search for libraries set(PATH_SUFFIXES_LIST lib64 lib ) find_library(LAPACKE_LIB NAMES lapacke PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) find_library(CBLAS_LIB NAMES cblas PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) find_library(LAPACK_LIB NAMES lapack PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) find_library(BLAS_LIB NAMES blas PATHS ${LAPACKE_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) ## TODO: Get version components # ------------------------------------------------------------------------ # Extract version information # ------------------------------------------------------------------------ # WARNING: We may not be able to determine the version of some LAPACKE set(LAPACKE_VERSION_MAJOR 0) set(LAPACKE_VERSION_MINOR 0) set(LAPACKE_VERSION_PATCH 0) if(LAPACKE_VERSION_STRING) string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\1" LAPACKE_VERSION_MAJOR "${LAPACKE_VERSION_STRING}") string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\2" LAPACKE_VERSION_MINOR "${LAPACKE_VERSION_STRING}") string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\3" LAPACKE_VERSION_PATCH "${LAPACKE_VERSION_STRING}") endif() #====================== # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) find_package_handle_standard_args(LAPACKE FOUND_VAR LAPACKE_FOUND REQUIRED_VARS LAPACKE_CBLAS_INCLUDE_DIR LAPACKE_LAPACKE_INCLUDE_DIR LAPACKE_LIB LAPACK_LIB CBLAS_LIB BLAS_LIB VERSION_VAR LAPACKE_VERSION_STRING ) if (LAPACKE_FOUND) set(LAPACKE_INCLUDE_DIRS ${LAPACKE_CBLAS_INCLUDE_DIR} ${LAPACKE_CBLAS_INCLUDE_DIR}) list(REMOVE_DUPLICATES LAPACKE_INCLUDE_DIRS) if("${CMAKE_C_COMPILER_ID}" MATCHES ".*Clang.*" OR "${CMAKE_C_COMPILER_ID}" MATCHES ".*GNU.*" OR "${CMAKE_C_COMPILER_ID}" MATCHES ".*Intel.*" ) #NOT MSVC set(MATH_LIB m) endif() list(APPEND LAPACKE_LIBRARIES ${LAPACKE_LIB} ${LAPACK_LIB} ${BLAS_LIB} ${CBLAS_LIB}) # Check for a common combination, and find required gfortran support libraries if(1) if("${CMAKE_C_COMPILER_ID}" MATCHES ".*Clang.*" AND "${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU") message(STATUS "\n\n WARNING: ${CMAKE_C_COMPILER} identified as ${CMAKE_C_COMPILER_ID}\n" "AND: ${CMAKE_Fortran_COMPILER} identified as ${CMAKE_Fortran_COMPILER_ID}\n" "\n" "may be require special configurations. The most common is the need to" "explicitly link C programs against the gfortran support library.") endif() else() ## This code automated code is hard to determine if it is robust in many different environments. # Check for a common combination, and find required gfortran support libraries if("${CMAKE_C_COMPILER_ID}" MATCHES ".*Clang.*" AND "${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU") include(FortranCInterface) FortranCInterface_VERIFY() if(NOT FortranCInterface_VERIFIED_C) message(FATAL_ERROR "C and fortran compilers are not compatible:\n${CMAKE_Fortran_COMPILER}:${CMAKE_C_COMPILER}") endif() execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -print-file-name=libgfortran.a OUTPUT_VARIABLE FORTRANSUPPORTLIB ERROR_QUIET) string(STRIP ${FORTRANSUPPORTLIB} FORTRANSUPPORTLIB) if(EXISTS "${FORTRANSUPPORTLIB}") list(APPEND LAPACKE_LIBRARIES ${FORTRANSUPPORTLIB}) message(STATUS "Appending fortran support lib: ${FORTRANSUPPORTLIB}") else() message(FATAL_ERROR "COULD NOT FIND libgfortran.a support library:${FORTRANSUPPORTLIB}:") endif() endif() endif() list(APPEND LAPACKE_LIBRARIES ${MATH_LIB}) endif() mark_as_advanced( LAPACKE_FOUND LAPACKE_INCLUDE_DIRS LAPACKE_LIBRARIES LAPACKE_VERSION_MAJOR LAPACKE_VERSION_MINOR LAPACKE_VERSION_PATCH LAPACKE_VERSION_STRING ) ## For debugging message(STATUS "LAPACKE_FOUND :${LAPACKE_FOUND}: - set to true if the library is found") message(STATUS "LAPACKE_INCLUDE_DIRS :${LAPACKE_INCLUDE_DIRS}: - list of required include directories") message(STATUS "LAPACKE_LIBRARIES :${LAPACKE_LIBRARIES}: - list of libraries to be linked") message(STATUS "LAPACKE_VERSION_MAJOR :${LAPACKE_VERSION_MAJOR}: - major version number") message(STATUS "LAPACKE_VERSION_MINOR :${LAPACKE_VERSION_MINOR}: - minor version number") message(STATUS "LAPACKE_VERSION_PATCH :${LAPACKE_VERSION_PATCH}: - patch version number") message(STATUS "LAPACKE_VERSION_STRING :${LAPACKE_VERSION_STRING}: - version number as a string") bart-0.4.02/cmake/FindMatlab.cmake000066400000000000000000000212501320577655200166240ustar00rootroot00000000000000# - this module looks for Matlab # Defines: # MATLAB_INCLUDE_DIR: include path for mex.h, engine.h # MATLAB_LIBRARIES: required libraries: libmex, etc # MATLAB_MEX_LIBRARY: path to libmex.lib # MATLAB_MX_LIBRARY: path to libmx.lib # MATLAB_MAT_LIBRARY: path to libmat.lib # added # MATLAB_ENG_LIBRARY: path to libeng.lib # MATLAB_ROOT: path to Matlab's root directory # This file is part of Gerardus # # This is a derivative work of file FindMatlab.cmake released with # CMake v2.8, because the original seems to be a bit outdated and # doesn't work with my Windows XP and Visual Studio 10 install # # (Note that the original file does work for Ubuntu Natty) # # Author: Ramon Casero , Tom Doel # Version: 0.2.3 # $Rev$ # $Date$ # # The original file was copied from an Ubuntu Linux install # /usr/share/cmake-2.8/Modules/FindMatlab.cmake #============================================================================= # Copyright 2005-2009 Kitware, Inc. # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file Copyright.txt for details. # # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the License for more information. #============================================================================= # (To distribute this file outside of CMake, substitute the full # License text for the above reference.) set(MATLAB_FOUND 0) if(WIN32) # Search for a version of Matlab available, starting from the most modern one to older versions foreach(MATVER "7.14" "7.11" "7.10" "7.9" "7.8" "7.7" "7.6" "7.5" "7.4") if((NOT DEFINED MATLAB_ROOT) OR ("${MATLAB_ROOT}" STREQUAL "") OR ("${MATLAB_ROOT}" STREQUAL "/registry")) get_filename_component(MATLAB_ROOT "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MathWorks\\MATLAB\\${MATVER};MATLABROOT]" ABSOLUTE) set(MATLAB_VERSION ${MATVER}) endif() endforeach() # Directory name depending on whether the Windows architecture is 32 # bit or 64 bit set(CMAKE_SIZEOF_VOID_P 8) # Note: For some wierd reason this variable is undefined in my system... if(CMAKE_SIZEOF_VOID_P MATCHES "4") set(WINDIR "win32") elseif(CMAKE_SIZEOF_VOID_P MATCHES "8") set(WINDIR "win64") else() message(FATAL_ERROR "CMAKE_SIZEOF_VOID_P (${CMAKE_SIZEOF_VOID_P}) doesn't indicate a valid platform") endif() # Folder where the MEX libraries are, depending of the Windows compiler if(${CMAKE_GENERATOR} MATCHES "Visual Studio 6") set(MATLAB_LIBRARIES_DIR "${MATLAB_ROOT}/extern/lib/${WINDIR}/microsoft/msvc60") elseif(${CMAKE_GENERATOR} MATCHES "Visual Studio 7") # Assume people are generally using Visual Studio 7.1, # if using 7.0 need to link to: ../extern/lib/${WINDIR}/microsoft/msvc70 set(MATLAB_LIBRARIES_DIR "${MATLAB_ROOT}/extern/lib/${WINDIR}/microsoft/msvc71") # set(MATLAB_LIBRARIES_DIR "${MATLAB_ROOT}/extern/lib/${WINDIR}/microsoft/msvc70") elseif(${CMAKE_GENERATOR} MATCHES "Borland") # Assume people are generally using Borland 5.4, # if using 7.0 need to link to: ../extern/lib/${WINDIR}/microsoft/msvc70 set(MATLAB_LIBRARIES_DIR "${MATLAB_ROOT}/extern/lib/${WINDIR}/microsoft/bcc54") # set(MATLAB_LIBRARIES_DIR "${MATLAB_ROOT}/extern/lib/${WINDIR}/microsoft/bcc50") # set(MATLAB_LIBRARIES_DIR "${MATLAB_ROOT}/extern/lib/${WINDIR}/microsoft/bcc51") elseif(${CMAKE_GENERATOR} MATCHES "Visual Studio*") # If the compiler is Visual Studio, but not any of the specific # versions above, we try our luck with the microsoft directory set(MATLAB_LIBRARIES_DIR "${MATLAB_ROOT}/extern/lib/${WINDIR}/microsoft/") else() message(FATAL_ERROR "Generator not compatible: ${CMAKE_GENERATOR}") endif() else() if((NOT DEFINED MATLAB_ROOT) OR ("${MATLAB_ROOT}" STREQUAL "")) if(APPLE) # If this is a Mac and the attempts to find MATLAB_ROOT have so far failed, # we look in the applications folder # Search for a version of Matlab available, starting from the most modern one to older versions foreach(MATVER "R2017b" "R2017a" "R2016b" "R2016a" "R2015b" "R2015a" "R2014b" "R2014a" "R2013b" "R2013a" "R2012b" "R2012a" "R2011b" "R2011a" "R2010b" "R2010a" "R2009b" "R2009a" "R2008b") if(EXISTS /Applications/MATLAB_${MATVER}.app) set(MATLAB_ROOT /Applications/MATLAB_${MATVER}.app) endif() endforeach() endif() ## Search for matlab find_program(MATLAB_EXEC NAMES matlab HINTS ENV PATH PATHS ${MATLAB_ROOT}/bin DOC "The command line matlab program" ) get_filename_component(MATLAB_EXEC "${MATLAB_EXEC}" REALPATH) get_filename_component(MATLAB_EXEC_DIR "${MATLAB_EXEC}" DIRECTORY) if(NOT MATLAB_ROOT) get_filename_component(MATLAB_ROOT "${MATLAB_EXEC_DIR}" DIRECTORY) endif() if(NOT MATLAB_EXEC) message(FATAL_ERROR "Matlab not found") endif() endif() # Check if this is a Mac if(APPLE) set(LIBRARY_EXTENSION dylib) else() set(LIBRARY_EXTENSION so) endif() find_program( MATLAB_MEX_PATH mex HINTS ENV PATH PATHS ${MATLAB_ROOT}/bin DOC "The mex program path" ) find_program( MATLAB_MEXEXT_PATH mexext HINTS ENV PATH PATHS ${MATLAB_ROOT}/bin DOC "The mexext program path" ) #Get default mex extentension execute_process( COMMAND ${MATLAB_MEXEXT_PATH} OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE MATLAB_MEX_EXT ) ## Remove the mex prefix to find the platform name if(APPLE) string(REPLACE "mex" "" MATLAB_PLATFORM_DIR ${MATLAB_MEX_EXT}) else() string(REPLACE "mex" "glnx" MATLAB_PLATFORM_DIR ${MATLAB_MEX_EXT}) endif() set(MATLAB_LIBRARIES_DIR ${MATLAB_ROOT}/bin/${MATLAB_PLATFORM_DIR}) endif() # Get path to the MEX libraries find_library(MATLAB_MEX_LIBRARY NAMES libmex.${LIBRARY_EXTENSION} PATHS ${MATLAB_LIBRARIES_DIR} NO_DEFAULT_PATH ) find_library(MATLAB_MX_LIBRARY NAMES libmx.${LIBRARY_EXTENSION} PATHS ${MATLAB_LIBRARIES_DIR} NO_DEFAULT_PATH ) find_library(MATLAB_MAT_LIBRARY NAMES libmat.${LIBRARY_EXTENSION} PATHS ${MATLAB_LIBRARIES_DIR} NO_DEFAULT_PATH ) find_library(MATLAB_ENG_LIBRARY NAMES libeng.${LIBRARY_EXTENSION} PATHS ${MATLAB_LIBRARIES_DIR} NO_DEFAULT_PATH ) # Get path to the include directory find_path(MATLAB_INCLUDE_DIR NAMES "mex.h" PATHS "${MATLAB_ROOT}/extern/include" ) # This is common to UNIX and Win32: set(MATLAB_LIBRARIES ${MATLAB_MX_LIBRARY} ${MATLAB_MEX_LIBRARY} ${MATLAB_MAT_LIBRARY} ${MATLAB_ENG_LIBRARY} ) if(MATLAB_INCLUDE_DIR AND MATLAB_LIBRARIES) set(MATLAB_FOUND 1) endif() mark_as_advanced( MATLAB_LIBRARIES MATLAB_MX_LIBRARY MATLAB_MEX_LIBRARY MATLAB_MAT_LIBRARY MATLAB_ENG_LIBRARY MATLAB_INCLUDE_DIR MATLAB_FOUND MATLAB_ROOT MATLAB_MEX_PATH MATLAB_MEXEXT_PATH MATLAB_MEX_EXT ) ##################### ##################### # Provide a macro to build the mex files from # within CMake ##################### # BuildMex.cmake # \author Kent Williams norman-k-williams@uiowa.edu # \author Hans J. Johnson hans-johnson@uiowa.edu include(CMakeParseArguments) include_directories(${MATLAB_INCLUDE_DIR}) # # BuildMex -- arguments # MEXNAME = root of mex library name # TARGETDIR = location for the mex library files to be created # SOURCE = list of source files # LIBRARIES = libraries needed to link mex library macro(BuildMex) set(oneValueArgs MEXNAME TARGETDIR) set(multiValueArgs SOURCE LIBRARIES) cmake_parse_arguments(BuildMex "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) # message("MEXNAME=${BuildMex_MEXNAME} SOURCE=${BuildMex_SOURCE} LIBRARIES=${BuildMex_LIBRARIES}") set_source_files_properties(${BuildMex_SOURCE} COMPILE_DEFINITIONS -DMATLAB_MEX_FILE ) add_library(${BuildMex_MEXNAME} SHARED ${BuildMex_SOURCE}) set_target_properties(${BuildMex_MEXNAME} PROPERTIES SUFFIX ".${MATLAB_MEX_EXT}" PREFIX "" RUNTIME_OUTPUT_DIRECTORY "${BuildMex_TARGETDIR}" ARCHIVE_OUTPUT_DIRECTORY "${BuildMex_TARGETDIR}" LIBRARY_OUTPUT_DIRECTORY "${BuildMex_TARGETDIR}" ) target_link_libraries(${BuildMex_MEXNAME} ${BuildMex_LIBRARIES} ${MATLAB_MEX_LIBRARY} ${MATLAB_MX_LIBRARY} ${MATLAB_ENG_LIBRARY}) endmacro(BuildMex) bart-0.4.02/cmake/FindOpenBLAS.cmake000066400000000000000000000147671320577655200170060ustar00rootroot00000000000000#.rst: # FindOpenBLAS # ------------- # # Find the OpenBLAS library # # Using OpenBLAS: # # :: # # find_package(OpenBLAS REQUIRED) # include_directories(${OpenBLAS_INCLUDE_DIRS}) # add_executable(foo foo.cc) # target_link_libraries(foo ${OpenBLAS_LIBRARIES}) # -- OR -- # target_link_libraries(foo ${OpenBLAS_PARALLEL_LIBRARIES}) # # This module sets the following variables: # # :: # # OpenBLAS_FOUND - set to true if the library is found # OpenBLAS_INCLUDE_DIRS - list of required include directories # OpenBLAS_LIBRARIES - list of libraries to be linked # OpenBLAS_HAS_PARALLEL_LIBRARIES - determine if there are parallel libraries compiled # OpenBLAS_PARALLEL_LIBRARIES - list of libraries for parallel implementations # OpenBLAS_VERSION_MAJOR - major version number # OpenBLAS_VERSION_MINOR - minor version number # OpenBLAS_VERSION_PATCH - patch version number # OpenBLAS_VERSION_STRING - version number as a string (ex: "0.2.18") #============================================================================= # Copyright 2016 Hans J. Johnson # # Distributed under the OSI-approved BSD License (the "License") # # This software is distributed WITHOUT ANY WARRANTY; without even the # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. #============================================================================= # set(OpenBLAS_HAS_PARALLEL_LIBRARIES FALSE) set(OpenBLAS_SEARCH_PATHS ${OpenBLAS_DIR} $ENV{OpenBLAS_DIR} $ENV{CMAKE_PREFIX_PATH} ${CMAKE_PREFIX_PATH} /usr /usr/local /usr/local/opt/openblas ## Mac Homebrew install path /opt/OpenBLAS ) set(CMAKE_PREFIX_PATH ${OpenBLAS_SEARCH_PATHS}) list(REMOVE_DUPLICATES CMAKE_PREFIX_PATH) ## First try to find OpenBLAS with NO_MODULE, ## As of 20160706 version 0.2.18 there is limited cmake support for OpenBLAS ## that is not as complete as this version, if found, use it ## to identify the OpenBLAS_VERSION_STRING and improve searching. find_package(OpenBLAS NO_MODULE QUIET) if(OpenBLAS_VERSION) set(OpenBLAS_VERSION_STRING ${OpenBLAS_VERSION}) unset(OpenBLAS_VERSION) # Use cmake conventional naming endif() ################################################################################################## ### First search for headers find_path(OpenBLAS_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${OpenBLAS_SEARCH_PATHS} PATH_SUFFIXES include include/openblas) find_path(OpenBLAS_LAPACKE_INCLUDE_DIR NAMES lapacke.h PATHS ${OpenBLAS_SEARCH_PATHS} PATH_SUFFIXES include) ################################################################################################## ### Second, search for libraries set(PATH_SUFFIXES_LIST lib64 lib ) find_library(OpenBLAS_LIB NAMES openblas PATHS ${OpenBLAS_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST}) if(EXISTS ${OpenBLAS_LIB}) get_filename_component(OpenBLAS_LIB_DIR i${OpenBLAS_LIB} DIRECTORY) endif() ## Find the named parallel version of openblas set(OpenBLAS_SEARCH_VERSIONS ${OpenBLAS_VERSION_STRING} 0.2.19 0.2.18 0.2.17 0.2.16) list(REMOVE_DUPLICATES OpenBLAS_SEARCH_VERSIONS) foreach(checkVersion ${OpenBLAS_SEARCH_VERSIONS}) find_library(OpenBLAS_PARALLEL_LIB NAMES openblasp-r${checkVersion} PATHS ${OpenBLAS_LIB_DIR} ${OpenBLAS_SEARCH_PATHS} PATH_SUFFIXES ${PATH_SUFFIXES_LIST} ) if(EXISTS ${OpenBLAS_PARALLEL_LIB}) if(NOT OpenBLAS_VERSION_STRING) set(OpenBLAS_VERSION_STRING ${checkVersion}) endif() set(OpenBLAS_HAS_PARALLEL_LIBRARIES ON) break() endif() endforeach() # ------------------------------------------------------------------------ # Extract version information # ------------------------------------------------------------------------ # WARNING: We may not be able to determine the version of some OpenBLAS set(OpenBLAS_VERSION_MAJOR 0) set(OpenBLAS_VERSION_MINOR 0) set(OpenBLAS_VERSION_PATCH 0) if(OpenBLAS_VERSION_STRING) string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\1" OpenBLAS_VERSION_MAJOR "${OpenBLAS_VERSION_STRING}") string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\2" OpenBLAS_VERSION_MINOR "${OpenBLAS_VERSION_STRING}") string(REGEX REPLACE "([0-9]+).([0-9]+).([0-9]+)" "\\3" OpenBLAS_VERSION_PATCH "${OpenBLAS_VERSION_STRING}") endif() #====================== # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) find_package_handle_standard_args(OpenBLAS FOUND_VAR OpenBLAS_FOUND REQUIRED_VARS OpenBLAS_CBLAS_INCLUDE_DIR OpenBLAS_LAPACKE_INCLUDE_DIR OpenBLAS_LIB VERSION_VAR OpenBLAS_VERSION_STRING ) if (OpenBLAS_FOUND) set(OpenBLAS_INCLUDE_DIRS ${OpenBLAS_CBLAS_INCLUDE_DIR} ${OpenBLAS_CBLAS_INCLUDE_DIR}) list(REMOVE_DUPLICATES OpenBLAS_INCLUDE_DIRS) if("${CMAKE_C_COMPILER_ID}" MATCHES ".*Clang.*" OR "${CMAKE_C_COMPILER_ID}" MATCHES ".*GNU.*" OR "${CMAKE_C_COMPILER_ID}" MATCHES ".*Intel.*" ) #NOT MSVC set(MATH_LIB m) endif() list(APPEND OpenBLAS_LIBRARIES ${OpenBLAS_LIB} ${MATH_LIB}) if(OpenBLAS_HAS_PARALLEL_LIBRARIES) list(APPEND OpenBLAS_PARALLEL_LIBRARIES ${OpenBLAS_PARALLEL_LIB}) endif() endif() mark_as_advanced( OpenBLAS_FOUND OpenBLAS_INCLUDE_DIRS OpenBLAS_LIBRARIES OpenBLAS_HAS_PARALLEL_LIBRARIES OpenBLAS_PARALLEL_LIBRARIES OpenBLAS_VERSION_MAJOR OpenBLAS_VERSION_MINOR OpenBLAS_VERSION_PATCH OpenBLAS_VERSION_STRING ) ## For debugging message(STATUS "OpenBLAS_FOUND :${OpenBLAS_FOUND}: - set to true if the library is found") message(STATUS "OpenBLAS_INCLUDE_DIRS :${OpenBLAS_INCLUDE_DIRS}: - list of required include directories") message(STATUS "OpenBLAS_LIBRARIES :${OpenBLAS_LIBRARIES}: - list of libraries to be linked") message(STATUS "OpenBLAS_HAS_PARALLEL_LIBRARIES :${OpenBLAS_HAS_PARALLEL_LIBRARIES}: - determine if there are parallel libraries compiled") message(STATUS "OpenBLAS_PARALLEL_LIBRARIES :${OpenBLAS_PARALLEL_LIBRARIES}: - list of libraries for parallel implementations") message(STATUS "OpenBLAS_VERSION_MAJOR :${OpenBLAS_VERSION_MAJOR}: - major version number") message(STATUS "OpenBLAS_VERSION_MINOR :${OpenBLAS_VERSION_MINOR}: - minor version number") message(STATUS "OpenBLAS_VERSION_PATCH :${OpenBLAS_VERSION_PATCH}: - patch version number") message(STATUS "OpenBLAS_VERSION_STRING :${OpenBLAS_VERSION_STRING}: - version number as a string") bart-0.4.02/doc/000077500000000000000000000000001320577655200133065ustar00rootroot00000000000000bart-0.4.02/doc/bart.1000066400000000000000000000004661320577655200143260ustar00rootroot00000000000000.TH BART 1 .SH NAME bart - Berkeley Advanced Reconstruction Toolbox .SH SYNOPSIS .B bart .IR command [\fB\-h\fR] ... .SH DESCRIPTION .B bart invokes a command from the Berkeley Advanced Reconstruction Toolbox. .SH AUTHOR BART Developer Team and Contributors. .SH SEE ALSO .B https://mrirecon.github.io/bart/ bart-0.4.02/doc/building.txt000066400000000000000000000101171320577655200156440ustar00rootroot00000000000000 0. Introduction BART has a build system based on GNU Make. The build system offers many features for users and developers: BART can be built on different architectures, with different compilers, and with various optional features. This makes it easy to use BART in different environments, on a laptop, a multi-GPU system, a HPC cluster, or in the cloud. The build system also supports running system and unit tests. To make developing more fun, the makefile is optimized for extremely fast builds. Using parallel builds, BART can be built from scratch in about five seconds. After changing a single source code file it is usually possible to rebuild the binary in less than a second. This is accomplished by automatically maintaining dependencies between object files and incrementally updating the binaries from object stored in libraries. 1. Building BART 1.2. Main Build Targets 1.2.1. Default By default, the main 'bart' binary will be built with: make or make bart 1.2.1. Building Individual Commands Individual BART commands can be built as standalone binaries: make All BART commands can be built with: make all Attention: If the TOOLBOX_PATH is set, the 'bart' tool will call the standalone tool and not the built-in tool. This can be used to selectively update individual tools, but can also cause confusion. 1.2.2. Testing System and unit tests can be build and run with: make test make utest 1.2.3. Cleaning Up To clean up working directory, run: make clean To also remove all built commands, run: make allclean 1.3. Libraries As a side effect of build the main 'bart' tool, static libraries are generated in 'lib/'. 2. Local Configuration The build can be configured by setting or adding variables. 2.1. Makefile.local It is recommended to put this variables into a file called 'Makefile.local' in the main BART directory. This file is then automatically included. By having local configuration is a seperate file, local changes are not overwritten when BART is updated and do not cause conflicts when using a version control system. 2.2. Makefile. It is also possible to put machine-specific configurations variables in a Makefile. where is the name of the machine as returned by 'uname -n'. 2.3 Custom Makefiles directory Additional Makefiles can be included by placing them in the Makefiles directory. All files matching the expansion Makefiles/Makefile.* are automatically included in the build. See Makefiles/README.md for example files. 3. Build Options 3.1. Adding New BART Commands # add new tool (src/foo.c) to list of targets XTARGETS += foo # dependencies for foo MODULES_foo += -llowrank 3.2. Build Flags 3.2.1. Silent Builds Silent builds can be activated with the following option: MAKEFLAGS += --silent 3.3.2. Parallel Build Parallel builds can be activated with the following option: PARALLEL=1 3.3. Optional Features Some BART features are optional, because they depend on the other libraries or features which are not available everywhere. 3.3.1. CUDA Support for CUDA can be turned on. It may be necessary to also provide the base path for CUDA installation. CUDA=1 CUDA_BASE=/usr/ 3.3.2. OpenMP OpenMP can be turned off for compilers which do not support it properly (.e.g. clang): OMP=0 3.3.3. FFTW Threads It is possible to turn off FFTW threads if the library is not available: FFTWTHREADS=0 3.3.4. ISMRM Raw Data Format If the ISMRMRD library is installed, preliminary support for the ISMRM raw data format can be activated: ISMRMRD=1 3.4. Compiler 3.4.1. Different Compiler If different compilers or compiler versions are installed, it is possible to override the default compiler: CC = gcc-4.8 #CC = gcc-5 #CC = clang-3.5 3.4.2. Different CFLAGS Different CFLAGS can be set like this: CFLAGS= -g -O2 -ffast-math 3.4.3. Static Linking Static linking can be used to build binaries which do not depend on external shared libraries. This might be useful if BART is to be deployed on a different machine where it is difficult to install required dependencies. SLINK=1 bart-0.4.02/doc/cmake_building.txt000066400000000000000000000036761320577655200170200ustar00rootroot00000000000000OVERVIEW ========= BART depends upon the new lapacke.h and cblas.h interfaces for blas and lapack. These new 'C' interfaces are often not packaged robustly on many different platforms. Important fixes to lapack distribution occured at lapack-3.6.0 and for finding lapack libs robustly with the github patch version 7fb63b1cd386b099d7da6eeaafc3e7dce055a7d0. List of issues when using default lapack from distributions: 1) Homebrew provides lapacke.h, but not cblas.h 2) SUSE provides cblas.h from ATLAS, but lapacke.h from lapack distro 3) RHEL6 does not provide lapacke.h or cblas.h 4) RHEL7 does not provide lapacke.h or cblas.h Additionally, several 'Vendor' optimized versions provide enhanced lapack and blas intefaces with various levels of support for lapacke.h and cblas.h. export BLD_DIR=~/src export CC=gcc # or clang export CXX=g++ # or clang++ export FC=$(which gfortran) # Make sure that C, C++, and Fortran compilers are compatible with each other! LAPACKE (LAPACK + C interfaces) BUILD HINTS: ============================================ cd ${BLD_DIR} git clone https://github.com/Reference-LAPACK/lapack.git cd ${BLD_DIR}/lapack git checkout 7fb63b1cd386b099d7da6eeaafc3e7dce055a7d0 -b Fixed64BitLibFinding mkdir -p ${BLD_DIR}/lapack-bld cd ${BLD_DIR}/lapack-bld cmake -DCMAKE_Fortran_COMPILER:FILEPATH=${FC} -DCBLAS:BOOL=ON -DLAPACKE:BOOL=ON -DCMAKE_INSTALL_PREFIX:PATH=${BLD_DIR}/lapack-install ../lapack make -j4 make install MAC & Linux builds: ====== mkdir -p ${BLD_DIR}/bart-LAPACKE-bld cd ${BLD_DIR}/bart-LAPACKE-bld rm -rf *; CC=clang CXX=clang++ cmake -DCMAKE_Fortran_COMPILER:FILEPATH=${FC} -DLINALG_VENDOR=LAPACKE -DLAPACKE_DIR=${BLD_DIR}/lapack-install ../bart make -j23 -- OR -- mkdir -p ${BLD_DIR}/bart-OpenBLAS-bld cd ${BLD_DIR}/bart-OpenBLAS-bld rm -rf *; CC=clang CXX=clang++ cmake -DCMAKE_Fortran_COMPILER:FILEPATH=${FC} -DLINALG_VENDOR=OpenBLAS -DOpenBLAS_DIR=${BLD_DIR}/lapack-install ../bart make -j23 bart-0.4.02/doc/debugging.txt000066400000000000000000000015201320577655200160000ustar00rootroot00000000000000 Running a command in a debugger ------------------------------- This involves several (easy) steps: 1. Recompile BART with debugging information. Create a Makefile.local in the BART directory with the following line added: DEBUG=1 Then recompile with: make allclean make bart 2. Install the GNU debugger (gdb) 3. Run the failing BART command: gdb --args bart [ ...] ... 4. Then type 'run' to start the process. If it crashes, you are back in the debugger. You can also type CTRL-C to interrupt it at any time. In the debugger: You can type 'bt' to get a backtrace which is helpful to investigate a segmentation fault or similar. You can also call functions. For example, this can be used to save a multi-dimensional array from the debugger like this: (gdb) call dump_cfl("dbg_img", 16, dims, image) bart-0.4.02/doc/dimensions-and-strides.txt000066400000000000000000000037031320577655200204350ustar00rootroot00000000000000 BART is built around a library which defines many generic functions on multi-dimensional arrays. Most functions come in two flavours: A basic version (e.g. 'md_copy') which takes as input the dimensions for its arguments and an extended version (e.g. 'md_copy2') which also takes the strides for each argument. The basic versions assume strides for a column-major array which is contiguous in memory. A stride refers to the distance in memory between successive elements in an array. They are used to compute the position of an element in memory for a given multi-index by multiplying the index of each dimension with the corresponding stride and summing the results. For a regular array of complex floats continuously laid out in memory with dimensions 'dim[N]' the default strides are: str[0] = sizeof(complex float) str[1] = dim[0] * sizeof(complex float) str[2] = dim[0] * dim[1] * sizeof(complex float) ... There is one exception: For a dimension with length one, the corresponding stride is set to zero. These default strides are what the function 'md_calc_strides' computes and which are also used automatically whenever strides are not explicitly specified. Dimensions and strides can be manipulated to obtain different views of the same array without having to make a copy. For example, swapping dimensions and strides for two indices yields a transposed view of the array, an increased stride yields a view of a sub-array, a negative stride a view where the corresponding dimension is reversed (flipped), and a zero stride yields a view in which one dimension is replicated. Many basic functions such as 'md_transpose', 'md_flip', 'md_slice', 'md_copy_block' etc. are implemented in this way by setting up dimensions and strides and calling into the generic copy function 'md_copy2'. Even much more complicated operations can often be implemented in just a few lines of code. One example is building a Casorati matrix of overlapping blocks (see 'num/casorati.c'). bart-0.4.02/doc/fft.txt000066400000000000000000000042311320577655200146260ustar00rootroot00000000000000 Centered FFT ------------ The center is N / 2 for even N and (N - 1) / 2 for odd N when counting from zero. Instead of using fftshift/ifftshift we usually use fftmod/ifftmod. While fftshift/ifftshift involves a cyclic copy of memory, fftmod applies a linear phase which has a similar effect. We prefer fftmod because we can usually merge this phase with other operations to reduce computation time. Though similar, there are some subtle differences which one has to keep in mind. The correct use of fftshift/ifftshift for a centered forward/inverse FFT is the following: forward: 1. ifftshift 2. fft 3. fftshift inverse: 1. ifftshift 2. ifft 3. fftshift In contrast, the correct use of fftmod/ifftmod for a centered forward/inverse FFT is this: forward: 1. fftmod 2. fft 3. fftmod inverse: 1. ifftmod 2. ifft 3. ifftmod If \xi_N is the N-th root of unity with smallest positive complex argument, the uncentered forward DFT of length N is: \hat f(k) = \sum_{x=0}^{N-1} \xi_N^{-xk} f(x) Shifting the center from index 0 to new index c yields the formula for the centered forward DFT of length N: \hat f_c(k) = \sum_{x=0}^{N-1} \xi_N^{-(x-c)(k-c)} f_c(x) Note that this corresponds to shifts in different directions for input and output. Expanding the exponent yields: (x-c)(k-c) = xk - xc - ck + c^2 Thus, the centered FFT can be implemented by multiplication with a linear phase before and after calling the uncentered FFT: \hat f(k) = \xi_N^{(k-c/2)c} \sum_{x=0}^{N-1} \xi_N^{-xk} \x_N^{(x-c/2)c} f(x) Observe that this is the same linear phase applied to the input and output. Note that we distributed the additional phase \xi^{-c^2} evenly to both terms. If N is a multiple of four, then c^2 = N (N/4) and the additional phase term vanishes. Then \xi_N^{kc} and \xi_N^{xc} are simply the alternating sequence of 1, -1, 1, -1, ... Because ifftmod applies the conjugate phase this implies that it is the same as fftmod in this special case. If N is a multiple of two, the additional phase is -pi/2. Evenly distributed this yields a factor of '-i' (i the imaginary number), i.e. fftmod applies -i, +i, -i, +i, ... For N odd the phase is more complicated. bart-0.4.02/doc/references.txt000066400000000000000000000206531320577655200161760ustar00rootroot00000000000000 - BART - Uecker M, Ong F, Tamir JI, Bahri D, Virtue P, Cheng JY, Zhang T, Lustig M. Berkeley Advanced Reconstruction Toolbox. Annual Meeting ISMRM, Toronto 2015, In: Proc Intl Soc Mag Reson Med 23; 2486. Uecker M, Virtue P, Ong F, Murphy MJ, Alley MT, Vasanawala SS, Lustig M. Software Toolbox and Programming Library for Compressed Sensing and Parallel Imaging, ISMRM Workshop on Data Sampling and Image Reconstruction, Sedona 2013. - sensitivity-encoded parallel imaging - (commands: itsense, pocsense, bpsense, rsense, pics) Ra JB and Rim CY. Fast imaging using subencoding data sets from multiple detectors. Magn Reson Med 1993; 30:142-145. Pruessmann KP, Weiger M, Scheidegger MB, Boesiger P. SENSE: Sensitivity encoding for fast MRI. Magn Reson Med 1999; 42:952-962. Pruessmann KP, Weiger M, Boernert P, Boesiger P. Advances in sensitivity encoding with arbitrary k-space trajectories. Magn Reson Med 2001; 46:638-651. Samsonov AA, Kholmovski EG, Parker DL, Johnson CR. POCSENSE: POCS-based reconstruction for sensitivity encoded magnetic resonance imaging. Magn Reson Med 2004; 52:1397-1406. - implementation of the non-uniform FFT - (command: nufft, pics) O’Sullivan JD. A fast sinc function gridding algorithm for Fourier inversion in computer tomography. IEEE Trans Med Imaging 1985; 4:200-207. Jackson JI, Meyer CH, Nishimura DG, Macovski A. Selection of a convolution function for Fourier inversion using gridding. IEEE Trans Med Imaging 1991; 3:473-478. Wajer F and Pruessmann KP. Major speedup of reconstruction for sensitivity­encoding with arbitrary trajectories. Annual Meeting of the ISMRM, Glasgow 2001, In: Proc Intl Soc Mag Reson Med 9; 767. Ong F, Uecker M, Jiang W, Lustig M. Fast Non-Cartesian Reconstruction with Pruned Fast Fourier Transform. Annual Meeting ISMRM, Toronto 2015, In: Proc Intl Soc Mag Reson Med 23; 3639. - methods for sensitivity calibration - (commands: walsh, caldir, ecalib, ecaltwo) Walsh DO, Gmitro AF, Marcellin MW. Adaptive reconstruction of phased array MR imagery. Magn Reson Med 2000, 43:682-690. Griswold M, Walsh D, Heidemann R, Haase A, Jakob A. The Use of an Adaptive Reconstruction for Array Coil Sensitivity Mapping and Intensity Normalization Annual Meetig ISMRM, Honolulu 2002, In: Proc Intl Soc Mag Reson Med 10; 2410. McKenzie CA, Yeh EN, Ohliger MA, Price MD, Sodickson DK. Self-calibrating parallel imaging with automatic coil sensitivity extraction. Magn Reson Med 2002; 47:529-538. Uecker M, Virtue P, Vasanawala SS, Lustig M. ESPIRiT Reconstruction Using Soft SENSE. Annual Meeting ISMRM, Salt Lake City 2013, In: Proc Intl Soc Mag Reson Med 21; 127. Uecker M, Lai P, Murphy MJ, Virtue P, Elad M, Pauly JM, Vasanawala SS, Lustig M. ESPIRiT - An Eigenvalue Approach to Autocalibrating Parallel MRI: Where SENSE meets GRAPPA. Magn Reson Med 2014; 71:990-1001. - joint estimation: nonlinear inversion, calibrationless - (commands: nlinv, sake) Uecker M, Hohage T, Block KT, Frahm J. Image reconstruction by regularized nonlinear inversion-joint estimation of coil sensitivities and image content. Magn Reson Med 2008; 60:674-682. Bi Z, Uecker M, Jiang D, Lustig M, Ying K. Robust Low-rank Matrix Completion for sparse motion correction in auto calibration PI. Annual Meeting ISMRM, Salt Lake City 2013, In: Proc Intl Soc Mag Reson Med 21; 2584. Shin PJ, Larson PEZ, Ohliger MA, Elad M, Pauly JM, Vigneron DB, Lustig M. Calibrationless Parallel Imaging Reconstruction Based on Structured Low-Rank Matrix Completion. Magn Reson Med 2014; 72:959-970. - coil compression - (command: cc) Buehrer M, Pruessmann KP, Boesiger P, Kozerke S. Array compression for MRI with large coil arrays. Magn Reson Med 2007, 57:1131-1139. Huang F, Vijayakumar S, Li Y, Hertel S, Duensing GR. A software channel compression technique for faster reconstruction with many channels. Magn Reson Imaging 2008; 26:133-141. Zhang T, Pauly JM, Vasanawala SS, Lustig M. Coil compression for accelerated imaging with cartesian sampling. Magn Reson Med 2013; 69:571-582. Bahri D, Uecker M, Lustig M. ESPIRiT-Based Coil Compression for Cartesian Sampling, Annual Meeting ISMRM, Salt Lake City 2013, In: Proc Intl Soc Mag Reson Med 21; 2657. - compressed sensing MRI - (commands: pocsense, rsense, pics) Block KT, Uecker M, and Frahm J. Undersampled radial MRI with multiple coils. Iterative image reconstruction using a total variation constraint. Magn Reson Med 2007; 57:1086-1098. Lustig M, Donoho D and Pauly JM. Sparse MRI: The application of compressed sensing for rapid MR imaging. Magn Reson Med 2007; 58:1182-1195. Liu B, King K, Steckner M, Xie J, Sheng J, Ying L. Regularized sensitivity encoding (SENSE) reconstruction using Bregman iterations. Magn Reson Med 2009; 61:145-152. - sparsity transforms, variational penalties, regularization - (commands: cdf97, rof, lrmatrix, pocsense, rsense, pics) Rudin LI, Osher S, Fatemi E. Nonlinear total variation based noise removal algorithms, Physica D: Nonlinear Phenomena 1992; 60:259-268. Figueiredo MAT and Nowak RD. An EM algorithm for wavelet-based image restoration IEEE Trans Image Process 2003; 12:906-916. Ong F, Uecker M, Tariq U, Hsiao A, Alley MT, Vasanawala SS, Lustig M. Robust 4D Flow Denoising using Divergence-free Wavelet Transform, Magn Reson Med 2015; 73:828-842. Ong F, Lustig M. Beyond low rank + sparse: Multi-scale low rank matrix decomposition, preprint 2015; arXiv:1507.08751. - sampling schemes - (commands: traj, poisson) Winkelmann S, Schaeffter T, Koehler T, Eggers H, Doessel O. An optimal radial profile order based on the Golden Ratio for time-resolved MRI. IEEE Trans Med Imaging 2007; 26:68-76. Lustig M, Alley M, Vasanawala S, Donoho DL, Pauly JM. L1 SPIR-iT: Autocalibrating Parallel Imaging Compressed Sensing Annual Meeting ISMRM, Honolulu 2009, In: Proc Intl Soc Mag Reson Med 17; 379. - acceleration with graphical processing units - (commands: pocsense, rsense, nufft, pics) Uecker M, Zhang S, Frahm J. Nonlinear Inverse Reconstruction for Real-time MRI of the Human Heart Using Undersampled Radial FLASH. Magn Reson Med 2010; 63:1456-1462. Murphy M, Alley M, Demmel J, Keutzer K, Vasanawala S, Lustig M. Fast ℓ1-SPIRiT Compressed Sensing Parallel Imaging MRI: Scalable Parallel Implementation and Clinically Feasible Runtime. IEEE Trans Med Imaging 2012; 31:1250-1262. - numerical phantoms - (command: phantom) Shepp LA, Logan BF. The Fourier reconstruction of a head section. IEEE T Nucl Sci 1974; 21:21-43. Koay CG, Sarlls JE, Özarslan E. Three-Dimensional Analytical Magnetic Resonance Imaging Phantom in the Fourier Domain. Magn Reson Med 2007; 58:430-436. Guerquin-Kern M, Lejeune L, Pruessmann KP, M Unser M, Realistic Analytical Phantoms for Parallel Magnetic Resonance Imaging. IEEE Trans Med Imaging 2012; 31:626-636. - applications - Hollingsworth KG, Higgins DM, McCallum M, Ward L, Coombs A, Straub V. Investigating the quantitative fidelity of prospectively undersampled chemical shift imaging in muscular dystrophy with compressed sensing and parallel imaging reconstruction. Magn Reson Med 2014; 72: 1610-1619. Zhang T, Cheng JY, Potnick AG, Barth RA, Alley MT, Uecker M, Lustig M, Pauly JM, Vasanawala SS. Fast Pediatric 3D Free Breathing Abdominal Dynamic Contrast Enhanced MRI with a High Spatiotemporal Resolution, J Magn Reson Imaging 2015; 41:460-473. Addy NO, Ingle RR, Wu HH, Hu BS, Nishimura DG. High-resolution variable-density 3D cones coronary MRA. Magn Reson Med 2015; 74: 614-621. Cheng JY, Zhang T, Ruangwattanapaisarn N, Alley MT, Uecker M, Pauly JM, Lustig M, Vasanawala SS. Free-Breathing Pediatric MRI with Nonrigid Motion Correction and Acceleration, J Magn Reson Imaging 2015; 42:407-420. Athalye V, Lustig M, Uecker M. Parallel Magnetic Resonance Imaging as Approximation in a Reproducing Kernel Hilbert Space, Inverse Problems 2015; 31:045008. Mann LW, Higgins DM, Peters CN, Cassidy S, Hodson KK, Coombs A, Taylor R, Hollingsworth KG. Accelerating MR Imaging Liver Steatosis Measurement Using Combined Compressed Sensing and Parallel Imaging: A Quantitative Evaluation, Radiology 2016; 278:245-256. Cheng JY, Hanneman K, Zhang T, Alley MT, Lai P, Tamir JI, Uecker M, Pauly JM, Lustig M, Vasanawala SS. Comprehensive Motion-Compensated Highly-Accelerated 4D Flow MRI with Ferumoxytol Enhancement for Pediatric Congenital Heart Disease, J Magn Reson Imaging, Epub (2015) Tamir JI, Uecker M, Chen W, Lai P, Aleey MT, Vasanawala SS, Lustig M. T2-Shuffling: Sharp, Multi-Contrast, Volumetric Fast Spin-Echo Imaging Magn Recon Med; in press (2015). bart-0.4.02/doc/style.txt000066400000000000000000000103631320577655200152120ustar00rootroot00000000000000 1. Language The main language is C11 with commonly used GNU extensions (gnu11) as supported by the GCC and clang compilers. 1.1. Standard types: The complex types are the standard types introduced with ISO C99. #include complex float complex double In headers we use _Complex without including the standard header for complex types for compatibility with C++. Similarly, we use the standard boolean type. #include bool x = true; 1.2. Arrays Large multi-dimensional arrays should use our md_* functions. Small arrays should use (variable-length) arrays to increase type-safety. Pointer arithmetic should be avoided. float kernel[N][3]; complex float* image = md_alloc(N, dims, CFL_SIZE); In headers, we use the __VLA(x) macro for compatibility with C++ when this is possible. 1.2. GNU Extensions: Some extensions a commonly supported by compilers and useful. statement expressions ({ }) __typeof__ const array parameters 1.3. Type safety void* and casts should be used only when necessary. Functions must have a prototype. Variable-length arrays are preferred over basic pointers. Macros can often be made type-safe, e.g. using the TYPE_CHECK macro. 2. Coding Style Coding style are meant as guidelines. It is OK to deviate from the guidelines in situations, if it helps to make the code easier to understand. 2.1. Indentation Indentation using a single tab. A tab is considered 8 characters. White space errors (white space after the last visible character of a line) should be avoided. 2.2. Expressions There should be no space after the opening or before the closing bracket. There should be a single space before and after any operator except for prefix and postfix operators. Subexpressions should be enclosed in brackets and not rely on operator precedence for correct evaluation order. int i = (3 + x++) * 2; If there is a constant involved in a comparison the constant should be on the left side. if (0. == x) return false; The type of the controlling expression used in if statements or loops should be boolean. Pointers and integers should not implicitly compared to NULL or zero. if (NULL == foo_ptr) foo_ptr = init_foo_ptr(); 2.3. Statement groups. Opening curly brace is on the next line for functions and on the same line for if, for, while, and switch statements. In the later case there should be an empty line afterwards. In case only a single statement follows an if, for, or while statement, the statement block can be omitted - but for if-else pairs only it can be omitted on both sides. There should be no empty line before the closing bracket. if (0. == x) { ... } if (0. == x) y = 3; Statements should be grouped in logical blocks by a single empty line. In particular, declarations, definitions (with initialization) should be separated from other statements. Memory allocation and deallocation should usually be separated. void compute_bar(complex float* bar) { complex float* foo = md_alloc(); compute_foo(foo); compute_bar_from_foo(bar, foo); md_free(foo); } 2.4. Comments: Comments should be used to document the API, explain complicated code and algorithms, and give required background information. Comments are not meant to explain things a competent programmer should know by reading the code. Good: // gram-schmidt algorithm for (...) { for (..) { ... } } Bad: // initialize foo int foo = 3; // config struct struct foo_conf_s conf = ... // allocate memory complex float* x = md_alloc(...); 2.5. Variable and Function Names Functions and variables names should be lower case and use '_' has separator of components. Names should be meaningful but not unnecessary long. If possible, use self-explanatory variable names. Except for loop indices, where one-letter variables names can be used. float norm = calc_frobenius_norm(image) This is preferable to adding comments: // calculate frobenous norm float n = clc_frbn(i); On the other hand, for often used functions a short name is appropriate. For example, we use md_fmac() instead of multidim_fused_multiply_accumulate() Locally used loop indices can be single character letters: i, j, k bart-0.4.02/doxyconfig000066400000000000000000002342221320577655200146420ustar00rootroot00000000000000# Doxyfile 1.8.3.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" "). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or sequence of words) that should # identify the project. Note that if you do not use Doxywizard you need # to put quotes around the project name if it contains spaces. PROJECT_NAME = "Berkeley Advanced Reconstruction Toolbox (BART)" # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. #PROJECT_NUMBER = 0.3 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer # a quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "" # With the PROJECT_LOGO tag one can specify an logo or icon that is # included in the documentation. The maximum height of the logo should not # exceed 55 pixels and the maximum width should not exceed 200 pixels. # Doxygen will copy the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = doc/dx/ # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. Note that you specify absolute paths here, but also # relative paths, which will be relative from the directory where doxygen is # started. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful if your file system # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding # "class=itcl::class" will allow you to use the command class in the # itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, # and language is one of the parsers supported by doxygen: IDL, Java, # Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, # C++. For instance to make doxygen treat .inc files as Fortran files (default # is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note # that for custom extensions you also need to set FILE_PATTERNS otherwise the # files are not read by doxygen. EXTENSION_MAPPING = # If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all # comments according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you # can mix doxygen, HTML, and XML commands with Markdown formatting. # Disable only in case of backward compatibilities issues. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented classes, # or namespaces to their corresponding documentation. Such a link can be # prevented in individual cases by by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also makes the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES (the # default) will make doxygen replace the get and set methods by a property in # the documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and # unions are shown inside the group in which they are included (e.g. using # @ingroup) instead of on a separate page (for HTML and Man pages) or # section (for LaTeX and RTF). INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and # unions with only public data fields will be shown inline in the documentation # of the scope in which they are defined (i.e. file, namespace, or group # documentation), provided this scope is documented. If set to NO (the default), # structs, classes, and unions are shown on a separate page (for HTML and Man # pages) or section (for LaTeX and RTF). INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penalty. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will roughly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. SYMBOL_CACHE_SIZE = 0 # Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be # set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given # their name and scope. Since this can be an expensive process and often the # same symbol appear multiple times in the code, doxygen keeps a cache of # pre-resolved symbols. If the cache is too small doxygen will become slower. # If the cache is too large, memory is wasted. The cache size is given by this # formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespaces are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation # rather than with sharp brackets. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to # do proper type resolution of all parameters of a function it will reject a # match between the prototype and the implementation of a member function even # if there is only one candidate or it is obvious which candidate to choose # by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen # will still accept a match between prototype and implementation in such cases. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if section-label ... \endif # and \cond section-label ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or macro consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and macros in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. # This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files # containing the references data. This must be a list of .bib files. The # .bib extension is automatically appended if omitted. Using this command # requires the bibtex tool to be installed. See also # http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style # of the bibliography can be controlled using LATEX_BIB_STYLE. To use this # feature you need bibtex and perl available in the search path. Do not use # file names with spaces, bibtex cannot handle them. CITE_BIB_FILES = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # The WARN_NO_PARAMDOC option can be enabled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh # *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py # *.f90 *.f *.for *.vhd *.vhdl FILE_PATTERNS = *.c *.cc *.cxx *.cpp *.c++ *.cu *.h *.py *.m # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = src/spirit-1.0 src/spirit-2.0 src/butterfly # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. # If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. # Doxygen will compare the file name with each pattern and apply the # filter if there is a match. # The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty or if # non of the patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) # and it is also possible to disable source filtering for a specific pattern # using *.ext= (so without naming a filter). This option only has effect when # FILTER_SOURCE_FILES is enabled. FILTER_SOURCE_PATTERNS = # If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page (index.html). # This can be useful if you have a project on for instance GitHub and want reuse # the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C, C++ and Fortran comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. # Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = YES # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. Note that when using a custom header you are responsible # for the proper inclusion of any scripts and style sheets that doxygen # needs, which is dependent on the configuration options used. # It is advised to generate a default header using "doxygen -w html # header.html footer.html stylesheet.css YourConfigFile" and then modify # that header. Note that the header is subject to change so you typically # have to redo this when upgrading to a newer version of doxygen or when # changing the value of configuration settings such as GENERATE_TREEVIEW! HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If left blank doxygen will # generate a default style sheet. Note that it is recommended to use # HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this # tag will in the future become obsolete. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional # user-defined cascading style sheet that is included after the standard # style sheets created by doxygen. Using this option one can overrule # certain style aspects. This is preferred over using HTML_STYLESHEET # since it does not replace the standard style sheet and is therefor more # robust against future updates. Doxygen will copy the style sheet file to # the output directory. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that # the files will be copied as-is; there are no commands or markers available. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the style sheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. # The allowed range is 0 to 359. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of # the colors in the HTML output. For a value of 0 the output will use # grayscales only. A value of 255 will produce the most vivid colors. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to # the luminance component of the colors in the HTML output. Values below # 100 gradually make the output lighter, whereas values above 100 make # the output darker. The value divided by 100 is the actual gamma applied, # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, # and 100 does not change the gamma. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of # entries shown in the various tree structured indices initially; the user # can expand and collapse entries dynamically later on. Doxygen will expand # the tree to such a level that at most the specified number of entries are # visible (unless a fully collapsed tree already exceeds this amount). # So setting the number of entries 1 will produce a full collapsed tree by # default. 0 is a special value representing an infinite number of entries # and will result in a full expanded tree by default. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely # identify the documentation publisher. This should be a reverse domain-name # style string, e.g. com.mycompany.MyDocSet.documentation. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated # that can be used as input for Qt's qhelpgenerator to generate a # Qt Compressed Help (.qch) of the generated HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to # add. For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see # # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's # filter section matches. # # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files # will be generated, which together with the HTML files, form an Eclipse help # plugin. To install this plugin and make it available under the help contents # menu in Eclipse, the contents of the directory containing the HTML and XML # files needs to be copied into the plugins directory of eclipse. The name of # the directory within the plugins directory should be the same as # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before # the help appears. GENERATE_ECLIPSEHELP = NO # A unique identifier for the eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have # this name. ECLIPSE_DOC_ID = org.doxygen.Project # The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) # at top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. Since the tabs have the same information as the # navigation tree you can set this option to NO if you already set # GENERATE_TREEVIEW to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. # Since the tree basically has the same information as the tab index you # could consider to set DISABLE_INDEX to NO when enabling this option. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values # (range [0,1..20]) that doxygen will group on one line in the generated HTML # documentation. Note that a value of 0 will completely suppress the enum # values from appearing in the overview section. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open # links to external symbols imported via tag files in a separate window. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are # not supported properly for IE 6.0, but are supported on all modern browsers. # Note that when changing this option you need to delete any form_*.png files # in the HTML output before the changes have effect. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax # (see http://www.mathjax.org) which uses client side Javascript for the # rendering instead of using prerendered bitmaps. Use this if you do not # have LaTeX installed or if you want to formulas look prettier in the HTML # output. When enabled you may also need to install MathJax separately and # configure the path to it using the MATHJAX_RELPATH option. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # thA MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and # SVG. The default value is HTML-CSS, which is slower, but has the best # compatibility. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the # HTML output directory using the MATHJAX_RELPATH option. The destination # directory should contain the MathJax.js script. For instance, if the mathjax # directory is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to # the MathJax Content Delivery Network so you can quickly see the result without # installing MathJax. # However, it is strongly recommended to install a local # copy of MathJax from http://www.mathjax.org before deployment. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension # names that should be enabled during MathJax rendering. MATHJAX_EXTENSIONS = # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets # (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a web server instead of a web client using Javascript. # There are two flavours of web server based search depending on the # EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for # searching and an index file used by the script. When EXTERNAL_SEARCH is # enabled the indexing and searching needs to be provided by external tools. # See the manual for details. SERVER_BASED_SEARCH = NO # When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP # script for searching. Instead the search results are written to an XML file # which needs to be processed by an external indexer. Doxygen will invoke an # external search engine pointed to by the SEARCHENGINE_URL option to obtain # the search results. Doxygen ships with an example indexer (doxyindexer) and # search engine (doxysearch.cgi) which are based on the open source search engine # library Xapian. See the manual for configuration details. EXTERNAL_SEARCH = NO # The SEARCHENGINE_URL should point to a search engine hosted by a web server # which will returned the search results when EXTERNAL_SEARCH is enabled. # Doxygen ships with an example search engine (doxysearch) which is based on # the open source search engine library Xapian. See the manual for configuration # details. SEARCHENGINE_URL = # When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed # search data is written to a file for indexing by an external tool. With the # SEARCHDATA_FILE tag the name of this file can be specified. SEARCHDATA_FILE = searchdata.xml # When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the # EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is # useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple # projects and redirect the results back to the right project. EXTERNAL_SEARCH_ID = # The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen # projects other than the one defined by this configuration file, but that are # all added to the same external search index. Each project needs to have a # unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id # of to a relative location where the documentation can be found. # The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ... EXTRA_SEARCH_MAPPINGS = #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = YES # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. # Note that when enabling USE_PDFLATEX this option is only used for # generating bitmaps for formulas in the HTML output, but not in the # Makefile that is written to the output directory. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4 # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for # the generated latex document. The footer should contain everything after # the last chapter. If it is left blank doxygen will generate a # standard footer. Notice: only use this tag if you know what you are doing! LATEX_FOOTER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See # http://en.wikipedia.org/wiki/BibTeX for more info. LATEX_BIB_STYLE = plain #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load style sheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. # This is useful # if you want to understand what is going on. # On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # pointed to by INCLUDE_PATH will be searched when a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition that # overrules the definition found in the source code. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros # that are alone on a line, have an all uppercase name, and do not end with a # semicolon, because these will confuse the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. For each # tag file the location of the external documentation should be added. The # format of a tag file without this location is as follows: # # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths # or URLs. Note that each tag file must have a unique name (where the name does # NOT include the path). If a tag file is not located in the directory in which # doxygen is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option also works with HAVE_DOT disabled, but it is recommended to # install and use dot, since it yields more powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = NO # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will # base this on the number of processors available in the system. You can set it # explicitly to a value larger than 0 to get control over the balance # between CPU load and processing speed. DOT_NUM_THREADS = 0 # By default doxygen will use the Helvetica font for all dot files that # doxygen generates. When you want a differently looking font you can specify # the font name using DOT_FONTNAME. You need to make sure dot is able to find # the font, which can be done by putting it in a standard location or by setting # the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the # directory containing the font. DOT_FONTNAME = Helvetica # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the Helvetica font. # If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to # set the path where dot can find it. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If the UML_LOOK tag is enabled, the fields and methods are shown inside # the class node. If there are many fields or methods and many nodes the # graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS # threshold limits the number of items for each type to make the size more # managable. Set this to 0 for no limit. Note that the threshold may be # exceeded by 50% before the limit is enforced. UML_LIMIT_NUM_FIELDS = 10 # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will generate a graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are svg, png, jpg, or gif. # If left blank png will be used. If you choose svg you need to set # HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible in IE 9+ (other browsers do not have this requirement). DOT_IMAGE_FORMAT = png # If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to # enable generation of interactive SVG images that allow zooming and panning. # Note that this requires a modern browser other than Internet Explorer. # Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you # need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files # visible. Older versions of IE do not have SVG support. INTERACTIVE_SVG = NO # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The MSCFILE_DIRS tag can be used to specify one or more directories that # contain msc files that are included in the documentation (see the # \mscfile command). MSCFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES bart-0.4.02/genctags000077500000000000000000000001251320577655200142600ustar00rootroot00000000000000#!/bin/bash ctags --langmap=c++:+.cu --extra=+f `find . -regex '.*\.[ch]u*' -print` bart-0.4.02/git-version.sh000077500000000000000000000004131320577655200153440ustar00rootroot00000000000000#!/bin/bash if test -d ${GIT_DIR:-.git} -o -f .git then git describe --match "v*" --dirty if [[ $? -eq 0 ]]; then git describe --match "v*" | cut -f1 -d'-' > version.txt else var=`cat version.txt` echo ${var}-dirty fi else cat version.txt fi bart-0.4.02/lib/000077500000000000000000000000001320577655200133075ustar00rootroot00000000000000bart-0.4.02/lib/.gitignore000066400000000000000000000001071320577655200152750ustar00rootroot00000000000000# Ignore everything in this directory * # Except this file !.gitignore bart-0.4.02/makedoc.sh000077500000000000000000000001271320577655200145030ustar00rootroot00000000000000#!/bin/bash ( cat doxyconfig ; echo "PROJECT_NUMBER=$(cat version.txt)" ) | doxygen - bart-0.4.02/matlab/000077500000000000000000000000001320577655200140015ustar00rootroot00000000000000bart-0.4.02/matlab/bart.m000066400000000000000000000036651320577655200151210ustar00rootroot00000000000000function [varargout] = bart(cmd, varargin); % BART Call BART command from Matlab. % [A, B] = bart('command', X, Y) call command with inputs X Y and outputs A B % % 2014-2016 Martin Uecker if nargin==0 || all(cmd==0) disp('Usage: bart '); return end bart_path = getenv('TOOLBOX_PATH'); if isempty(bart_path) if exist('/usr/local/bin/bart', 'file') bart_path = '/usr/local/bin'; elseif exist('/usr/bin/bart', 'file') bart_path = '/usr/bin'; else error('Environment variable TOOLBOX_PATH is not set.'); end end % clear the LD_LIBRARY_PATH environment variable (to work around % a bug in Matlab). if ismac==1 setenv('DYLD_LIBRARY_PATH', ''); else setenv('LD_LIBRARY_PATH', ''); end name = tempname; in = cell(1, nargin - 1); for i=1:nargin - 1, in{i} = strcat(name, 'in', num2str(i)); writecfl(in{i}, varargin{i}); end in_str = sprintf(' %s', in{:}); out = cell(1, nargout); for i=1:nargout, out{i} = strcat(name, 'out', num2str(i)); end out_str = sprintf(' %s', out{:}); if ispc % For cygwin use bash and modify paths ERR = system(['bash.exe --login -c ', ... strrep(bart_path, filesep, '/'), ... '"', '/bart ', strrep(cmd, filesep, '/'), ' ', ... strrep(in_str, filesep, '/'), ... ' ', strrep(out_str, filesep, '/'), '"']); else ERR = system([bart_path, '/bart ', cmd, ' ', in_str, ' ', out_str]); end for i=1:nargin - 1, if (exist(strcat(in{i}, '.cfl'),'file')) delete(strcat(in{i}, '.cfl')); end if (exist(strcat(in{i}, '.hdr'),'file')) delete(strcat(in{i}, '.hdr')); end end for i=1:nargout, if ERR==0 varargout{i} = readcfl(out{i}); end if (exist(strcat(out{i}, '.cfl'),'file')) delete(strcat(out{i}, '.cfl')); end if (exist(strcat(out{i}, '.hdr'),'file')) delete(strcat(out{i}, '.hdr')); end end if ERR~=0 error('command exited with an error'); end end bart-0.4.02/matlab/readcfl.m000066400000000000000000000020511320577655200155550ustar00rootroot00000000000000function data = readcfl(filenameBase) %READCFL Read complex data from file. % Read in reconstruction data stored in filenameBase.cfl (complex float) % based on dimensions stored in filenameBase.hdr. % % Written to edit data with the Berkeley Advanced Reconstruction Toolbox (BART). % % Copyright 2016. CBClab, Maastricht University. % 2016 Tim Loderhose (t.loderhose@student.maastrichtuniversity.nl) dims = readReconHeader(filenameBase); filename = strcat(filenameBase,'.cfl'); fid = fopen(filename); data_r_i = fread(fid, prod([2 dims]), '*float32'); data_r_i = reshape(data_r_i, [2 dims]); data = complex(zeros(dims,'single'),0); data(:) = complex(data_r_i(1,:),data_r_i(2,:)); fclose(fid); end function dims = readReconHeader(filenameBase) filename = strcat(filenameBase,'.hdr'); fid = fopen(filename); line = getNextLine(fid); dims = str2num(line); fclose(fid); end function line = getNextLine(fid) line = fgetl(fid); while(line(1) == '#') line = fgetl(fid); end end bart-0.4.02/matlab/writecfl.m000066400000000000000000000021221320577655200157730ustar00rootroot00000000000000function writecfl(filenameBase,data) %WRITECFL Write complex data to file. % Writes reconstruction data to filenameBase.cfl (complex float) and its % dimensions to filenameBase.hdr. % % Written to edit data with the Berkeley Advanced Reconstruction Toolbox (BART). % % Copyright 2013. Joseph Y Cheng. % Copyright 2016. CBClab, Maastricht University. % 2012 Joseph Y Cheng (jycheng@mrsrl.stanford.edu). % 2016 Tim Loderhose (t.loderhose@student.maastrichtuniversity.nl). dims = size(data); writeReconHeader(filenameBase,dims); filename = strcat(filenameBase,'.cfl'); fid = fopen(filename,'w'); data = data(:); fwrite(fid,[real(data)'; imag(data)'],'float32'); fclose(fid); end function writeReconHeader(filenameBase,dims) filename = strcat(filenameBase,'.hdr'); fid = fopen(filename,'w'); fprintf(fid,'# Dimensions\n'); for N=1:length(dims) fprintf(fid,'%d ',dims(N)); end if length(dims) < 5 for N=1:(5-length(dims)) fprintf(fid,'1 '); end end fprintf(fid,'\n'); fclose(fid); end bart-0.4.02/python/000077500000000000000000000000001320577655200140625ustar00rootroot00000000000000bart-0.4.02/python/bart.py000066400000000000000000000035301320577655200153650ustar00rootroot00000000000000# Copyright 2016. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. # # Authors: # 2016 Siddharth Iyer import subprocess as sp import tempfile as tmp import cfl import os def bart(nargout, cmd, *args): if type(nargout) != int or nargout < 0: print("Usage: bart(, , )"); return None bart_path = os.environ['TOOLBOX_PATH'] + '/bart '; if not bart_path: if os.path.isfile('/usr/local/bin/bart'): bart_path = '/usr/local/bin' elif os.path.isfile('/usr/bin/bart'): bart_path = '/usr/bin' else: raise Exception('Environment variable TOOLBOX_PATH is not set.') name = tmp.NamedTemporaryFile().name nargin = len(args); infiles = [name + 'in' + str(idx) for idx in range(nargin)] in_str = ' '.join(infiles) for idx in range(nargin): cfl.writecfl(infiles[idx], args[idx]) outfiles = [name + 'out' + str(idx) for idx in range(nargout)] out_str = ' '.join(outfiles) #TODO: Windows option. ERR = os.system(bart_path + '/bart ' + cmd + ' ' + in_str + ' ' + out_str); for elm in infiles: if os.path.isfile(elm + '.cfl'): os.remove(elm + '.cfl') if os.path.isfile(elm + '.hdr'): os.remove(elm + '.hdr') output = [] for idx in range(nargout): elm = outfiles[idx] if not ERR: output.append(cfl.readcfl(elm)) if os.path.isfile(elm + '.cfl'): os.remove(elm + '.cfl') if os.path.isfile(elm + '.hdr'): os.remove(elm + '.hdr') if ERR: raise Exception("Command exited with an error.") if nargout == 1: output = output[0] return output bart-0.4.02/python/bartview.py000077500000000000000000000325061320577655200162700ustar00rootroot00000000000000#!/usr/bin/python # # Copyright 2015. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. # # Authors: # 2015 Frank Ong from __future__ import division import operator import numpy as np import sys import matplotlib import matplotlib.pyplot as plt from matplotlib.widgets import Slider, Button, RadioButtons from functools import partial import time import threading import os.path class DiscreteSlider(Slider): """A matplotlib slider widget with discrete steps.""" def __init__(self, *args, **kwargs): self.previous_val = kwargs['valinit'] Slider.__init__(self, *args, **kwargs) def set_val(self, val): discrete_val = round(val) xy = self.poly.xy xy[2] = discrete_val, 1 xy[3] = discrete_val, 0 self.poly.xy = xy self.valtext.set_text(self.valfmt % discrete_val) if self.drawon: self.ax.figure.canvas.draw() self.val = val if self.previous_val!=discrete_val: self.previous_val = discrete_val if not self.eventson: return for cid, func in self.observers.iteritems(): func(discrete_val) class BartView(object): def __init__(self, cflname): matplotlib.rcParams['toolbar'] = 'None' #matplotlib.rcParams['font.size'] = 6 # Read data self.cflname = sys.argv[1] self.im = self.readcfl(self.cflname) self.im_unsqueeze_shape = np.where( np.array(self.im.shape) > 1 )[0] self.im = self.im.squeeze() t1 = time.clock() # Reorder image self.Ndims = len( self.im.shape ) self.order = np.r_[:self.Ndims] self.im_ordered = self.im self.order3 = np.array([0,1,1]) # Slice image self.slice_num = np.zeros( self.Ndims, dtype = 'int' ); self.im_shape = self.im_ordered.shape self.im_slice = self.im_ordered[ (slice(None), slice(None)) + tuple(self.slice_num[2:]) ] # Create figure self.fig = plt.figure(facecolor='black', figsize=(9,6)) #self.fig = plt.figure(facecolor='black', figsize=(6,4)) self.fig.subplots_adjust( left=0.0 , bottom=0.0 , right=1.0 , top=1 - 0.25) self.fig.canvas.set_window_title(self.cflname) # Show image self.immax = np.max(abs(self.im)) self.l = plt.imshow( abs(self.im_slice) , cmap = "gray", vmin=0, vmax=self.immax) self.ax = plt.gca() self.asp = self.im_ordered.shape[1] / self.im_ordered.shape[0] self.aspect = 1 self.ax.set_aspect( 1 ) plt.axis('off') radios = [] buttons = [] sliders = [] # Create Radio Buttons for X Y dimensions dims = self.im_unsqueeze_shape[ self.order ].astype(str) for i in xrange(0,len(dims)): dims[i] = "Dim " + dims[i] oboxx_ax = plt.axes( [0, 1 - 0.03, 0.1, 0.03], axisbg = "gainsboro" ) oboxx_ax.set_xticks([]); oboxx_ax.set_yticks([]); orderx_ax = plt.axes( [0, 1 - 0.18, 0.1, 0.15], axisbg = 'gainsboro' ) orderx_radio = RadioButtons( orderx_ax, dims, activecolor = 'SteelBlue', active = 0 ) orderx_ax.text(0.5,1.05, 'Up/Down', horizontalalignment = 'center') radios.append( orderx_radio ) orderx_radio.on_clicked( self.update_orderx ) oboxy_ax = plt.axes( [0.1, 1 - 0.03, 0.1, 0.03], axisbg = "gainsboro" ) oboxy_ax.set_xticks([]); oboxy_ax.set_yticks([]); ordery_ax = plt.axes( [0.1, 1 - 0.18, 0.1, 0.15], axisbg = 'gainsboro' ) ordery_radio = RadioButtons( ordery_ax, dims, activecolor = 'SteelBlue', active = 1 ) ordery_ax.text(0.5,1.05, 'Left/Right', horizontalalignment = 'center') radios.append( ordery_radio ) ordery_radio.on_clicked( self.update_ordery ) # Create Radio buttons for mosaic self.mosaic_valid = False mbox_ax = plt.axes( [0.2, 1 - 0.03, 0.1, 0.03], axisbg = "gainsboro" ) mbox_ax.set_xticks([]); mbox_ax.set_yticks([]); mosaic_ax = plt.axes( [0.2, 1 - 0.18, 0.1, 0.15], axisbg = 'gainsboro' ) mosaic_radio = RadioButtons( mosaic_ax, dims, activecolor = 'SteelBlue', active = 1 ) mosaic_ax.text(0.5,1.05, 'Mosaic', horizontalalignment = 'center') radios.append( mosaic_radio ) mosaic_radio.on_clicked( self.update_mosaic ) # Create flip buttons self.flipx = 1; flipx_ax = plt.axes( [0.3, 1 - 0.09, 0.1, 0.09] ) flipx_button = Button( flipx_ax, 'Flip\nUp/Down', color='gainsboro' ) flipx_button.on_clicked(self.update_flipx); self.flipy = 1; flipy_ax = plt.axes( [0.3, 1 - 0.18, 0.1, 0.09] ) flipy_button = Button( flipy_ax, 'Flip\nLeft/Right', color='gainsboro' ) flipy_button.on_clicked(self.update_flipy); # Create Refresh buttons refresh_ax = plt.axes( [0.4, 1 - 0.09, 0.1, 0.09] ) refresh_button = Button( refresh_ax, 'Refresh', color='gainsboro' ) refresh_button.on_clicked(self.update_refresh); # Create Save button save_ax = plt.axes( [0.4, 1 - 0.18, 0.1, 0.09] ) save_button = Button( save_ax, 'Export to\nPNG', color='gainsboro' ) save_button.on_clicked(self.save); # Create dynamic refresh radio button #self.drefresh = threading.Event() #drefresh_ax = plt.axes( [0.4, 1 - 0.18, 0.1, 0.09] ) #drefresh_button = Button( drefresh_ax, 'Dynamic\nRefresh', color='gainsboro' ) #drefresh_button.on_clicked(self.update_drefresh); # Create Magnitude/phase radio button self.magnitude = True mag_ax = plt.axes( [0.5, 1 - 0.18, 0.1, 0.18], axisbg = 'gainsboro' ) mag_radio = RadioButtons( mag_ax, ('Mag','Phase') , activecolor = 'SteelBlue', active = 0 ) radios.append( mag_radio ) mag_radio.on_clicked( self.update_magnitude ) sbox_ax = plt.axes( [0.6, 1 - 0.18, 0.5, 0.18], axisbg='gainsboro') sbox_ax.set_xticks([]) sbox_ax.set_yticks([]) # Create aspect sliders aspect_ax = plt.axes( [0.65, 1 - 0.09 + 0.02, 0.1, 0.02], axisbg = 'white' ) aspect_slider = Slider( aspect_ax, "", 0.25, 4, valinit=1, color='SteelBlue') aspect_ax.text( 4 / 2,1.5, 'Aspect Ratio', horizontalalignment = 'center') sliders.append( aspect_slider ) aspect_slider.on_changed( self.update_aspect ) # Create contrast sliders self.vmin = 0 vmin_ax = plt.axes( [0.83, 1 - 0.09 + 0.02, 0.1, 0.02], axisbg = 'white' ) vmin_slider = Slider( vmin_ax, "", 0, 1, valinit=0, color='SteelBlue') vmin_ax.text(0.5,1.5, 'Contrast Min', horizontalalignment = 'center') sliders.append( vmin_slider ) vmin_slider.on_changed( self.update_vmin ) self.vmax = 1 vmax_ax = plt.axes( [0.83, 1 - 0.18 + 0.02, 0.1, 0.02], axisbg = 'white' ) vmax_slider = Slider( vmax_ax, "", 0, 1, valinit=1, color='SteelBlue') vmax_ax.text(0.5,1.5, 'Contrast Max', horizontalalignment = 'center') sliders.append( vmax_slider ) vmax_slider.on_changed( self.update_vmax ) # Create sliders for choosing slices box_ax = plt.axes( [0, 1 - 0.25, 1, 0.07], axisbg='gainsboro') box_ax.set_xticks([]) box_ax.set_yticks([]) slider_thick = 0.02 slider_start = 0.1 ax = [] for d in np.r_[:self.Ndims]: slice_ax = plt.axes( [0.01 + 1 / self.Ndims * d, 1 - 0.24, 0.8 / self.Ndims, slider_thick] , axisbg='white') slice_slider = DiscreteSlider( slice_ax, "", 0, self.im_shape[d]-1, valinit=self.slice_num[d],valfmt='%i', color='SteelBlue') slice_ax.text( (self.im_shape[d]-1)/2,1.5, 'Dim %d Slice' % self.im_unsqueeze_shape[d], horizontalalignment = 'center' ) sliders.append(slice_slider); slice_slider.on_changed( partial( self.update_slice, d ) ) plt.show() def readcfl(self, name): h = open(name + ".hdr", "r") h.readline() # skip l = h.readline() dims = [int(i) for i in l.split( )] n = reduce(operator.mul, dims, 1) h.close() return np.memmap( name + ".cfl", dtype = np.complex64, mode='r', shape=tuple(dims), order='F' ) def save( self, event ): extent = self.ax.get_window_extent().transformed(self.fig.dpi_scale_trans.inverted()) num = 0 fname = self.cflname + '_' + str(num) + '.png' while( os.path.isfile(fname) ): num += 1 fname = self.cflname + '_' + str(num) + '.png' self.fig.savefig(fname, bbox_inches=extent) def update_flipx( self, event ): self.flipx *= -1 self.update_image() def update_flipy( self, event ): self.flipy *= -1 self.update_image() def update_refresh( self, event ): self.update_image() def dynamic_refresh( self ): while( self.drefresh.is_set() ): self.update_image() def update_drefresh( self, event ): if ( self.drefresh.is_set() ): self.drefresh.clear() else: self.drefresh.set() th = threading.Thread( target = self.dynamic_refresh ) th.start() def update_aspect( self, aspect ): self.aspect = aspect self.ax.set_aspect( self.asp * self.im_ordered.shape[0] / self.im_ordered.shape[1] * aspect ) def update_vmax( self, vmax ): self.vmax = max(self.vmin, vmax) self.l.set_clim( vmax = self.vmax * self.immax ); def update_vmin( self, vmin ): self.vmin = min(self.vmax,vmin) self.l.set_clim( vmin = self.vmin * self.immax ); def update_magnitude( self, l ): self.magnitude = ( l == 'Mag' ) if (self.magnitude): self.l.set_cmap('gray') else: self.l.set_cmap('hsv') self.update_image() def update_orderx( self, l ): l = int(l[4:]) self.order3[0] = np.where( self.im_unsqueeze_shape == l )[0] self.update_ordered_image() def update_ordery( self, l ): l = int(l[4:]) self.order3[1] = np.where( self.im_unsqueeze_shape == l )[0] self.update_ordered_image() def update_ordered_image(self): self.mosaic_valid = len( self.order3[:3] ) == len( set( self.order3[:3] ) ) self.order_valid = len( self.order3[:2] ) == len( set( self.order3[:2] ) ); if ( self.mosaic_valid ): self.order[:3] = self.order3[:3] order_remain = np.r_[:self.Ndims] for t in np.r_[:3]: order_remain = order_remain[ (order_remain != self.order[t] ) ] self.order[3:] = order_remain self.im_ordered = np.transpose( self.im, self.order ) self.ax.set_aspect( self.asp * self.im_ordered.shape[0] / self.im_ordered.shape[1] * self.aspect ) self.update_image() elif ( self.order_valid ): self.order[:2] = self.order3[:2] order_remain = np.r_[:self.Ndims] for t in np.r_[:2]: order_remain = order_remain[ (order_remain != self.order[t] ) ] self.order[2:] = order_remain self.im_ordered = np.transpose( self.im, self.order ) self.ax.set_aspect( self.asp * self.im_ordered.shape[0] / self.im_ordered.shape[1] * self.aspect ) self.update_image() def update_image( self ): self.immax = np.max(abs(self.im)) self.l.set_clim( vmin = self.vmin * self.immax , vmax = self.vmax * self.immax ); if ( self.mosaic_valid ): im_slice = self.im_ordered[ (slice(None,None,self.flipx), slice(None,None,self.flipy), slice(None)) + tuple(self.slice_num[self.order[3:]])] im_slice = self.mosaic( im_slice ) else: im_slice = self.im_ordered[ (slice(None,None,self.flipx), slice(None,None,self.flipy)) + tuple(self.slice_num[self.order[2:]]) ] if self.magnitude: self.l.set_data( abs(im_slice) ) else: self.l.set_data( (np.angle(im_slice) + np.pi) / (2 * np.pi) ) self.fig.canvas.draw() def update_slice( self, d, s ): self.slice_num[d] = int(round(s)) self.update_image() def mosaic( self, im ): im = im.squeeze() (x, y, z) = im.shape z2 = int( np.ceil( z ** 0.5 ) ) z = int( z2 ** 2 ) im = np.pad( im, [(0,0), (0,0), (0, z - im.shape[2] )], mode='constant') im = im.reshape( (x, y * z, 1), order = 'F' ) im = im.transpose( (1, 2, 0) ) im = im.reshape( (y * z2 , z2, x), order = 'F' ) im = im.transpose( (2, 1, 0) ) im = im.reshape( (x * z2, y * z2), order = 'F' ) return im def update_mosaic( self, l ): l = int(l[4:]) self.order3[2] = np.where( self.im_unsqueeze_shape == l )[0] self.update_ordered_image() if __name__ == "__main__": # Error if more than 1 argument if (len(sys.argv) != 2): print "BartView: multidimensional image viewer for cfl" print "Usage: bview cflname" exit() BartView( sys.argv[1] ) bart-0.4.02/python/bartview3.py000077500000000000000000000326101320577655200163470ustar00rootroot00000000000000#!/usr/bin/python3 # # Copyright 2017. Massachusetts Institute of Technology. # Copyright 2015. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. # # Authors: # 2015 Frank Ong # 2017 Siddharth Iyer import operator import numpy as np import sys import matplotlib import matplotlib.pyplot as plt from matplotlib.widgets import Slider, Button, RadioButtons from functools import partial, reduce import time import threading import os.path class DiscreteSlider(Slider): """A matplotlib slider widget with discrete steps.""" def __init__(self, *args, **kwargs): self.previous_val = kwargs['valinit'] Slider.__init__(self, *args, **kwargs) def set_val(self, val): discrete_val = round(val) xy = self.poly.xy xy[2] = discrete_val, 1 xy[3] = discrete_val, 0 self.poly.xy = xy self.valtext.set_text(self.valfmt % discrete_val) if self.drawon: self.ax.figure.canvas.draw() self.val = val if self.previous_val!=discrete_val: self.previous_val = discrete_val if not self.eventson: return for cid, func in self.observers.items(): func(discrete_val) class BartView(object): def __init__(self, cflname): matplotlib.rcParams['toolbar'] = 'None' #matplotlib.rcParams['font.size'] = 6 # Read data self.cflname = sys.argv[1] self.im = self.readcfl(self.cflname) self.im_unsqueeze_shape = np.where( np.array(self.im.shape) > 1 )[0] self.im = self.im.squeeze() t1 = time.clock() # Reorder image self.Ndims = len( self.im.shape ) self.order = np.r_[:self.Ndims] self.im_ordered = self.im self.order3 = np.array([0,1,1]) # Slice image self.slice_num = np.zeros( self.Ndims, dtype = 'int' ); self.im_shape = self.im_ordered.shape self.im_slice = self.im_ordered[ (slice(None), slice(None)) + tuple(self.slice_num[2:]) ] # Create figure self.fig = plt.figure(facecolor='black', figsize=(9,6)) #self.fig = plt.figure(facecolor='black', figsize=(6,4)) self.fig.subplots_adjust( left=0.0 , bottom=0.0 , right=1.0 , top=1 - 0.25) self.fig.canvas.set_window_title(self.cflname) # Show image self.immax = np.max(abs(self.im)) self.l = plt.imshow( abs(self.im_slice) , cmap = "gray", vmin=0, vmax=self.immax) self.ax = plt.gca() self.asp = self.im_ordered.shape[1] / self.im_ordered.shape[0] self.aspect = 1 self.ax.set_aspect( 1 ) plt.axis('off') radios = [] buttons = [] sliders = [] # Create Radio Buttons for X Y dimensions dims = self.im_unsqueeze_shape[ self.order ].astype(str) for i in range(0,len(dims)): dims[i] = "Dim " + dims[i] oboxx_ax = plt.axes( [0, 1 - 0.03, 0.1, 0.03], axisbg = "gainsboro" ) oboxx_ax.set_xticks([]); oboxx_ax.set_yticks([]); orderx_ax = plt.axes( [0, 1 - 0.18, 0.1, 0.15], axisbg = 'gainsboro' ) orderx_radio = RadioButtons( orderx_ax, dims, activecolor = 'SteelBlue', active = 0 ) orderx_ax.text(0.5,1.05, 'Up/Down', horizontalalignment = 'center') radios.append( orderx_radio ) orderx_radio.on_clicked( self.update_orderx ) oboxy_ax = plt.axes( [0.1, 1 - 0.03, 0.1, 0.03], axisbg = "gainsboro" ) oboxy_ax.set_xticks([]); oboxy_ax.set_yticks([]); ordery_ax = plt.axes( [0.1, 1 - 0.18, 0.1, 0.15], axisbg = 'gainsboro' ) ordery_radio = RadioButtons( ordery_ax, dims, activecolor = 'SteelBlue', active = 1 ) ordery_ax.text(0.5,1.05, 'Left/Right', horizontalalignment = 'center') radios.append( ordery_radio ) ordery_radio.on_clicked( self.update_ordery ) # Create Radio buttons for mosaic self.mosaic_valid = False mbox_ax = plt.axes( [0.2, 1 - 0.03, 0.1, 0.03], axisbg = "gainsboro" ) mbox_ax.set_xticks([]); mbox_ax.set_yticks([]); mosaic_ax = plt.axes( [0.2, 1 - 0.18, 0.1, 0.15], axisbg = 'gainsboro' ) mosaic_radio = RadioButtons( mosaic_ax, dims, activecolor = 'SteelBlue', active = 1 ) mosaic_ax.text(0.5,1.05, 'Mosaic', horizontalalignment = 'center') radios.append( mosaic_radio ) mosaic_radio.on_clicked( self.update_mosaic ) # Create flip buttons self.flipx = 1; flipx_ax = plt.axes( [0.3, 1 - 0.09, 0.1, 0.09] ) flipx_button = Button( flipx_ax, 'Flip\nUp/Down', color='gainsboro' ) flipx_button.on_clicked(self.update_flipx); self.flipy = 1; flipy_ax = plt.axes( [0.3, 1 - 0.18, 0.1, 0.09] ) flipy_button = Button( flipy_ax, 'Flip\nLeft/Right', color='gainsboro' ) flipy_button.on_clicked(self.update_flipy); # Create Refresh buttons refresh_ax = plt.axes( [0.4, 1 - 0.09, 0.1, 0.09] ) refresh_button = Button( refresh_ax, 'Refresh', color='gainsboro' ) refresh_button.on_clicked(self.update_refresh); # Create Save button save_ax = plt.axes( [0.4, 1 - 0.18, 0.1, 0.09] ) save_button = Button( save_ax, 'Export to\nPNG', color='gainsboro' ) save_button.on_clicked(self.save); # Create dynamic refresh radio button #self.drefresh = threading.Event() #drefresh_ax = plt.axes( [0.4, 1 - 0.18, 0.1, 0.09] ) #drefresh_button = Button( drefresh_ax, 'Dynamic\nRefresh', color='gainsboro' ) #drefresh_button.on_clicked(self.update_drefresh); # Create Magnitude/phase radio button self.magnitude = True mag_ax = plt.axes( [0.5, 1 - 0.18, 0.1, 0.18], axisbg = 'gainsboro' ) mag_radio = RadioButtons( mag_ax, ('Mag','Phase') , activecolor = 'SteelBlue', active = 0 ) radios.append( mag_radio ) mag_radio.on_clicked( self.update_magnitude ) sbox_ax = plt.axes( [0.6, 1 - 0.18, 0.5, 0.18], axisbg='gainsboro') sbox_ax.set_xticks([]) sbox_ax.set_yticks([]) # Create aspect sliders aspect_ax = plt.axes( [0.65, 1 - 0.09 + 0.02, 0.1, 0.02], axisbg = 'white' ) aspect_slider = Slider( aspect_ax, "", 0.25, 4, valinit=1, color='SteelBlue') aspect_ax.text( 4 / 2,1.5, 'Aspect Ratio', horizontalalignment = 'center') sliders.append( aspect_slider ) aspect_slider.on_changed( self.update_aspect ) # Create contrast sliders self.vmin = 0 vmin_ax = plt.axes( [0.83, 1 - 0.09 + 0.02, 0.1, 0.02], axisbg = 'white' ) vmin_slider = Slider( vmin_ax, "", 0, 1, valinit=0, color='SteelBlue') vmin_ax.text(0.5,1.5, 'Contrast Min', horizontalalignment = 'center') sliders.append( vmin_slider ) vmin_slider.on_changed( self.update_vmin ) self.vmax = 1 vmax_ax = plt.axes( [0.83, 1 - 0.18 + 0.02, 0.1, 0.02], axisbg = 'white' ) vmax_slider = Slider( vmax_ax, "", 0, 1, valinit=1, color='SteelBlue') vmax_ax.text(0.5,1.5, 'Contrast Max', horizontalalignment = 'center') sliders.append( vmax_slider ) vmax_slider.on_changed( self.update_vmax ) # Create sliders for choosing slices box_ax = plt.axes( [0, 1 - 0.25, 1, 0.07], axisbg='gainsboro') box_ax.set_xticks([]) box_ax.set_yticks([]) slider_thick = 0.02 slider_start = 0.1 ax = [] for d in np.r_[:self.Ndims]: slice_ax = plt.axes( [0.01 + 1 / self.Ndims * d, 1 - 0.24, 0.8 / self.Ndims, slider_thick] , axisbg='white') slice_slider = DiscreteSlider( slice_ax, "", 0, self.im_shape[d]-1, valinit=self.slice_num[d],valfmt='%i', color='SteelBlue') slice_ax.text( (self.im_shape[d]-1)/2,1.5, 'Dim %d Slice' % self.im_unsqueeze_shape[d], horizontalalignment = 'center' ) sliders.append(slice_slider); slice_slider.on_changed( partial( self.update_slice, d ) ) plt.show() def readcfl(self, name): h = open(name + ".hdr", "r") h.readline() # skip l = h.readline() dims = [int(i) for i in l.split( )] n = reduce(operator.mul, dims, 1) h.close() return np.memmap( name + ".cfl", dtype = np.complex64, mode='r', shape=tuple(dims), order='F' ) def save( self, event ): extent = self.ax.get_window_extent().transformed(self.fig.dpi_scale_trans.inverted()) num = 0 fname = self.cflname + '_' + str(num) + '.png' while( os.path.isfile(fname) ): num += 1 fname = self.cflname + '_' + str(num) + '.png' self.fig.savefig(fname, bbox_inches=extent) def update_flipx( self, event ): self.flipx *= -1 self.update_image() def update_flipy( self, event ): self.flipy *= -1 self.update_image() def update_refresh( self, event ): self.update_image() def dynamic_refresh( self ): while( self.drefresh.is_set() ): self.update_image() def update_drefresh( self, event ): if ( self.drefresh.is_set() ): self.drefresh.clear() else: self.drefresh.set() th = threading.Thread( target = self.dynamic_refresh ) th.start() def update_aspect( self, aspect ): self.aspect = aspect self.ax.set_aspect( self.asp * self.im_ordered.shape[0] / self.im_ordered.shape[1] * aspect ) def update_vmax( self, vmax ): self.vmax = max(self.vmin, vmax) self.l.set_clim( vmax = self.vmax * self.immax ); def update_vmin( self, vmin ): self.vmin = min(self.vmax,vmin) self.l.set_clim( vmin = self.vmin * self.immax ); def update_magnitude( self, l ): self.magnitude = ( l == 'Mag' ) if (self.magnitude): self.l.set_cmap('gray') else: self.l.set_cmap('hsv') self.update_image() def update_orderx( self, l ): l = int(l[4:]) self.order3[0] = np.where( self.im_unsqueeze_shape == l )[0] self.update_ordered_image() def update_ordery( self, l ): l = int(l[4:]) self.order3[1] = np.where( self.im_unsqueeze_shape == l )[0] self.update_ordered_image() def update_ordered_image(self): self.mosaic_valid = len( self.order3[:3] ) == len( set( self.order3[:3] ) ) self.order_valid = len( self.order3[:2] ) == len( set( self.order3[:2] ) ); if ( self.mosaic_valid ): self.order[:3] = self.order3[:3] order_remain = np.r_[:self.Ndims] for t in np.r_[:3]: order_remain = order_remain[ (order_remain != self.order[t] ) ] self.order[3:] = order_remain self.im_ordered = np.transpose( self.im, self.order ) self.ax.set_aspect( self.asp * self.im_ordered.shape[0] / self.im_ordered.shape[1] * self.aspect ) self.update_image() elif ( self.order_valid ): self.order[:2] = self.order3[:2] order_remain = np.r_[:self.Ndims] for t in np.r_[:2]: order_remain = order_remain[ (order_remain != self.order[t] ) ] self.order[2:] = order_remain self.im_ordered = np.transpose( self.im, self.order ) self.ax.set_aspect( self.asp * self.im_ordered.shape[0] / self.im_ordered.shape[1] * self.aspect ) self.update_image() def update_image( self ): self.immax = np.max(abs(self.im)) self.l.set_clim( vmin = self.vmin * self.immax , vmax = self.vmax * self.immax ); if ( self.mosaic_valid ): im_slice = self.im_ordered[ (slice(None,None,self.flipx), slice(None,None,self.flipy), slice(None)) + tuple(self.slice_num[self.order[3:]])] im_slice = self.mosaic( im_slice ) else: im_slice = self.im_ordered[ (slice(None,None,self.flipx), slice(None,None,self.flipy)) + tuple(self.slice_num[self.order[2:]]) ] if self.magnitude: self.l.set_data( abs(im_slice) ) else: self.l.set_data( (np.angle(im_slice) + np.pi) / (2 * np.pi) ) self.fig.canvas.draw() def update_slice( self, d, s ): self.slice_num[d] = int(round(s)) self.update_image() def mosaic( self, im ): im = im.squeeze() (x, y, z) = im.shape z2 = int( np.ceil( z ** 0.5 ) ) z = int( z2 ** 2 ) im = np.pad( im, [(0,0), (0,0), (0, z - im.shape[2] )], mode='constant') im = im.reshape( (x, y * z, 1), order = 'F' ) im = im.transpose( (1, 2, 0) ) im = im.reshape( (y * z2 , z2, x), order = 'F' ) im = im.transpose( (2, 1, 0) ) im = im.reshape( (x * z2, y * z2), order = 'F' ) return im def update_mosaic( self, l ): l = int(l[4:]) self.order3[2] = np.where( self.im_unsqueeze_shape == l )[0] self.update_ordered_image() if __name__ == "__main__": # Error if more than 1 argument if (len(sys.argv) != 2): print("BartView: multidimensional image viewer for cfl") print("Usage: bview cflname") exit() BartView( sys.argv[1] ) bart-0.4.02/python/cfl.py000066400000000000000000000022031320577655200151750ustar00rootroot00000000000000# Copyright 2013-2015. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. # # Authors: # 2013 Martin Uecker # 2015 Jonathan Tamir import numpy as np def readcfl(name): # get dims from .hdr h = open(name + ".hdr", "r") h.readline() # skip l = h.readline() h.close() dims = [int(i) for i in l.split( )] # remove singleton dimensions from the end n = np.prod(dims) dims_prod = np.cumprod(dims) dims = dims[:np.searchsorted(dims_prod, n)+1] # load data and reshape into dims d = open(name + ".cfl", "r") a = np.fromfile(d, dtype=np.complex64, count=n); d.close() return a.reshape(dims, order='F') # column-major def writecfl(name, array): h = open(name + ".hdr", "w") h.write('# Dimensions\n') for i in (array.shape): h.write("%d " % i) h.write('\n') h.close() d = open(name + ".cfl", "w") array.T.astype(np.complex64).tofile(d) # tranpose for column-major order d.close() bart-0.4.02/rules/000077500000000000000000000000001320577655200136735ustar00rootroot00000000000000bart-0.4.02/rules/box.mk000066400000000000000000000002021320577655200150060ustar00rootroot00000000000000 boxsrcs := $(XTARGETS:%=src/%.c) boxobjs := $(boxsrcs:.c=.o) .INTERMEDIATE: $(boxobjs) lib/libbox.a: libbox.a($(boxobjs)) bart-0.4.02/rules/calib.mk000066400000000000000000000007061320577655200153010ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. calibsrcs := $(wildcard $(srcdir)/calib/*.c) calibobjs := $(calibsrcs:.c=.o) ifeq ($(CUDA),1) calibcudasrcs += $(wildcard $(srcdir)/calib/*.cu) calibobjs += $(calibcudasrcs:.cu=.o) endif .INTERMEDIATE: $(calibobjs) lib/libcalib.a: libcalib.a($(calibobjs)) bart-0.4.02/rules/dfwavelet.mk000066400000000000000000000007661320577655200162160ustar00rootroot00000000000000# Copyright 2015. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. dfwaveletsrcs := $(wildcard $(srcdir)/dfwavelet/*.c) dfwaveletobjs := $(dfwaveletsrcs:.c=.o) dfwaveletcudasrcs := $(wildcard $(srcdir)/dfwavelet/*.cu) ifeq ($(CUDA),1) dfwaveletobjs += $(dfwaveletcudasrcs:.cu=.o) endif .INTERMEDIATE: $(dfwaveletobjs) lib/libdfwavelet.a: libdfwavelet.a($(dfwaveletobjs)) bart-0.4.02/rules/grecon.mk000066400000000000000000000005371320577655200155060ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. greconsrcs := $(wildcard $(srcdir)/grecon/*.c) greconobjs := $(greconsrcs:.c=.o) .INTERMEDIATE: $(greconobjs) lib/libgrecon.a: libgrecon.a($(greconobjs)) bart-0.4.02/rules/ismrm.mk000066400000000000000000000005761320577655200153630ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. ismrmsrcs := $(wildcard $(srcdir)/ismrm/*.c) ismrmobjs := $(ismrmsrcs:.c=.o) .INTERMEDIATE: $(ismrmobjs) lib/libismrm.a: libismrm.a($(ismrmobjs)) lib/libismrm.a: CPPFLAGS += $(ISMRM_H) bart-0.4.02/rules/iter.mk000066400000000000000000000006721320577655200151740ustar00rootroot00000000000000# Copyright 2014. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. itersrcs := $(wildcard $(srcdir)/iter/*.c) itercudasrcs := $(wildcard $(srcdir)/iter/*.cu) iterobjs := $(itersrcs:.c=.o) ifeq ($(CUDA),1) iterobjs += $(itercudasrcs:.cu=.o) endif .INTERMEDIATE: $(iterobjs) lib/libiter.a: libiter.a($(iterobjs)) bart-0.4.02/rules/lapacke.mk000066400000000000000000000002471320577655200156270ustar00rootroot00000000000000 lapackesrcs := $(wildcard $(srcdir)/lapacke/*.c) lapackeobjs := $(lapackesrcs:.c=.o) .INTERMEDIATE: $(lapackeobjs) lib/liblapacke.a: liblapacke.a($(lapackeobjs)) bart-0.4.02/rules/linops.mk000066400000000000000000000010251320577655200155260ustar00rootroot00000000000000# Copyright 2014. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. linopssrcs := $(wildcard $(srcdir)/linops/*.c) linopscudasrcs := $(wildcard $(srcdir)/linops/*.cu) linopsobjs := $(linopssrcs:.c=.o) ifeq ($(CUDA),1) linopsobjs += $(linopscudasrcs:.cu=.o) endif .INTERMEDIATE: $(linopsobjs) lib/liblinops.a: liblinops.a($(linopsobjs)) UTARGETS += test_linop_matrix MODULES_test_linop_matrix += -llinops bart-0.4.02/rules/lowrank.mk000066400000000000000000000007261320577655200157060ustar00rootroot00000000000000# Copyright 2015. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. lowranksrcs := $(wildcard $(srcdir)/lowrank/*.c) #lowranksrcs := $(wildcard $(srcdir)/lowrank/lr*.c) lowrankobjs := $(lowranksrcs:.c=.o) .INTERMEDIATE: $(lowrankobjs) lib/liblowrank.a: liblowrank.a($(lowrankobjs)) UTARGETS += test_batchsvd MODULES_test_batchsvd = -llowrank bart-0.4.02/rules/make_symbol_table.sh000077500000000000000000000001311320577655200176760ustar00rootroot00000000000000#!/bin/bash EXEC=$1 OUT=$2 nm --defined-only ${EXEC} | cut -c11-16,19- | sort > ${OUT} bart-0.4.02/rules/misc.mk000066400000000000000000000007621320577655200151640ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # Copyright 2015. Martin Uecker. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. miscsrcs := $(wildcard $(srcdir)/misc/*.c) miscobjs := $(miscsrcs:.c=.o) .INTERMEDIATE: $(miscobjs) lib/libmisc.a: libmisc.a($(miscobjs)) DOTHIS := $(shell $(root)/rules/update-version.sh) $(srcdir)/misc/version.o: $(srcdir)/misc/version.inc UTARGETS += test_pattern bart-0.4.02/rules/nlops.mk000066400000000000000000000003131320577655200153540ustar00rootroot00000000000000 nlopssrcs := $(wildcard $(srcdir)/nlops/*.c) nlopscudasrcs := $(wildcard $(srcdir)/nlops/*.cu) nlopsobjs := $(nlopssrcs:.c=.o) .INTERMEDIATE: $(nlopsobjs) lib/libnlops.a: libnlops.a($(nlopsobjs)) bart-0.4.02/rules/noir.mk000066400000000000000000000005171320577655200151760ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. noirsrcs := $(wildcard $(srcdir)/noir/*.c) noirobjs := $(noirsrcs:.c=.o) .INTERMEDIATE: $(noirobjs) lib/libnoir.a: libnoir.a($(noirobjs)) bart-0.4.02/rules/noncart.mk000066400000000000000000000005461320577655200156750ustar00rootroot00000000000000# Copyright 2014. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. noncartsrcs := $(wildcard $(srcdir)/noncart/*.c) noncartobjs := $(noncartsrcs:.c=.o) .INTERMEDIATE: $(noncartobjs) lib/libnoncart.a: libnoncart.a($(noncartobjs)) bart-0.4.02/rules/num.mk000066400000000000000000000010331320577655200150200ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. numsrcs := $(wildcard $(srcdir)/num/*.c) numcudasrcs := $(wildcard $(srcdir)/num/*.cu) numobjs := $(numsrcs:.c=.o) ifeq ($(CUDA),1) numobjs += $(numcudasrcs:.cu=.o) endif .INTERMEDIATE: $(numobjs) lib/libnum.a: libnum.a($(numobjs)) UTARGETS += test_multind test_flpmath test_splines test_linalg test_polynom test_window UTARGETS += test_blas bart-0.4.02/rules/sake.mk000066400000000000000000000005161320577655200151510ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. sakesrcs := $(wildcard $(srcdir)/sake/*.c) sakeobjs := $(sakesrcs:.c=.o) .INTERMEDIATE: $(sakeobjs) lib/libsake.a: libsake.a($(sakeobjs)) bart-0.4.02/rules/sense.mk000066400000000000000000000005271320577655200153450ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. sensesrcs := $(wildcard $(srcdir)/sense/*.c) senseobjs := $(sensesrcs:.c=.o) .INTERMEDIATE: $(senseobjs) lib/libsense.a: libsense.a($(senseobjs)) bart-0.4.02/rules/simu.mk000066400000000000000000000006171320577655200152050ustar00rootroot00000000000000# Copyright 2013. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. simusrcs := $(wildcard $(srcdir)/simu/*.c) simuobjs := $(simusrcs:.c=.o) .INTERMEDIATE: $(simuobjs) lib/libsimu.a: libsimu.a($(simuobjs)) UTARGETS += test_biot_savart MODULES_test_biot_savart += -lsimu bart-0.4.02/rules/update-if-changed.sh000077500000000000000000000000561320577655200175000ustar00rootroot00000000000000#!/bin/bash cmp -s $1 $2 || mv $1 $2 rm -f $1 bart-0.4.02/rules/update-version.sh000077500000000000000000000002211320577655200171720ustar00rootroot00000000000000#!/bin/bash echo 'VERSION('`./git-version.sh`')' > version.new ./rules/update-if-changed.sh version.new src/misc/version.inc rm -f version.new bart-0.4.02/rules/update_commands.sh000077500000000000000000000005771320577655200174060ustar00rootroot00000000000000#!/bin/bash set -e BINDIR=$(dirname $0) BART_EXE=$1 shift OUTPUT=$1 shift XTARGETS=$@ TEST_FILE_COMMANDS=$(mktemp) echo "AUTOGENERATED. DO NOT EDIT." > ${TEST_FILE_COMMANDS} for cmd in ${XTARGETS} ; do printf "\n\n--%s--\n\n" $cmd ; ${BART_EXE} $cmd -h ; done >> ${TEST_FILE_COMMANDS} ${BINDIR}/update-if-changed.sh ${TEST_FILE_COMMANDS} ${OUTPUT} rm -rf ${TEST_FILE_COMMANDS} bart-0.4.02/rules/wavelet.mk000066400000000000000000000004361320577655200156760ustar00rootroot00000000000000 waveletsrcs := $(wildcard $(srcdir)/wavelet/*.c) waveletobjs := $(waveletsrcs:.c=.o) waveletcudasrcs := $(wildcard $(srcdir)/wavelet/*.cu) ifeq ($(CUDA),1) waveletobjs += $(waveletcudasrcs:.cu=.o) endif .INTERMEDIATE: $(waveletobjs) lib/libwavelet.a: libwavelet.a($(waveletobjs)) bart-0.4.02/save/000077500000000000000000000000001320577655200134775ustar00rootroot00000000000000bart-0.4.02/save/nsv/000077500000000000000000000000001320577655200143055ustar00rootroot00000000000000bart-0.4.02/save/nsv/README.txt000066400000000000000000000000731320577655200160030ustar00rootroot00000000000000This folder is to save the simulations done by estvar/nsv. bart-0.4.02/scripts/000077500000000000000000000000001320577655200142305ustar00rootroot00000000000000bart-0.4.02/scripts/bart_completion.sh000066400000000000000000000014721320577655200177510ustar00rootroot00000000000000# bart parameter-completion function _bart() { local cur=${COMP_WORDS[$COMP_CWORD]} if [ $COMP_CWORD -eq 1 ] ; then local CMDS=$(bart | tail -n +2) COMPREPLY=($(compgen -W "$CMDS" -- "$cur")); else local bcmd=${COMP_WORDS[1]} case $cur in -*) COMPREPLY=($(bart ${bcmd} -h | grep -o -E "^${cur}\w*")) ;; *) case $bcmd in twixread) COMPREPLY=($(compgen -o plusdirs -f -X '!*.dat' -- ${cur})) ;; *) local CFLS=$(compgen -o plusdirs -f -X '!*.hdr' -- ${cur}) local COOS=$(compgen -o plusdirs -f -X '!*.coo' -- ${cur}); local RAS=$(compgen -o plusdirs -f -X '!*.ra' -- ${cur}); local suffix=".hdr" COMPREPLY=($(for i in ${CFLS} ${COOS} ${RAS}; do echo ${i%$suffix} ; done)) ;; esac ;; esac fi return 0 } complete -o filenames -F _bart bart ./bart bart-0.4.02/scripts/grasp.sh000066400000000000000000000125061320577655200157040ustar00rootroot00000000000000#!/bin/bash # Copyright 2015. The Regents of the University of California. # All rights reserved. Use of this source code is governed by # a BSD-style license which can be found in the LICENSE file. # # Authors: # 2015 Martin Uecker # # Compressed sensing parallel imaging reconstruction with temporal # total-variation regularization for Siemens radial VIBE sequence # with golden-angle sampling (GRASP). # set -e # default settings export SPOKES=21 export SKIP=0 export CALIB=400 export ITER=30 export REG=0.05 SCALE=0.6 LOGFILE=/dev/stdout MAXPROC=4 MAXTHREADS=4 title=$(cat <<- EOF (BART-)GRASP v0.3 (Berkeley Advanced Reconstruction Toolbox) --- EXPERIMENTAL --- FOR RESEARCH USE ONLY --- EOF ) helpstr=$(cat <<- EOF Compressed sensing parallel imaging reconstruction with temporal total-variation regularization for Siemens radial VIBE sequence with golden-angle sampling (GRASP). This script requires the Berkeley Advanced Reconstruction Toolbox version 0.2.09. (later versions may also work). -s spokes number of spokes per frame -r lambda regularization parameter -p maxproc max. number of slices processed in parallel -t maxthreads max. number of threads per slice -l logfile -h help EOF ) usage="Usage: $0 [-h] [-s spokes] [-r lambda] " echo "$title" echo while getopts "hl:s:p:t:r:" opt; do case $opt in s) SPOKES=$OPTARG ;; r) REG=$OPTARG ;; h) echo "$usage" echo echo "$helpstr" exit 0 ;; l) LOGFILE=$(readlink -f "$OPTARG") ;; p) MAXPROC=$OPTARG ;; t) MAXTHREADS=$OPTARG ;; \?) echo "$usage" >&2 exit 1 ;; esac done shift $((OPTIND - 1)) if [ $# -lt 2 ] ; then echo "$usage" >&2 exit 1 fi export PATH=$TOOLBOX_PATH:$PATH input=$(readlink -f "$1") output=$(readlink -f "$2") if [ ! -e $input ] ; then echo "Input file does not exist." >&2 echo "$usage" >&2 exit 1 fi if [ ! -e $TOOLBOX_PATH/bart ] ; then echo "\$TOOLBOX_PATH is not set correctly!" >&2 exit 1 fi #WORKDIR=$(mktemp -d) # Mac: http://unix.stackexchange.com/questions/30091/fix-or-alternative-for-mktemp-in-os-x WORKDIR=`mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir'` trap 'rm -rf "$WORKDIR"' EXIT cd $WORKDIR # start group for redirection of output to the logfile { # read TWIX file bart twixread -A $input grasp export READ=$(bart show -d0 grasp) export COILS=$(bart show -d3 grasp) export PHASES=$(($(bart show -d1 grasp) / $SPOKES)) export OMP_NUM_THREADS=$((MAXPROC * $MAXTHREADS)) # zero-pad #flip $(bitmask 2) grasp grasp2 #resize 2 64 grasp2 grasp #circshift 2 10 grasp grasp2 #fft -u $(bitmask 2) grasp2 grasp_hybrid #rm grasp.* grasp2.* # inverse FFT along 3rd dimension bart fft -i -u $(bart bitmask 2) grasp grasp_hybrid rm grasp.cfl grasp.hdr SLICES=$(bart show -d2 grasp_hybrid) # create trajectory with 400 spokes and 2x oversampling bart traj -G -x$READ -y$CALIB r bart scale $SCALE r rcalib # create trajectory with 2064 spokes and 2x oversampling bart traj -G -x$READ -y$(($SPOKES * $PHASES)) r bart scale $SCALE r r2 # split off time dimension into index 10 bart reshape $(bart bitmask 2 10) $SPOKES $PHASES r2 rfull # number of threads per slice export OMP_NUM_THREADS=$MAXTHREADS calib_slice() { # extract slice bart slice 2 $1 grasp_hybrid grasp1-$1 # extract first $CALIB spokes bart extract 1 $(($SKIP + 0)) $(($SKIP + $CALIB - 1)) grasp1-$1 grasp2-$1 # reshape dimensions bart reshape $(bart bitmask 0 1 2 3) 1 $READ $CALIB $COILS grasp2-$1 grasp3-$1 # apply inverse nufft to first $CALIB spokes bart nufft -i -t rcalib grasp3-$1 img-$1.coo } recon_slice() { # extract sensitivities for slice bart slice 2 $1 sens sens-$1 # extract spokes and split-off time dim bart extract 1 $(($SKIP + 0)) $(($SKIP + $SPOKES * $PHASES - 1)) grasp1-$1 grasp2-$1 bart reshape $(bart bitmask 1 2) $SPOKES $PHASES grasp2-$1 grasp1-$1 # move time dimensions to dim 10 and reshape bart transpose 2 10 grasp1-$1 grasp2-$1 bart reshape $(bart bitmask 0 1 2) 1 $READ $SPOKES grasp2-$1 grasp1-$1 rm grasp2-$1.cfl grasp2-$1.hdr # reconstruction with tv penality along dimension 10 # old (v0.2.08): # pics -S -d5 -lv -u10. -r$REG -R$(bitmask 10) -i$ITER -t rfull grasp1-$1 sens-$1 i-$1.coo # new (v0.2.09): bart pics -S -d5 -u10. -RT:$(bart bitmask 10):0:$REG -i$ITER -t rfull grasp1-$1 sens-$1 i-$1.coo # clean up temp files rm *-$1.cfl *-$1.hdr } export -f calib_slice export -f recon_slice # loop over slices seq -w 0 $(($SLICES - 1)) | xargs -I {} -P $MAXPROC bash -c "calib_slice {}" # transform back to k-space and compute sensitivities bart join 2 img-*.coo img bart fft -u $(bart bitmask 0 1 2) img ksp #ecalib -S -c0.8 -m1 -r20 ksp sens # transpose because we already support off-center calibration region # in dim 0 but here we might have it in 2 bart transpose 0 2 ksp ksp2 bart ecalib -S -c0.8 -m1 -r20 ksp2 sens2 bart transpose 0 2 sens2 sens # loop over slices seq -w 0 $(($SLICES - 1)) | xargs -I {} -P $MAXPROC bash -c "recon_slice {}" #echo 20 | xargs -i --max-procs=$MAXPROC bash -c "recon_slice {}" # join slices back together bart join 2 i-*.coo $output # generate dicoms #for s in $(seq -w 0 $(($SLICES - 1))) ; do # for p in $(seq -w 0 $(($PHASES - 1))) ; do # bart slice 10 $p i-$s.coo i-$p-$s.coo # bart toimg i-$p-$s.coo $output.series$p.slice$s.dcm # done #done } > $LOGFILE exit 0 bart-0.4.02/scripts/octview.m000077500000000000000000000002501320577655200160660ustar00rootroot00000000000000#! /usr/bin/octave -qf addpath(strcat(getenv("TOOLBOX_PATH"), "/matlab")); arg_list = argv(); data = squeeze(readcfl(arg_list{1})); imshow3(abs(data), []); pause; bart-0.4.02/scripts/profile.sh000066400000000000000000000022511320577655200162240ustar00rootroot00000000000000#!/bin/sh set -e usage="Usage: $0 " helpstr=$(cat <<- EOF Postprocess debugging output from BART to extract profiling information and to translate pointer values to symbol names. -h help EOF ) while getopts "h" opt; do case $opt in h) echo "$usage" echo echo "$helpstr" exit 0 ;; \?) echo "$usage" >&2 exit 1 ;; esac done shift $((OPTIND - 1)) if [ $# -lt 2 ] ; then echo "$usage" >&2 exit 1 fi in=$(readlink -f "$1") out=$(readlink -f "$2") if [ ! -e $input ] ; then echo "Input file does not exist." >&2 echo "$usage" >&2 exit 1 fi if [ ! -e $TOOLBOX_PATH/bart ] ; then echo "\$TOOLBOX_PATH is not set correctly!" >&2 exit 1 fi #WORKDIR=$(mktemp -d) # Mac: http://unix.stackexchange.com/questions/30091/fix-or-alternative-for-mktemp-in-os-x WORKDIR=`mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir'` trap 'rm -rf "$WORKDIR"' EXIT cd $WORKDIR nm --defined-only $TOOLBOX_PATH/bart | cut -c11-16,19- | sort > bart.syms cat $in | grep "^TRACE" \ | grep " 0x" \ | cut -c7-23,25-31,34- \ | sort -k3 \ | join -11 -23 bart.syms - \ | cut -c8- \ | sort -k2 > $out bart-0.4.02/scripts/rtnlinv.m000066400000000000000000000026441320577655200161100ustar00rootroot00000000000000% 2015, Martin Uecker % % Example script to use BART for the initial preprocessing % (gridding) which is required - but not included - in the % original Matlab RT-NLINV example. The example is for a % single frame, but this should also work in a similar way % for the RT-NLINV2 code which reconstructs a time-series % of images from highly undersampled data using temporal % regularization. % % Links to the Matlab code can be found here: % http://www.eecs.berkeley.edu/~uecker/toolbox.html % % References: % % Uecker M et al., Nonlinear Inverse Reconstruction for Real-time MRI % of the Human Heart Using Undersampled Radial FLASH, % MRM 63:1456-1462 (2010) % % Uecker M et al., Real-time magnetic resonance imaging at 20 ms % resolution, NMR in Biomedicine 23: 986-994 (2010) % % data set is included in the IRGNTV example A = load('radial_cardiac_25_projections.mat'); % re-format trajectory for BART t = zeros(3, 256, 25); t(1,:,:) = real(A.k) * 384.; t(2,:,:) = imag(A.k) * 384.; % use adjoint nufft to interpolate data onto Cartesia grid adj = bart('nufft -d384:384:1 -a ', t, reshape(A.rawdata, [1 256 25 12])); % compute point-spread function psf = bart('nufft -d384:384:1 -a ', t, ones(1, 256, 25)); % transform back to k-space adjk = bart('fft -u 7', adj); psfk = bart('fft -u 7', psf); % use nlinv from RT-NLINV (nlinv2) matlab package R = nlinv(squeeze(adjk), squeeze(psfk) * 1., 9, 'noncart'); bart-0.4.02/src/000077500000000000000000000000001320577655200133305ustar00rootroot00000000000000bart-0.4.02/src/avg.c000066400000000000000000000023541320577655200142550ustar00rootroot00000000000000/* Copyright 2014-2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong */ #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Calculates (weighted) average along dimensions specified by bitmask."; int main_avg(int argc, char* argv[argc]) { bool wavg = false; const struct opt_s opts[] = { OPT_SET('w', &wavg, "weighted average"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); int N = DIMS; unsigned int flags = atoi(argv[1]); long idims[N]; complex float* data = load_cfl(argv[2], N, idims); long odims[N]; md_select_dims(N, ~flags, odims, idims); complex float* out = create_cfl(argv[3], N, odims); (wavg ? md_zwavg : md_zavg)(N, idims, flags, out, data); unmap_cfl(N, idims, data); unmap_cfl(N, odims, out); exit(0); } bart-0.4.02/src/bart.c000066400000000000000000000036621320577655200144330ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Martin Uecker */ #include #include #include #include #include #include #include #include "misc/misc.h" #include "misc/cppmap.h" #include "main.h" struct { int (*main_fun)(int argc, char* argv[]); const char* name; } dispatch_table[] = { #define DENTRY(x) { main_ ## x, # x }, MAP(DENTRY, MAIN_LIST) #undef DENTRY { NULL, NULL } }; static void usage(void) { printf("BART. Available commands are:"); for (int i = 0; NULL != dispatch_table[i].name; i++) { if (0 == i % 6) printf("\n"); printf("%-12s", dispatch_table[i].name); } printf("\n"); } int main_bart(int argc, char* argv[]) { char* bn = basename(argv[0]); if (0 == strcmp(bn, "bart")) { if (1 == argc) { usage(); exit(1); } const char* tpath[3] = { getenv("TOOLBOX_PATH"), "/usr/lib/bart/commands/", "/usr/local/lib/bart/commands/", }; for (unsigned int i = 0; i < ARRAY_SIZE(tpath); i++) { if (NULL == tpath[i]) continue; size_t len = strlen(tpath[i]) + strlen(argv[1]) + 2; char cmd[len]; size_t r = snprintf(cmd, len, "%s/%s", tpath[i], argv[1]); assert(r < len); if (-1 == execv(cmd, argv + 1)) { // only if it doesn't exist - try builtin if (ENOENT != errno) { perror("Executing bart command failed"); exit(1); } } else { assert(0); } } return main_bart(argc - 1, argv + 1); } for (int i = 0; NULL != dispatch_table[i].name; i++) { if (0 == strcmp(bn, dispatch_table[i].name)) return dispatch_table[i].main_fun(argc, argv); } fprintf(stderr, "Unknown bart command: \"%s\".\n", bn); exit(1); } bart-0.4.02/src/bbox.c000066400000000000000000000000601320577655200144220ustar00rootroot00000000000000 #define main_bart main_bbox #include "bart.c" bart-0.4.02/src/bench.c000066400000000000000000000265141320577655200145630ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2017 Martin Uecker * 2014 Jonathan Tamir */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/rand.h" #include "num/init.h" #include "num/ops.h" #include "wavelet/wavthresh.h" #include "misc/debug.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/opts.h" #define DIMS 8 static double bench_generic_copy(long dims[DIMS]) { long strs[DIMS]; md_calc_strides(DIMS, strs, dims, CFL_SIZE); md_calc_strides(DIMS, strs, dims, CFL_SIZE); complex float* x = md_alloc(DIMS, dims, CFL_SIZE); complex float* y = md_alloc(DIMS, dims, CFL_SIZE); md_gaussian_rand(DIMS, dims, x); double tic = timestamp(); md_copy2(DIMS, dims, strs, y, strs, x, CFL_SIZE); double toc = timestamp(); md_free(x); md_free(y); return toc - tic; } static double bench_generic_matrix_multiply(long dims[DIMS]) { long dimsX[DIMS]; long dimsY[DIMS]; long dimsZ[DIMS]; #if 1 md_select_dims(DIMS, 2 * 3 + 17, dimsX, dims); // 1 110 1 md_select_dims(DIMS, 2 * 6 + 17, dimsY, dims); // 1 011 1 md_select_dims(DIMS, 2 * 5 + 17, dimsZ, dims); // 1 101 1 #else md_select_dims(DIMS, 2 * 5 + 17, dimsZ, dims); // 1 101 1 md_select_dims(DIMS, 2 * 3 + 17, dimsY, dims); // 1 110 1 md_select_dims(DIMS, 2 * 6 + 17, dimsX, dims); // 1 011 1 #endif complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); complex float* z = md_alloc(DIMS, dimsZ, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_gaussian_rand(DIMS, dimsY, y); double tic = timestamp(); md_ztenmul(DIMS, dimsZ, z, dimsX, x, dimsY, y); double toc = timestamp(); md_free(x); md_free(y); md_free(z); return toc - tic; } static double bench_generic_add(long dims[DIMS], unsigned int flags, bool forloop) { long dimsX[DIMS]; long dimsY[DIMS]; long dimsC[DIMS]; md_select_dims(DIMS, flags, dimsX, dims); md_select_dims(DIMS, ~flags, dimsC, dims); md_select_dims(DIMS, ~0u, dimsY, dims); long strsX[DIMS]; long strsY[DIMS]; md_calc_strides(DIMS, strsX, dimsX, CFL_SIZE); md_calc_strides(DIMS, strsY, dimsY, CFL_SIZE); complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_gaussian_rand(DIMS, dimsY, y); long L = md_calc_size(DIMS, dimsC); long T = md_calc_size(DIMS, dimsX); double tic = timestamp(); if (forloop) { for (long i = 0; i < L; i++) { for (long j = 0; j < T; j++) y[i + j * L] += x[j]; } } else { md_zaxpy2(DIMS, dims, strsY, y, 1., strsX, x); } double toc = timestamp(); md_free(x); md_free(y); return toc - tic; } static double bench_generic_sum(long dims[DIMS], unsigned int flags, bool forloop) { long dimsX[DIMS]; long dimsY[DIMS]; long dimsC[DIMS]; md_select_dims(DIMS, ~0u, dimsX, dims); md_select_dims(DIMS, flags, dimsY, dims); md_select_dims(DIMS, ~flags, dimsC, dims); long strsX[DIMS]; long strsY[DIMS]; md_calc_strides(DIMS, strsX, dimsX, CFL_SIZE); md_calc_strides(DIMS, strsY, dimsY, CFL_SIZE); complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_clear(DIMS, dimsY, y, CFL_SIZE); long L = md_calc_size(DIMS, dimsC); long T = md_calc_size(DIMS, dimsY); double tic = timestamp(); if (forloop) { for (long i = 0; i < L; i++) { for (long j = 0; j < T; j++) y[j] = y[j] + x[i + j * L]; } } else { md_zaxpy2(DIMS, dims, strsY, y, 1., strsX, x); } double toc = timestamp(); md_free(x); md_free(y); return toc - tic; } static double bench_copy1(long scale) { long dims[DIMS] = { 1, 128 * scale, 128 * scale, 1, 1, 16, 1, 16 }; return bench_generic_copy(dims); } static double bench_copy2(long scale) { long dims[DIMS] = { 262144 * scale, 16, 1, 1, 1, 1, 1, 1 }; return bench_generic_copy(dims); } static double bench_matrix_mult(long scale) { long dims[DIMS] = { 1, 256 * scale, 256 * scale, 256 * scale, 1, 1, 1, 1 }; return bench_generic_matrix_multiply(dims); } static double bench_batch_matmul1(long scale) { long dims[DIMS] = { 30000 * scale, 8, 8, 8, 1, 1, 1, 1 }; return bench_generic_matrix_multiply(dims); } static double bench_batch_matmul2(long scale) { long dims[DIMS] = { 1, 8, 8, 8, 30000 * scale, 1, 1, 1 }; return bench_generic_matrix_multiply(dims); } static double bench_tall_matmul1(long scale) { long dims[DIMS] = { 1, 8, 8, 100000 * scale, 1, 1, 1, 1 }; return bench_generic_matrix_multiply(dims); } static double bench_tall_matmul2(long scale) { long dims[DIMS] = { 1, 100000 * scale, 8, 8, 1, 1, 1, 1 }; return bench_generic_matrix_multiply(dims); } static double bench_add(long scale) { long dims[DIMS] = { 65536 * scale, 1, 50 * scale, 1, 1, 1, 1, 1 }; return bench_generic_add(dims, MD_BIT(2), false); } static double bench_addf(long scale) { long dims[DIMS] = { 65536 * scale, 1, 50 * scale, 1, 1, 1, 1, 1 }; return bench_generic_add(dims, MD_BIT(2), true); } static double bench_add2(long scale) { long dims[DIMS] = { 50 * scale, 1, 65536 * scale, 1, 1, 1, 1, 1 }; return bench_generic_add(dims, MD_BIT(0), false); } static double bench_sum2(long scale) { long dims[DIMS] = { 50 * scale, 1, 65536 * scale, 1, 1, 1, 1, 1 }; return bench_generic_sum(dims, MD_BIT(0), false); } static double bench_sum(long scale) { long dims[DIMS] = { 65536 * scale, 1, 50 * scale, 1, 1, 1, 1, 1 }; return bench_generic_sum(dims, MD_BIT(2), false); } static double bench_sumf(long scale) { long dims[DIMS] = { 65536 * scale, 1, 50 * scale, 1, 1, 1, 1, 1 }; return bench_generic_sum(dims, MD_BIT(2), true); } static double bench_transpose(long scale) { long dims[DIMS] = { 2000 * scale, 2000 * scale, 1, 1, 1, 1, 1, 1 }; complex float* x = md_alloc(DIMS, dims, CFL_SIZE); complex float* y = md_alloc(DIMS, dims, CFL_SIZE); md_gaussian_rand(DIMS, dims, x); md_clear(DIMS, dims, y, CFL_SIZE); double tic = timestamp(); md_transpose(DIMS, 0, 1, dims, y, dims, x, CFL_SIZE); double toc = timestamp(); md_free(x); md_free(y); return toc - tic; } static double bench_resize(long scale) { long dimsX[DIMS] = { 2000 * scale, 1000 * scale, 1, 1, 1, 1, 1, 1 }; long dimsY[DIMS] = { 1000 * scale, 2000 * scale, 1, 1, 1, 1, 1, 1 }; complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_clear(DIMS, dimsY, y, CFL_SIZE); double tic = timestamp(); md_resize(DIMS, dimsY, y, dimsX, x, CFL_SIZE); double toc = timestamp(); md_free(x); md_free(y); return toc - tic; } static double bench_norm(int s, long scale) { long dims[DIMS] = { 256 * scale, 256 * scale, 1, 16, 1, 1, 1, 1 }; #if 0 complex float* x = md_alloc_gpu(DIMS, dims, CFL_SIZE); complex float* y = md_alloc_gpu(DIMS, dims, CFL_SIZE); #else complex float* x = md_alloc(DIMS, dims, CFL_SIZE); complex float* y = md_alloc(DIMS, dims, CFL_SIZE); #endif md_gaussian_rand(DIMS, dims, x); md_gaussian_rand(DIMS, dims, y); double tic = timestamp(); switch (s) { case 0: md_zscalar(DIMS, dims, x, y); break; case 1: md_zscalar_real(DIMS, dims, x, y); break; case 2: md_znorm(DIMS, dims, x); break; case 3: md_z1norm(DIMS, dims, x); break; } double toc = timestamp(); md_free(x); md_free(y); return toc - tic; } static double bench_zscalar(long scale) { return bench_norm(0, scale); } static double bench_zscalar_real(long scale) { return bench_norm(1, scale); } static double bench_znorm(long scale) { return bench_norm(2, scale); } static double bench_zl1norm(long scale) { return bench_norm(3, scale); } static double bench_wavelet(long scale) { long dims[DIMS] = { 1, 256 * scale, 256 * scale, 1, 16, 1, 1, 1 }; long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(dims[0], 16); minsize[1] = MIN(dims[1], 16); minsize[2] = MIN(dims[2], 16); const struct operator_p_s* p = prox_wavelet_thresh_create(DIMS, dims, 6, 0u, minsize, 1.1, true); complex float* x = md_alloc(DIMS, dims, CFL_SIZE); md_gaussian_rand(DIMS, dims, x); double tic = timestamp(); operator_p_apply(p, 0.98, DIMS, dims, x, DIMS, dims, x); double toc = timestamp(); md_free(x); operator_p_free(p); return toc - tic; } enum bench_indices { REPETITION_IND, SCALE_IND, THREADS_IND, TESTS_IND, BENCH_DIMS }; typedef double (*bench_fun)(long scale); static void do_test(const long dims[BENCH_DIMS], complex float* out, long scale, bench_fun fun, const char* str) { printf("%30.30s |", str); int N = dims[REPETITION_IND]; double sum = 0.; double min = 1.E10; double max = 0.; for (int i = 0; i < N; i++) { double dt = fun(scale); sum += dt; min = MIN(dt, min); max = MAX(dt, max); printf(" %3.4f", (float)dt); fflush(stdout); assert(0 == REPETITION_IND); out[i] = dt; } printf(" | Avg: %3.4f Max: %3.4f Min: %3.4f\n", (float)(sum / N), max, min); } const struct benchmark_s { bench_fun fun; const char* str; } benchmarks[] = { { bench_add, "add (md_zaxpy)" }, { bench_add2, "add (md_zaxpy), contiguous" }, { bench_addf, "add (for loop)" }, { bench_sum, "sum (md_zaxpy)" }, { bench_sum2, "sum (md_zaxpy), contiguous" }, { bench_sumf, "sum (for loop)" }, { bench_transpose, "complex transpose" }, { bench_resize, "complex resize" }, { bench_matrix_mult, "complex matrix multiply" }, { bench_batch_matmul1, "batch matrix multiply 1" }, { bench_batch_matmul2, "batch matrix multiply 2" }, { bench_tall_matmul1, "tall matrix multiply 1" }, { bench_tall_matmul2, "tall matrix multiply 2" }, { bench_zscalar, "complex dot product" }, { bench_zscalar, "complex dot product" }, { bench_zscalar_real, "real complex dot product" }, { bench_znorm, "l2 norm" }, { bench_zl1norm, "l1 norm" }, { bench_copy1, "copy 1" }, { bench_copy2, "copy 2" }, { bench_wavelet, "wavelet soft thresh" }, }; static const char usage_str[] = "[]"; static const char help_str[] = "Performs a series of micro-benchmarks."; int main_bench(int argc, char* argv[]) { bool threads = false; bool scaling = false; const struct opt_s opts[] = { OPT_SET('T', &threads, "varying number of threads"), OPT_SET('S', &scaling, "varying problem size"), }; cmdline(&argc, argv, 0, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); long dims[BENCH_DIMS] = MD_INIT_ARRAY(BENCH_DIMS, 1); long strs[BENCH_DIMS]; long pos[BENCH_DIMS] = { 0 }; dims[REPETITION_IND] = 5; dims[THREADS_IND] = threads ? 8 : 1; dims[SCALE_IND] = scaling ? 5 : 1; dims[TESTS_IND] = sizeof(benchmarks) / sizeof(benchmarks[0]); md_calc_strides(BENCH_DIMS, strs, dims, CFL_SIZE); bool outp = (2 == argc); complex float* out = (outp ? create_cfl : anon_cfl)(outp ? argv[1] : "", BENCH_DIMS, dims); num_init(); do { if (threads) { num_set_num_threads(pos[THREADS_IND] + 1); debug_printf(DP_INFO, "%02d threads. ", pos[THREADS_IND] + 1); } do_test(dims, &MD_ACCESS(BENCH_DIMS, strs, pos, out), pos[SCALE_IND] + 1, benchmarks[pos[TESTS_IND]].fun, benchmarks[pos[TESTS_IND]].str); } while (md_next(BENCH_DIMS, dims, ~MD_BIT(REPETITION_IND), pos)); unmap_cfl(BENCH_DIMS, dims, out); exit(0); } bart-0.4.02/src/bitmask.c000066400000000000000000000023611320577655200151300ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Martin Uecker */ #include #include #include #include "num/multind.h" #include "misc/misc.h" #include "misc/opts.h" static const char usage_str[] = "-b | ... "; static const char help_str[] = "Convert between a bitmask and set of dimensions."; int main_bitmask(int argc, char* argv[]) { bool inverse = false; long flags = 0; const struct opt_s opts[] = { OPT_SET('b', &inverse, "dimensions from bitmask"), }; cmdline(&argc, argv, 0, 1000, usage_str, help_str, ARRAY_SIZE(opts), opts); if ((2 != argc) && inverse) error("exactly one argument needed.\n"); if (!inverse) { for (int i = 1; i < argc; i++) { int d = atoi(argv[i]); assert(d >= 0); flags = MD_SET(flags, d); } printf("%ld\n", flags); } else { int i = 0; flags = atoi(argv[1]); while (flags) { if (flags & 1) printf("%d ", i); flags >>= 1; i++; } printf("\n"); } exit(0); } bart-0.4.02/src/cabs.c000066400000000000000000000016311320577655200144050ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Siddharth Iyer */ #include #include #include #include #include "num/flpmath.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Absolute value of array (||).\n"; int main_cabs(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); long dims[DIMS]; complex float* idata = load_cfl(argv[1], DIMS, dims); complex float* odata = create_cfl(argv[2], DIMS, dims); md_zabs(DIMS, dims, odata, idata); unmap_cfl(DIMS, dims, idata); unmap_cfl(DIMS, dims, odata); exit(0); } bart-0.4.02/src/caldir.c000066400000000000000000000031401320577655200147300ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker * uecker@eecs.berkeley.edu */ #include #include #include #include #include #include "misc/mmio.h" #include "misc/misc.h" #include "misc/mri.h" #include "num/multind.h" #include "num/fft.h" #include "calib/direct.h" static const char usage_str[] = "cal_size "; static const char help_str[] = "Estimates coil sensitivities from the k-space center using\n" "a direct method (McKenzie et al.). The size of the fully-sampled\n" "calibration region is automatically determined but limited by\n" "{cal_size} (e.g. in the readout direction).\n"; int main_caldir(int argc, char* argv[]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); long dims[DIMS]; complex float* in_data = load_cfl(argv[2], DIMS, dims); int calsize_ro = atoi(argv[1]); long calsize[3] = { calsize_ro, calsize_ro, calsize_ro }; assert((dims[0] == 1) || (calsize_ro < dims[0])); assert(1 == dims[4]); complex float* out_data = create_cfl(argv[3], DIMS, dims); long caldims[DIMS]; complex float* cal_data = extract_calib(caldims, calsize, dims, in_data, false); printf("Calibration region %ldx%ldx%ld\n", caldims[0], caldims[1], caldims[2]); direct_calib(dims, out_data, caldims, cal_data); printf("Done.\n"); md_free(cal_data); unmap_cfl(DIMS, dims, (void*)out_data); unmap_cfl(DIMS, dims, (void*)in_data); exit(0); } bart-0.4.02/src/calib/000077500000000000000000000000001320577655200144025ustar00rootroot00000000000000bart-0.4.02/src/calib/calib.c000066400000000000000000000514551320577655200156320ustar00rootroot00000000000000/* Copyright 2013-2016. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2013 Dara Bahri * 2015-2016 Siddharth Iyer * * * Uecker M, Lai P, Murphy MJ, Virtue P, Elad M, Pauly JM, Vasanawala SS, Lustig M. * ESPIRiT - An Eigenvalue Approach to Autocalibrating Parallel MRI: Where SENSE * meets GRAPPA. Magn Reson Med, 71:990-1001 (2014) * * Iyer S, Ong F, Lustig M. * Towards A Parameter Free ESPIRiT: Soft-Weighting For Robust Coil Sensitivity Estimation. * Presented in the session: "New Frontiers In Image Reconstruction" at ISMRM 2016. * http://www.ismrm.org/16/program_files/O86.htm * */ #include #include #include #include #include "num/multind.h" #include "num/fft.h" #include "num/flpmath.h" #include "num/linalg.h" #include "num/lapack.h" #include "num/casorati.h" #include "num/rand.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/resize.h" #include "misc/debug.h" #include "misc/utils.h" #include "calib/calmat.h" #include "calib/cc.h" #include "calib/softweight.h" #include "calib.h" #ifdef USE_CUDA #include "calib/calibcu.h" #endif #if 0 #define CALMAT_SVD #endif #if 0 #define FLIP #endif #ifndef M_PI #define M_PI 3.14159265358979323846 #endif static void eigen_herm3(int M, int N, float val[M], complex float matrix[N][N]) // ordering might be different to herm2 { complex float mout[M][N]; for (int li = 0; li < N; li++) for (int lj = 0; lj < li; lj++) matrix[lj][li] = conj(matrix[li][lj]); //mat_identity(M, N, mout); orthiter(M, N, 30, val, mout, matrix); for (int i = 0; i < M; i++) for (int j = 0; j < N; j++) matrix[i][j] = mout[i][j]; } static float scurve(float x) { if (x <= -1.) return 0.; if (x >= 1.) return 1.; return 0.5 * (1. + 2. * x / (1. + powf(x, 2.))); } static float crop_weight_function(float crth, float val) { return scurve((sqrtf(val) - crth) / (1. - crth)); } static float crop_thresh_function(float crth, float val) { return (val <= crth) ? 0. : 1.; } typedef float (*weight_function)(float crth, float val); static void crop_weight(const long dims[DIMS], complex float* ptr, weight_function fun, float crth, const complex float* map) { long xx = dims[0]; long yy = dims[1]; long zz = dims[2]; long cc = dims[3]; long mm = dims[4]; assert(DIMS > 5); assert(1 == md_calc_size(DIMS - 5, dims + 5)); for (long m = 0; m < mm; m++) { #pragma omp parallel for for (long k = 0; k < zz; k++) { for (long i = 0; i < yy; i++) { for (long j = 0; j < xx; j++) { float val = cabsf(map[((m * zz + k) * yy + i) * xx + j]); for (long c = 0; c < cc; c++) ptr[(((m * cc + c) * zz + k) * yy + i) * xx + j] *= fun(crth, val); } } } } } void crop_sens(const long dims[DIMS], complex float* ptr, bool soft, float crth, const complex float* map) { crop_weight(dims, ptr, soft ? crop_weight_function : crop_thresh_function, crth, map); } /** * sure_crop - This determines the crop-threshold to use as described in the talk: "Towards A Parameter * Free ESPIRiT: Soft-Weighting For Robust Coil Sensitivity Estimation". This was given at the * session: "New Frontiers In Image Reconstruction" at ISMRM 2016. * * Parameters: * var - Estimated variance in data. * evec_dims - The eigenvector dimensions. * evec_data - The eigenvectors. * eptr - The eigenvalues. * calreg_dims - Dimension of the calibration region. * calreg - Calibration data. */ static float sure_crop(float var, const long evec_dims[5], complex float* evec_data, complex float* eptr, const long calreg_dims[4], const complex float* calreg) { // Must be in ascending order. float start = 0.7; float delta = 0.01; long num_cvals = (long) (((1 - delta - start)/delta) + 1); long num_maps = evec_dims[4]; // Construct low-resolution image long im_dims[5]; md_select_dims(5, 15, im_dims, evec_dims); complex float* im = md_alloc(5, im_dims, CFL_SIZE); md_resize_center(5, im_dims, im, calreg_dims, calreg, CFL_SIZE); ifftuc(5, im_dims, FFT_FLAGS, im, im); // Temporary vector for crop dimensions long cropdims[5]; md_select_dims(5, 15, cropdims, calreg_dims); cropdims[4] = num_maps; // Eigenvectors (M) complex float* M = md_alloc(5, evec_dims, CFL_SIZE); md_copy(5, evec_dims, M, evec_data, CFL_SIZE); // Temporary eigenvector holder to hold low resolution maps complex float* LM = md_alloc(5, evec_dims, CFL_SIZE); // Temporary holder for projection calreg complex float* TC = md_alloc(5, calreg_dims, CFL_SIZE); // Temporary holder to hold low resolution calib maps complex float* CM = md_alloc(5, cropdims, CFL_SIZE); // Eigenvalues (W) long W_dims[5]; md_select_dims(5, 23, W_dims, evec_dims); complex float* W = md_alloc(5, W_dims, CFL_SIZE); md_copy(5, W_dims, W, eptr, CFL_SIZE); // Place holder for the inner product result complex float* ip = md_alloc(5, W_dims, CFL_SIZE); // Place holder for the projection result complex float* proj = md_alloc(5, im_dims, CFL_SIZE); // Place holder for divergence term long div_dims[5] = MD_INIT_ARRAY(5, 1); complex float* div = md_alloc(5, div_dims, CFL_SIZE); // Calculating strides. long str1_ip[5]; long str2_ip[5]; long stro_ip[5]; md_calc_strides(5, str1_ip, im_dims, CFL_SIZE); md_calc_strides(5, str2_ip, evec_dims, CFL_SIZE); md_calc_strides(5, stro_ip, W_dims, CFL_SIZE); long str1_proj[5]; long str2_proj[5]; long stro_proj[5]; md_calc_strides(5, str1_proj, W_dims, CFL_SIZE); md_calc_strides(5, str2_proj, evec_dims, CFL_SIZE); md_calc_strides(5, stro_proj, im_dims, CFL_SIZE); long str1_div[5]; long str2_div[5]; long stro_div[5]; md_calc_strides(5, str1_div, evec_dims, CFL_SIZE); md_calc_strides(5, str2_div, evec_dims, CFL_SIZE); md_calc_strides(5, stro_div, div_dims, CFL_SIZE); long tdims_ip[5]; long tdims_proj[5]; for (unsigned int i = 0; i < 5; i++) { assert((im_dims[i] == evec_dims[i]) || (1 == im_dims[i]) || (1 == evec_dims[i])); assert(( W_dims[i] == evec_dims[i]) || (1 == W_dims[i]) || (1 == evec_dims[i])); tdims_ip[i] = (1 == im_dims[i]) ? evec_dims[i] : im_dims[i]; tdims_proj[i] = (1 == W_dims[i]) ? evec_dims[i] : W_dims[i]; } // Starting parameter sweep with SURE. float minMSE = 0; float estMSE = 0; float optVal = 0; float c = 0; for (int idx = 0; idx < num_cvals; idx++) { estMSE = 0; *div = 0; md_clear(5, W_dims, ip, CFL_SIZE); md_clear(5, im_dims, proj, CFL_SIZE); md_clear(5, evec_dims, LM, CFL_SIZE); md_clear(5, calreg_dims, TC, CFL_SIZE); c = start + idx * delta; // Cropping crop_weight(evec_dims, M, crop_thresh_function, c, W); // Projection (stored in proj) md_zfmacc2(5, tdims_ip, stro_ip, ip, str1_ip, im, str2_ip, M); md_zfmac2 (5, tdims_proj, stro_proj, proj, str1_proj, ip, str2_proj, M); // Construct low resolution projection image. fftuc(5, im_dims, FFT_FLAGS, proj, proj); md_resize_center(5, calreg_dims, TC, im_dims, proj, CFL_SIZE); md_resize_center(5, im_dims, proj, calreg_dims, TC, CFL_SIZE); ifftuc(5, im_dims, FFT_FLAGS, proj, proj); for (int jdx = 0; jdx < md_calc_size(5, im_dims); jdx++) estMSE += powf(cabsf(im[jdx] - proj[jdx]), 2); // Construct low-resolution maps fftuc(5, evec_dims, FFT_FLAGS, LM, M); md_resize_center(5, cropdims, CM, evec_dims, LM, CFL_SIZE); md_resize_center(5, evec_dims, LM, cropdims, CM, CFL_SIZE); ifftuc(5, evec_dims, FFT_FLAGS, LM, LM); // Calculating SURE divergence using low resolution maps md_zfmacc2(5, evec_dims, stro_div, div, str1_div, LM, str2_div, LM); estMSE += 2 * var * creal(*div); if ((0 == idx) || (estMSE < minMSE)) { optVal = c; minMSE = estMSE; } } md_free(im); md_free(TC); md_free(CM); md_free(M); md_free(LM); md_free(W); md_free(ip); md_free(proj); md_free(div); // Smudge factor is to soften by little bit to improve robustness. This is to account for // the sweeped thresholds possibly having a too large a step size between them and for any // other inconsistencies. float smudge = 0.99; debug_printf(DP_DEBUG1, "Calculated c: %.3f\n", optVal); debug_printf(DP_DEBUG1, "Smudge: %.3f\n", smudge); return smudge * optVal; } void calone(const struct ecalib_conf* conf, const long cov_dims[4], complex float* imgcov, unsigned int SN, float svals[SN], const long calreg_dims[DIMS], const complex float* data) { assert(1 == md_calc_size(DIMS - 5, calreg_dims + 5)); #if 1 long nskerns_dims[5]; complex float* nskerns; compute_kernels(conf, nskerns_dims, &nskerns, SN, svals, calreg_dims, data); #else long channels = calreg_dims[3]; long kx = conf->kdims[0]; long ky = conf->kdims[1]; long kz = conf->kdims[2]; long nskerns_dims[5] = { kx, ky, kz, channels, 0 }; long N = md_calc_size(4, nskerns_dims); assert(N > 0); nskerns_dims[4] = N; complex float* nskerns = md_alloc(5, nskerns_dims, CFL_SIZE); long nr_kernels = channels; nskerns_dims[4] = channels; spirit_kernel(nskerns_dims, nskerns, calreg_dims, data); #endif compute_imgcov(cov_dims, imgcov, nskerns_dims, nskerns); md_free(nskerns); } /* calculate point-wise maps * */ void eigenmaps(const long out_dims[DIMS], complex float* optr, complex float* eptr, const complex float* imgcov2, const long msk_dims[3], const bool* msk, bool orthiter, bool ecal_usegpu) { #ifdef USE_CUDA if (ecal_usegpu) { //FIXME cuda version should be able to return sensitivities for a subset of image-space points assert(!msk); eigenmapscu(out_dims, optr, eptr, imgcov2); return; } #else assert(!ecal_usegpu); #endif long channels = out_dims[3]; long maps = out_dims[4]; assert(DIMS >= 5); assert(1 == md_calc_size(DIMS - 5, out_dims + 5)); assert(maps <= channels); long xx = out_dims[0]; long yy = out_dims[1]; long zz = out_dims[2]; float scale = 1.; // for some reason, not if (msk_dims) { assert(msk_dims[0] == xx); assert(msk_dims[1] == yy); assert(msk_dims[2] == zz); } md_clear(5, out_dims, optr, CFL_SIZE); #pragma omp parallel for collapse(3) for (long k = 0; k < zz; k++) { for (long j = 0; j < yy; j++) { for (long i = 0; i < xx; i++) { if (!msk || msk[i + xx * (j + yy * k)]) { float val[channels]; complex float cov[channels][channels]; complex float tmp[channels * (channels + 1) / 2]; for (long l = 0; l < channels * (channels + 1) / 2; l++) tmp[l] = imgcov2[((l * zz + k) * yy + j) * xx + i] / scale; unpack_tri_matrix(channels, cov, tmp); if (orthiter) eigen_herm3(maps, channels, val, cov); else lapack_eig(channels, val, cov); for (long u = 0; u < maps; u++) { long ru = (orthiter ? maps : channels) - 1 - u; for (long v = 0; v < channels; v++) optr[((((u * channels + v) * zz + k) * yy + j) * xx + i)] = cov[ru][v]; if (NULL != eptr) eptr[((u * zz + k) * yy + j) * xx + i] = val[ru]; } } } } } } void caltwo(const struct ecalib_conf* conf, const long out_dims[DIMS], complex float* out_data, complex float* emaps, const long in_dims[4], complex float* in_data, const long msk_dims[3], const bool* msk) { long xx = out_dims[0]; long yy = out_dims[1]; long zz = out_dims[2]; long xh = in_dims[0]; long yh = in_dims[1]; long zh = in_dims[2]; long channels = out_dims[3]; long cosize = channels * (channels + 1) / 2; assert(DIMS >= 5); assert(1 == md_calc_size(DIMS - 5, out_dims + 5)); assert(in_dims[3] == cosize); long cov_dims[4] = { xh, yh, zh, cosize }; long covbig_dims[4] = { xx, yy, zz, cosize }; assert(((xx == 1) && (xh == 1)) || (xx >= xh)); assert(((yy == 1) && (yh == 1)) || (yy >= yh)); assert(((zz == 1) && (zh == 1)) || (zz >= zh)); assert((1 == xh) || (0 == xh % 2)); assert((1 == yh) || (0 == yh % 2)); assert((1 == zh) || (0 == zh % 2)); complex float* imgcov2 = md_alloc(4, covbig_dims, CFL_SIZE); debug_printf(DP_DEBUG1, "Resize...\n"); sinc_zeropad(4, covbig_dims, imgcov2, cov_dims, in_data); debug_printf(DP_DEBUG1, "Point-wise eigen-decomposition...\n"); eigenmaps(out_dims, out_data, emaps, imgcov2, msk_dims, msk, conf->orthiter, conf->usegpu); md_free(imgcov2); } void calone_dims(const struct ecalib_conf* conf, long cov_dims[4], long channels) { long kx = conf->kdims[0]; long ky = conf->kdims[1]; long kz = conf->kdims[2]; cov_dims[0] = (1 == kx) ? 1 : (2 * kx); cov_dims[1] = (1 == ky) ? 1 : (2 * ky); cov_dims[2] = (1 == kz) ? 1 : (2 * kz); cov_dims[3] = channels * (channels + 1) / 2; } const struct ecalib_conf ecalib_defaults = { { 6, 6, 6 }, 0.001, -1, -1., false, false, 0.8, true, false, -1., false, true, -1., false}; void calib2(const struct ecalib_conf* conf, const long out_dims[DIMS], complex float* out_data, complex float* eptr, unsigned int SN, float svals[SN], const long calreg_dims[DIMS], const complex float* data, const long msk_dims[3], const bool* msk) { long channels = calreg_dims[3]; long maps = out_dims[4]; assert(calreg_dims[3] == out_dims[3]); assert(maps <= channels); assert(1 == md_calc_size(DIMS - 5, out_dims + 5)); assert(1 == md_calc_size(DIMS - 5, calreg_dims + 5)); complex float rot[channels][channels]; if (conf->rotphase) { long scc_dims[DIMS] = MD_INIT_ARRAY(DIMS, 1); scc_dims[COIL_DIM] = channels; scc_dims[MAPS_DIM] = channels; scc(scc_dims, &rot[0][0], calreg_dims, data); } else { for (unsigned int i = 0; i < channels; i++) for (unsigned int j = 0; j < channels; j++) rot[i][j] = (i == j) ? 1. : 0.; } long cov_dims[4]; calone_dims(conf, cov_dims, channels); complex float* imgcov = md_alloc(4, cov_dims, CFL_SIZE); calone(conf, cov_dims, imgcov, SN, svals, calreg_dims, data); caltwo(conf, out_dims, out_data, eptr, cov_dims, imgcov, msk_dims, msk); /* Intensity and phase normalization similar as proposed * for adaptive combine (Walsh's method) in * Griswold et al., ISMRM 10:2410 (2002) */ if (conf->intensity) { debug_printf(DP_DEBUG1, "Normalize...\n"); /* I think the reason this works is because inhomogeneity usually * comes from only a few coil elements which are close. The l1-norm * is more resilient against such outliers. -- Martin */ normalizel1(DIMS, COIL_FLAG, out_dims, out_data); md_zsmul(DIMS, out_dims, out_data, out_data, sqrtf((float)channels)); } float c = (conf->crop > 0) ? conf->crop : sure_crop(conf->var, out_dims, out_data, eptr, calreg_dims, data); debug_printf(DP_DEBUG1, "Crop maps... (c = %.2f)\n", c); crop_sens(out_dims, out_data, conf->softcrop, c, eptr); debug_printf(DP_DEBUG1, "Fix phase...\n"); // rotate the the phase with respect to the first principle component fixphase2(DIMS, out_dims, COIL_DIM, rot[0], out_data, out_data); md_free(imgcov); } void calib(const struct ecalib_conf* conf, const long out_dims[DIMS], complex float* out_data, complex float* eptr, unsigned int SN, float svals[SN], const long calreg_dims[DIMS], const complex float* data) { calib2(conf, out_dims, out_data, eptr, SN, svals, calreg_dims, data, NULL, NULL); } static void perturb(const long dims[2], complex float* vecs, float amt) { complex float* noise = md_alloc(2, dims, CFL_SIZE); md_gaussian_rand(2, dims, noise); for (long j = 0; j < dims[1]; j++) { float nrm = md_znorm(1, dims, noise + j * dims[0]); complex float val = amt / nrm; md_zsmul(1, dims, noise + j * dims[0], noise + j * dims[0], val); } md_zadd(2, dims, vecs, vecs, noise); for (long j = 0; j < dims[1]; j++) { float nrm = md_znorm(1, dims, vecs + j * dims[0]); complex float val = 1 / nrm; md_zsmul(1, dims, vecs + j * dims[0], vecs + j * dims[0], val); } md_free(noise); } static int number_of_kernels(const struct ecalib_conf* conf, unsigned int N, const float val[N]) { unsigned int n = 0; if (-1 != conf->numsv) { n = conf->numsv; assert(-1. == conf->percentsv); assert(-1. == conf->threshold); } else if (conf->percentsv != -1.) { n = (unsigned int)(N * conf->percentsv / 100.); assert(-1 == conf->numsv); assert(-1. == conf->threshold); } else { assert(-1 == conf->numsv); assert(-1. == conf->percentsv); for (unsigned int i = 0; i < N; i++) { if (val[i] / val[0] > sqrtf(conf->threshold)) n++; } } if (val[0] <= 0.) error("No signal.\n"); debug_printf(DP_DEBUG1, "Using %d/%ld kernels (%.2f%%, last SV: %f%s).\n", n, N, (float)n / (float)N * 100., (n > 0) ? (val[n - 1] / val[0]) : 1., conf->weighting ? ", weighted" : ""); float tr = 0.; for (unsigned int i = 0; i < N; i++) { tr += powf(val[i], 2.); debug_printf(DP_DEBUG3, "SVALS %f (%f)\n", val[i], val[i] / val[0]); } debug_printf(DP_DEBUG3, "\nTRACE: %f (%f)\n", tr, tr / (float)N); assert(n <= N); return n; } void compute_kernels(const struct ecalib_conf* conf, long nskerns_dims[5], complex float** nskerns_ptr, unsigned int SN, float val[SN], const long caldims[DIMS], const complex float* caldata) { assert(1 == md_calc_size(DIMS - 5, caldims + 5)); nskerns_dims[0] = conf->kdims[0]; nskerns_dims[1] = conf->kdims[1]; nskerns_dims[2] = conf->kdims[2]; nskerns_dims[3] = caldims[3]; long N = md_calc_size(4, nskerns_dims); assert(N > 0); nskerns_dims[4] = N; complex float* nskerns = md_alloc(5, nskerns_dims, CFL_SIZE); *nskerns_ptr = nskerns; PTR_ALLOC(complex float[N][N], vec); assert(NULL != val); assert(SN == N); debug_printf(DP_DEBUG1, "Build calibration matrix and SVD...\n"); #ifdef CALMAT_SVD calmat_svd(conf->kdims, N, *vec, val, caldims, caldata); if (conf->weighting) soft_weight_singular_vectors(N, conf->var, conf->kdims, caldims, val, val); for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) #ifndef FLIP nskerns[i * N + j] = ((*vec)[j][i]) * (conf->weighting ? val[i] : 1.); #else nskerns[i * N + j] = ((*vec)[j][N - 1 - i]) * (conf->weighting ? val[N - 1 - i] : 1.); #endif #else covariance_function(conf->kdims, N, *vec, caldims, caldata); debug_printf(DP_DEBUG1, "Eigen decomposition... (size: %ld)\n", N); // we could apply Nystroem method here to speed it up float tmp_val[N]; lapack_eig(N, tmp_val, *vec); // reverse and square root, test for smaller null to avoid NaNs for (int i = 0; i < N; i++) val[i] = (tmp_val[N - 1 - i] < 0.) ? 0. : sqrtf(tmp_val[N - 1 - i]); if (conf->weighting) soft_weight_singular_vectors(N, conf-> var, conf->kdims, caldims, val, val); for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) #ifndef FLIP nskerns[i * N + j] = (*vec)[N - 1 - i][j] * (conf->weighting ? val[i] : 1.); // flip #else nskerns[i * N + j] = (*vec)[i][j] * (conf->weighting ? val[N - 1 - i] : 1.); // flip #endif #endif if (conf->perturb > 0.) { long dims[2] = { N, N }; perturb(dims, nskerns, conf->perturb); } #ifndef FLIP nskerns_dims[4] = number_of_kernels(conf, N, val); #else nskerns_dims[4] = N - number_of_kernels(conf, N, val); #endif PTR_FREE(vec); } void compute_imgcov(const long cov_dims[4], complex float* imgcov, const long nskerns_dims[5], const complex float* nskerns) { debug_printf(DP_DEBUG1, "Zeropad...\n"); long xh = cov_dims[0]; long yh = cov_dims[1]; long zh = cov_dims[2]; long kx = nskerns_dims[0]; long ky = nskerns_dims[1]; long kz = nskerns_dims[2]; long channels = nskerns_dims[3]; long nr_kernels = nskerns_dims[4]; long imgkern_dims[5] = { xh, yh, zh, channels, nr_kernels }; complex float* imgkern1 = md_alloc(5, imgkern_dims, CFL_SIZE); complex float* imgkern2 = md_alloc(5, imgkern_dims, CFL_SIZE); md_resize_center(5, imgkern_dims, imgkern1, nskerns_dims, nskerns, CFL_SIZE); // resort array debug_printf(DP_DEBUG1, "FFT (juggling)...\n"); long istr[5]; long mstr[5]; long idim[5] = { xh, yh, zh, channels, nr_kernels }; long mdim[5] = { nr_kernels, channels, xh, yh, zh }; md_calc_strides(5, istr, idim, CFL_SIZE); md_calc_strides(5, mstr, mdim, CFL_SIZE); long m2str[5] = { mstr[2], mstr[3], mstr[4], mstr[1], mstr[0] }; ifftmod(5, imgkern_dims, FFT_FLAGS, imgkern1, imgkern1); ifft2(5, imgkern_dims, FFT_FLAGS, m2str, imgkern2, istr, imgkern1); float scalesq = (kx * ky * kz) * (xh * yh * zh); // second part for FFT scaling md_free(imgkern1); debug_printf(DP_DEBUG1, "Calculate Gram matrix...\n"); int cosize = channels * (channels + 1) / 2; assert(cov_dims[3] == cosize); #pragma omp parallel for collapse(3) for (int k = 0; k < zh; k++) { for (int j = 0; j < yh; j++) { for (int i = 0; i < xh; i++) { complex float gram[cosize]; gram_matrix2(channels, gram, nr_kernels, (const complex float (*)[nr_kernels])(imgkern2 + ((k * yh + j) * xh + i) * (channels * nr_kernels))); #ifdef FLIP // add (scaled) identity matrix for (int i = 0, l = 0; i < channels; i++) for (int j = 0; j <= i; j++, l++) gram[l] = ((i == j) ? (kx * ky * kz) : 0.) - gram[l]; #endif for (int l = 0; l < cosize; l++) imgcov[(((l * zh) + k) * yh + j) * xh + i] = gram[l] / scalesq; } } } md_free(imgkern2); } bart-0.4.02/src/calib/calib.h000066400000000000000000000044121320577655200156260ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __CALIB_H #define __CALIB_H #include "misc/cppwrap.h" #include "misc/mri.h" struct ecalib_conf { long kdims[3]; float threshold; int numsv; float percentsv; _Bool weighting; _Bool softcrop; float crop; _Bool orthiter; _Bool usegpu; float perturb; _Bool intensity; _Bool rotphase; float var; _Bool automate; }; extern const struct ecalib_conf ecalib_defaults; extern void calib(const struct ecalib_conf* conf, const long out_dims[DIMS], _Complex float* out_data, _Complex float* eptr, unsigned int SN, float svals[__VLA2(SN)], const long calreg_dims[DIMS], const _Complex float* calreg_data); extern void calib2(const struct ecalib_conf* conf, const long out_dims[DIMS], _Complex float* out_data, _Complex float* eptr, unsigned int SN, float svals[__VLA2(SN)], const long calreg_dims[DIMS], const _Complex float* data, const long msk_dims[3], const _Bool* msk); extern void eigenmaps(const long out_dims[DIMS], _Complex float* out_data, _Complex float* eptr, const _Complex float* imgcov, const long msk_dims[3], const _Bool* msk, _Bool orthiter, _Bool usegpu); extern void crop_sens(const long dims[DIMS], _Complex float* ptr, bool soft, float crth, const _Complex float* map); extern void calone_dims(const struct ecalib_conf* conf, long cov_dims[4], long channels); extern void calone(const struct ecalib_conf* conf, const long cov_dims[4], _Complex float* cov, unsigned int SN, float svals[__VLA2(SN)], const long calreg_dims[DIMS], const _Complex float* cal_data); extern void caltwo(const struct ecalib_conf* conf, const long out_dims[DIMS], _Complex float* out_data, _Complex float* emaps, const long in_dims[4], _Complex float* in_data, const long msk_dims[3], const _Bool* msk); extern void compute_imgcov(const long cov_dims[4], _Complex float* imgcov, const long nskerns_dims[5], const _Complex float* nskerns); extern void compute_kernels(const struct ecalib_conf* conf, long nskerns_dims[5], _Complex float** nskerns_ptr, unsigned int SN, float svals[__VLA2(SN)], const long caldims[DIMS], const _Complex float* caldata); #include "misc/cppwrap.h" #endif // __CALIB_H bart-0.4.02/src/calib/calibcu.cu000066400000000000000000000162211320577655200163370ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Dara Bahri * 2013 Martin Uecker */ #include #include #include #include #include #include #include #include #include #include #include "misc/mri.h" #include "num/multind.h" #include "num/flpmath.h" #include "calibcu.h" #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #if 0 // pass &matrix[0][0] void eigen_hermcu(int N, float* eigenval, complex float* matrix) { culaInitialize(); assert(culaCheev('V', 'U', N, (culaFloatComplex*) matrix, N, (culaFloat*) eigenval) == culaNoError); culaShutdown(); } #endif static __device__ __host__ inline cuFloatComplex cuFloatComplexScale(cuFloatComplex a, float s) { cuFloatComplex c; c.x = s * a.x; c.y = s * a.y; return c; } static __device__ void gram_schmidtcu(int M, int N, cuFloatComplex* evals, cuFloatComplex* vecs) { cuFloatComplex val1; cuFloatComplex val2; for (int i = M-1; i >= 0; i--) { val1 = vecs[threadIdx.y + i*N]; __syncthreads(); for (int j = i+1; j <= M-1; j++) { val2 = vecs[threadIdx.y + j*N]; __syncthreads(); vecs[threadIdx.y + i*N] = cuCmulf(val1, cuConjf(val2)); __syncthreads(); if (threadIdx.y == 0) { cuFloatComplex tmp = make_cuFloatComplex(0.,0.); for (int k = 0; k < N; k++) tmp = cuCaddf(tmp, vecs[k + i*N]); vecs[i*N] = cuFloatComplexScale(tmp, -1.); } __syncthreads(); val1 = cuCaddf(val1, cuCmulf(val2, vecs[i*N])); __syncthreads(); } vecs[threadIdx.y + i*N] = cuCmulf(val1, cuConjf(val1)); __syncthreads(); if (threadIdx.y == 0) { cuFloatComplex tmp = make_cuFloatComplex(0.,0.); for (int k = 0; k < N; k++) tmp = cuCaddf(tmp, vecs[k + i*N]); evals[i] = make_cuFloatComplex(sqrt(cuCrealf(tmp)),0.); } __syncthreads(); vecs[threadIdx.y + i*N] = cuFloatComplexScale(val1, 1./cuCrealf(evals[i])); } } static __device__ inline void mat_mulcu(int M, int N, cuFloatComplex* A, cuFloatComplex* B, cuFloatComplex* C, int offset) { cuFloatComplex tmp; for (int i = 0; i < M; i++) { tmp = make_cuFloatComplex(0.,0.); for (int j = 0; j < N; j++) tmp = cuCaddf(tmp, cuCmulf(B[j + i*N], C[offset*N*N + threadIdx.y + j*N])); A[threadIdx.y + i*N] = tmp; } } static __global__ void eigenmapscu_kern(cuFloatComplex* in_filled, cuFloatComplex* in, cuFloatComplex* out, cuFloatComplex* vals, int iter, int x, int y, int z, int N, int M) { const int offset = blockIdx.x * blockDim.x + threadIdx.x; if (offset > x*y*z-1) return; extern __shared__ cuFloatComplex sdata[]; cuFloatComplex *tmp1, *tmp2, *evals; tmp1 = sdata + threadIdx.x * (2*M*N + M); tmp2 = tmp1 + M*N; evals = tmp2 + M*N; if (threadIdx.y == 0) { int l = 0; for (int i = 0; i < N; i++) { for (int j = 0; j <= i; j++) in_filled[offset*N*N + i*N + j] = in[offset + (l++)*x*y*z]; for (int j = 0; j < i; j++) in_filled[offset*N*N + j*N + i] = cuConjf(in_filled[offset*N*N + i*N + j]); } } __syncthreads(); for (int i = 0; i < M; i++) tmp1[threadIdx.y + i*N] = (threadIdx.y == i) ? make_cuFloatComplex(1.,0.) : make_cuFloatComplex(0.,0.); __syncthreads(); for (int i = 0; i < iter; i++) { for (int j = 0; j < M; j++) tmp2[threadIdx.y + j*N] = tmp1[threadIdx.y + j*N]; __syncthreads(); mat_mulcu(M, N, tmp1, tmp2, in_filled, offset); __syncthreads(); gram_schmidtcu(M, N, evals, tmp1); __syncthreads(); } for (int i = 0; i < M; i++) out[offset + (i*N + threadIdx.y)*x*y*z] = tmp1[N * (M-1-i) + threadIdx.y]; if (threadIdx.y == 0) if (vals) for (int i = 0; i < M; i++) vals[offset + i*x*y*z] = evals[M-1-i]; } void eigenmapscu(const long dims[5], _Complex float* optr, _Complex float* eptr, const _Complex float* imgcov2) { const int iter = 30; const int x = (int) dims[0]; const int y = (int) dims[1]; const int z = (int) dims[2]; const int N = (int) dims[3]; const int M = (int) dims[4]; assert(M <= N); long imgcov2_dims[5]; md_select_dims(5, ~(COIL_FLAG|MAPS_FLAG), imgcov2_dims, dims); imgcov2_dims[3] = N * (N + 1) / 2; long eptr_dims[5]; md_select_dims(5, ~COIL_FLAG, eptr_dims, dims); long imgcov2_df_dims[5]; md_select_dims(5, ~(COIL_FLAG|MAPS_FLAG), imgcov2_df_dims, dims); imgcov2_df_dims[3] = N * N; printf("CUDA Pointwise Eigendecomposition...\n"); cuFloatComplex* optr_device = (cuFloatComplex*)md_alloc_gpu(5, dims, sizeof(cuFloatComplex)); cuFloatComplex* imgcov2_device = (cuFloatComplex*)md_alloc_gpu(5, imgcov2_dims, sizeof(cuFloatComplex)); cuFloatComplex* imgcov2_device_filled = (cuFloatComplex*)md_alloc_gpu(5, imgcov2_df_dims, sizeof(cuFloatComplex)); cuFloatComplex* eptr_device = (cuFloatComplex*)md_alloc_gpu(5, eptr_dims, sizeof(cuFloatComplex)); md_copy(5, imgcov2_dims, imgcov2_device, imgcov2, sizeof(cuFloatComplex)); struct cudaDeviceProp mycudaDeviceProperties; cudaGetDeviceProperties(&mycudaDeviceProperties, 0); const int maxSharedMemPerBlock = mycudaDeviceProperties.sharedMemPerBlock; const int maxThreadsPerBlock = mycudaDeviceProperties.maxThreadsPerBlock; const int memPerPoint = (2*M*N + M) * sizeof(cuFloatComplex); int pointsPerBlock = MIN(maxThreadsPerBlock/N, maxSharedMemPerBlock/memPerPoint); const int maxRegsPerBlock = mycudaDeviceProperties.regsPerBlock; const int maxCmemPerBlock = mycudaDeviceProperties.totalConstMem; // determined by --ptxas-options="-v". cmem is constant mem used for 1) kernel args, 2) user defined constants, 3) compiler-generated constants const int regsPerThread = 36; const int cmemPerThread = 108; pointsPerBlock = MIN(pointsPerBlock, maxRegsPerBlock / (N * regsPerThread)); pointsPerBlock = MIN(pointsPerBlock, maxCmemPerBlock / (N * cmemPerThread)); assert(pointsPerBlock > 0); dim3 threads(pointsPerBlock, N, 1); int numBlocks = (x*y*z + (pointsPerBlock-1)) / pointsPerBlock; dim3 blocks(numBlocks); // if numBlocks > ~65,000, need to distribute over x, y, z dims size_t sharedMem = memPerPoint * pointsPerBlock; eigenmapscu_kern<<>>(imgcov2_device_filled, imgcov2_device, optr_device, eptr_device, iter, x, y, z, N, M); cudaThreadSynchronize(); cudaError_t error = cudaGetLastError(); if (error != cudaSuccess) { fprintf(stderr, "ERROR: %s\n", cudaGetErrorString(error)); abort(); } md_copy(5, dims, optr, optr_device, sizeof(_Complex float)); md_copy(5, eptr_dims, eptr, eptr_device, sizeof(_Complex float)); md_free(imgcov2_device); md_free(imgcov2_device_filled); md_free(optr_device); md_free(eptr_device); } bart-0.4.02/src/calib/calibcu.h000066400000000000000000000005631320577655200161610ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" extern void eigenmapscu(const long dims[5], _Complex float* optr, _Complex float* eptr, const _Complex float* imgcov2); #include "misc/cppwrap.h" bart-0.4.02/src/calib/calmat.c000066400000000000000000000130341320577655200160100ustar00rootroot00000000000000/* Copyright 2013-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2015 Martin Uecker */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/casorati.h" #include "num/lapack.h" #include "num/linalg.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "calmat.h" /** * Compute basic calibration matrix */ complex float* calibration_matrix(long calmat_dims[2], const long kdims[3], const long calreg_dims[4], const complex float* data) { long kernel_dims[4]; md_copy_dims(3, kernel_dims, kdims); kernel_dims[3] = calreg_dims[3]; casorati_dims(4, calmat_dims, kernel_dims, calreg_dims); complex float* cm = md_alloc_sameplace(2, calmat_dims, CFL_SIZE, data); long calreg_strs[4]; md_calc_strides(4, calreg_strs, calreg_dims, CFL_SIZE); casorati_matrix(4, kernel_dims, calmat_dims, cm, calreg_dims, calreg_strs, data); return cm; } /** * Compute calibration matrix - but mask out a specified patch shape */ complex float* calibration_matrix_mask(long calmat_dims[2], const long kdims[3], const complex float* msk, const long calreg_dims[4], const complex float* data) { complex float* tmp = calibration_matrix(calmat_dims, kdims, calreg_dims, data); if (NULL == msk) return tmp; long msk_dims[2]; md_select_dims(2, ~MD_BIT(0), msk_dims, calmat_dims); assert(md_calc_size(3, kdims) * calreg_dims[3] == md_calc_size(2, msk_dims)); long msk_strs[2]; md_calc_strides(2, msk_strs, msk_dims, CFL_SIZE); // mask out un-sampled samples... long calmat_strs[2]; md_calc_strides(2, calmat_strs, calmat_dims, CFL_SIZE); md_zmul2(2, calmat_dims, calmat_strs, tmp, calmat_strs, tmp, msk_strs, msk); return tmp; } /** * Compute pattern matrix - mask out a specified patch shape */ static complex float* pattern_matrix(long pcm_dims[2], const long kdims[3], const complex float* mask, const long calreg_dims[4], const complex float* data) { // estimate pattern long pat_dims[4]; md_select_dims(4, ~MD_BIT(COIL_DIM), pat_dims, calreg_dims); complex float* pattern = md_alloc_sameplace(4, pat_dims, CFL_SIZE, data); estimate_pattern(4, calreg_dims, COIL_FLAG, pattern, data); // compute calibration matrix of pattern complex float* pm = calibration_matrix_mask(pcm_dims, kdims, mask, pat_dims, pattern); md_free(pattern); return pm; } complex float* calibration_matrix_mask2(long calmat_dims[2], const long kdims[3], const complex float* mask, const long calreg_dims[4], const complex float* data) { long pcm_dims[2]; complex float* pm = pattern_matrix(pcm_dims, kdims, mask, calreg_dims, data); long pcm_strs[2]; md_calc_strides(2, pcm_strs, pcm_dims, CFL_SIZE); // number of samples for each patch long msk_dims[2]; md_select_dims(2, ~MD_BIT(1), msk_dims, pcm_dims); long msk_strs[2]; md_calc_strides(2, msk_strs, msk_dims, CFL_SIZE); complex float* msk = md_alloc(2, msk_dims, CFL_SIZE); md_clear(2, msk_dims, msk, CFL_SIZE); md_zfmacc2(2, pcm_dims, msk_strs, msk, pcm_strs, pm, pcm_strs, pm); md_free(pm); // fully sampled? md_zcmp2(2, msk_dims, msk_strs, msk, msk_strs, msk, (long[2]){ 0, 0 }, &(complex float){ /* pcm_dims[1] */ 15 }); // FIXME debug_printf(DP_DEBUG1, "%ld/%ld fully-sampled patches.\n", (long)pow(md_znorm(2, msk_dims, msk), 2.), pcm_dims[0]); complex float* tmp = calibration_matrix_mask(calmat_dims, kdims, mask, calreg_dims, data); // mask out incompletely sampled patches... long calmat_strs[2]; md_calc_strides(2, calmat_strs, calmat_dims, CFL_SIZE); md_zmul2(2, calmat_dims, calmat_strs, tmp, calmat_strs, tmp, msk_strs, msk); return tmp; } #if 0 static void circular_patch_mask(const long kdims[3], unsigned int channels, complex float mask[channels * md_calc_size(3, kdims)]) { long kpos[3] = { 0 }; long kcen[3]; for (unsigned int i = 0; i < 3; i++) kcen[i] = (1 == kdims[i]) ? 0 : (kdims[i] - 1) / 2; do { float dist = 0.; for (unsigned int i = 0; i < 3; i++) dist += (float)labs(kpos[i] - kcen[i]) / (float)kdims[i]; for (unsigned int c = 0; c < channels; c++) mask[((c * kdims[2] + kpos[2]) * kdims[1] + kpos[1]) * kdims[0] + kpos[0]] = (dist <= 0.5) ? 1 : 0; } while (md_next(3, kdims, 1 | 2 | 4, kpos)); } #endif void covariance_function(const long kdims[3], unsigned int N, complex float cov[N][N], const long calreg_dims[4], const complex float* data) { long calmat_dims[2]; #if 1 complex float* cm = calibration_matrix(calmat_dims, kdims, calreg_dims, data); #else long channels = calreg_dims[3]; complex float msk[channels * md_calc_size(3, kdims)]; circular_patch_mask(kdims, channels, msk); complex float* cm = calibration_matrix_mask2(calmat_dims, kdims, msk, calreg_dims, data); #endif unsigned int L = calmat_dims[0]; assert(N == calmat_dims[1]); gram_matrix(N, cov, L, MD_CAST_ARRAY2(const complex float, 2, calmat_dims, cm, 0, 1)); md_free(cm); } void calmat_svd(const long kdims[3], unsigned int N, complex float cov[N][N], float* S, const long calreg_dims[4], const complex float* data) { long calmat_dims[2]; complex float* cm = calibration_matrix(calmat_dims, kdims, calreg_dims, data); unsigned int L = calmat_dims[0]; assert(N == calmat_dims[1]); PTR_ALLOC(complex float[L][L], U); // initialize to zero in case L < N not all written to for (unsigned int i = 0; i < N; i++) S[i] = 0.; lapack_svd_econ(L, N, *U, cov, S, MD_CAST_ARRAY2(complex float, 2, calmat_dims, cm, 0, 1)); PTR_FREE(U); md_free(cm); } bart-0.4.02/src/calib/calmat.h000066400000000000000000000015531320577655200160200ustar00rootroot00000000000000 #include "misc/cppwrap.h" extern complex float* calibration_matrix(long calmat_dims[2], const long kdims[3], const long calreg_dims[4], const complex float* data); extern complex float* calibration_matrix_mask(long calmat_dims[2], const long kdims[3], const complex float* mask, const long calreg_dims[4], const complex float* data); extern complex float* calibration_matrix_mask2(long calmat_dims[2], const long kdims[3], const complex float* msk, const long calreg_dims[4], const complex float* data); #ifndef __cplusplus extern void covariance_function(const long kdims[3], unsigned int N, complex float cov[static N][N], const long calreg_dims[4], const complex float* data); extern void calmat_svd(const long kdims[3], unsigned int N, complex float cov[static N][N], float* S, const long calreg_dims[4], const complex float* data); #endif #include "misc/cppwrap.h" bart-0.4.02/src/calib/cc.c000066400000000000000000000142531320577655200151400ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2014 Martin Uecker * 2013 Dara Bahri * * * Huang F, Vijayakumar S, Li Y, Hertel S, Duensing GR. A software channel * compression technique for faster reconstruction with many channels. * Magn Reson Imaging 2008; 26:133-141. * * Buehrer M, Pruessmann KP, Boesiger P, Kozerke S. Array compression for MRI * with large coil arrays. Magn Reson Med 2007, 57: 1131–1139. * * Zhang T, Pauly JM, Vasanawala SS, Lustig M. Coil compression for * accelerated imaging with cartesian sampling. Magn Reson Med 2013; * 69:571-582. * * Bahri D, Uecker M, Lustig M. ESPIRiT-Based Coil Compression for * Cartesian Sampling, Annual Meeting ISMRM, Salt Lake City 2013, * In: Proc Intl Soc Mag Reson Med 21:2657 */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/lapack.h" #include "num/linalg.h" #include "misc/debug.h" #include "misc/mri.h" #include "calib/calib.h" #include "cc.h" void scc(const long out_dims[DIMS], complex float* out_data, const long caldims[DIMS], const complex float* cal_data) { int channels = caldims[COIL_DIM]; assert(1 == md_calc_size(3, out_dims)); assert(out_dims[COIL_DIM] == channels); assert(out_dims[MAPS_DIM] == channels); complex float tmp[channels][channels]; size_t csize = md_calc_size(3, caldims); gram_matrix(channels, tmp, csize, (const complex float (*)[csize])cal_data); float vals[channels]; lapack_eig(channels, vals, tmp); md_flip(DIMS, out_dims, MAPS_FLAG, out_data, tmp, CFL_SIZE); debug_printf(DP_DEBUG1, "Energy:"); float sum = 0.; for (int i = 0; i < channels; i++) sum += vals[i]; for (int i = 0; i < channels; i++) debug_printf(DP_DEBUG1, " %.3f", vals[channels - 1 - i] / sum); debug_printf(DP_DEBUG1, "\n"); } static void align1(int M, int N, complex float out[M][N], const complex float in1[M][N], const complex float in2[M][N]) { assert(M <= N); #if 1 complex float in1T[N][M]; complex float C[M][M]; complex float U[M][M]; complex float VH[M][M]; float S[M]; complex float P[M][M]; mat_adjoint(M, N, in1T, in1); // A_{x-1}^H mat_mul(M, N, M, C, in2, in1T); // C = A_{x} A_{x-1}^H // VH and U are switched here because SVD uses column-major arrays lapack_svd(M, M, VH, U, S, C); // U S V^H = C mat_mul(M, M, M, C, U, VH); // U V^H mat_adjoint(M, M, P, C); // P_x = V U^H mat_mul(M, M, N, out, P, in2); // A_{x} <- P_x A_{x} #else mat_copy(M, N, out, in2); #endif } static void align_ro2(const long dims[DIMS], int start, int end, complex float* odata, const complex float* idata) { int dir = (start < end) ? 1 : -1; long tmp_dims[DIMS]; md_select_dims(DIMS, ~READ_FLAG, tmp_dims, dims); complex float* tmp1 = md_alloc(DIMS, tmp_dims, CFL_SIZE); complex float* tmp2 = md_alloc(DIMS, tmp_dims, CFL_SIZE); complex float* tmp3 = md_alloc(DIMS, tmp_dims, CFL_SIZE); md_copy_block(DIMS, (long[DIMS]){ [READ_DIM] = start }, tmp_dims, tmp1, dims, idata, CFL_SIZE); if (dir) md_copy_block(DIMS, (long[DIMS]){ [READ_DIM] = start }, dims, odata, tmp_dims, tmp1, CFL_SIZE); for (int i = start; i != end - dir; i += dir) { md_copy_block(DIMS, (long[DIMS]){ [READ_DIM] = i + dir }, tmp_dims, tmp2, dims, idata, CFL_SIZE); align1(tmp_dims[MAPS_DIM], tmp_dims[COIL_DIM], MD_CAST_ARRAY2( complex float, DIMS, tmp_dims, tmp3, COIL_DIM, MAPS_DIM), MD_CAST_ARRAY2(const complex float, DIMS, tmp_dims, tmp1, COIL_DIM, MAPS_DIM), MD_CAST_ARRAY2(const complex float, DIMS, tmp_dims, tmp2, COIL_DIM, MAPS_DIM)); md_copy(DIMS, tmp_dims, tmp1, tmp3, CFL_SIZE); md_copy_block(DIMS, (long[DIMS]){ [READ_DIM] = i + dir }, dims, odata, tmp_dims, tmp3, CFL_SIZE); } md_free(tmp1); md_free(tmp2); md_free(tmp3); } void align_ro(const long dims[DIMS], complex float* odata, const complex float* idata) { int ro = dims[READ_DIM]; assert(ro > 1); #if 1 align_ro2(dims, 0, ro, odata, idata); #else #pragma omp parallel sections { #pragma omp section align_ro2(dims, ro / 2, ro, odata, idata); #pragma omp section align_ro2(dims, ro / 2, -1, odata, idata); } #endif } void gcc(const long out_dims[DIMS], complex float* out_data, const long caldims[DIMS], const complex float* cal_data) { int ro = out_dims[READ_DIM]; // zero pad calibration region along readout and FFT long tmp_dims[DIMS]; md_copy_dims(DIMS, tmp_dims, caldims); tmp_dims[READ_DIM] = ro; complex float* tmp = md_alloc(DIMS, tmp_dims, CFL_SIZE); md_resize_center(DIMS, tmp_dims, tmp, caldims, cal_data, CFL_SIZE); ifftuc(DIMS, tmp_dims, READ_FLAG, tmp, tmp); // apply scc at each readout location long tmp2_dims[DIMS]; md_select_dims(DIMS, ~READ_FLAG, tmp2_dims, tmp_dims); long out2_dims[DIMS]; md_select_dims(DIMS, ~READ_FLAG, out2_dims, out_dims); #pragma omp parallel for for (int i = 0; i < ro; i++) { complex float* tmp2 = md_alloc(DIMS, tmp2_dims, CFL_SIZE); complex float* out2 = md_alloc(DIMS, out2_dims, CFL_SIZE); long pos[DIMS] = { [READ_DIM] = i }; md_copy_block(DIMS, pos, tmp2_dims, tmp2, tmp_dims, tmp, CFL_SIZE); scc(out2_dims, out2, tmp2_dims, tmp2); md_copy_block(DIMS, pos, out_dims, out_data, out2_dims, out2, CFL_SIZE); md_free(out2); md_free(tmp2); } } void ecc(const long out_dims[DIMS], complex float* out_data, const long caldims[DIMS], const complex float* cal_data) { int channels = caldims[COIL_DIM]; assert(1 == out_dims[PHS1_DIM]); assert(1 == out_dims[PHS2_DIM]); assert(out_dims[COIL_DIM] == channels); assert(out_dims[MAPS_DIM] == channels); struct ecalib_conf conf = ecalib_defaults; conf.threshold = 0.001; conf.crop = 0.; conf.kdims[0] = 6; conf.kdims[1] = 1; conf.kdims[2] = 1; // conf.numsv = L; conf.weighting = false; conf.orthiter = false; conf.perturb = 0.; long map_dims[DIMS]; md_select_dims(DIMS, ~MAPS_FLAG, map_dims, out_dims); complex float* emaps = md_alloc(DIMS, map_dims, CFL_SIZE); int K = conf.kdims[0] * caldims[COIL_DIM]; float svals[K]; calib(&conf, out_dims, out_data, emaps, K, svals, caldims, cal_data); md_free(emaps); } bart-0.4.02/src/calib/cc.h000066400000000000000000000007621320577655200151450ustar00rootroot00000000000000 #include "misc/mri.h" extern void scc(const long out_dims[DIMS], complex float* out_data, const long caldims[DIMS], const complex float* cal_data); extern void gcc(const long out_dims[DIMS], complex float* out_data, const long caldims[DIMS], const complex float* cal_data); extern void ecc(const long out_dims[DIMS], complex float* out_data, const long caldims[DIMS], const complex float* cal_data); extern void align_ro(const long dims[DIMS], complex float* odata, const complex float* idata); bart-0.4.02/src/calib/direct.c000066400000000000000000000037251320577655200160270ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker * * * McKenzie CA, Yeh EN, Ohliger MA, Price MD, Sodickson DK. Self-calibrating parallel * imaging with automatic coil sensitivity extraction. Magn Reson Med 2002; 47:529–538. */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/specfun.h" #include "misc/mri.h" #include "misc/misc.h" #include "direct.h" static double kaiser(double beta, int M, int n) { if (M == 1) return 1.; if (fabs((double)n / (double)M - 0.5) >= 0.5) return 0.; return bessel_i0(beta * sqrt(1. - pow(2. * (double)n / (double)M - 1., 2.))) / bessel_i0(beta); } void direct_calib(const long dims[5], complex float* sens, const long caldims[5], const complex float* data) { complex float* tmp = md_alloc(5, caldims, CFL_SIZE); assert(1 == caldims[4]); assert(1 == dims[4]); md_copy(5, caldims, tmp, data, CFL_SIZE); // apply Kaiser-Bessel Window beta=4 for (int z = 0; z < caldims[2]; z++) for (int y = 0; y < caldims[1]; y++) for (int x = 0; x < caldims[0]; x++) for (int c = 0; c < caldims[3]; c++) tmp[((c * caldims[2] + z) * caldims[1] + y) * caldims[0] + x] *= kaiser(4., caldims[2], z) * kaiser(4., caldims[1], y) * kaiser(4., caldims[0], x); md_resize_center(5, dims, sens, caldims, tmp, CFL_SIZE); ifftc(5, dims, 7, sens, sens); long dims1[5]; md_select_dims(5, ~MD_BIT(COIL_DIM), dims1, dims); complex float* img = md_alloc(5, dims1, CFL_SIZE); md_zrss(5, dims, COIL_FLAG, img, sens); #if 1 long T = md_calc_size(5, dims1); for (int i = 0; i < T; i++) for (int j = 0; j < dims[COIL_DIM]; j++) sens[j * T + i] *= (cabs(img[i]) == 0.) ? 0. : (1. / cabs(img[i])); #endif md_free(img); } bart-0.4.02/src/calib/direct.h000066400000000000000000000004761320577655200160340ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ extern void direct_calib(const long dims[5], _Complex float* sens, const long caldims[5], const _Complex float* data); bart-0.4.02/src/calib/estvar.c000066400000000000000000000213011320577655200160470ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Siddharth Iyer */ #include #include #include #include #include #include #include #include "num/rand.h" #include "num/multind.h" #include "num/lapack.h" #include "misc/debug.h" #include "misc/misc.h" #include "calib/calib.h" #include "calib/calmat.h" #include "estvar.h" /** * noise_calreg - This uses the dimension of the calibration * region to create a new "calibration region" * with each entry being iid standard normal * (or Gaussian) samples. * * Parameters: * T - Product of all the dimensions of the calibration * region. * ncalreg - Pointer to store the new calibration region to. */ static void noise_calreg(long T, complex float* ncalreg) { float spike = 1; float stdev = 1.f/sqrtf(2.f); for (long idx = 0; idx < T; idx++) { if (spike >= uniform_rand()) { ncalreg[idx] = stdev * gaussian_rand(); } } } /** * file_name - This returns the name of file to read or write the simulated noise singular values to. * * Parameters: * kernel_dims - kernel dimensions. * calreg_dims - calibration region dimensions. */ static char* file_name(const long kernel_dims[3], const long calreg_dims[4]) { char PATH[] = "/save/nsv/"; char KERNEL[] = "KERNEL_"; char CALREG[] = "CALREG_"; char DAT[] = ".dat"; int space[] = {strlen(TOOLBOX_PATH), strlen(PATH), strlen(KERNEL), floor(log10(kernel_dims[0])) + 2, floor(log10(kernel_dims[1])) + 2, floor(log10(kernel_dims[2])) + 2, strlen(CALREG), floor(log10(calreg_dims[0])) + 2, floor(log10(calreg_dims[1])) + 2, floor(log10(calreg_dims[2])) + 2, floor(log10(calreg_dims[3])) + 1, strlen(DAT) + 1}; size_t total = 0; for (size_t idx = 0; idx < sizeof(space)/sizeof(int); idx ++) { total += space[idx]; } char* name = calloc(total, sizeof(char)); assert(NULL != name); sprintf(name, "%s%s%s%ldx%ldx%ld_%s%ldx%ldx%ldx%ld%s", TOOLBOX_PATH, PATH, KERNEL, kernel_dims[0], kernel_dims[1], kernel_dims[2], CALREG, calreg_dims[0], calreg_dims[1], calreg_dims[2], calreg_dims[3], DAT); return name; } /** * load_noise_sv - This loads the noise singular values if * previously simulated. * * Parameters: * kernel_dims[3] - kernel dimensions * calreg_dims[3] - calibration region dimensions * L - Number of elements in E. * E - Load simulated noise singular values to. */ static int load_noise_sv(const long kernel_dims[3], const long calreg_dims[4], long L, float* E) { char* name = file_name(kernel_dims, calreg_dims); FILE* fp = fopen(name, "rb"); if (!fp) { free(name); return 0; } int c = fread(E, sizeof(float), L, fp); assert(c == L); free(name); fclose(fp); return 1; } /** * save_noise_sv - This saves the simulated noise singular * values to use it again should the same * parameters are encountered again. * * Parameters: * kernel_dims[3] - kernel dimensions * calreg_dims[4] - calibration region dimensions * L - Number of elements in E. * E - Load simulated noise singular values to. */ static void save_noise_sv(const long kernel_dims[3], const long calreg_dims[4], long L, float* E) { char* name = file_name(kernel_dims, calreg_dims); FILE* fp = fopen(name, "wb"); if (!fp) { free(name); return; } fwrite(E, sizeof(float), L, fp); free(name); fclose(fp); } /** * nsv - This takes the singular value * decomposition of the Hankel matrix * constructed from a noise-only * calibration region. The noise is * distributed as zero-mean unit-variance * Gaussian noise. * * Parameters: * kernel_dims - The dimensions of the window that sweeps through the * calibration matrix. * calreg_dims - The calibration region dimensions. * L - The number of singular values. * E - Array to save noise singular values to. * num_iters - The number of iterations in order to get a better * estimate of the noise singular values. */ static void nsv(const long kernel_dims[3], const long calreg_dims[4], long L, float* E, long num_iters) { if (NULL != getenv("TOOLBOX_PATH") && 1 == load_noise_sv(kernel_dims, calreg_dims, L, E)) { return; } debug_printf(DP_DEBUG1, "NOTE: Running simulations to figure out noise singular values.\n"); debug_printf(DP_DEBUG1, " The simulation results are saved if TOOLBOX_PATH is set.\n"); long N = kernel_dims[0] * kernel_dims[1] * kernel_dims[2] * calreg_dims[3]; float tmpE[N]; long T = md_calc_size(4, calreg_dims) * sizeof(complex float); complex float ncalreg[T]; noise_calreg(T, ncalreg); PTR_ALLOC(complex float[N][N], vec); covariance_function(kernel_dims, N, *vec, calreg_dims, ncalreg); lapack_eig(N, tmpE, *vec); for (int idx = 0; idx < L; idx ++) E[idx] = sqrtf(tmpE[N-idx-1]); for (long idx = 0; idx < num_iters - 1; idx ++) { noise_calreg(T, ncalreg); covariance_function(kernel_dims, N, *vec, calreg_dims, ncalreg); lapack_eig(N, tmpE, *vec); for (long jdx = 0; jdx < L; jdx ++) { E[jdx] += sqrtf(tmpE[N-jdx-1]); } } for (long idx = 0; idx < L; idx++) { E[idx] /= num_iters; } if (NULL != getenv("TOOLBOX_PATH")) save_noise_sv(kernel_dims, calreg_dims, L, E); PTR_FREE(vec); } /** * estimate_noise_variance - This function estimates the variance * of noise present in the calibration * region by fitting the last s^th singular * values of the noise simulation to the * Calibration matrix's singular values. * * Parameters: * L - This is the number of singular values, or the length * of S and E. * S - This is the singular values obtained from the singular * value decomposition of the Hankel matrix constructed * from the calibration data. * E - This is the noise singular values as constructed by * function: standard_normal_noise_sv */ static float estimate_noise_variance(long L, const float* S, const float* E) { float t = 0.f; float c = 0.f; // Counter to avoid zero singular values. long s = 4; // We fit the last one s^th singular values. int num = L/s; int start = L - num; for (long idx = 0; idx < num; idx ++) { if (isnan(S[start + idx]) || S[start + idx] <= 0 || isnan(E[start + idx]) || E[start + idx] <= 0) { break; } t += ((float)S[start + idx])/((float)E[start + idx]); c += 1.f; } return ((t * t)/(c * c))/1.21; //Scaling down since it works well in practice. } extern float estvar_sv(long L, const float S[L], const long kernel_dims[3], const long calreg_dims[4]) { float E[L]; nsv(kernel_dims, calreg_dims, L, E, 10); // Number of iterations set to 5. return estimate_noise_variance(L, S, E); } extern float estvar_calreg(const long kernel_dims[3], const long calreg_dims[4], const complex float* calreg) { // Calibration/Hankel matrix dimension. long calmat_dims[2] = {(calreg_dims[0] - kernel_dims[0] + 1) * (calreg_dims[1] - kernel_dims[1] + 1) * (calreg_dims[2] - kernel_dims[2] + 1), calreg_dims[3] * kernel_dims[0] * kernel_dims[1] * kernel_dims[2]}; long L = calmat_dims[0] > calmat_dims[1] ? calmat_dims[1] : calmat_dims[0]; long N = calmat_dims[1]; //Number of columns. float tmpE[N]; float S[L]; PTR_ALLOC(complex float[N][N], vec); covariance_function(kernel_dims, N, *vec, calreg_dims, calreg); lapack_eig(N, tmpE, *vec); for (int idx = 0; idx < L; idx ++) S[idx] = sqrtf(tmpE[N-idx-1]); return estvar_sv(L, S, kernel_dims, calreg_dims); } extern float estvar_kspace(long N, const long kernel_dims[3], const long calib_size[3], const long kspace_dims[N], const complex float* kspace) { long calreg_dims[N]; complex float* calreg = NULL; calreg = extract_calib(calreg_dims, calib_size, kspace_dims, kspace, false); float variance = estvar_calreg(kernel_dims, calreg_dims, calreg); md_free(calreg); return variance; } bart-0.4.02/src/calib/estvar.h000066400000000000000000000032161320577655200160610ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Siddharth Iyer */ #ifndef __ESTVAR_H #define __ESTVAR_H #define TOOLBOX_PATH getenv("TOOLBOX_PATH") /** * estvar_sv - This estimates the variance of noise present in the * calibration region using the singular values of the * calibration matrix. * * Parameters: * L - Number of singular values. * S - Array of singular values. * kernel_dims - Kernel dimensions. * calreg_dims - Calibration region dimensions. */ extern float estvar_sv(long L, const float S[L], const long kernel_dims[3], const long calreg_dims[4]); /** * estvar_calreg - This estimates the variance of noise present in the * calibration region. * * Parameters: * kernel_dims - Kernel dimensions. * calreg_dims - Calibration region dimensions. * calreg - Calibration region. */ extern float estvar_calreg(const long kernel_dims[3], const long calreg_dims[4], const complex float* calreg); /** * estvar_kspace - This estimates the variance of noise present in kspace data. * * Parameters: * N - Total number of dimensions in a CFL file. * kernel_dims - Kernel dimensions. * calib_size - Size of the calibration region. * kspace_dims - Dimensions of input data. * kspace - Input kspace data. */ extern float estvar_kspace(long N, const long kernel_dims[3], const long calib_size[3], const long kspace_dims[N], const complex float* kspace); #endif bart-0.4.02/src/calib/softweight.c000066400000000000000000000064411320577655200167360ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015-2016 Siddharth Iyer * * Iyer S, Ong F, Lustig M. * Towards a Parameter­Free ESPIRiT: Soft­Weighting for Robust Coil Sensitivity Estimation * In Proceedings of ISMRM 2016. * * Candès E, Long C, Trzasko J. * Unbiased Risk Estimates for Singular Value Thresholding and Spectral Estimators. * IEEE Transactions on Signal Processing 61, no. 19 (2013): 4643­657. * */ #include #include #include #include #include #include #include "num/rand.h" #include "num/multind.h" #include "num/lapack.h" #include "misc/debug.h" #include "calib/estvar.h" #include "softweight.h" /* * divergence - Calculates the divergence of the spectral estimator for use in SURE as * proposed by Candès et al. * * Parameters: * N - Number of singular values. * S - Array of singular values. * calmat_dims - Dimension of the calibration matrix. * lambda - Soft-threshold to test. */ static float divergence(long N, const float S[N], const long calmat_dims[2], float lambda) { int idx, jdx; float div = 0; float abs_diff_bw_calmat_dims = labs(calmat_dims[0] - calmat_dims[1]); float s, s1, s2, t; for (idx = 0; idx < N; idx ++) { s = S[idx]; if (s == 0) continue; t = 1 - lambda/s; s1 = (s > lambda ? 1: 0) + 2 * abs_diff_bw_calmat_dims * (t > 0? t: 0); s2 = 0; for (jdx = 0; jdx < N; jdx++) { if (idx == jdx) continue; t = s - lambda; s2 += s * (t > 0? t: 0)/(s * s - S[jdx] * S[jdx]); } div += s1 + 4 * s2; } return div; } extern void soft_weight_singular_vectors(long N, float variance, const long kernel_dims[3], const long calreg_dims[4], const float S[N], float W[N]) { int idx = 0, jdx = 0; float t; long calmat_dims[2] = {(calreg_dims[0] - kernel_dims[0] + 1) * (calreg_dims[1] - kernel_dims[1] + 1) * (calreg_dims[2] - kernel_dims[2] + 1), kernel_dims[0] * kernel_dims[1] * kernel_dims[2] * calreg_dims[3]}; float Y = calmat_dims[0] * calmat_dims[1] * variance; float G = 0; for (jdx = 0; jdx < N; jdx++) { G += S[jdx] * S[jdx]; } debug_printf(DP_DEBUG1, "Using estimated variance: : %f\n", variance); float lambda = S[N-1]; float testMSE = 0; float testLambda = 0; float MSE = -Y + G + variance * divergence(N, S, calmat_dims, lambda); for (idx = 1; idx < N; idx++) { G = 0; testLambda = S[N-idx-1]; for (jdx = 0; jdx < N; jdx++) { t = S[jdx]; G += (t < testLambda? t * t : testLambda * testLambda); } testMSE = -Y + G + variance * divergence(N, S, calmat_dims, testLambda); if (testMSE < MSE) { MSE = testMSE; lambda = testLambda; } } debug_printf(DP_DEBUG1, "Soft threshold (Lambda): %f\n", lambda); for (int idx = 0; idx < N; idx++) { t = (S[idx] > 0) ?(S[idx] - lambda)/S[idx] : 0; W[idx] = (t > 0)? t : 0; } } bart-0.4.02/src/calib/softweight.h000066400000000000000000000026231320577655200167410ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015-2016 Siddharth Iyer * * Iyer S, Ong F, Lustig M. * Towards A Parameter­Free ESPIRiT: Soft­Weighting For Robust Coil Sensitivity Estimation. * In Proceedings Of ISMRM 2016. * * Candès E, Long C, Trzasko J. * Unbiased Risk Estimates for Singular Value Thresholding and Spectral Estimators. * IEEE Transactions on Signal Processing 61, no. 19 (2013): 4643­657. * */ #ifndef _SOFT_WEIGHT_H_ #define _SOFT_WEIGHT_H_ /** * soft_weight_singular_vectors - This returns weights for the singular vectors derived from the * soft-thresholding operator proposed by Candès et al., as seen * in "Towards a Parameter­Free ESPIRiT: Soft­Weighting for * Robust Coil Sensitivity Estimation." * * Parameters: * N - Number of singular values. * kernel_dims - Dimension of kernel. * calreg_dims - Calibration region dimensions. * S - Array of singular values. * W - Array to store weights to. */ extern void soft_weight_singular_vectors(long N, float var, const long kernel_dims[3], const long calreg_dims[4], const float S[N], float W[N]); #endif bart-0.4.02/src/calib/walsh.c000066400000000000000000000046401320577655200156700ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/linalg.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/debug.h" #include "calib/calmat.h" #include "walsh.h" void walsh(const long bsize[3], const long dims[DIMS], complex float* sens, const long caldims[DIMS], const complex float* data) { assert(1 == caldims[MAPS_DIM]); assert(1 == dims[MAPS_DIM]); int channels = caldims[COIL_DIM]; int cosize = channels * (channels + 1) / 2; assert(dims[COIL_DIM] == cosize); long dims1[DIMS]; md_copy_dims(DIMS, dims1, dims); dims1[COIL_DIM] = channels; long kdims[4]; kdims[0] = MIN(bsize[0], dims[0]); kdims[1] = MIN(bsize[1], dims[1]); kdims[2] = MIN(bsize[2], dims[2]); md_resize_center(DIMS, dims1, sens, caldims, data, CFL_SIZE); ifftc(DIMS, dims1, FFT_FLAGS, sens, sens); long odims[DIMS]; md_copy_dims(DIMS, odims, dims1); for (int i = 0; i < 3; i++) odims[i] = dims[i] + kdims[i] - 1; complex float* tmp = md_alloc(DIMS, odims, CFL_SIZE); #if 0 md_resizec(DIMS, odims, tmp, dims1, sens, CFL_SIZE); #else long cen[DIMS] = { 0 }; for (int i = 0; i < 3; i++) cen[i] = (odims[i] - dims[i] + 1) / 2; complex float* tmp1 = md_alloc(DIMS, odims, CFL_SIZE); md_circ_ext(DIMS, odims, tmp1, dims1, sens, CFL_SIZE); // md_resize(DIMS, odims, tmp1, dims1, sens, CFL_SIZE); md_circ_shift(DIMS, odims, cen, tmp, tmp1, CFL_SIZE); md_free(tmp1); #endif long calmat_dims[2]; complex float* cm = calibration_matrix(calmat_dims, kdims, odims, tmp); md_free(tmp); int xh = dims[0]; int yh = dims[1]; int zh = dims[2]; int pixels = calmat_dims[1] / channels; #pragma omp parallel for for (int k = 0; k < zh; k++) { complex float in[channels][pixels]; complex float cov[cosize]; for (int j = 0; j < yh; j++) { for (int i = 0; i < xh; i++) { for (int c = 0; c < channels; c++) for (int p = 0; p < pixels; p++) in[c][p] = cm[((((c * pixels + p) * zh) + k) * yh + j) * xh + i]; gram_matrix2(channels, cov, pixels, in); for (int l = 0; l < cosize; l++) sens[(((l * zh) + k) * yh + j) * xh + i] = cov[l]; } } } } bart-0.4.02/src/calib/walsh.h000066400000000000000000000005501320577655200156710ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/mri.h" extern void walsh(const long bsize[3], const long dims[DIMS], _Complex float* sens, const long caldims[DIMS], const _Complex float* data); bart-0.4.02/src/calmat.c000066400000000000000000000044741320577655200147460ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker */ #include #include #include "misc/mmio.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #include "num/flpmath.h" #include "num/multind.h" #include "calib/calmat.h" static const char usage_str[] = " "; static const char help_str[] = "Compute calibration matrix."; int main_calmat(int argc, char* argv[]) { long calsize[3] = { 24, 24, 24 }; long kdims[3] = { 5, 5, 5 }; bool calcen = false; const struct opt_s opts[] = { OPT_VEC3('k', &kdims, "ksize", "kernel size"), OPT_VEC3('K', &kdims, "", "()"), OPT_VEC3('r', &calsize, "cal_size", "Limits the size of the calibration region."), OPT_VEC3('R', &calsize, "", "()"), OPT_SET('C', &calcen, "()"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); int N = DIMS; long ksp_dims[N]; complex float* in_data = load_cfl(argv[1], N, ksp_dims); assert(1 == ksp_dims[MAPS_DIM]); long cal_dims[N]; complex float* cal_data = NULL; if (!calcen) { cal_data = extract_calib(cal_dims, calsize, ksp_dims, in_data, false); } else { for (int i = 0; i < 3; i++) cal_dims[i] = (calsize[i] < ksp_dims[i]) ? calsize[i] : ksp_dims[i]; for (int i = 3; i < N; i++) cal_dims[i] = ksp_dims[i]; cal_data = md_alloc(N, cal_dims, CFL_SIZE); md_resize_center(N, cal_dims, cal_data, ksp_dims, in_data, CFL_SIZE); } for (int i = 0; i < 3; i++) if (1 == ksp_dims[i]) kdims[i] = 1; for (unsigned int i = 0; i < 3; i++) if ((1 == cal_dims[i]) && (1 != ksp_dims[i])) error("Calibration region not found!\n"); // FIXME: we should scale the data unmap_cfl(N, ksp_dims, in_data); long calmat_dims[N]; md_singleton_dims(N, calmat_dims); complex float* cm = calibration_matrix(calmat_dims, kdims, cal_dims, cal_data); md_free(cal_data); complex float* out_data = create_cfl(argv[2], N, calmat_dims); md_copy(N, calmat_dims, out_data, cm, CFL_SIZE); md_free(cm); unmap_cfl(N, calmat_dims, out_data); exit(0); } bart-0.4.02/src/carg.c000066400000000000000000000017031320577655200144110ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Argument (phase angle).\n"; int main_carg(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); num_init(); long dims[DIMS]; complex float* in_data = load_cfl(argv[1], DIMS, dims); complex float* out_data = create_cfl(argv[2], DIMS, dims); md_zarg(DIMS, dims, out_data, in_data); unmap_cfl(DIMS, dims, out_data); unmap_cfl(DIMS, dims, in_data); exit(0); } bart-0.4.02/src/casorati.c000066400000000000000000000031621320577655200153030ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Jon Tamir */ #include #include #include #include #include "num/multind.h" #include "num/casorati.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "dim1 kern1 dim2 kern2 ... dimn kernn "; static const char help_str[] = "Casorati matrix with kernel (kern1, ..., kernn) along dimensions (dim1, ..., dimn).\n"; int main_casorati(int argc, char* argv[]) { cmdline(&argc, argv, 4, 100, usage_str, help_str, 0, NULL); num_init(); int count = argc - 3; assert((count > 0) && (count % 2 == 0)); long idims[DIMS]; long kdims[DIMS]; long odims[2]; complex float* idata = load_cfl(argv[argc - 2], DIMS, idims); md_copy_dims(DIMS, kdims, idims); for (int i = 0; i < count; i += 2) { unsigned int kdim = atoi(argv[i + 1]); unsigned int ksize = atoi(argv[i + 2]); assert(kdim < DIMS); assert(ksize >= 1); kdims[kdim] = ksize; } casorati_dims(DIMS, odims, kdims, idims); complex float* odata = create_cfl(argv[argc - 1], 2, odims); long istrs[DIMS]; md_calc_strides(DIMS, istrs, idims, CFL_SIZE); casorati_matrix(DIMS, kdims, odims, odata, idims, istrs, idata); unmap_cfl(DIMS, idims, idata); unmap_cfl(2, odims, odata); exit(0); } bart-0.4.02/src/cc.c000066400000000000000000000077731320577655200140770ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2015,2017 Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker */ #include #include #include #include "misc/mmio.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "calib/cc.h" static const char usage_str[] = " |"; static const char help_str[] = "Performs coil compression."; int main_cc(int argc, char* argv[]) { long calsize[3] = { 24, 24, 24 }; bool proj = true; long P = -1; bool all = false; enum cc_type { SCC, GCC, ECC } cc_type = SCC; const struct opt_s opts[] = { OPT_LONG('p', &P, "N", "perform compression to N virtual channels"), OPT_CLEAR('M', &proj, "output compression matrix"), OPT_VEC3('r', &calsize, "S", "size of calibration region"), OPT_VEC3('R', &calsize, "", "(size of calibration region)"), OPT_SET('A', &all, "use all data to compute coefficients"), OPT_SELECT('S', enum cc_type, &cc_type, SCC, "type: SVD"), OPT_SELECT('G', enum cc_type, &cc_type, GCC, "type: Geometric"), OPT_SELECT('E', enum cc_type, &cc_type, ECC, "type: ESPIRiT"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); if (-1 == P) { debug_printf(DP_WARN, "Use -M to output compression matrix.\n"); proj = false; } long in_dims[DIMS]; complex float* in_data = load_cfl(argv[1], DIMS, in_dims); assert(1 == in_dims[MAPS_DIM]); long channels = in_dims[COIL_DIM]; if (0 == P) P = channels; long out_dims[DIMS] = MD_INIT_ARRAY(DIMS, 1); out_dims[COIL_DIM] = channels; out_dims[MAPS_DIM] = channels; out_dims[READ_DIM] = (SCC == cc_type) ? 1 : in_dims[READ_DIM]; complex float* out_data = (proj ? anon_cfl : create_cfl)(argv[2], DIMS, out_dims); long caldims[DIMS]; complex float* cal_data = NULL; if (all) { md_copy_dims(DIMS, caldims, in_dims); cal_data = in_data; } else { cal_data = extract_calib(caldims, calsize, in_dims, in_data, false); } if (ECC == cc_type) debug_printf(DP_WARN, "Warning: ECC depends on a parameter choice rule for optimal results which is not implemented.\n"); switch (cc_type) { case SCC: scc(out_dims, out_data, caldims, cal_data); break; case GCC: gcc(out_dims, out_data, caldims, cal_data); break; case ECC: ecc(out_dims, out_data, caldims, cal_data); break; } if (!all) md_free(cal_data); if (proj) { debug_printf(DP_DEBUG1, "Compressing to %ld virtual coils...\n", P); long trans_dims[DIMS]; md_copy_dims(DIMS, trans_dims, in_dims); trans_dims[COIL_DIM] = P; complex float* trans_data = create_cfl(argv[2], DIMS, trans_dims); long fake_trans_dims[DIMS]; md_select_dims(DIMS, ~COIL_FLAG, fake_trans_dims, in_dims); fake_trans_dims[MAPS_DIM] = P; long out2_dims[DIMS]; md_copy_dims(DIMS, out2_dims, out_dims); out2_dims[MAPS_DIM] = P; if (SCC != cc_type) { complex float* in2_data = anon_cfl(NULL, DIMS, in_dims); ifftuc(DIMS, in_dims, READ_FLAG, in2_data, in_data); unmap_cfl(DIMS, in_dims, in_data); in_data = in2_data; complex float* out2 = anon_cfl(NULL, DIMS, out2_dims); align_ro(out2_dims, out2, out_data); unmap_cfl(DIMS, out_dims, out_data); out_data = out2; } md_zmatmulc(DIMS, fake_trans_dims, trans_data, out2_dims, out_data, in_dims, in_data); if (SCC != cc_type) { fftuc(DIMS, trans_dims, READ_FLAG, trans_data, trans_data); unmap_cfl(DIMS, out2_dims, out_data); } else { unmap_cfl(DIMS, out_dims, out_data); } unmap_cfl(DIMS, trans_dims, trans_data); unmap_cfl(DIMS, in_dims, in_data); } else { unmap_cfl(DIMS, in_dims, in_data); unmap_cfl(DIMS, out_dims, out_data); } printf("Done.\n"); exit(0); } bart-0.4.02/src/ccapply.c000066400000000000000000000063471320577655200151410ustar00rootroot00000000000000/* Copyright 2016-2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016-2017 Jonathan Tamir */ #include #include #include #include "misc/mmio.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "calib/cc.h" static const char usage_str[] = " "; static const char help_str[] = "Apply coil compression forward/inverse operation."; int main_ccapply(int argc, char* argv[]) { bool forward = true; bool do_fft = true; long P = -1; enum cc_type { SCC, GCC, ECC } cc_type = SCC; const struct opt_s opts[] = { OPT_LONG('p', &P, "N", "perform compression to N virtual channels"), OPT_CLEAR('u', &forward, "apply inverse operation"), OPT_CLEAR('t', &do_fft, "don't apply FFT in readout"), OPT_SELECT('S', enum cc_type, &cc_type, SCC, "type: SVD"), OPT_SELECT('G', enum cc_type, &cc_type, GCC, "type: Geometric"), OPT_SELECT('E', enum cc_type, &cc_type, ECC, "type: ESPIRiT"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); long in_dims[DIMS]; long cc_dims[DIMS]; complex float* in_data = load_cfl(argv[1], DIMS, in_dims); complex float* cc_data = load_cfl(argv[2], DIMS, cc_dims); assert(1 == in_dims[MAPS_DIM]); const long channels = cc_dims[COIL_DIM]; if (-1 == P) P = in_dims[COIL_DIM]; assert(cc_dims[MAPS_DIM] >= P && in_dims[COIL_DIM] >= P); long out_dims[DIMS] = MD_INIT_ARRAY(DIMS, 1); md_select_dims(DIMS, ~COIL_FLAG, out_dims, in_dims); out_dims[COIL_DIM] = forward ? P : channels; complex float* out_data = create_cfl(argv[3], DIMS, out_dims); // transpose for the matrix multiplication long trp_dims[DIMS]; if (forward) { debug_printf(DP_DEBUG1, "Compressing to %ld virtual coils...\n", P); md_transpose_dims(DIMS, COIL_DIM, MAPS_DIM, trp_dims, out_dims); trp_dims[MAPS_DIM] = out_dims[COIL_DIM]; } else { debug_printf(DP_DEBUG1, "Uncompressing channels...\n"); md_transpose_dims(DIMS, COIL_DIM, MAPS_DIM, trp_dims, in_dims); } long cc2_dims[DIMS]; md_select_dims(DIMS, ~MAPS_FLAG, cc2_dims, cc_dims); cc2_dims[MAPS_DIM] = P; if (SCC != cc_type) { if (do_fft) { complex float* in2_data = anon_cfl(NULL, DIMS, in_dims); ifftuc(DIMS, in_dims, READ_FLAG, in2_data, in_data); unmap_cfl(DIMS, in_dims, in_data); in_data = in2_data; } complex float* cc2_data = anon_cfl(NULL, DIMS, cc2_dims); align_ro(cc2_dims, cc2_data, cc_data); unmap_cfl(DIMS, cc_dims, cc_data); cc_data = cc2_data; } if (forward) md_zmatmulc(DIMS, trp_dims, out_data, cc2_dims, cc_data, in_dims, in_data); else md_zmatmul(DIMS, out_dims, out_data, cc2_dims, cc_data, trp_dims, in_data); if (SCC != cc_type) { if (do_fft) fftuc(DIMS, out_dims, READ_FLAG, out_data, out_data); unmap_cfl(DIMS, cc2_dims, cc_data); } else { unmap_cfl(DIMS, cc_dims, cc_data); } unmap_cfl(DIMS, in_dims, in_data); unmap_cfl(DIMS, out_dims, out_data); printf("Done.\n"); exit(0); } bart-0.4.02/src/cdf97.c000066400000000000000000000026231320577655200144130ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/wavelet.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/io.h" #include "misc/opts.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Perform a wavelet (cdf97) transform.\n"; int main_cdf97(int argc, char* argv[]) { bool inv = false; const struct opt_s opts[] = { OPT_SET('i', &inv, "inverse"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); unsigned int flags = atoi(argv[1]); long dims[DIMS]; complex float* idata = load_cfl(argv[2], DIMS, dims); complex float* odata = create_cfl(argv[3], DIMS, dims); md_copy(DIMS, dims, odata, idata, CFL_SIZE); unmap_cfl(DIMS, dims, idata); if (inv) { md_iresortz(DIMS, dims, flags, odata); md_icdf97z(DIMS, dims, flags, odata); } else { md_cdf97z(DIMS, dims, flags, odata); md_resortz(DIMS, dims, flags, odata); } unmap_cfl(DIMS, dims, odata); exit(0); } bart-0.4.02/src/circshift.c000066400000000000000000000023261320577655200154550ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "dim shift "; static const char help_str[] = "Perform circular shift along {dim} by {shift} elements.\n"; int main_circshift(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); const int N = DIMS; long dims[N]; int dim = atoi(argv[1]); int shift = atoi(argv[2]); assert((0 <= dim) && (dim < N)); long center[N]; memset(center, 0, N * sizeof(long)); center[dim] = shift; complex float* idata = load_cfl(argv[3], N, dims); complex float* odata = create_cfl(argv[4], N, dims); md_circ_shift(N, dims, center, odata, idata, sizeof(complex float)); unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/conj.c000066400000000000000000000016711320577655200144320ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2012 Martin Uecker */ #include #include #include #include #include #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Compute complex conjugate.\n"; int main_conj(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); num_init(); const int N = 16; long dims[N]; complex float* idata = load_cfl(argv[1], N, dims); complex float* odata = create_cfl(argv[2], N, dims); md_zconj(N, dims, odata, idata); unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/conv.c000066400000000000000000000024071320577655200144440ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2015 Martin Uecker */ #include #include #include "num/multind.h" #include "num/conv.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Performs a convolution along selected dimensions."; int main_conv(int argc, char* argv[]) { cmdline(&argc, argv, 4, 4, usage_str, help_str, 0, NULL); num_init(); unsigned int flags = atoi(argv[1]); unsigned int N = DIMS; long dims[N]; const complex float* in = load_cfl(argv[2], N, dims); long krn_dims[N]; const complex float* krn = load_cfl(argv[3], N, krn_dims); complex float* out = create_cfl(argv[4], N, dims); struct conv_plan* plan = conv_plan(N, flags, CONV_CYCLIC, CONV_SYMMETRIC, dims, dims, krn_dims, krn); conv_exec(plan, out, in); conv_free(plan); unmap_cfl(N, dims, out); unmap_cfl(N, krn_dims, krn); unmap_cfl(N, dims, in); exit(0); } bart-0.4.02/src/copy.c000066400000000000000000000032061320577655200144470ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include "num/multind.h" #include "num/init.h" #include "misc/resize.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "dim1 pos1 ... dimn posn "; static const char help_str[] = "Copy an array to a given position in the output file (which must exist)."; int main_copy(int argc, char* argv[]) { const struct opt_s opts[] = { }; cmdline(&argc, argv, 4, 1000, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); unsigned int N = DIMS; int count = argc - 3; assert((count > 0) && (count % 2 == 0)); long in_dims[N]; long out_dims[N]; void* in_data = load_cfl(argv[argc - 2], N, in_dims); void* out_data = load_cfl(argv[argc - 1], N, out_dims); // reload unmap_cfl(N, out_dims, out_data); out_data = create_cfl(argv[argc - 1], N, out_dims); long position[N]; for (unsigned int i = 0; i < N; i++) position[i] = 0; for (int i = 0; i < count; i += 2) { unsigned int dim = atoi(argv[i + 1]); long pos = atol(argv[i + 2]); assert(dim < N); assert((0 <= pos) && (pos < out_dims[dim])); position[dim] = pos; } md_copy_block(N, position, out_dims, out_data, in_dims, in_data, CFL_SIZE); unmap_cfl(N, in_dims, in_data); unmap_cfl(N, out_dims, out_data); exit(0); } bart-0.4.02/src/cpyphs.c000066400000000000000000000017531320577655200150100ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Real value.\n"; int main_creal(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); num_init(); long dims[DIMS]; complex float* in_data = load_cfl(argv[1], DIMS, dims); complex float* out_data = create_cfl(argv[2], DIMS, dims); md_zreal(DIMS, dims, out_data, in_data); unmap_cfl(DIMS, dims, out_data); unmap_cfl(DIMS, dims, in_data); exit(0); } bart-0.4.02/src/crop.c000066400000000000000000000024111320577655200144350ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "dimension size "; static const char help_str[] = "Extracts a sub-array corresponding to the central part of {size} along {dimension}\n"; int main_crop(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); int N = DIMS; long in_dims[N]; long out_dims[N]; complex float* in_data = load_cfl(argv[3], N, in_dims); int dim = atoi(argv[1]); int count = atoi(argv[2]); assert(dim < N); assert(count >= 1); for (int i = 0; i < N; i++) out_dims[i] = in_dims[i]; out_dims[dim] = count; complex float* out_data = create_cfl(argv[4], N, out_dims); md_resize_center(N, out_dims, out_data, in_dims, in_data, sizeof(complex float)); unmap_cfl(N, in_dims, in_data); unmap_cfl(N, out_dims, out_data); exit(0); } bart-0.4.02/src/delta.c000066400000000000000000000021031320577655200145610ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/io.h" #include "misc/misc.h" static const char usage_str[] = "dims flags size out"; static const char help_str[] = "Kronecker delta.\n"; int main_delta(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); int N = atoi(argv[1]); long len = atoi(argv[3]); unsigned int flags = atoi(argv[2]); assert(N >= 0); long dims[N]; for (int i = 0; i < N; i++) dims[i] = MD_IS_SET(flags, i) ? len : 1; complex float* x = create_cfl(argv[4], N, dims); md_clear(N, dims, x, CFL_SIZE); md_fill_diag(N, dims, flags, x, &(complex float){ 1. }, CFL_SIZE); unmap_cfl(N, dims, x); exit(0); } bart-0.4.02/src/dfwavelet/000077500000000000000000000000001320577655200153115ustar00rootroot00000000000000bart-0.4.02/src/dfwavelet/dfwavelet.c000066400000000000000000001171211320577655200174410ustar00rootroot00000000000000/* * Copyright 2013-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Frank Ong * 2013 Martin Uecker, Pat Virtue, and Mark Murphy * * * Ong F, Uecker M, Tariq U, Hsiao A, Alley MT, Vasanawala SS, Lustig M. * Robust 4D Flow Denoising using Divergence-free Wavelet Transform, * Magn Reson Med 2015; 73: 828-842. */ #define _GNU_SOURCE #include #include #include #include #include "num/multind.h" #include "misc/misc.h" #include "dfwavelet.h" #include "dfwavelet_impl.h" #ifdef USE_CUDA #include "dfwavelet_kernels.h" #endif #define str_eq(s1,s2) (!strcmp ((s1),(s2))) /******** Header *********/ static void dffwt3_cpu(struct dfwavelet_plan_s* plan, data_t* out_wcdf1,data_t* out_wcdf2,data_t* out_wcn, data_t* in_vx,data_t* in_vy,data_t* in_vz); static void dfiwt3_cpu(struct dfwavelet_plan_s* plan, data_t* out_vx,data_t* out_vy,data_t* out_vz, data_t* in_wcdf1,data_t* in_wcdf2,data_t* in_wcn); static void dfsoftthresh_cpu(struct dfwavelet_plan_s* plan,scalar_t dfthresh, scalar_t nthresh, data_t* out_wcdf1,data_t* out_wcdf2,data_t* out_wcn); static void dfwavthresh3_cpu(struct dfwavelet_plan_s* plan,scalar_t dfthresh, scalar_t nthresh,data_t* out_vx,data_t* out_vy,data_t* out_vz,data_t* in_vx,data_t* in_vy,data_t* in_vz); void dflincomb_cpu(struct dfwavelet_plan_s* plan,data_t* wc1,data_t* wc2,data_t* wc3); void dfunlincomb_cpu(struct dfwavelet_plan_s* plan,data_t* wc1,data_t* wc2,data_t* wc3); static void fwt3_cpu(struct dfwavelet_plan_s* plan, data_t* out, data_t* in,int dir); static void iwt3_cpu(struct dfwavelet_plan_s* plan, data_t* out, data_t* in,int dir); static void circshift_cpu(struct dfwavelet_plan_s* plan, data_t *data); static void circunshift_cpu(struct dfwavelet_plan_s* plan, data_t *data); static void conv_down_3d(data_t *out, data_t *in, int size1, int skip1, int size2, int skip2, int size3, int skip3, scalar_t *filter, int filterLen); static void conv_up_3d(data_t *out, data_t *in, int size1, int skip1, int size2, int skip2, int size3, int skip3, scalar_t *filter, int filterLen); static void mult(data_t* in,scalar_t scalar,int maxInd); static void create_numLevels(struct dfwavelet_plan_s* plan); static void create_wavelet_sizes(struct dfwavelet_plan_s* plan); static void create_wavelet_filters(struct dfwavelet_plan_s* plan); static void get_noise_amp (struct dfwavelet_plan_s* plan); struct dfwavelet_plan_s* prepare_dfwavelet_plan(int numdims, long* imSize, long* minSize, data_t* res,int use_gpu) { struct dfwavelet_plan_s* plan = (struct dfwavelet_plan_s*) malloc(sizeof(struct dfwavelet_plan_s)); plan->use_gpu = use_gpu; plan->numdims = numdims; plan->imSize = (long*) malloc(sizeof(long)*numdims); plan->minSize = (long*) malloc(sizeof(long)*numdims); plan->res = (data_t*) malloc(sizeof(data_t)*numdims); plan->percentZero = -1; plan->noiseAmp = NULL; // Get imSize, numPixel, numdims plan->numPixel = 1; int i; for (i = 0; i < numdims; i++) { plan->imSize[i] = imSize[i]; plan->numPixel *= imSize[i]; plan->minSize[i] = minSize[i]; plan->res[i] = res[i]; } create_wavelet_filters(plan); create_numLevels(plan); create_wavelet_sizes(plan); plan->randShift = (int*) malloc(sizeof(int)*plan->numdims); memset(plan->randShift,0,sizeof(int)*plan->numdims); get_noise_amp(plan); return plan; } void dfwavelet_forward(struct dfwavelet_plan_s* plan, data_t* out_wcdf1, data_t* out_wcdf2, data_t* out_wcn, data_t* in_vx, data_t* in_vy, data_t* in_vz) { if(plan->use_gpu==0) dffwt3_cpu(plan,out_wcdf1,out_wcdf2,out_wcn,in_vx,in_vy,in_vz); #ifdef USE_CUDA if(plan->use_gpu==1) dffwt3_gpu(plan,out_wcdf1,out_wcdf2,out_wcn,in_vx,in_vy,in_vz); if(plan->use_gpu==2) dffwt3_gpuHost(plan,out_wcdf1,out_wcdf2,out_wcn,in_vx,in_vy,in_vz); #endif } void dfwavelet_inverse(struct dfwavelet_plan_s* plan, data_t* out_vx,data_t* out_vy,data_t* out_vz, data_t* in_wcdf1,data_t* in_wcdf2,data_t* in_wcn) { if(plan->use_gpu==0) dfiwt3_cpu(plan,out_vx,out_vy,out_vz,in_wcdf1,in_wcdf2,in_wcn); #ifdef USE_CUDA if(plan->use_gpu==1) dfiwt3_gpu(plan,out_vx,out_vy,out_vz,in_wcdf1,in_wcdf2,in_wcn); if(plan->use_gpu==2) dfiwt3_gpuHost(plan,out_vx,out_vy,out_vz,in_wcdf1,in_wcdf2,in_wcn); #endif } void dfsoft_thresh(struct dfwavelet_plan_s* plan, scalar_t dfthresh, scalar_t nthresh,data_t* wcdf1,data_t* wcdf2, data_t* wcn) { if(plan->use_gpu==0) dfsoftthresh_cpu(plan,dfthresh,nthresh,wcdf1,wcdf2,wcn); #ifdef USE_CUDA if(plan->use_gpu==1) dfsoftthresh_gpu(plan,dfthresh,nthresh,wcdf1,wcdf2,wcn); if(plan->use_gpu==2) dfsoftthresh_gpuHost(plan,dfthresh,nthresh,wcdf1,wcdf2,wcn); #endif } void dfwavelet_thresh(struct dfwavelet_plan_s* plan, scalar_t dfthresh, scalar_t nthresh,data_t* out_vx, data_t* out_vy, data_t* out_vz, data_t* in_vx,data_t* in_vy, data_t* in_vz) { if(plan->use_gpu==0) dfwavthresh3_cpu(plan,dfthresh,nthresh,out_vx,out_vy,out_vz, in_vx,in_vy,in_vz); #ifdef USE_CUDA if(plan->use_gpu==1) dfwavthresh3_gpu(plan,dfthresh,nthresh, out_vx,out_vy,out_vz, in_vx,in_vy,in_vz); if(plan->use_gpu==2) dfwavthresh3_gpuHost(plan,dfthresh,nthresh,out_vx,out_vy,out_vz,in_vx,in_vy,in_vz); #endif } static int dfrand_lim(unsigned int* state, int limit) { int divisor = RAND_MAX/(limit+1); int retval = 0; do { retval = rand_r(state) / divisor; } while (retval > limit); return retval; } void dfwavelet_new_randshift (struct dfwavelet_plan_s* plan) { int i; int maxShift = 1 << (plan->numLevels); for(i = 0; i < plan->numdims; i++) { // Generate random shift value between 0 and maxShift plan->randShift[i] = dfrand_lim(&plan->state, maxShift); } } void dfwavelet_clear_randshift (struct dfwavelet_plan_s* plan) { memset(plan->randShift, 0, plan->numdims*sizeof(int)); } void dfwavelet_free(struct dfwavelet_plan_s* plan) { free(plan->imSize); free(plan->minSize); free(plan->lod0); free(plan->lod1); free(plan->res); free(plan->waveSizes); free(plan->randShift); if (plan->noiseAmp!=NULL) free(plan->noiseAmp); free(plan); } ////////////// Private Functions ////////////// void dffwt3_cpu(struct dfwavelet_plan_s* plan, data_t* out_wcdf1,data_t* out_wcdf2,data_t* out_wcn, data_t* in_vx,data_t* in_vy,data_t* in_vz) { fwt3_cpu(plan,out_wcdf1,in_vx,0); fwt3_cpu(plan,out_wcdf2,in_vy,1); fwt3_cpu(plan,out_wcn,in_vz,2); mult(out_wcdf1,1/plan->res[0],plan->numCoeff); mult(out_wcdf2,1/plan->res[1],plan->numCoeff); mult(out_wcn,1/plan->res[2],plan->numCoeff); dflincomb_cpu(plan,out_wcdf1,out_wcdf2,out_wcn); } void dfiwt3_cpu(struct dfwavelet_plan_s* plan, data_t* out_vx,data_t* out_vy,data_t* out_vz, data_t* in_wcdf1,data_t* in_wcdf2,data_t* in_wcn) { dfunlincomb_cpu(plan,in_wcdf1,in_wcdf2,in_wcn); mult(in_wcdf1,plan->res[0],plan->numCoeff); mult(in_wcdf2,plan->res[1],plan->numCoeff); mult(in_wcn,plan->res[2],plan->numCoeff); iwt3_cpu(plan,out_vx,in_wcdf1,0); iwt3_cpu(plan,out_vy,in_wcdf2,1); iwt3_cpu(plan,out_vz,in_wcn,2); } void dfsoftthresh_cpu(struct dfwavelet_plan_s* plan,scalar_t dfthresh, scalar_t nthresh, data_t* wcdf1,data_t* wcdf2,data_t* wcn) { data_t* HxLyLz1 = wcdf1 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz2 = wcdf2 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz3 = wcn + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; int l; for (l = 1; l <= plan->numLevels; ++l){ HxLyLz1 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz2 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz3 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; } int dxNext = plan->waveSizes[0 + 3*plan->numLevels]; int dyNext = plan->waveSizes[1 + 3*plan->numLevels]; int dzNext = plan->waveSizes[2 + 3*plan->numLevels]; int blockSize = dxNext*dyNext*dzNext; int naInd = 0; for (l = plan->numLevels; l >= 1; --l) { dxNext = plan->waveSizes[0 + 3*l]; dyNext = plan->waveSizes[1 + 3*l]; dzNext = plan->waveSizes[2 + 3*l]; blockSize = dxNext*dyNext*dzNext; HxLyLz1 = HxLyLz1 - 7*blockSize; HxLyLz2 = HxLyLz2 - 7*blockSize; HxLyLz3 = HxLyLz3 - 7*blockSize; int bandInd; for (bandInd=0; bandInd<7*3;bandInd++) { data_t *subband; scalar_t lambda; if (bandInd<7) { subband = HxLyLz1 + bandInd*blockSize; lambda = dfthresh * plan->noiseAmp[naInd]; } else if (bandInd<14) { subband = HxLyLz2 + (bandInd-7)*blockSize; lambda = dfthresh * plan->noiseAmp[naInd]; } else { subband = HxLyLz3 + (bandInd-14)*blockSize; lambda = nthresh * plan->noiseAmp[naInd]; } // SoftThresh float const eps = 1.1921e-7f; #pragma omp parallel for for(int i = 0; i < blockSize; i++) { scalar_t norm = cabs(subband[i]); scalar_t red = norm - lambda; red = 0.5f*(red + fabs(red)); red = red / (norm + eps); subband[i] = red * subband[i]; } naInd++; } } } void dfwavthresh3_cpu(struct dfwavelet_plan_s* plan,scalar_t dfthresh, scalar_t nthresh,data_t* out_vx,data_t* out_vy,data_t* out_vz,data_t* in_vx,data_t* in_vy,data_t* in_vz) { data_t *wcdf1,*wcdf2,*wcn; wcdf1 = (data_t*) malloc(sizeof(data_t)*plan->numCoeff); wcdf2 = (data_t*) malloc(sizeof(data_t)*plan->numCoeff); wcn = (data_t*) malloc(sizeof(data_t)*plan->numCoeff); dffwt3_cpu(plan, wcdf1,wcdf2,wcn,in_vx,in_vy,in_vz); dfsoftthresh_cpu(plan,dfthresh,nthresh,wcdf1,wcdf2,wcn); dfiwt3_cpu(plan,out_vx,out_vy,out_vz,wcdf1,wcdf2,wcn); free(wcdf1); free(wcdf2); free(wcn); } void dflincomb_cpu(struct dfwavelet_plan_s* plan,data_t* wc1,data_t* wc2,data_t* wc3) { data_t* HxLyLz1 = wc1 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz2 = wc2 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz3 = wc3 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; int l; for (l = 1; l <= plan->numLevels; ++l){ HxLyLz1 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz2 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz3 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; } int dxNext = plan->waveSizes[0 + 3*plan->numLevels]; int dyNext = plan->waveSizes[1 + 3*plan->numLevels]; int dzNext = plan->waveSizes[2 + 3*plan->numLevels]; int blockSize = dxNext*dyNext*dzNext; int i,j,k; for (l = plan->numLevels; l >= 1; --l) { dxNext = plan->waveSizes[0 + 3*l]; dyNext = plan->waveSizes[1 + 3*l]; dzNext = plan->waveSizes[2 + 3*l]; blockSize = dxNext*dyNext*dzNext; HxLyLz1 = HxLyLz1 - 7*blockSize; HxLyLz2 = HxLyLz2 - 7*blockSize; HxLyLz3 = HxLyLz3 - 7*blockSize; data_t* LxHyLz1 = HxLyLz1 + blockSize; data_t* HxHyLz1 = LxHyLz1 + blockSize; data_t* LxLyHz1 = HxHyLz1 + blockSize; data_t* HxLyHz1 = LxLyHz1 + blockSize; data_t* LxHyHz1 = HxLyHz1 + blockSize; data_t* HxHyHz1 = LxHyHz1 + blockSize; data_t* LxHyLz2 = HxLyLz2 + blockSize; data_t* HxHyLz2 = LxHyLz2 + blockSize; data_t* LxLyHz2 = HxHyLz2 + blockSize; data_t* HxLyHz2 = LxLyHz2 + blockSize; data_t* LxHyHz2 = HxLyHz2 + blockSize; data_t* HxHyHz2 = LxHyHz2 + blockSize; data_t* LxHyLz3 = HxLyLz3 + blockSize; data_t* HxHyLz3 = LxHyLz3 + blockSize; data_t* LxLyHz3 = HxHyLz3 + blockSize; data_t* HxLyHz3 = LxLyHz3 + blockSize; data_t* LxHyHz3 = HxLyHz3 + blockSize; data_t* HxHyHz3 = LxHyHz3 + blockSize; #pragma omp parallel for private(i,j,k) for (k=0;kwaveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz2 = wc2 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz3 = wc3 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; int l; for (l = 1; l <= plan->numLevels; ++l){ HxLyLz1 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz2 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz3 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; } int dxNext = plan->waveSizes[0 + 3*plan->numLevels]; int dyNext = plan->waveSizes[1 + 3*plan->numLevels]; int dzNext = plan->waveSizes[2 + 3*plan->numLevels]; int blockSize = dxNext*dyNext*dzNext; int i,j,k; for (l = plan->numLevels; l >= 1; --l) { dxNext = plan->waveSizes[0 + 3*l]; dyNext = plan->waveSizes[1 + 3*l]; dzNext = plan->waveSizes[2 + 3*l]; blockSize = dxNext*dyNext*dzNext; HxLyLz1 = HxLyLz1 - 7*blockSize; HxLyLz2 = HxLyLz2 - 7*blockSize; HxLyLz3 = HxLyLz3 - 7*blockSize; data_t* LxHyLz1 = HxLyLz1 + blockSize; data_t* HxHyLz1 = LxHyLz1 + blockSize; data_t* LxLyHz1 = HxHyLz1 + blockSize; data_t* HxLyHz1 = LxLyHz1 + blockSize; data_t* LxHyHz1 = HxLyHz1 + blockSize; data_t* HxHyHz1 = LxHyHz1 + blockSize; data_t* LxHyLz2 = HxLyLz2 + blockSize; data_t* HxHyLz2 = LxHyLz2 + blockSize; data_t* LxLyHz2 = HxHyLz2 + blockSize; data_t* HxLyHz2 = LxLyHz2 + blockSize; data_t* LxHyHz2 = HxLyHz2 + blockSize; data_t* HxHyHz2 = LxHyHz2 + blockSize; data_t* LxHyLz3 = HxLyLz3 + blockSize; data_t* HxHyLz3 = LxHyLz3 + blockSize; data_t* LxLyHz3 = HxHyLz3 + blockSize; data_t* HxLyHz3 = LxLyHz3 + blockSize; data_t* LxHyHz3 = HxLyHz3 + blockSize; data_t* HxHyHz3 = LxHyHz3 + blockSize; #pragma omp parallel for private(i,j,k) for (k=0;kwaveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; int l; for (l = 1; l <= plan->numLevels; ++l){ HxLyLz += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; } int dx = plan->imSize[0]; int dy = plan->imSize[1]; int dz = plan->imSize[2]; int dxNext = plan->waveSizes[0 + 3*plan->numLevels]; int dyNext = plan->waveSizes[1 + 3*plan->numLevels]; int dzNext = plan->waveSizes[2 + 3*plan->numLevels]; int blockSize = dxNext*dyNext*dzNext; data_t* LxLyLz = (data_t*) malloc(sizeof(data_t)*blockSize); data_t* tempz = (data_t*) malloc(sizeof(data_t)*dx*dy*dzNext); data_t* tempyz = (data_t*) malloc(sizeof(data_t)*dx*dyNext*dzNext); data_t* tempxyz = (data_t*) malloc(sizeof(data_t)*blockSize); // Assign Filters scalar_t *lodx,*lody,*lodz,*hidx,*hidy,*hidz; lodx = plan->lod0; lody = plan->lod0; lodz = plan->lod0; hidx = plan->hid0; hidy = plan->hid0; hidz = plan->hid0; if (dir==0) { lodx = plan->lod1; hidx = plan->hid1; } if (dir==1) { lody = plan->lod1; hidy = plan->hid1; } if (dir==2) { lodz = plan->lod1; hidz = plan->hid1; } for (l = plan->numLevels; l >= 1; --l) { dxNext = plan->waveSizes[0 + 3*l]; dyNext = plan->waveSizes[1 + 3*l]; dzNext = plan->waveSizes[2 + 3*l]; blockSize = dxNext*dyNext*dzNext; HxLyLz = HxLyLz - 7*blockSize; data_t* LxHyLz = HxLyLz + blockSize; data_t* HxHyLz = LxHyLz + blockSize; data_t* LxLyHz = HxHyLz + blockSize; data_t* HxLyHz = LxLyHz + blockSize; data_t* LxHyHz = HxLyHz + blockSize; data_t* HxHyHz = LxHyHz + blockSize; int dxy = dx*dy; int newdz = (dz + plan->filterLen-1) / 2; int newdy = (dy + plan->filterLen-1) / 2; int newdxy = dx*newdy; // Lz conv_down_3d(tempz, inImage, dz, dxy, dx, 1, dy, dx, lodz,plan->filterLen); // LyLz conv_down_3d(tempyz, tempz, dy, dx, dx, 1, newdz, dxy, lody,plan->filterLen); conv_down_3d(LxLyLz, tempyz, dx, 1, newdy, dx, newdz, newdxy, lodx,plan->filterLen); conv_down_3d(HxLyLz, tempyz, dx, 1, newdy, dx, newdz, newdxy, hidx,plan->filterLen); // HyLz conv_down_3d(tempyz, tempz, dy, dx, dx, 1, newdz, dxy, hidy,plan->filterLen); conv_down_3d(LxHyLz, tempyz, dx, 1, newdy, dx, newdz, newdxy, lodx,plan->filterLen); conv_down_3d(HxHyLz, tempyz, dx, 1, newdy, dx, newdz, newdxy, hidx,plan->filterLen); // Hz conv_down_3d(tempz, inImage, dz, dxy, dx, 1, dy, dx, hidz,plan->filterLen); // LyHz conv_down_3d(tempyz, tempz, dy, dx, dx, 1, newdz, dxy, lody,plan->filterLen); conv_down_3d(LxLyHz, tempyz, dx, 1, newdy, dx, newdz, newdxy, lodx,plan->filterLen); conv_down_3d(HxLyHz, tempyz, dx, 1, newdy, dx, newdz, newdxy, hidx,plan->filterLen); // HyHz conv_down_3d(tempyz, tempz, dy, dx, dx, 1, newdz, dxy, hidy,plan->filterLen); conv_down_3d(LxHyHz, tempyz, dx, 1, newdy, dx, newdz, newdxy, lodx,plan->filterLen); conv_down_3d(HxHyHz, tempyz, dx, 1, newdy, dx, newdz, newdxy, hidx,plan->filterLen); memcpy(tempxyz, LxLyLz, blockSize*sizeof(data_t)); inImage = tempxyz; dx = dxNext; dy = dyNext; dz = dzNext; } // Final LxLyLz memcpy(coeff, inImage, plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]*sizeof(data_t)); free(LxLyLz); free(tempz); free(tempyz); free(tempxyz); circunshift_cpu(plan,origInImage); } void iwt3_cpu(struct dfwavelet_plan_s* plan, data_t* outImage, data_t* coeff,int dir) { // Workspace dimensions int dxWork = plan->waveSizes[0 + 3*plan->numLevels]*2-1 + plan->filterLen-1; int dyWork = plan->waveSizes[1 + 3*plan->numLevels]*2-1 + plan->filterLen-1; int dzWork = plan->waveSizes[2 + 3*plan->numLevels]*2-1 + plan->filterLen-1; int dyWork2 = plan->waveSizes[1 + 3*(plan->numLevels-1)]*2-1 + plan->filterLen-1; int dzWork2 = plan->waveSizes[2 + 3*(plan->numLevels-1)]*2-1 + plan->filterLen-1; // Workspace data_t* tempyz = (data_t*) malloc(sizeof(data_t)*dxWork*dyWork2*dzWork2); data_t* tempz = (data_t*) malloc(sizeof(data_t)*dxWork*dyWork*dzWork2); data_t* tempFull = (data_t*) malloc(sizeof(data_t)*dxWork*dyWork*dzWork); int dx = plan->waveSizes[0]; int dy = plan->waveSizes[1]; int dz = plan->waveSizes[2]; // Assign Filters scalar_t *lorx,*lory,*lorz,*hirx,*hiry,*hirz; lorx = plan->lor0; lory = plan->lor0; lorz = plan->lor0; hirx = plan->hir0; hiry = plan->hir0; hirz = plan->hir0; if (dir==0) { lorx = plan->lor1; hirx = plan->hir1; } if (dir==1) { lory = plan->lor1; hiry = plan->hir1; } if (dir==2) { lorz = plan->lor1; hirz = plan->hir1; } memcpy(outImage, coeff, dx*dy*dz*sizeof(data_t)); data_t* HxLyLz = coeff + dx*dy*dz; int level; for (level = 1; level < plan->numLevels+1; ++level) { dx = plan->waveSizes[0 + 3*level]; dy = plan->waveSizes[1 + 3*level]; dz = plan->waveSizes[2 + 3*level]; int blockSize = dx*dy*dz; data_t* LxHyLz = HxLyLz + blockSize; data_t* HxHyLz = LxHyLz + blockSize; data_t* LxLyHz = HxHyLz + blockSize; data_t* HxLyHz = LxLyHz + blockSize; data_t* LxHyHz = HxLyHz + blockSize; data_t* HxHyHz = LxHyHz + blockSize; data_t* LxLyLz = outImage; int newdx = 2*dx-1 + plan->filterLen-1; int newdy = 2*dy-1 + plan->filterLen-1; int newdz = 2*dz-1 + plan->filterLen-1; int dxy = dx*dy; int newdxy = newdx*dy; int newnewdxy = newdx*newdy; memset(tempFull, 0, newnewdxy*newdz*sizeof(data_t)); memset(tempz, 0, newnewdxy*dz*sizeof(data_t)); memset(tempyz, 0, newdxy*dz*sizeof(data_t)); conv_up_3d(tempyz, LxLyLz, dx, 1, dy, dx, dz, dxy, lorx,plan->filterLen); conv_up_3d(tempyz, HxLyLz, dx, 1, dy, dx, dz, dxy, hirx,plan->filterLen); conv_up_3d(tempz, tempyz, dy, newdx, newdx, 1, dz, newdxy, lory,plan->filterLen); memset(tempyz, 0, newdxy*dz*sizeof(data_t)); conv_up_3d(tempyz, LxHyLz, dx, 1, dy, dx, dz, dxy, lorx,plan->filterLen); conv_up_3d(tempyz, HxHyLz, dx, 1, dy, dx, dz, dxy, hirx,plan->filterLen); conv_up_3d(tempz, tempyz, dy, newdx, newdx, 1, dz, newdxy, hiry,plan->filterLen); conv_up_3d(tempFull, tempz, dz, newnewdxy, newdx, 1, newdy, newdx, lorz,plan->filterLen); memset(tempz, 0, newnewdxy*dz*sizeof(data_t)); memset(tempyz, 0, newdxy*dz*sizeof(data_t)); conv_up_3d(tempyz, LxLyHz, dx, 1, dy, dx, dz, dxy, lorx,plan->filterLen); conv_up_3d(tempyz, HxLyHz, dx, 1, dy, dx, dz, dxy, hirx,plan->filterLen); conv_up_3d(tempz, tempyz, dy, newdx, newdx, 1, dz, newdxy, lory,plan->filterLen); memset(tempyz, 0, newdxy*dz*sizeof(data_t)); conv_up_3d(tempyz, LxHyHz, dx, 1, dy, dx, dz, dxy, lorx,plan->filterLen); conv_up_3d(tempyz, HxHyHz, dx, 1, dy, dx, dz, dxy, hirx,plan->filterLen); conv_up_3d(tempz, tempyz, dy, newdx, newdx, 1, dz, newdxy, hiry,plan->filterLen); conv_up_3d(tempFull, tempz, dz, newnewdxy, newdx, 1, newdy, newdx, hirz,plan->filterLen); // Crop center of workspace int dxNext = plan->waveSizes[0+3*(level+1)]; int dyNext = plan->waveSizes[1+3*(level+1)]; int dzNext = plan->waveSizes[2+3*(level+1)]; int dxyNext = dxNext*dyNext; dxWork = (2*dx-1 + plan->filterLen-1); dyWork = (2*dy-1 + plan->filterLen-1); dzWork = (2*dz-1 + plan->filterLen-1); int dxyWork = dxWork*dyWork; int xOffset = (int) ((dxWork - dxNext) / 2.0); int yOffset = (int) ((dyWork - dyNext) / 2.0); int zOffset = (int) ((dzWork - dzNext) / 2.0); int k,j; for (k = 0; k < dzNext; ++k){ for (j = 0; j < dyNext; ++j){ memcpy(outImage+j*dxNext + k*dxyNext, tempFull+xOffset + (yOffset+j)*dxWork + (zOffset+k)*dxyWork, dxNext*sizeof(data_t)); } } HxLyLz += 7*blockSize; } free(tempyz); free(tempz); free(tempFull); circunshift_cpu(plan,outImage); } void circshift_cpu(struct dfwavelet_plan_s* plan, data_t *data) { if (plan->randshift) dfwavelet_new_randshift(plan); // Return if no shifts int zeroShift = 1; int i; for (i = 0; i< plan->numdims; i++) { zeroShift &= (plan->randShift[i]==0); } if(zeroShift) { return; } // Copy data data_t* dataCopy = malloc(sizeof(data_t)*plan->numPixel); memcpy(dataCopy, data, plan->numPixel*sizeof(data_t)); if (plan->numdims==2) { int dx,dy,r0,r1,j,i,index,indexShifted; dx = plan->imSize[0]; dy = plan->imSize[1]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; #pragma omp parallel for private(index, j, i,indexShifted) for(j = 0; j < dy; j++) { for(i = 0; i < dx; i++) { index = i+j*dx; indexShifted = (((i+r0) + (j+r1)*dx)%(dx*dy)+dx*dy)%(dx*dy); data[indexShifted] = dataCopy[index]; } } } if (plan->numdims==3) { int dx,dy,dz,r0,r1,r2,k,j,i,index,indexShifted; dx = plan->imSize[0]; dy = plan->imSize[1]; dz = plan->imSize[2]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; r2 = plan->randShift[2]; #pragma omp parallel for private(index, k, j, i,indexShifted) for (k = 0; k < dz; k++) { for(j = 0; j < dy; j++) { for(i = 0; i < dx; i++) { index = i+j*dx+k*dx*dy; indexShifted = ((i+r0 + (j+r1)*dx + (k+r2)*dx*dy)%(dx*dy*dz)+(dx*dy*dz))%(dx*dy*dz); data[indexShifted] = dataCopy[index]; } } } } #pragma omp barrier free(dataCopy); } void circunshift_cpu(struct dfwavelet_plan_s* plan, data_t *data) { // Return if no shifts int zeroShift = 1; int i; for (i = 0; i< plan->numdims; i++) { zeroShift &= (plan->randShift[i]==0); } if(zeroShift) { return; } // Copy data data_t* dataCopy = malloc(sizeof(data_t)*plan->numPixel); memcpy(dataCopy, data, plan->numPixel*sizeof(data_t)); if (plan->numdims==2) { int dx,dy,r0,r1,j,i,index,indexShifted; dx = plan->imSize[0]; dy = plan->imSize[1]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; #pragma omp parallel for private(index, j, i,indexShifted) for(j = 0; j < dy; j++) { for(i = 0; i < dx; i++) { index = i+j*dx; indexShifted = (((i+r0) + (j+r1)*dx)%(dx*dy)+dx*dy)%(dx*dy); data[index] = dataCopy[indexShifted]; } } } if (plan->numdims==3) { int dx,dy,dz,r0,r1,r2,k,j,i,index,indexShifted; dx = plan->imSize[0]; dy = plan->imSize[1]; dz = plan->imSize[2]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; r2 = plan->randShift[2]; #pragma omp parallel for private(index, k, j, i,indexShifted) for (k = 0; k < dz; k++) { for(j = 0; j < dy; j++) { for(i = 0; i < dx; i++) { index = i+j*dx+k*dx*dy; indexShifted = ((i+r0 + (j+r1)*dx + (k+r2)*dx*dy)%(dx*dy*dz)+(dx*dy*dz))%(dx*dy*dz); data[index] = dataCopy[indexShifted]; } } } } free(dataCopy); } /********** Helper Function *********/ void conv_down_3d(data_t *out, data_t *in, int size1, int skip1, int size2, int skip2, int size3, int skip3, scalar_t *filter, int filterLen) { int outSize1 = (size1 + filterLen-1) / 2; // Adjust out skip 2 and 3 if needed int outSkip2; if(skip2 > skip1) { outSkip2 = outSize1*skip2/size1; } else { outSkip2 = skip2; } int outSkip3; if(skip3 > skip1) { outSkip3 = outSize1*skip3/size1; } else { outSkip3 = skip3; } int i32; #pragma omp parallel for for (i32 = 0; i32 < size2*size3; ++i32) { int i2 = i32 % size2; int i3 = i32 / size2; int i1; for (i1 = 0; i1 < outSize1; ++i1) { out[i3*outSkip3 + i2*outSkip2 + i1*skip1] = 0.0f; int k; for (k = 0; k < filterLen; ++k) { int out_i1 = 2*i1+1 - (filterLen-1) + k; if (out_i1 < 0) out_i1 = -out_i1-1; if (out_i1 >= size1) out_i1 = size1-1 - (out_i1-size1); out[i3*outSkip3 + i2*outSkip2 + i1*skip1] += in[i3*skip3 + i2*skip2 + out_i1*skip1] * filter[filterLen-1-k]; } } } } void conv_up_3d(data_t *out, data_t *in, int size1, int skip1, int size2, int skip2, int size3, int skip3, scalar_t *filter, int filterLen) { int outSize1 = 2*size1-1 + filterLen-1; // Adjust out skip 2 and 3 if needed int outSkip2; if(skip2 > skip1) { outSkip2 = outSize1*skip2/size1; } else { outSkip2 = skip2; } int outSkip3; if(skip3 > skip1) { outSkip3 = outSize1*skip3/size1; } else { outSkip3 = skip3; } int i32; #pragma omp parallel for for (i32 = 0; i32 < size2*size3; ++i32) { int i2 = i32 % size2; int i3 = i32 / size2; int i1; for (i1 = 0; i1 < outSize1; ++i1) { int k; for (k = (i1 - (filterLen-1)) & 1; k < filterLen; k += 2){ int in_i1 = (i1 - (filterLen-1) + k) >> 1; if (in_i1 >= 0 && in_i1 < size1) out[i3*outSkip3 + i2*outSkip2 + i1*skip1] += in[i3*skip3 + i2*skip2 + in_i1*skip1] * filter[filterLen-1-k]; } } } } void mult(data_t* in,scalar_t scale,int numMax) { int i; for(i=0; inumdims; int filterLen = plan->filterLen; int bandSize, l, minSize; plan->numLevels = 10000000; int d; for (d = 0; d < numdims; d++) { bandSize = plan->imSize[d]; minSize = plan->minSize[d]; l = 0; while (bandSize > minSize) { ++l; bandSize = (bandSize + filterLen - 1) / 2; } l--; plan->numLevels = (l < plan->numLevels) ? l : plan->numLevels; } } void create_wavelet_sizes(struct dfwavelet_plan_s* plan) { int numdims = plan->numdims; int filterLen = plan->filterLen; int numLevels = plan->numLevels; int numSubCoef; plan->waveSizes = (long*) malloc(sizeof(long)*numdims*(numLevels+2)); // Get number of subband per level, (3 for 2d, 7 for 3d) // Set the last bandSize to be imSize int d,l; int numSubband = 1; for (d = 0; dwaveSizes[d + numdims*(numLevels+1)] = plan->imSize[d]; numSubband <<= 1; } numSubband--; // Get numCoeff and waveSizes // Each bandSize[l] is (bandSize[l+1] + filterLen - 1)/2 plan->numCoeff = 0; for (l = plan->numLevels; l >= 1; --l) { numSubCoef = 1; for (d = 0; d < numdims; d++) { plan->waveSizes[d + numdims*l] = (plan->waveSizes[d + numdims*(l+1)] + filterLen - 1) / 2; numSubCoef *= plan->waveSizes[d + numdims*l]; } plan->numCoeff += numSubband*numSubCoef; if (l==1) plan->numCoarse = numSubCoef; } numSubCoef = 1; for (d = 0; d < numdims; d++) { plan->waveSizes[d] = plan->waveSizes[numdims+d]; numSubCoef *= plan->waveSizes[d]; } plan->numCoeff += numSubCoef; } /* All filter coefficients are obtained from http://wavelets.pybytes.com/ */ void create_wavelet_filters(struct dfwavelet_plan_s* plan) { int filterLen = 0; scalar_t* filter1, *filter2; filterLen = 6; // CDF 2.2 and CDF 3.1 Wavelet scalar_t cdf22[] = { 0.0,-0.17677669529663689,0.35355339059327379,1.0606601717798214,0.35355339059327379,-0.17677669529663689, 0.0,0.35355339059327379,-0.70710678118654757,0.35355339059327379,0.0,0.0, 0.0,0.35355339059327379,0.70710678118654757,0.35355339059327379,0.0,0.0, 0.0,0.17677669529663689,0.35355339059327379,-1.0606601717798214,0.35355339059327379,0.17677669529663689 }; scalar_t cdf31[] = { 0.0,-0.35355339059327379,1.0606601717798214,1.0606601717798214,-0.35355339059327379,0.0 , 0.0,-0.17677669529663689,0.53033008588991071,-0.53033008588991071,0.17677669529663689,0.0, 0.0,0.17677669529663689,0.53033008588991071,0.53033008588991071,0.17677669529663689,0.0, 0.0,-0.35355339059327379,-1.0606601717798214,1.0606601717798214,0.35355339059327379,0.0 }; filter1 = cdf22; filter2 = cdf31; // Allocate filters contiguously (for convenience) plan->filterLen = filterLen; plan->lod0 = (scalar_t*) malloc(sizeof(scalar_t) * 4 * filterLen); memcpy(plan->lod0, filter1, 4*filterLen*sizeof(scalar_t)); plan->lod1 = (scalar_t*) malloc(sizeof(scalar_t) * 4 * filterLen); memcpy(plan->lod1, filter2, 4*filterLen*sizeof(scalar_t)); plan->hid0 = plan->lod0 + 1*filterLen; plan->lor0 = plan->lod0 + 2*filterLen; plan->hir0 = plan->lod0 + 3*filterLen; plan->hid1 = plan->lod1 + 1*filterLen; plan->lor1 = plan->lod1 + 2*filterLen; plan->hir1 = plan->lod1 + 3*filterLen; } #ifndef M_PI #define M_PI 3.14159265358979323846 #endif static data_t drand() /* uniform distribution, (0..1] */ { return (rand()+1.0)/(RAND_MAX+1.0); } static void random_normal(data_t* in,int length) /* normal distribution, centered on 0, std dev 1 */ { int i; for (i=0;inoiseAmp==NULL) { // Generate Gaussian w/ mean=0, std=1 data data_t* vx,*vy,*vz; data_t* wcdf1,*wcdf2,*wcn; vx = (data_t*) malloc(sizeof(data_t)*plan->numPixel); vy = (data_t*) malloc(sizeof(data_t)*plan->numPixel); vz = (data_t*) malloc(sizeof(data_t)*plan->numPixel); random_normal(vx,plan->numPixel); random_normal(vy,plan->numPixel); random_normal(vz,plan->numPixel); wcdf1 = (data_t*) malloc(sizeof(data_t)*plan->numCoeff); wcdf2 = (data_t*) malloc(sizeof(data_t)*plan->numCoeff); wcn = (data_t*) malloc(sizeof(data_t)*plan->numCoeff); // Get Wavelet Coefficients int temp_use_gpu = plan->use_gpu; if (plan->use_gpu==1) plan->use_gpu = 2; dfwavelet_forward(plan,wcdf1,wcdf2,wcn,vx,vy,vz); plan->use_gpu = temp_use_gpu; // Get Noise Amp for each subband data_t* HxLyLz1 = wcdf1 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz2 = wcdf2 + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; data_t* HxLyLz3 = wcn + plan->waveSizes[0]*plan->waveSizes[1]*plan->waveSizes[2]; int l; for (l = 1; l <= plan->numLevels; ++l){ HxLyLz1 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz2 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; HxLyLz3 += 7*plan->waveSizes[0 + 3*l]*plan->waveSizes[1 + 3*l]*plan->waveSizes[2 + 3*l]; } int numBand = 7*plan->numLevels*3; plan->noiseAmp = (scalar_t*) malloc(sizeof(scalar_t)*numBand); int naInd = 0; for (l = plan->numLevels; l >= 1; --l) { int dxNext = plan->waveSizes[0 + 3*l]; int dyNext = plan->waveSizes[1 + 3*l]; int dzNext = plan->waveSizes[2 + 3*l]; int blockSize = dxNext*dyNext*dzNext; HxLyLz1 = HxLyLz1 - 7*blockSize; HxLyLz2 = HxLyLz2 - 7*blockSize; HxLyLz3 = HxLyLz3 - 7*blockSize; int bandInd; //#pragma omp parallel for private(bandInd) for (bandInd=0; bandInd<7*3;bandInd++) { data_t *subband; if (bandInd<7) { subband = HxLyLz1 + bandInd*blockSize; } else if (bandInd<14) { subband = HxLyLz2 + (bandInd-7)*blockSize; } else { subband = HxLyLz3 + (bandInd-14)*blockSize; } data_t sig = 0; data_t mean = 0; data_t mean_old; int i; for (i=0; inoiseAmp[naInd] = sig; naInd++; } } free(vx); free(vy); free(vz); free(wcdf1); free(wcdf2); free(wcn); } } bart-0.4.02/src/dfwavelet/dfwavelet.h000066400000000000000000000030161320577655200174430ustar00rootroot00000000000000/* * Copyright 2013-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __DFWAVELET_H #define __DFWAVELET_H #include "misc/cppwrap.h" struct dfwavelet_plan_s; extern struct dfwavelet_plan_s* prepare_dfwavelet_plan(int numdims, long* imSize, long* minSize_tr, _Complex float* res, int use_gpu); extern void dfwavelet_forward(struct dfwavelet_plan_s* plan, _Complex float* out_wcdf1, _Complex float* out_wcdf2, _Complex float* out_wcn, _Complex float* in_vx, _Complex float* in_vy, _Complex float* in_vz); extern void dfwavelet_inverse(struct dfwavelet_plan_s* plan, _Complex float* out_vx, _Complex float* out_vy, _Complex float* out_vz, _Complex float* in_wcdf1, _Complex float* in_wcdf2, _Complex float* in_wcn); extern void dfsoft_thresh(struct dfwavelet_plan_s* plan, float dfthresh, float nthresh, _Complex float* wcdf1, _Complex float* wcdf2, _Complex float* wcn); extern void dfwavelet_thresh(struct dfwavelet_plan_s* plan, float dfthresh, float nthresh,_Complex float* out_vx, _Complex float* out_vy, _Complex float* out_vz, _Complex float* in_vx,_Complex float* in_vy, _Complex float* in_vz); extern void dfwavelet_new_randshift(struct dfwavelet_plan_s* plan); extern void dfwavelet_clear_randshift(struct dfwavelet_plan_s* plan); extern void dfwavelet_free(struct dfwavelet_plan_s* plan); extern void print_plan(struct dfwavelet_plan_s* plan); #include "misc/cppwrap.h" #endif // __WAVELET_H bart-0.4.02/src/dfwavelet/dfwavelet_impl.h000066400000000000000000000027041320577655200204670ustar00rootroot00000000000000/* * Copyright 2013-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ /** * data_t is the type for input/output data, can be float/double or _Complex float/double. */ typedef _Complex float data_t; /** * scalar_t is the type for filter and scalers, can be float/double */ typedef float scalar_t; /** * Wavelet Plan struct * * @param imSize Input Image Size * @param numPixel Number of image pixels * @param numCoeff Number of wavelet coefficients * @param trDims Which dimensions we do wavelet transform * @param minSize_tr Minimum size for the scaling subband * @param waveSizes_tr Contains all wavelet subband sizes * */ struct dfwavelet_plan_s { int use_gpu; unsigned int state; int numdims; long* imSize; // Input Image Size long numPixel; // Number of image pixels long numCoeff; // Number of wavelet coefficients data_t* res; // Resolution scalar_t* noiseAmp; // Noise amplification for each subband scalar_t percentZero; long* minSize; // Minimum size for the scaling subband long numCoarse; long* waveSizes; // Contains all wavelet subband sizes int numLevels; int* randShift; _Bool randshift; // Filter parameters int filterLen; scalar_t* lod0; scalar_t* hid0; scalar_t* lor0; scalar_t* hir0; scalar_t* lod1; scalar_t* hid1; scalar_t* lor1; scalar_t* hir1; }; bart-0.4.02/src/dfwavelet/dfwavelet_kernels.cu000066400000000000000000001735141320577655200213610ustar00rootroot00000000000000/* * Copyright 2013-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Frank Ong, Martin Uecker, Pat Virtue, and Mark Murphy * frankong@berkeley.edu */ #include #include #include #include #include #include #include #include "num/multind.h" #include "dfwavelet_kernels.h" #include "dfwavelet_impl.h" # define _hdev_ __host__ __device__ // _data_t is the interal representation of data_t in CUDA // Must be float2/double2 for data_t=Complex float/double or float/double for data_t=float/double typedef float2 _data_t; // Float2 Operators inline _hdev_ float2 operator+ (float2 z1, float2 z2) { return make_float2 (z1.x + z2.x, z1.y + z2.y); } inline _hdev_ float2 operator- (float2 z1, float2 z2) { return make_float2 (z1.x - z2.x, z1.y - z2.y); } inline _hdev_ float2 operator* (float2 z1, float2 z2) { return make_float2 (z1.x*z2.x - z1.y*z2.y, z1.x*z2.y + z1.y*z2.x); } inline _hdev_ float2 operator* (float2 z1, float alpha) { return make_float2 (z1.x*alpha, z1.y*alpha); } inline _hdev_ float2 operator* (float alpha,float2 z1) { return make_float2 (z1.x*alpha, z1.y*alpha); } inline _hdev_ float2 operator/ (float alpha,float2 z1) { return make_float2 (1.f/z1.x, 1.f/z1.y); } inline _hdev_ void operator+= (float2 &z1, float2 z2) { z1.x += z2.x; z1.y += z2.y; } inline _hdev_ float abs(float2 z1) { return sqrt(z1.x*z1.x + z1.y*z1.y); } // Double2 Operators inline _hdev_ double2 operator+ (double2 z1, double2 z2) { return make_double2 (z1.x + z2.x, z1.y + z2.y); } inline _hdev_ double2 operator- (double2 z1, double2 z2) { return make_double2 (z1.x - z2.x, z1.y - z2.y); } inline _hdev_ double2 operator* (double2 z1, double2 z2) { return make_double2 (z1.x*z2.x - z1.y*z2.y, z1.x*z2.y + z1.y*z2.x); } inline _hdev_ double2 operator* (double2 z1, double alpha) { return make_double2 (z1.x*alpha, z1.y*alpha); } inline _hdev_ double2 operator* (double alpha,double2 z1) { return make_double2 (z1.x*alpha, z1.y*alpha); } inline _hdev_ double2 operator/ (double alpha,double2 z1) { return make_double2 (1.f/z1.x, 1.f/z1.y); } inline _hdev_ void operator+= (double2 &z1, double2 z2) { z1.x += z2.x; z1.y += z2.y; } inline _hdev_ double abs(double2 z1) { return sqrt(z1.x*z1.x + z1.y*z1.y); } /********** Macros ************/ #define cuda(Call) do { \ cudaError_t err = cuda ## Call ; \ if (err != cudaSuccess){ \ fprintf(stderr, "%s\n", cudaGetErrorString(err)); \ throw; \ } \ } while(0) #define cuda_sync() do{ \ cuda (ThreadSynchronize()); \ cuda (GetLastError()); \ } while(0) /********** Macros ************/ #define cuda(Call) do { \ cudaError_t err = cuda ## Call ; \ if (err != cudaSuccess){ \ fprintf(stderr, "%s\n", cudaGetErrorString(err)); \ throw; \ } \ } while(0) #define cuda_sync() do{ \ cuda (ThreadSynchronize()); \ cuda (GetLastError()); \ } while(0) // ############################################################################ // Headers // ############################################################################ static __global__ void cu_fwt3df_col(_data_t *Lx,_data_t *Hx,_data_t *in,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,scalar_t *lod,scalar_t *hid,int filterLen); static __global__ void cu_fwt3df_row(_data_t *Ly,_data_t *Hy,_data_t *in,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,scalar_t *lod,scalar_t *hid,int filterLen); static __global__ void cu_fwt3df_dep(_data_t *Lz,_data_t *Hz,_data_t *in,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,scalar_t *lod,scalar_t *hid,int filterLen); static __global__ void cu_iwt3df_dep(_data_t *out,_data_t *Lz,_data_t *Hz,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,int xOffset,int yOffset,int zOffset,scalar_t *lod,scalar_t *hid,int filterLen); static __global__ void cu_iwt3df_row(_data_t *out,_data_t *Ly,_data_t *Hy,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,int xOffset,int yOffset,int zOffset,scalar_t *lod,scalar_t *hid,int filterLen); static __global__ void cu_iwt3df_col(_data_t *out,_data_t *Lx,_data_t *Hx,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,int xOffset,int yOffset,int zOffset,scalar_t *lod,scalar_t *hid,int filterLen); static __global__ void cu_fwt3df_LC1(_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dxNext, int dyNext, int dzNext); static __global__ void cu_fwt3df_LC2(_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dxNext, int dyNext, int dzNext); static __global__ void cu_fwt3df_LC1_diff(_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dxNext, int dyNext, int dzNext); static __global__ void cu_fwt3df_LC2_diff(_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dxNext, int dyNext, int dzNext); static __global__ void cu_fwt3df_LC3(_data_t* HxHyHz_df1,_data_t* HxHyHz_df2,_data_t* HxHyHz_n,int dxNext, int dyNext, int dzNext); static __global__ void cu_iwt3df_LC1(_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dx, int dy, int dz); static __global__ void cu_iwt3df_LC2(_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dx, int dy, int dz); static __global__ void cu_iwt3df_LC1_diff(_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dx, int dy, int dz); static __global__ void cu_iwt3df_LC2_diff(_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dx, int dy, int dz); static __global__ void cu_iwt3df_LC3(_data_t* HxHyHz_df1,_data_t* HxHyHz_df2,_data_t* HxHyHz_n,int dx, int dy, int dz); static __global__ void cu_mult(_data_t* in, _data_t mult, int maxInd); static __global__ void cu_soft_thresh (_data_t* in, scalar_t thresh, int numMax); static __global__ void cu_circshift(_data_t* data, _data_t* dataCopy, int dx, int dy, int dz, int shift1, int shift2, int shift3); static __global__ void cu_circunshift(_data_t* data, _data_t* dataCopy, int dx, int dy, int dz, int shift1, int shift2, int shift3); extern "C" void dffwt3_gpuHost(struct dfwavelet_plan_s* plan, data_t* out_wcdf1,data_t* out_wcdf2,data_t* out_wcn, data_t* in_vx,data_t* in_vy,data_t* in_vz) { assert(plan->use_gpu==2); data_t* dev_wcdf1,*dev_wcdf2,*dev_wcn,*dev_vx,*dev_vy,*dev_vz; cuda(Malloc( (void**)&dev_vx, plan->numPixel*sizeof(data_t) )); cuda(Malloc( (void**)&dev_vy, plan->numPixel*sizeof(data_t) )); cuda(Malloc( (void**)&dev_vz, plan->numPixel*sizeof(data_t) )); cuda(Memcpy( dev_vx, in_vx, plan->numPixel*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_vy, in_vy, plan->numPixel*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_vz, in_vz, plan->numPixel*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Malloc( (void**)&dev_wcdf1, plan->numCoeff*sizeof(data_t) )); cuda(Malloc( (void**)&dev_wcdf2, plan->numCoeff*sizeof(data_t) )); cuda(Malloc( (void**)&dev_wcn, plan->numCoeff*sizeof(data_t) )); dffwt3_gpu(plan,dev_wcdf1,dev_wcdf2,dev_wcn,dev_vx,dev_vy,dev_vz); cuda(Memcpy( out_wcdf1, dev_wcdf1, plan->numCoeff*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_wcdf2, dev_wcdf2, plan->numCoeff*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_wcn, dev_wcn, plan->numCoeff*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Free( dev_wcdf1 )); cuda(Free( dev_wcdf2 )); cuda(Free( dev_wcn )); cuda(Free( dev_vx )); cuda(Free( dev_vy )); cuda(Free( dev_vz )); } extern "C" void dfiwt3_gpuHost(struct dfwavelet_plan_s* plan, data_t* out_vx,data_t* out_vy,data_t* out_vz, data_t* in_wcdf1,data_t* in_wcdf2,data_t* in_wcn) { assert(plan->use_gpu==2); data_t* dev_wcdf1,*dev_wcdf2,*dev_wcn,*dev_vx,*dev_vy,*dev_vz; cuda(Malloc( (void**)&dev_wcdf1, plan->numCoeff*sizeof(data_t) )); cuda(Malloc( (void**)&dev_wcdf2, plan->numCoeff*sizeof(data_t) )); cuda(Malloc( (void**)&dev_wcn, plan->numCoeff*sizeof(data_t) )); cuda(Memcpy( dev_wcdf1, in_wcdf1, plan->numCoeff*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_wcdf2, in_wcdf2, plan->numCoeff*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_wcn, in_wcn, plan->numCoeff*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Malloc( (void**)&dev_vx, plan->numPixel*sizeof(data_t) )); cuda(Malloc( (void**)&dev_vy, plan->numPixel*sizeof(data_t) )); cuda(Malloc( (void**)&dev_vz, plan->numPixel*sizeof(data_t) )); dfiwt3_gpu(plan,dev_vx,dev_vy,dev_vz,dev_wcdf1,dev_wcdf2,dev_wcn); cuda(Memcpy( out_vx, dev_vx, plan->numPixel*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_vy, dev_vy, plan->numPixel*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_vz, dev_vz, plan->numPixel*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Free( dev_wcdf1 )); cuda(Free( dev_wcdf2 )); cuda(Free( dev_wcn )); cuda(Free( dev_vx )); cuda(Free( dev_vy )); cuda(Free( dev_vz )); } extern "C" void dfsoftthresh_gpuHost(struct dfwavelet_plan_s* plan,scalar_t dfthresh, scalar_t nthresh, data_t* out_wcdf1,data_t* out_wcdf2,data_t* out_wcn) { assert(plan->use_gpu==2); data_t* dev_wcdf1,*dev_wcdf2,*dev_wcn; cuda(Malloc( (void**)&dev_wcdf1, plan->numCoeff*sizeof(data_t) )); cuda(Malloc( (void**)&dev_wcdf2, plan->numCoeff*sizeof(data_t) )); cuda(Malloc( (void**)&dev_wcn, plan->numCoeff*sizeof(data_t) )); cuda(Memcpy( dev_wcdf1, out_wcdf1, plan->numCoeff*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_wcdf2, out_wcdf2, plan->numCoeff*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_wcn, out_wcn, plan->numCoeff*sizeof(data_t), cudaMemcpyHostToDevice )); dfsoftthresh_gpu(plan,dfthresh,nthresh,dev_wcdf1,dev_wcdf2,dev_wcn); cuda(Memcpy( out_wcdf1, dev_wcdf1, plan->numCoeff*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_wcdf2, dev_wcdf2, plan->numCoeff*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_wcn, dev_wcn, plan->numCoeff*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Free( dev_wcdf1 )); cuda(Free( dev_wcdf2 )); cuda(Free( dev_wcn )); } extern "C" void dfwavthresh3_gpuHost(struct dfwavelet_plan_s* plan, scalar_t dfthresh,scalar_t nthresh,data_t* out_vx,data_t* out_vy,data_t* out_vz, data_t* in_vx,data_t* in_vy,data_t* in_vz) { assert(plan->use_gpu==2); data_t*dev_vx,*dev_vy,*dev_vz; cuda(Malloc( (void**)&dev_vx, plan->numPixel*sizeof(data_t) )); cuda(Malloc( (void**)&dev_vy, plan->numPixel*sizeof(data_t) )); cuda(Malloc( (void**)&dev_vz, plan->numPixel*sizeof(data_t) )); cuda(Memcpy( dev_vx, in_vx, plan->numPixel*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_vy, in_vy, plan->numPixel*sizeof(data_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_vz, in_vz, plan->numPixel*sizeof(data_t), cudaMemcpyHostToDevice )); dfwavthresh3_gpu(plan,dfthresh,nthresh,dev_vx,dev_vy,dev_vz,dev_vx,dev_vy,dev_vz); cuda(Memcpy( out_vx, dev_vx, plan->numPixel*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_vy, dev_vy, plan->numPixel*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Memcpy( out_vz, dev_vz, plan->numPixel*sizeof(data_t), cudaMemcpyDeviceToHost )); cuda(Free( dev_vx )); cuda(Free( dev_vy )); cuda(Free( dev_vz )); } extern "C" void dffwt3_gpu(struct dfwavelet_plan_s* plan, data_t* out_wcdf1,data_t* out_wcdf2,data_t* out_wcn, data_t* in_vx,data_t* in_vy,data_t* in_vz) { circshift_gpu(plan,in_vx); circshift_gpu(plan,in_vy); circshift_gpu(plan,in_vz); long numCoeff, filterLen,*waveSizes; numCoeff = plan->numCoeff; waveSizes = plan->waveSizes; filterLen = plan->filterLen; int numLevels = plan->numLevels; // Cast from generic data_t to device compatible _data_t _data_t* dev_wcdf1 = (_data_t*) out_wcdf1; _data_t* dev_wcdf2 = (_data_t*) out_wcdf2; _data_t* dev_wcn = (_data_t*) out_wcn; _data_t* dev_in_vx = (_data_t*) in_vx; _data_t* dev_in_vy = (_data_t*) in_vy; _data_t* dev_in_vz = (_data_t*) in_vz; _data_t* res = (_data_t*) plan->res; _data_t* dev_temp1,*dev_temp2; cuda(Malloc( (void**)&dev_temp1, numCoeff*sizeof(_data_t) )); cuda(Malloc( (void**)&dev_temp2, numCoeff*sizeof(_data_t) )); // Get dimensions int dx = plan->imSize[0]; int dy = plan->imSize[1]; int dz = plan->imSize[2]; int dxNext = waveSizes[0 + 3*numLevels]; int dyNext = waveSizes[1 + 3*numLevels]; int dzNext = waveSizes[2 + 3*numLevels]; int blockSize = dxNext*dyNext*dzNext; // allocate device memory and copy filters to device scalar_t *dev_filters; cuda(Malloc( (void**)&dev_filters, 4*plan->filterLen*sizeof(scalar_t) )); scalar_t *dev_lod0 = dev_filters + 0*plan->filterLen; scalar_t *dev_hid0 = dev_filters + 1*plan->filterLen; scalar_t *dev_lod1 = dev_filters + 2*plan->filterLen; scalar_t *dev_hid1 = dev_filters + 3*plan->filterLen; cuda(Memcpy( dev_lod0, plan->lod0, 2*plan->filterLen*sizeof(scalar_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_lod1, plan->lod1, 2*plan->filterLen*sizeof(scalar_t), cudaMemcpyHostToDevice )); // Initialize variables and Pointers for FWT int const SHMEM_SIZE = 16384; int const T = 512; int mem, K; dim3 numBlocks, numThreads; // Temp Pointers _data_t *dev_tempLx,*dev_tempHx; dev_tempLx = dev_temp1; dev_tempHx = dev_tempLx + numCoeff/2; _data_t *dev_tempLxLy,*dev_tempHxLy,*dev_tempLxHy,*dev_tempHxHy; dev_tempLxLy = dev_temp2; dev_tempHxLy = dev_tempLxLy + numCoeff/4; dev_tempLxHy = dev_tempHxLy + numCoeff/4; dev_tempHxHy = dev_tempLxHy + numCoeff/4; // wcdf1 Pointers _data_t *dev_LxLyLz_df1,*dev_HxLyLz_df1,*dev_LxHyLz_df1,*dev_HxHyLz_df1,*dev_LxLyHz_df1,*dev_HxLyHz_df1,*dev_LxHyHz_df1,*dev_HxHyHz_df1,*dev_current_vx; dev_LxLyLz_df1 = dev_wcdf1; dev_HxLyLz_df1 = dev_LxLyLz_df1 + waveSizes[0]*waveSizes[1]*waveSizes[2]; for (int l = 1; l <= numLevels; ++l){ dev_HxLyLz_df1 += 7*waveSizes[0 + 3*l]*waveSizes[1 + 3*l]*waveSizes[2 + 3*l]; } dev_current_vx = dev_in_vx; // wcdf2 Pointers _data_t *dev_LxLyLz_df2,*dev_HxLyLz_df2,*dev_LxHyLz_df2,*dev_HxHyLz_df2,*dev_LxLyHz_df2,*dev_HxLyHz_df2,*dev_LxHyHz_df2,*dev_HxHyHz_df2,*dev_current_vy; dev_LxLyLz_df2 = dev_wcdf2; dev_HxLyLz_df2 = dev_LxLyLz_df2 + waveSizes[0]*waveSizes[1]*waveSizes[2]; for (int l = 1; l <= numLevels; ++l){ dev_HxLyLz_df2 += 7*waveSizes[0 + 3*l]*waveSizes[1 + 3*l]*waveSizes[2 + 3*l]; } dev_current_vy = dev_in_vy; // wcn Pointers _data_t *dev_LxLyLz_n,*dev_HxLyLz_n,*dev_LxHyLz_n,*dev_HxHyLz_n,*dev_LxLyHz_n,*dev_HxLyHz_n,*dev_LxHyHz_n,*dev_HxHyHz_n,*dev_current_vz; dev_LxLyLz_n = dev_wcn; dev_HxLyLz_n = dev_LxLyLz_n + waveSizes[0]*waveSizes[1]*waveSizes[2]; for (int l = 1; l <= numLevels; ++l){ dev_HxLyLz_n += 7*waveSizes[0 + 3*l]*waveSizes[1 + 3*l]*waveSizes[2 + 3*l]; } dev_current_vz = dev_in_vz; //*****************Loop through levels**************** for (int l = numLevels; l >= 1; --l) { dxNext = waveSizes[0 + 3*l]; dyNext = waveSizes[1 + 3*l]; dzNext = waveSizes[2 + 3*l]; blockSize = dxNext*dyNext*dzNext; // Update Pointers // df1 dev_HxLyLz_df1 = dev_HxLyLz_df1 - 7*blockSize; dev_LxHyLz_df1 = dev_HxLyLz_df1 + blockSize; dev_HxHyLz_df1 = dev_LxHyLz_df1 + blockSize; dev_LxLyHz_df1 = dev_HxHyLz_df1 + blockSize; dev_HxLyHz_df1 = dev_LxLyHz_df1 + blockSize; dev_LxHyHz_df1 = dev_HxLyHz_df1 + blockSize; dev_HxHyHz_df1 = dev_LxHyHz_df1 + blockSize; // df2 dev_HxLyLz_df2 = dev_HxLyLz_df2 - 7*blockSize; dev_LxHyLz_df2 = dev_HxLyLz_df2 + blockSize; dev_HxHyLz_df2 = dev_LxHyLz_df2 + blockSize; dev_LxLyHz_df2 = dev_HxHyLz_df2 + blockSize; dev_HxLyHz_df2 = dev_LxLyHz_df2 + blockSize; dev_LxHyHz_df2 = dev_HxLyHz_df2 + blockSize; dev_HxHyHz_df2 = dev_LxHyHz_df2 + blockSize; // n dev_HxLyLz_n = dev_HxLyLz_n - 7*blockSize; dev_LxHyLz_n = dev_HxLyLz_n + blockSize; dev_HxHyLz_n = dev_LxHyLz_n + blockSize; dev_LxLyHz_n = dev_HxHyLz_n + blockSize; dev_HxLyHz_n = dev_LxLyHz_n + blockSize; dev_LxHyHz_n = dev_HxLyHz_n + blockSize; dev_HxHyHz_n = dev_LxHyHz_n + blockSize; //************WCVX*********** // FWT Columns K = (SHMEM_SIZE-16)/(dx*sizeof(_data_t)); numBlocks = dim3(1,(dy+K-1)/K,dz); numThreads = dim3(T/K,K,1); mem = K*dx*sizeof(_data_t); cu_fwt3df_col <<< numBlocks,numThreads,mem >>>(dev_tempLx,dev_tempHx,dev_current_vx,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod1,dev_hid1,filterLen); cuda_sync(); // FWT Rows K = (SHMEM_SIZE-16)/(dy*sizeof(_data_t)); numBlocks = dim3(((dxNext)+K-1)/K,1,dz); numThreads = dim3(K,T/K,1); mem = K*dy*sizeof(_data_t); cu_fwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempLxLy,dev_tempLxHy,dev_tempLx,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempHxLy,dev_tempHxHy,dev_tempHx,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cuda_sync(); // FWT Depths K = (SHMEM_SIZE-16)/(dz*sizeof(_data_t)); numBlocks = dim3(((dxNext)+K-1)/K,dyNext,1); numThreads = dim3(K,1,T/K); mem = K*dz*sizeof(_data_t); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_LxLyLz_df1,dev_LxLyHz_df1,dev_tempLxLy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_LxHyLz_df1,dev_LxHyHz_df1,dev_tempLxHy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_HxLyLz_df1,dev_HxLyHz_df1,dev_tempHxLy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_HxHyLz_df1,dev_HxHyHz_df1,dev_tempHxHy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cuda_sync(); //************WCVY*********** // FWT Columns K = (SHMEM_SIZE-16)/(dx*sizeof(_data_t)); numBlocks = dim3(1,(dy+K-1)/K,dz); numThreads = dim3(T/K,K,1); mem = K*dx*sizeof(_data_t); cu_fwt3df_col <<< numBlocks,numThreads,mem >>>(dev_tempLx,dev_tempHx,dev_current_vy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cuda_sync(); // FWT Rows K = (SHMEM_SIZE-16)/(dy*sizeof(_data_t)); numBlocks = dim3(((dxNext)+K-1)/K,1,dz); numThreads = dim3(K,T/K,1); mem = K*dy*sizeof(_data_t); cu_fwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempLxLy,dev_tempLxHy,dev_tempLx,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod1,dev_hid1,filterLen); cu_fwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempHxLy,dev_tempHxHy,dev_tempHx,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod1,dev_hid1,filterLen); cuda_sync(); // FWT Depths K = (SHMEM_SIZE-16)/(dz*sizeof(_data_t)); numBlocks = dim3(((dxNext)+K-1)/K,dyNext,1); numThreads = dim3(K,1,T/K); mem = K*dz*sizeof(_data_t); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_LxLyLz_df2,dev_LxLyHz_df2,dev_tempLxLy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_LxHyLz_df2,dev_LxHyHz_df2,dev_tempLxHy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_HxLyLz_df2,dev_HxLyHz_df2,dev_tempHxLy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_HxHyLz_df2,dev_HxHyHz_df2,dev_tempHxHy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cuda_sync(); //************WCVZ*********** // FWT Columns K = (SHMEM_SIZE-16)/(dx*sizeof(_data_t)); numBlocks = dim3(1,(dy+K-1)/K,dz); numThreads = dim3(T/K,K,1); mem = K*dx*sizeof(_data_t); cu_fwt3df_col <<< numBlocks,numThreads,mem >>>(dev_tempLx,dev_tempHx,dev_current_vz,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cuda_sync(); // FWT Rows K = (SHMEM_SIZE-16)/(dy*sizeof(_data_t)); numBlocks = dim3(((dxNext)+K-1)/K,1,dz); numThreads = dim3(K,T/K,1); mem = K*dy*sizeof(_data_t); cu_fwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempLxLy,dev_tempLxHy,dev_tempLx,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cu_fwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempHxLy,dev_tempHxHy,dev_tempHx,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod0,dev_hid0,filterLen); cuda_sync(); // FWT Depths K = (SHMEM_SIZE-16)/(dz*sizeof(_data_t)); numBlocks = dim3(((dxNext)+K-1)/K,dyNext,1); numThreads = dim3(K,1,T/K); mem = K*dz*sizeof(_data_t); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_LxLyLz_n,dev_LxLyHz_n,dev_tempLxLy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod1,dev_hid1,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_LxHyLz_n,dev_LxHyHz_n,dev_tempLxHy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod1,dev_hid1,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_HxLyLz_n,dev_HxLyHz_n,dev_tempHxLy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod1,dev_hid1,filterLen); cu_fwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_HxHyLz_n,dev_HxHyHz_n,dev_tempHxHy,dx,dy,dz,dxNext,dyNext,dzNext,dev_lod1,dev_hid1,filterLen); cuda_sync(); //******* Multi ****** int maxInd = 7*blockSize; numThreads = T; numBlocks = (maxInd+numThreads.x-1)/numThreads.x; cu_mult <<< numBlocks, numThreads >>> (dev_HxLyLz_df1,1.f/res[0],maxInd); cu_mult <<< numBlocks, numThreads >>> (dev_HxLyLz_df2,1.f/res[1],maxInd); cu_mult <<< numBlocks, numThreads >>> (dev_HxLyLz_n,1.f/res[2],maxInd); cuda_sync(); //*******Linear Combination****** int t1 = min(dxNext,T); int t2 = T/t1; numBlocks = dim3( (dxNext+t1-1)/t1, (dyNext+t2-1)/t2, dzNext); numThreads = dim3(t1,t2,1); cu_fwt3df_LC1 <<< numBlocks,numThreads >>> (dev_HxLyLz_df1,dev_HxLyLz_df2,dev_HxLyLz_n,dev_LxHyLz_df1,dev_LxHyLz_df2,dev_LxHyLz_n,dev_LxLyHz_df1,dev_LxLyHz_df2,dev_LxLyHz_n,dxNext,dyNext,dzNext); cu_fwt3df_LC2 <<< numBlocks,numThreads >>> (dev_HxHyLz_df1,dev_HxHyLz_df2,dev_HxHyLz_n,dev_HxLyHz_df1,dev_HxLyHz_df2,dev_HxLyHz_n,dev_LxHyHz_df1,dev_LxHyHz_df2,dev_LxHyHz_n,dxNext,dyNext,dzNext); cu_fwt3df_LC3 <<< numBlocks,numThreads >>> (dev_HxHyHz_df1,dev_HxHyHz_df2,dev_HxHyHz_n,dxNext,dyNext,dzNext); cuda_sync(); cu_fwt3df_LC1_diff <<< numBlocks,numThreads >>> (dev_HxLyLz_df1,dev_HxLyLz_df2,dev_HxLyLz_n,dev_LxHyLz_df1,dev_LxHyLz_df2,dev_LxHyLz_n,dev_LxLyHz_df1,dev_LxLyHz_df2,dev_LxLyHz_n,dxNext,dyNext,dzNext); cu_fwt3df_LC2_diff <<< numBlocks,numThreads >>> (dev_HxHyLz_df1,dev_HxHyLz_df2,dev_HxHyLz_n,dev_HxLyHz_df1,dev_HxLyHz_df2,dev_HxLyHz_n,dev_LxHyHz_df1,dev_LxHyHz_df2,dev_LxHyHz_n,dxNext,dyNext,dzNext); cuda_sync(); dev_current_vx = dev_wcdf1; dev_current_vy = dev_wcdf2; dev_current_vz = dev_wcn; dx = dxNext; dy = dyNext; dz = dzNext; } cuda(Free( dev_filters )); cuda(Free( dev_temp1 )); cuda(Free( dev_temp2 )); circunshift_gpu(plan,in_vx); circunshift_gpu(plan,in_vy); circunshift_gpu(plan,in_vz); } extern "C" void dfiwt3_gpu(struct dfwavelet_plan_s* plan, data_t* out_vx,data_t* out_vy,data_t* out_vz, data_t* in_wcdf1,data_t* in_wcdf2,data_t* in_wcn) { long numCoeff, filterLen,*waveSizes; numCoeff = plan->numCoeff; waveSizes = plan->waveSizes; filterLen = plan->filterLen; int numLevels = plan->numLevels; // Cast from generic data_t to device compatible _data_t _data_t* dev_out_vx = (_data_t*)out_vx; _data_t* dev_out_vy = (_data_t*)out_vy; _data_t* dev_out_vz = (_data_t*)out_vz; _data_t* dev_wcdf1 = (_data_t*)in_wcdf1; _data_t* dev_wcdf2 = (_data_t*)in_wcdf2; _data_t* dev_wcn = (_data_t*)in_wcn; _data_t* res = (_data_t*) plan->res; _data_t* dev_temp1, *dev_temp2; cuda(Malloc( (void**)&dev_temp1, numCoeff*sizeof(_data_t) )); cuda(Malloc( (void**)&dev_temp2, numCoeff*sizeof(_data_t)) ); // allocate device memory scalar_t *dev_filters; cuda(Malloc( (void**)&dev_filters, 4*(plan->filterLen)*sizeof(scalar_t) )); scalar_t *dev_lor0 = dev_filters + 0*plan->filterLen; scalar_t *dev_hir0 = dev_filters + 1*plan->filterLen; scalar_t *dev_lor1 = dev_filters + 2*plan->filterLen; scalar_t *dev_hir1 = dev_filters + 3*plan->filterLen; cuda(Memcpy( dev_lor0, plan->lor0, 2*plan->filterLen*sizeof(scalar_t), cudaMemcpyHostToDevice )); cuda(Memcpy( dev_lor1, plan->lor1, 2*plan->filterLen*sizeof(scalar_t), cudaMemcpyHostToDevice )); // Workspace dimensions int dxWork = waveSizes[0 + 3*numLevels]*2-1 + filterLen-1; int dyWork = waveSizes[1 + 3*numLevels]*2-1 + filterLen-1; int dzWork = waveSizes[2 + 3*numLevels]*2-1 + filterLen-1; // Initialize variables and pointers for IWT int const SHMEM_SIZE = 16384; int const T = 512; int mem,K; dim3 numBlocks, numThreads; int dx = waveSizes[0]; int dy = waveSizes[1]; int dz = waveSizes[2]; // Temp Pointers _data_t *dev_tempLxLy,*dev_tempHxLy,*dev_tempLxHy,*dev_tempHxHy; dev_tempLxLy = dev_temp1; dev_tempHxLy = dev_tempLxLy + numCoeff/4; dev_tempLxHy = dev_tempHxLy + numCoeff/4; dev_tempHxHy = dev_tempLxHy + numCoeff/4; _data_t *dev_tempLx,*dev_tempHx; dev_tempLx = dev_temp2; dev_tempHx = dev_tempLx + numCoeff/2; // wcdf1 Pointers _data_t *dev_LxLyLz_df1,*dev_HxLyLz_df1,*dev_LxHyLz_df1,*dev_HxHyLz_df1,*dev_LxLyHz_df1,*dev_HxLyHz_df1,*dev_LxHyHz_df1,*dev_HxHyHz_df1,*dev_current_vx; dev_LxLyLz_df1 = dev_wcdf1; dev_HxLyLz_df1 = dev_LxLyLz_df1 + dx*dy*dz; dev_current_vx = dev_LxLyLz_df1; // wcdf2 Pointers _data_t *dev_LxLyLz_df2,*dev_HxLyLz_df2,*dev_LxHyLz_df2,*dev_HxHyLz_df2,*dev_LxLyHz_df2,*dev_HxLyHz_df2,*dev_LxHyHz_df2,*dev_HxHyHz_df2,*dev_current_vy; dev_LxLyLz_df2 = dev_wcdf2; dev_HxLyLz_df2 = dev_LxLyLz_df2 + dx*dy*dz; dev_current_vy = dev_LxLyLz_df2; // wcn Pointers _data_t *dev_LxLyLz_n,*dev_HxLyLz_n,*dev_LxHyLz_n,*dev_HxHyLz_n,*dev_LxLyHz_n,*dev_HxLyHz_n,*dev_LxHyHz_n,*dev_HxHyHz_n,*dev_current_vz; dev_LxLyLz_n = dev_wcn; dev_HxLyLz_n = dev_LxLyLz_n + dx*dy*dz; dev_current_vz = dev_LxLyLz_n; for (int level = 1; level < numLevels+1; ++level) { dx = waveSizes[0 + 3*level]; dy = waveSizes[1 + 3*level]; dz = waveSizes[2 + 3*level]; int blockSize = dx*dy*dz; int dxNext = waveSizes[0+3*(level+1)]; int dyNext = waveSizes[1+3*(level+1)]; int dzNext = waveSizes[2+3*(level+1)]; // Calclate Offset dxWork = (2*dx-1 + filterLen-1); dyWork = (2*dy-1 + filterLen-1); dzWork = (2*dz-1 + filterLen-1); int xOffset = (int) floor((dxWork - dxNext) / 2.0); int yOffset = (int) floor((dyWork - dyNext) / 2.0); int zOffset = (int) floor((dzWork - dzNext) / 2.0); // Update Pointers // df1 dev_LxHyLz_df1 = dev_HxLyLz_df1 + blockSize; dev_HxHyLz_df1 = dev_LxHyLz_df1 + blockSize; dev_LxLyHz_df1 = dev_HxHyLz_df1 + blockSize; dev_HxLyHz_df1 = dev_LxLyHz_df1 + blockSize; dev_LxHyHz_df1 = dev_HxLyHz_df1 + blockSize; dev_HxHyHz_df1 = dev_LxHyHz_df1 + blockSize; // df2 dev_LxHyLz_df2 = dev_HxLyLz_df2 + blockSize; dev_HxHyLz_df2 = dev_LxHyLz_df2 + blockSize; dev_LxLyHz_df2 = dev_HxHyLz_df2 + blockSize; dev_HxLyHz_df2 = dev_LxLyHz_df2 + blockSize; dev_LxHyHz_df2 = dev_HxLyHz_df2 + blockSize; dev_HxHyHz_df2 = dev_LxHyHz_df2 + blockSize; // n dev_LxHyLz_n = dev_HxLyLz_n + blockSize; dev_HxHyLz_n = dev_LxHyLz_n + blockSize; dev_LxLyHz_n = dev_HxHyLz_n + blockSize; dev_HxLyHz_n = dev_LxLyHz_n + blockSize; dev_LxHyHz_n = dev_HxLyHz_n + blockSize; dev_HxHyHz_n = dev_LxHyHz_n + blockSize; //*******Linear Combination****** int t1 = min(dxNext,T); int t2 = T/t1; numBlocks = dim3( (dx+t1-1)/t1, (dy+t2-1)/t2, dz); numThreads = dim3(t1,t2,1); cu_iwt3df_LC1 <<< numBlocks,numThreads >>> (dev_HxLyLz_df1,dev_HxLyLz_df2,dev_HxLyLz_n,dev_LxHyLz_df1,dev_LxHyLz_df2,dev_LxHyLz_n,dev_LxLyHz_df1,dev_LxLyHz_df2,dev_LxLyHz_n,dx,dy,dz); cu_iwt3df_LC2 <<< numBlocks,numThreads >>> (dev_HxHyLz_df1,dev_HxHyLz_df2,dev_HxHyLz_n,dev_HxLyHz_df1,dev_HxLyHz_df2,dev_HxLyHz_n,dev_LxHyHz_df1,dev_LxHyHz_df2,dev_LxHyHz_n,dx,dy,dz); cu_iwt3df_LC3 <<< numBlocks,numThreads >>> (dev_HxHyHz_df1,dev_HxHyHz_df2,dev_HxHyHz_n,dx,dy,dz); cuda_sync(); cu_iwt3df_LC1_diff <<< numBlocks,numThreads >>> (dev_HxLyLz_df1,dev_HxLyLz_df2,dev_HxLyLz_n,dev_LxHyLz_df1,dev_LxHyLz_df2,dev_LxHyLz_n,dev_LxLyHz_df1,dev_LxLyHz_df2,dev_LxLyHz_n,dx,dy,dz); cu_iwt3df_LC2_diff <<< numBlocks,numThreads >>> (dev_HxHyLz_df1,dev_HxHyLz_df2,dev_HxHyLz_n,dev_HxLyHz_df1,dev_HxLyHz_df2,dev_HxLyHz_n,dev_LxHyHz_df1,dev_LxHyHz_df2,dev_LxHyHz_n,dx,dy,dz); cuda_sync(); //******* Multi ****** int maxInd = 7*blockSize; numThreads = T; numBlocks = (maxInd+numThreads.x-1)/numThreads.x; cu_mult <<< numBlocks, numThreads >>> (dev_HxLyLz_df1,res[0],maxInd); cu_mult <<< numBlocks, numThreads >>> (dev_HxLyLz_df2,res[1],maxInd); cu_mult <<< numBlocks, numThreads >>> (dev_HxLyLz_n,res[2],maxInd); cuda_sync(); //************WCX************ // Update Pointers if (level==numLevels) dev_current_vx = dev_out_vx; // IWT Depths K = (SHMEM_SIZE-16)/(2*dz*sizeof(_data_t)); numBlocks = dim3((dx+K-1)/K,dy,1); numThreads = dim3(K,1,(T/K)); mem = K*2*dz*sizeof(_data_t); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempLxLy,dev_LxLyLz_df1,dev_LxLyHz_df1,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempHxLy,dev_HxLyLz_df1,dev_HxLyHz_df1,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempLxHy,dev_LxHyLz_df1,dev_LxHyHz_df1,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempHxHy,dev_HxHyLz_df1,dev_HxHyHz_df1,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cuda_sync(); // IWT Rows K = (SHMEM_SIZE-16)/(2*dy*sizeof(_data_t)); numBlocks = dim3((dx+K-1)/K,1,dzNext); numThreads = dim3(K,(T/K),1); mem = K*2*dy*sizeof(_data_t); cu_iwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempLx,dev_tempLxLy,dev_tempLxHy,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,plan->filterLen); cu_iwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempHx,dev_tempHxLy,dev_tempHxHy,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,plan->filterLen); cuda_sync(); // IWT Columns K = (SHMEM_SIZE-16)/(2*dx*sizeof(_data_t)); numBlocks = dim3(1,(dyNext+K-1)/K,dzNext); numThreads = dim3((T/K),K,1); mem = K*2*dx*sizeof(_data_t); cu_iwt3df_col <<< numBlocks,numThreads,mem >>>(dev_current_vx,dev_tempLx,dev_tempHx,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor1,dev_hir1,plan->filterLen); cuda_sync(); //************WCY************ // Update Pointers if (level==numLevels) dev_current_vy = dev_out_vy; // IWT Depths K = (SHMEM_SIZE-16)/(2*dz*sizeof(_data_t)); numBlocks = dim3((dx+K-1)/K,dy,1); numThreads = dim3(K,1,(T/K)); mem = K*2*dz*sizeof(_data_t); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempLxLy,dev_LxLyLz_df2,dev_LxLyHz_df2,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempHxLy,dev_HxLyLz_df2,dev_HxLyHz_df2,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempLxHy,dev_LxHyLz_df2,dev_LxHyHz_df2,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempHxHy,dev_HxHyLz_df2,dev_HxHyHz_df2,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,filterLen); cuda_sync(); // IWT Rows K = (SHMEM_SIZE-16)/(2*dy*sizeof(_data_t)); numBlocks = dim3((dx+K-1)/K,1,dzNext); numThreads = dim3(K,(T/K),1); mem = K*2*dy*sizeof(_data_t); cu_iwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempLx,dev_tempLxLy,dev_tempLxHy,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor1,dev_hir1,plan->filterLen); cu_iwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempHx,dev_tempHxLy,dev_tempHxHy,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor1,dev_hir1,plan->filterLen); cuda_sync(); // IWT Columns K = (SHMEM_SIZE-16)/(2*dx*sizeof(_data_t)); numBlocks = dim3(1,(dyNext+K-1)/K,dzNext); numThreads = dim3((T/K),K,1); mem = K*2*dx*sizeof(_data_t); cu_iwt3df_col <<< numBlocks,numThreads,mem >>>(dev_current_vy,dev_tempLx,dev_tempHx,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,plan->filterLen); cuda_sync(); //************WCZ************ // Update Pointers if (level==numLevels) dev_current_vz = dev_out_vz; // IWT Depths K = (SHMEM_SIZE-16)/(2*dz*sizeof(_data_t)); numBlocks = dim3((dx+K-1)/K,dy,1); numThreads = dim3(K,1,(T/K)); mem = K*2*dz*sizeof(_data_t); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempLxLy,dev_LxLyLz_n,dev_LxLyHz_n,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor1,dev_hir1,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempHxLy,dev_HxLyLz_n,dev_HxLyHz_n,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor1,dev_hir1,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempLxHy,dev_LxHyLz_n,dev_LxHyHz_n,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor1,dev_hir1,filterLen); cu_iwt3df_dep <<< numBlocks,numThreads,mem >>>(dev_tempHxHy,dev_HxHyLz_n,dev_HxHyHz_n,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor1,dev_hir1,filterLen); cuda_sync(); // IWT Rows K = (SHMEM_SIZE-16)/(2*dy*sizeof(_data_t)); numBlocks = dim3((dx+K-1)/K,1,dzNext); numThreads = dim3(K,(T/K),1); mem = K*2*dy*sizeof(_data_t); cu_iwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempLx,dev_tempLxLy,dev_tempLxHy,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,plan->filterLen); cu_iwt3df_row <<< numBlocks,numThreads,mem >>>(dev_tempHx,dev_tempHxLy,dev_tempHxHy,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,plan->filterLen); cuda_sync(); // IWT Columns K = (SHMEM_SIZE-16)/(2*dx*sizeof(_data_t)); numBlocks = dim3(1,(dyNext+K-1)/K,dzNext); numThreads = dim3((T/K),K,1); mem = K*2*dx*sizeof(_data_t); cu_iwt3df_col <<< numBlocks,numThreads,mem >>>(dev_current_vz,dev_tempLx,dev_tempHx,dx,dy,dz,dxNext,dyNext,dzNext,xOffset,yOffset,zOffset,dev_lor0,dev_hir0,plan->filterLen); cuda_sync(); dev_HxLyLz_df1 += 7*blockSize; dev_HxLyLz_df2 += 7*blockSize; dev_HxLyLz_n += 7*blockSize; } cuda(Free( dev_filters )); cuda(Free( dev_temp1 )); cuda(Free( dev_temp2 )); circunshift_gpu(plan,out_vx); circunshift_gpu(plan,out_vy); circunshift_gpu(plan,out_vz); } int rand_lim(int limit) { int divisor = RAND_MAX/(limit+1); int retval; do { retval = rand() / divisor; } while (retval > limit); return retval; } void dfwavelet_new_randshift_gpu (struct dfwavelet_plan_s* plan) { int i; i = rand(); for(i = 0; i < plan->numdims; i++) { // Determine maximum shift value for this dimension int log2dim = 1; while( (1<imSize[i]) { log2dim++; } int maxShift = 1 << (log2dim-plan->numLevels); if (maxShift > 8) { maxShift = 8; } // Generate random shift value between 0 and maxShift plan->randShift[i] = rand_lim(maxShift); } } extern "C" void dfwavthresh3_gpu(struct dfwavelet_plan_s* plan,scalar_t dfthresh, scalar_t nthresh,data_t* out_vx,data_t* out_vy,data_t* out_vz,data_t* in_vx,data_t* in_vy,data_t* in_vz) { data_t* dev_wcdf1,*dev_wcdf2,*dev_wcn; cuda(Malloc( (void**)&dev_wcdf1, plan->numCoeff*sizeof(_data_t) )); cuda(Malloc( (void**)&dev_wcdf2, plan->numCoeff*sizeof(_data_t) )); cuda(Malloc( (void**)&dev_wcn, plan->numCoeff*sizeof(_data_t) )); dffwt3_gpu(plan,dev_wcdf1,dev_wcdf2,dev_wcn,in_vx,in_vy,in_vz); dfsoftthresh_gpu(plan,dfthresh,nthresh,dev_wcdf1,dev_wcdf2,dev_wcn); dfiwt3_gpu(plan,out_vx,out_vy,out_vz,dev_wcdf1,dev_wcdf2,dev_wcn); cuda(Free( dev_wcdf1 )); cuda(Free( dev_wcdf2 )); cuda(Free( dev_wcn )); } extern "C" void dfsoftthresh_gpu(struct dfwavelet_plan_s* plan,scalar_t dfthresh, scalar_t nthresh, data_t* out_wcdf1,data_t* out_wcdf2,data_t* out_wcn) { assert(plan->use_gpu==1||plan->use_gpu==2); _data_t* dev_wcdf1,*dev_wcdf2,*dev_wcn; dev_wcdf1 = (_data_t*) out_wcdf1; dev_wcdf2 = (_data_t*) out_wcdf2; dev_wcn = (_data_t*) out_wcn; int numMax; int const T = 512; dim3 numBlocks, numThreads; numMax = plan->numCoeff-plan->numCoarse; numBlocks = dim3((numMax+T-1)/T,1,1); numThreads = dim3(T,1,1); cu_soft_thresh <<< numBlocks,numThreads>>> (dev_wcdf1+plan->numCoarse,dfthresh,numMax); cu_soft_thresh <<< numBlocks,numThreads>>> (dev_wcdf2+plan->numCoarse,dfthresh,numMax); cu_soft_thresh <<< numBlocks,numThreads>>> (dev_wcn+plan->numCoarse,nthresh,numMax); } /********** Aux functions **********/ extern "C" void circshift_gpu(struct dfwavelet_plan_s* plan, data_t* data_c) { // Return if no shifts int zeroShift = 1; int i; for (i = 0; i< plan->numdims; i++) { zeroShift &= (plan->randShift[i]==0); } if(zeroShift) { return; } _data_t* data = (_data_t*) data_c; // Copy data _data_t* dataCopy; cuda(Malloc((void**)&dataCopy, plan->numPixel*sizeof(_data_t))); cuda(Memcpy(dataCopy, data, plan->numPixel*sizeof(_data_t), cudaMemcpyDeviceToDevice)); int T = 512; if (plan->numdims==2) { int dx,dy,r0,r1; dx = plan->imSize[0]; dy = plan->imSize[1]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; cu_circshift <<< (plan->numPixel+T-1)/T, T>>>(data,dataCopy,dx,dy,1,r0,r1,0); } else if (plan->numdims==3) { int dx,dy,dz,r0,r1,r2; dx = plan->imSize[0]; dy = plan->imSize[1]; dz = plan->imSize[2]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; r2 = plan->randShift[2]; cu_circshift <<< (plan->numPixel+T-1)/T, T>>>(data,dataCopy,dx,dy,dz,r0,r1,r2); } cuda(Free(dataCopy)); } extern "C" void circunshift_gpu(struct dfwavelet_plan_s* plan, data_t* data_c) { // Return if no shifts int zeroShift = 1; int i; for (i = 0; i< plan->numdims; i++) { zeroShift &= (plan->randShift[i]==0); } if(zeroShift) { return; } _data_t* data = (_data_t*) data_c; // Copy data _data_t* dataCopy; cuda(Malloc((void**)&dataCopy, plan->numPixel*sizeof(_data_t))); cuda(Memcpy(dataCopy, data, plan->numPixel*sizeof(_data_t), cudaMemcpyDeviceToDevice)); int T = 512; if (plan->numdims==2) { int dx,dy,r0,r1; dx = plan->imSize[0]; dy = plan->imSize[1]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; cu_circunshift <<< (plan->numPixel+T-1)/T, T>>>(data,dataCopy,dx,dy,1,r0,r1,0); } else if (plan->numdims==3) { int dx,dy,dz,r0,r1,r2; dx = plan->imSize[0]; dy = plan->imSize[1]; dz = plan->imSize[2]; r0 = plan->randShift[0]; r1 = plan->randShift[1]; r2 = plan->randShift[2]; cu_circunshift <<< (plan->numPixel+T-1)/T, T>>>(data,dataCopy,dx,dy,dz,r0,r1,r2); } cuda(Free(dataCopy)); } // ############################################################################ // CUDA function of fwt column convolution // Loads data to scratchpad (shared memory) and convolve w/ low pass and high pass // Output: Lx, Hx // Input: in, dx, dy, dz, dxNext, lod, hid, filterLen // ############################################################################ extern "C" __global__ void cu_fwt3df_col(_data_t *Lx,_data_t *Hx,_data_t *in,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,scalar_t *lod,scalar_t *hid,int filterLen) { extern __shared__ _data_t cols []; int ti = threadIdx.x; int tj = threadIdx.y; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; if (j>=dy) { return; } // Load Input to Temp Array for (int i = ti; i < dx; i += blockDim.x){ cols[i + tj*dx] = in[i + j*dx + k*dx*dy]; } __syncthreads(); // Low-Pass and High-Pass Downsample int ind, lessThan, greaThan; for (int i = ti; i < dxNext; i += blockDim.x){ _data_t y = cols[0]-cols[0]; _data_t z = cols[0]-cols[0]; #pragma unroll for (int f = 0; f < filterLen; f++){ ind = 2*i+1 - (filterLen-1)+f; lessThan = (int) (ind<0); greaThan = (int) (ind>=dx); ind = -1*lessThan+ind*(-2*lessThan+1); ind = (2*dx-1)*greaThan+ind*(-2*greaThan+1); y += cols[ind + tj*dx] * lod[filterLen-1-f]; z += cols[ind + tj*dx] * hid[filterLen-1-f]; } Lx[i + j*dxNext + k*dxNext*dy] = y; Hx[i + j*dxNext + k*dxNext*dy] = z; } } // ############################################################################ // CUDA function of fwt row convolution. Assumes fwt_col() has already been called // Loads data to scratchpad (shared memory) and convolve w/ low pass and high pass // Output: LxLy, LxHy / HxLy, HxHy // Input: Lx/Hx, dx, dy, dxNext, dyNext, lod, hid, filterLen // ############################################################################ extern "C" __global__ void cu_fwt3df_row(_data_t *Ly,_data_t *Hy,_data_t *in,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,scalar_t *lod,scalar_t *hid,int filterLen) { extern __shared__ _data_t rows []; int const K = blockDim.x; int ti = threadIdx.x; int tj = threadIdx.y; int i = blockIdx.x*blockDim.x+threadIdx.x; int k = blockIdx.z*blockDim.z+threadIdx.z; if (i>=dxNext) { return; } for (int j = tj; j < dy; j += blockDim.y){ rows[ti + j*K] = in[i + j*dxNext + k*dxNext*dy]; } __syncthreads(); // Low-Pass and High Pass Downsample int ind, lessThan, greaThan; for (int j = tj; j < dyNext; j += blockDim.y){ _data_t y = rows[0]-rows[0]; _data_t z = rows[0]-rows[0]; #pragma unroll for (int f = 0; f < filterLen; f++){ ind = 2*j+1 - (filterLen-1)+f; lessThan = (int) (ind<0); greaThan = (int) (ind>=dy); ind = -1*lessThan+ind*(-2*lessThan+1); ind = (2*dy-1)*greaThan+ind*(-2*greaThan+1); y += rows[ti + ind*K] * lod[filterLen-1-f]; z += rows[ti + ind*K] * hid[filterLen-1-f]; } Ly[i + j*dxNext + k*dxNext*dyNext] = y; Hy[i + j*dxNext + k*dxNext*dyNext] = z; } } // ############################################################################ // CUDA function of fwt depth convolution. Assumes fwt_row() has already been called // Loads data to scratchpad (shared memory) and convolve w/ low pass and high pass // Output: LxLy, LxHy / HxLy, HxHy // Input: Lx/Hx, dx, dy, dxNext, dyNext, lod, hid, filterLen // ############################################################################ extern "C" __global__ void cu_fwt3df_dep(_data_t *Lz,_data_t *Hz,_data_t *in,int dx,int dy,int dz,int dxNext,int dyNext,int dzNext,scalar_t *lod,scalar_t *hid,int filterLen) { extern __shared__ _data_t deps []; int const K = blockDim.x; int ti = threadIdx.x; int tk = threadIdx.z; int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; if (i>=dxNext) { return; } for (int k = tk; k < dz; k += blockDim.z){ deps[ti + k*K] = in[i + j*dxNext + k*dxNext*dyNext]; } __syncthreads(); // Low-Pass and High Pass Downsample int ind, lessThan, greaThan; for (int k = tk; k < dzNext; k += blockDim.z){ _data_t y = deps[0]-deps[0]; _data_t z = deps[0]-deps[0]; #pragma unroll for (int f = 0; f < filterLen; f++){ ind = 2*k+1 - (filterLen-1)+f; lessThan = (int) (ind<0); greaThan = (int) (ind>=dz); ind = -1*lessThan+ind*(-2*lessThan+1); ind = (2*dz-1)*greaThan+ind*(-2*greaThan+1); y += deps[ti + ind*K] * lod[filterLen-1-f]; z += deps[ti + ind*K] * hid[filterLen-1-f]; } Lz[i + j*dxNext + k*dxNext*dyNext] = y; Hz[i + j*dxNext + k*dxNext*dyNext] = z; } } extern "C" __global__ void cu_fwt3df_LC1(_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dxNext, int dyNext, int dzNext) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t x,y,z; scalar_t xGreatZero,yGreatZero,zGreatZero; if ((i>=dxNext)||(j>=dyNext)||(k>=dzNext)) { return; } //HLL x = HxLyLz_df1[i+j*dxNext+k*dxNext*dyNext]; y = HxLyLz_df2[i+j*dxNext+k*dxNext*dyNext]; z = HxLyLz_n[i+j*dxNext+k*dxNext*dyNext]; HxLyLz_df1[i+j*dxNext+k*dxNext*dyNext] = y; HxLyLz_df2[i+j*dxNext+k*dxNext*dyNext] = z; yGreatZero = j>0; zGreatZero = k>0; HxLyLz_n[i+j*dxNext+k*dxNext*dyNext] = x + yGreatZero*0.25f*y + zGreatZero*0.25f*z; //LHL x = LxHyLz_df1[i+j*dxNext+k*dxNext*dyNext]; y = LxHyLz_df2[i+j*dxNext+k*dxNext*dyNext]; z = LxHyLz_n[i+j*dxNext+k*dxNext*dyNext]; LxHyLz_df2[i+j*dxNext+k*dxNext*dyNext] = z; xGreatZero = i>0; zGreatZero = k>0; LxHyLz_n[i+j*dxNext+k*dxNext*dyNext] = y + xGreatZero*0.25f*x + zGreatZero*0.25f*z; //LLH x = LxLyHz_df1[i+j*dxNext+k*dxNext*dyNext]; y = LxLyHz_df2[i+j*dxNext+k*dxNext*dyNext]; z = LxLyHz_n[i+j*dxNext+k*dxNext*dyNext]; LxLyHz_df1[i+j*dxNext+k*dxNext*dyNext] = y; LxLyHz_df2[i+j*dxNext+k*dxNext*dyNext] = x; yGreatZero = j>0; xGreatZero = i>0; LxLyHz_n[i+j*dxNext+k*dxNext*dyNext] = z + yGreatZero*0.25*y + xGreatZero*0.25*x; } extern "C" __global__ void cu_fwt3df_LC1_diff(_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dxNext, int dyNext, int dzNext) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t x,y,z; _data_t zero = make_float2(0.f,0.f); if ((i>=dxNext)||(j>=dyNext)||(k>=dzNext)) { return; } //HLL if (j>0) y = HxLyLz_df1[i+(j-1)*dxNext+k*dxNext*dyNext]; else y = zero; if (k>0) z = HxLyLz_df2[i+j*dxNext+(k-1)*dxNext*dyNext]; else z = zero; HxLyLz_n[i+j*dxNext+k*dxNext*dyNext] += -0.25*y - 0.25*z; //LHL if (i>0) x = LxHyLz_df1[(i-1)+j*dxNext+k*dxNext*dyNext]; else x = zero; if (k>0) z = LxHyLz_df2[i+j*dxNext+(k-1)*dxNext*dyNext]; else z = zero; LxHyLz_n[i+j*dxNext+k*dxNext*dyNext] += -0.25*x - 0.25*z; //LLH if (j>0) y = LxLyHz_df1[i+(j-1)*dxNext+k*dxNext*dyNext]; else y = zero; if (i>0) x = LxLyHz_df2[(i-1)+j*dxNext+k*dxNext*dyNext]; else x = zero; LxLyHz_n[i+j*dxNext+k*dxNext*dyNext] += -0.25*y - 0.25*x; } extern "C" __global__ void cu_fwt3df_LC2(_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dxNext, int dyNext, int dzNext) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t x,y,z; scalar_t xGreatZero,yGreatZero,zGreatZero; if ((i>=dxNext)||(j>=dyNext)||(k>=dzNext)) { return; } //HHL x = HxHyLz_df1[i+j*dxNext+k*dxNext*dyNext]; y = HxHyLz_df2[i+j*dxNext+k*dxNext*dyNext]; z = HxHyLz_n[i+j*dxNext+k*dxNext*dyNext]; HxHyLz_df1[i+j*dxNext+k*dxNext*dyNext] = 0.5*(x-y); HxHyLz_df2[i+j*dxNext+k*dxNext*dyNext] = z; zGreatZero = k>0; HxHyLz_n[i+j*dxNext+k*dxNext*dyNext] = 0.5*(x+y) + zGreatZero*0.125*z; //HLH x = HxLyHz_df1[i+j*dxNext+k*dxNext*dyNext]; y = HxLyHz_df2[i+j*dxNext+k*dxNext*dyNext]; z = HxLyHz_n[i+j*dxNext+k*dxNext*dyNext]; HxLyHz_df1[i+j*dxNext+k*dxNext*dyNext] = 0.5*(z-x); HxLyHz_df2[i+j*dxNext+k*dxNext*dyNext] = y; yGreatZero = j>0; HxLyHz_n[i+j*dxNext+k*dxNext*dyNext] = 0.5*(z+x) + yGreatZero*0.125*y; //LHH x = LxHyHz_df1[i+j*dxNext+k*dxNext*dyNext]; y = LxHyHz_df2[i+j*dxNext+k*dxNext*dyNext]; z = LxHyHz_n[i+j*dxNext+k*dxNext*dyNext]; LxHyHz_df1[i+j*dxNext+k*dxNext*dyNext] = 0.5*(y-z); LxHyHz_df2[i+j*dxNext+k*dxNext*dyNext] = x; xGreatZero = i>0; LxHyHz_n[i+j*dxNext+k*dxNext*dyNext] = 0.5*(y+z) + xGreatZero*0.125*x; } extern "C" __global__ void cu_fwt3df_LC2_diff(_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dxNext, int dyNext, int dzNext) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t x,y,z; _data_t zero = make_float2(0.f,0.f); if ((i>=dxNext)||(j>=dyNext)||(k>=dzNext)) { return; } //HHL if (k>0) z = HxHyLz_df2[i+j*dxNext+(k-1)*dxNext*dyNext]; else z = zero; HxHyLz_n[i+j*dxNext+k*dxNext*dyNext] += -0.125*z; //HLH if (j>0) y = HxLyHz_df2[i+(j-1)*dxNext+k*dxNext*dyNext]; else y = zero; HxLyHz_n[i+j*dxNext+k*dxNext*dyNext] += -0.125*y; //LHH if (i>0) x = LxHyHz_df2[(i-1)+j*dxNext+k*dxNext*dyNext]; else x = zero; LxHyHz_n[i+j*dxNext+k*dxNext*dyNext] += -0.125*x; } extern "C" __global__ void cu_fwt3df_LC3(_data_t* HxHyHz_df1,_data_t* HxHyHz_df2,_data_t* HxHyHz_n,int dxNext, int dyNext, int dzNext) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t x,y,z; if ((i>=dxNext)||(j>=dyNext)||(k>=dzNext)) { return; } //HHH x = HxHyHz_df1[i+j*dxNext+k*dxNext*dyNext]; y = HxHyHz_df2[i+j*dxNext+k*dxNext*dyNext]; z = HxHyHz_n[i+j*dxNext+k*dxNext*dyNext]; HxHyHz_df1[i+j*dxNext+k*dxNext*dyNext] = 1.0/3.0*(-2.0*x+y+z); HxHyHz_df2[i+j*dxNext+k*dxNext*dyNext] = 1.0/3.0*(2*y-x-z); HxHyHz_n[i+j*dxNext+k*dxNext*dyNext] = 1.0/3.0*(x+y+z); } // ############################################################################ // CUDA function of iwt depth convolution. // Loads data to scratchpad (shared memory) and convolve w/ low pass and high pass // Scratchpad size: K x 2*dy // Output: Lz/Hz // Input: LxLy,LxHy / HxLy, HxHy, dx, dy, dxNext, dyNext,xOffset, yOffset,lod, hid, filterLen // ############################################################################ extern "C" __global__ void cu_iwt3df_dep(_data_t *out, _data_t *Lz, _data_t *Hz, int dx, int dy,int dz,int dxNext, int dyNext, int dzNext,int xOffset, int yOffset,int zOffset,scalar_t *lod, scalar_t *hid, int filterLen) { extern __shared__ _data_t deps []; int const K = blockDim.x; int ti = threadIdx.x; int tk = threadIdx.z; int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; if (i>=dx){ return; } for (int k = tk; k < dz; k += blockDim.z){ deps[ti + k*K] = Lz[i + j*dx + k*dx*dy]; deps[ti + (k+dz)*K] = Hz[i + j*dx + k*dx*dy]; } __syncthreads(); // Low-Pass and High Pass Downsample int ind; for (int k = tk+zOffset; k < dzNext+zOffset; k += blockDim.z){ _data_t y = deps[0]-deps[0]; #pragma unroll for (int f = (k-(filterLen-1)) % 2; f < filterLen; f+=2){ ind = (k-(filterLen-1)+f)>>1; if ((ind >= 0) && (ind < dz)) { y += deps[ti + ind*K] * lod[filterLen-1-f]; y += deps[ti + (ind+dz)*K] * hid[filterLen-1-f]; } } out[i + j*dx + (k-zOffset)*dx*dy] = y; } } // ############################################################################ // CUDA function of iwt row convolution. Assumes fwt_col() has already been called. // Loads data to scratchpad (shared memory) and convolve w/ low pass and high pass // Scratchpad size: K x 2*dy // Output: Lx/Hx // Input: LxLy,LxHy / HxLy, HxHy, dx, dy, dxNext, dyNext,xOffset, yOffset,lod, hid, filterLen // ############################################################################ extern "C" __global__ void cu_iwt3df_row(_data_t *out, _data_t *Ly, _data_t *Hy, int dx, int dy,int dz,int dxNext, int dyNext,int dzNext,int xOffset, int yOffset, int zOffset,scalar_t *lod, scalar_t *hid, int filterLen) { extern __shared__ _data_t rows []; int const K = blockDim.x; int ti = threadIdx.x; int tj = threadIdx.y; int i = blockIdx.x*blockDim.x+threadIdx.x; int k = blockIdx.z*blockDim.z+threadIdx.z; if (i>=dx){ return; } for (int j = tj; j < dy; j += blockDim.y){ rows[ti + j*K] = Ly[i + j*dx + k*dx*dy]; rows[ti + (j+dy)*K] = Hy[i + j*dx + k*dx*dy]; } __syncthreads(); // Low-Pass and High Pass Downsample int ind; for (int j = tj+yOffset; j < dyNext+yOffset; j += blockDim.y){ _data_t y = rows[0]-rows[0]; #pragma unroll for (int f = (j-(filterLen-1)) % 2; f < filterLen; f+=2){ ind = (j-(filterLen-1)+f)>>1; if ((ind >= 0) && (ind < dy)) { y += rows[ti + ind*K] * lod[filterLen-1-f]; y += rows[ti + (ind+dy)*K] * hid[filterLen-1-f]; } } out[i + (j-yOffset)*dx + k*dx*dyNext] = y; } } // ############################################################################ // CUDA function of iwt column convolution // Loads data to scratchpad (shared memory) and convolve w/ low pass and high pass // Scratchpad size: 2*dx x K // Output: out // Input: Lx, Hx, dx, dy, dxNext, dyNext, lod, hid, filterLen // ############################################################################ extern "C" __global__ void cu_iwt3df_col(_data_t *out, _data_t *Lx, _data_t *Hx, int dx, int dy,int dz,int dxNext, int dyNext, int dzNext,int xOffset, int yOffset, int zOffset,scalar_t *lod, scalar_t *hid, int filterLen) { extern __shared__ _data_t cols []; int ti = threadIdx.x; int tj = threadIdx.y; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; if (j>=dyNext){ return; } int dx2 = 2*dx; // Load Input to Temp Array for (int i = ti; i < dx; i += blockDim.x){ cols[i + tj*dx2] = Lx[i + j*dx + k*dx*dyNext]; cols[dx+i + tj*dx2] = Hx[i + j*dx + k*dx*dyNext]; } __syncthreads(); // Low-Pass and High Pass Downsample int ind; for (int i = ti+xOffset; i < dxNext+xOffset; i += blockDim.x){ _data_t y = cols[0]-cols[0]; #pragma unroll for (int f = (i-(filterLen-1)) % 2; f < filterLen; f+=2){ ind = (i-(filterLen-1)+f)>>1; if (ind >= 0 && ind < dx) { y += cols[ind + tj*dx2] * lod[filterLen-1-f]; y += cols[dx+ind + tj*dx2] * hid[filterLen-1-f]; } } out[(i-xOffset) + j*dxNext + k*dxNext*dyNext] = y; } } extern "C" __global__ void cu_iwt3df_LC1 (_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dx, int dy, int dz) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t df1,df2,n; scalar_t xGreatZero,yGreatZero,zGreatZero; if ((i>=dx)||(j>=dy)||(k>=dz)) { return; } //HLL df1 = HxLyLz_df1[i+j*dx+k*dx*dy]; df2 = HxLyLz_df2[i+j*dx+k*dx*dy]; n = HxLyLz_n[i+j*dx+k*dx*dy]; HxLyLz_df2[i+j*dx+k*dx*dy] = df1; HxLyLz_n[i+j*dx+k*dx*dy] = df2; yGreatZero = j>0; zGreatZero = k>0; HxLyLz_df1[i+j*dx+k*dx*dy] = n - yGreatZero*0.25*df1 - zGreatZero*0.25*df2; //LHL df1 = LxHyLz_df1[i+j*dx+k*dx*dy]; df2 = LxHyLz_df2[i+j*dx+k*dx*dy]; n = LxHyLz_n[i+j*dx+k*dx*dy]; LxHyLz_n[i+j*dx+k*dx*dy] = df2; xGreatZero = i>0; zGreatZero = k>0; LxHyLz_df2[i+j*dx+k*dx*dy] = n - xGreatZero*0.25*df1 - zGreatZero*0.25*df2; //LLH df1 = LxLyHz_df1[i+j*dx+k*dx*dy]; df2 = LxLyHz_df2[i+j*dx+k*dx*dy]; n = LxLyHz_n[i+j*dx+k*dx*dy]; LxLyHz_df1[i+j*dx+k*dx*dy] = df2; LxLyHz_df2[i+j*dx+k*dx*dy] = df1; yGreatZero = j>0; xGreatZero = i>0; LxLyHz_n[i+j*dx+k*dx*dy] = n - yGreatZero*0.25*df1 - xGreatZero*0.25*df2; } extern "C" __global__ void cu_iwt3df_LC1_diff (_data_t *HxLyLz_df1,_data_t *HxLyLz_df2,_data_t *HxLyLz_n,_data_t *LxHyLz_df1,_data_t *LxHyLz_df2,_data_t *LxHyLz_n,_data_t *LxLyHz_df1,_data_t *LxLyHz_df2,_data_t *LxLyHz_n,int dx, int dy, int dz) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t x,y,z; _data_t zero = make_float2(0.f,0.f); if ((i>=dx)||(j>=dy)||(k>=dz)) { return; } //HLL if (j>0) y = HxLyLz_df2[i+(j-1)*dx+k*dx*dy]; else y = zero; if (k>0) z = HxLyLz_n[i+j*dx+(k-1)*dx*dy]; else z = zero; HxLyLz_df1[i+j*dx+k*dx*dy] += 0.25*y + 0.25*z; //LHL if (i>0) x = LxHyLz_df1[(i-1)+j*dx+k*dx*dy]; else x = zero; if (k>0) z = LxHyLz_n[i+j*dx+(k-1)*dx*dy]; else z = zero; LxHyLz_df2[i+j*dx+k*dx*dy] += 0.25*x + 0.25*z; //LLH if (j>0) y = LxLyHz_df2[i+(j-1)*dx+k*dx*dy]; else y = zero; if (i>0) x = LxLyHz_df1[(i-1)+j*dx+k*dx*dy]; else x = zero; LxLyHz_n[i+j*dx+k*dx*dy] += 0.25*y + 0.25*x; } extern "C" __global__ void cu_iwt3df_LC2 (_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dx, int dy, int dz) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t df1,df2,n; scalar_t xGreatZero,yGreatZero,zGreatZero; if ((i>=dx)||(j>=dy)||(k>=dz)) { return; } //HHL df1 = HxHyLz_df1[i+j*dx+k*dx*dy]; df2 = HxHyLz_df2[i+j*dx+k*dx*dy]; n = HxHyLz_n[i+j*dx+k*dx*dy]; HxHyLz_n[i+j*dx+k*dx*dy] = df2; zGreatZero = k>0; HxHyLz_df1[i+j*dx+k*dx*dy] = df1+n-zGreatZero*0.125*df2; HxHyLz_df2[i+j*dx+k*dx*dy] = n-df1-zGreatZero*0.125*df2; //HLH df1 = HxLyHz_df1[i+j*dx+k*dx*dy]; df2 = HxLyHz_df2[i+j*dx+k*dx*dy]; n = HxLyHz_n[i+j*dx+k*dx*dy]; HxLyHz_df2[i+j*dx+k*dx*dy] = df2; yGreatZero = j>0; HxLyHz_n[i+j*dx+k*dx*dy] = df1+n-yGreatZero*0.125*df2; HxLyHz_df1[i+j*dx+k*dx*dy] = n-df1-yGreatZero*0.125*df2; //LHH df1 = LxHyHz_df1[i+j*dx+k*dx*dy]; df2 = LxHyHz_df2[i+j*dx+k*dx*dy]; n = LxHyHz_n[i+j*dx+k*dx*dy]; LxHyHz_df1[i+j*dx+k*dx*dy] = df2; xGreatZero = i>0; LxHyHz_df2[i+j*dx+k*dx*dy] = df1+n-xGreatZero*0.125*df2; LxHyHz_n[i+j*dx+k*dx*dy] = n-df1-xGreatZero*0.125*df2; } extern "C" __global__ void cu_iwt3df_LC2_diff (_data_t* HxHyLz_df1,_data_t* HxHyLz_df2,_data_t* HxHyLz_n,_data_t* HxLyHz_df1,_data_t* HxLyHz_df2,_data_t* HxLyHz_n,_data_t* LxHyHz_df1,_data_t* LxHyHz_df2,_data_t* LxHyHz_n,int dx, int dy, int dz) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t x,y,z; _data_t zero = make_float2(0.f,0.f); if ((i>=dx)||(j>=dy)||(k>=dz)) { return; } //HHL if (k>0) z = HxHyLz_n[i+j*dx+(k-1)*dx*dy]; else z = zero; HxHyLz_df1[i+j*dx+k*dx*dy] += 0.125*z; HxHyLz_df2[i+j*dx+k*dx*dy] += 0.125*z; //HLH if (j>0) y = HxLyHz_df2[i+(j-1)*dx+k*dx*dy]; else y = zero; HxLyHz_df1[i+j*dx+k*dx*dy] += 0.125*y; HxLyHz_n[i+j*dx+k*dx*dy] += 0.125*y; //LHH if (i>0) x = LxHyHz_df1[(i-1)+j*dx+k*dx*dy]; else x = zero; LxHyHz_df2[i+j*dx+k*dx*dy] += 0.125*x; LxHyHz_n[i+j*dx+k*dx*dy] += 0.125*x; } extern "C" __global__ void cu_iwt3df_LC3 (_data_t* HxHyHz_df1,_data_t* HxHyHz_df2,_data_t* HxHyHz_n,int dx, int dy, int dz) { int i = blockIdx.x*blockDim.x+threadIdx.x; int j = blockIdx.y*blockDim.y+threadIdx.y; int k = blockIdx.z*blockDim.z+threadIdx.z; _data_t df1,df2,n; if ((i>=dx)||(j>=dy)||(k>=dz)) { return; } //HHH df1 = HxHyHz_df1[i+j*dx+k*dx*dy]; df2 = HxHyHz_df2[i+j*dx+k*dx*dy]; n = HxHyHz_n[i+j*dx+k*dx*dy]; HxHyHz_df1[i+j*dx+k*dx*dy] = n-df1; HxHyHz_df2[i+j*dx+k*dx*dy] = df2+n; HxHyHz_n[i+j*dx+k*dx*dy] = df1-df2+n; } extern "C" __global__ void cu_mult(_data_t* in, _data_t mult, int maxInd) { int ind = blockIdx.x*blockDim.x+threadIdx.x; if (ind > maxInd) { return; } in[ind] = in[ind]*mult; } extern "C" __global__ void cu_add_mult(_data_t* out, _data_t* in, _data_t mult, int maxInd) { int ind = blockIdx.x*blockDim.x+threadIdx.x; if (ind > maxInd) { return; } _data_t i = out[ind]; out[ind] = i+(out[ind]-i)*mult; } __global__ void cu_soft_thresh (_data_t* in, scalar_t thresh, int numMax) { int const i = threadIdx.x + blockDim.x*blockIdx.x; if (i>numMax) return; scalar_t norm = abs(in[i]); scalar_t red = norm - thresh; in[i] = (red > 0.f) ? ((red / norm) * (in[i])) : in[i]-in[i]; } __global__ void cu_circshift(_data_t* data, _data_t* dataCopy, int dx, int dy, int dz,int shift1, int shift2,int shift3) { int index = blockIdx.x*blockDim.x + threadIdx.x; if (index >= dx*dy*dz) { return; } int indexShifted = (index+shift1+shift2*dx+shift3*dx*dy)%(dx*dy*dz); data[indexShifted] = dataCopy[index]; } __global__ void cu_circunshift(_data_t* data, _data_t* dataCopy, int dx, int dy, int dz,int shift1, int shift2,int shift3) { int index = blockIdx.x*blockDim.x + threadIdx.x; if (index >= dx*dy*dz) { return; } int indexShifted = (index+shift1+shift2*dx+shift3*dx*dy)%(dx*dy*dz); data[index] = dataCopy[indexShifted]; } bart-0.4.02/src/dfwavelet/dfwavelet_kernels.h000066400000000000000000000041331320577655200211670ustar00rootroot00000000000000/* * Copyright 2013-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifdef __cplusplus extern "C" { #endif struct dfwavelet_plan_s; /* GPU Host Funcstion */ extern void dffwt3_gpuHost(struct dfwavelet_plan_s* plan, _Complex float* out_wcdf1,_Complex float* out_wcdf2,_Complex float* out_wcn, _Complex float* in_vx,_Complex float* in_vy,_Complex float* in_vz); extern void dfiwt3_gpuHost(struct dfwavelet_plan_s* plan, _Complex float* out_vx,_Complex float* out_vy,_Complex float* out_vz, _Complex float* in_wcdf1,_Complex float* in_wcdf2,_Complex float* in_wcn); extern void dfsoftthresh_gpuHost(struct dfwavelet_plan_s* plan,float dfthresh, float nthresh, _Complex float* out_wcdf1,_Complex float* out_wcdf2,_Complex float* out_wcn); extern void dfwavthresh3_gpuHost(struct dfwavelet_plan_s* plan,float dfthresh, float nthresh,_Complex float* out_vx,_Complex float* out_vy,_Complex float* out_vz,_Complex float* in_vx,_Complex float* in_vy,_Complex float* in_vz); /* GPU Funcstion */ extern void dffwt3_gpu(struct dfwavelet_plan_s* plan, _Complex float* out_wcdf1,_Complex float* out_wcdf2,_Complex float* out_wcn, _Complex float* in_vx,_Complex float* in_vy,_Complex float* in_vz); extern void dfiwt3_gpu(struct dfwavelet_plan_s* plan, _Complex float* out_vx,_Complex float* out_vy,_Complex float* out_vz, _Complex float* in_wcdf1,_Complex float* in_wcdf2,_Complex float* in_wcn); extern void dfsoftthresh_gpu(struct dfwavelet_plan_s* plan,float dfthresh, float nthresh, _Complex float* out_wcdf1,_Complex float* out_wcdf2,_Complex float* out_wcn); extern void dfwavthresh3_gpu(struct dfwavelet_plan_s* plan,float dfthresh, float nthresh,_Complex float* out_vx,_Complex float* out_vy,_Complex float* out_vz,_Complex float* in_vx,_Complex float* in_vy,_Complex float* in_vz); extern void circshift_gpu(struct dfwavelet_plan_s* plan, _Complex float* data); extern void circunshift_gpu(struct dfwavelet_plan_s* plan, _Complex float* data); #ifdef __cplusplus } #endif bart-0.4.02/src/dfwavelet/prox_dfwavelet.c000066400000000000000000000323601320577655200205120ustar00rootroot00000000000000/* * Copyright 2013-2015 The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Frank Ong * 2016 Martin Uecker * * Ong F, Uecker M, Tariq U, Hsiao A, Alley MT, Vasanawala SS, Lustig M. * Robust 4D Flow Denoising using Divergence-free Wavelet Transform, * Magn Reson Med 2015; 73: 828-842. */ #define _GNU_SOURCE #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/mri.h" #include "linops/linop.h" #include "linops/waveop.h" #include "iter/thresh.h" #include "dfwavelet.h" #include "prox_dfwavelet.h" static void prox_dfwavelet_del(const operator_data_t* _data); static void prox_dfwavelet_thresh(const operator_data_t* _data, float thresh, complex float* out, const complex float* in); static struct prox_dfwavelet_data* prepare_prox_dfwavelet_data(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, bool use_gpu); static void prox_4pt_dfwavelet_del(const operator_data_t* _data); static void prox_4pt_dfwavelet_thresh(const operator_data_t* _data, float thresh, complex float* out, const complex float* in); static struct prox_4pt_dfwavelet_data* prepare_prox_4pt_dfwavelet_data(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, bool use_gpu); struct prox_dfwavelet_data { INTERFACE(operator_data_t); bool use_gpu; unsigned int slice_flag; unsigned int flow_dim; float lambda; long im_dims[DIMS]; long tim_dims[DIMS]; long im_strs[DIMS]; complex float* vx; complex float* vy; complex float* vz; struct dfwavelet_plan_s* plan; }; static DEF_TYPEID(prox_dfwavelet_data); const struct operator_p_s* prox_dfwavelet_create(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, bool use_gpu) { struct prox_dfwavelet_data* data = prepare_prox_dfwavelet_data(im_dims, min_size, res, flow_dim, lambda, use_gpu); return operator_p_create(DIMS, im_dims, DIMS, im_dims, CAST_UP(data), prox_dfwavelet_thresh, prox_dfwavelet_del); } struct prox_dfwavelet_data* prepare_prox_dfwavelet_data(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, bool use_gpu) { // get dimension PTR_ALLOC(struct prox_dfwavelet_data, data); SET_TYPEID(prox_dfwavelet_data, data); md_copy_dims(DIMS, data->im_dims, im_dims); md_select_dims(DIMS, FFT_FLAGS, data->tim_dims, im_dims); md_calc_strides(DIMS, data->im_strs, im_dims, CFL_SIZE); // initialize temp #ifdef USE_CUDA if (use_gpu) { data->vx = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->vy = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->vz = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); } else #endif { data->vx = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->vy = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->vz = md_alloc(DIMS, data->tim_dims, CFL_SIZE); } data->flow_dim = flow_dim; data->slice_flag = ~FFT_FLAGS; data->lambda = lambda; data->plan = prepare_dfwavelet_plan(3, data->tim_dims, (long*) min_size, (complex float*) res, use_gpu); return PTR_PASS(data); } static void prox_dfwavelet_del(const operator_data_t* _data) { struct prox_dfwavelet_data* data = CAST_DOWN(prox_dfwavelet_data, _data); md_free(data->vx); md_free(data->vy); md_free(data->vz); dfwavelet_free(data->plan); free(data); } static void prox_dfwavelet_thresh(const operator_data_t* _data, float thresh, complex float* out, const complex float* in) { struct prox_dfwavelet_data* data = CAST_DOWN(prox_dfwavelet_data, _data); bool done = false; long pos[DIMS]; md_set_dims(DIMS, pos, 0); while (!done) { // copy vx, vy, vz md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vx, in, CFL_SIZE); pos[data->flow_dim]++; md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vy, in, CFL_SIZE); pos[data->flow_dim]++; md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vz, in, CFL_SIZE); pos[data->flow_dim]=0; // threshold dfwavelet_thresh(data->plan, thresh * data->lambda, thresh* data->lambda, data->vx, data->vy, data->vz, data->vx, data->vy, data->vz); // copy vx, vy, vz md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vx, CFL_SIZE); pos[data->flow_dim]++; md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vy, CFL_SIZE); pos[data->flow_dim]++; md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vz, CFL_SIZE); pos[data->flow_dim]=0; // increment pos long carryon = 1; for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(data->slice_flag & ~MD_BIT(data->flow_dim), i)) { pos[i] += carryon; if (pos[i] < data->im_dims[i]) { carryon = 0; break; } else { carryon = 1; pos[i] = 0; } } } done = carryon; } } struct prox_4pt_dfwavelet_data { INTERFACE(operator_data_t); bool use_gpu; unsigned int slice_flag; unsigned int flow_dim; float lambda; long im_dims[DIMS]; long tim_dims[DIMS]; long im_strs[DIMS]; complex float* vx; complex float* vy; complex float* vz; complex float* ph0; complex float* pc0; complex float* pc1; complex float* pc2; complex float* pc3; struct dfwavelet_plan_s* plan; const struct linop_s* w_op; const struct operator_p_s* wthresh_op; }; static DEF_TYPEID(prox_4pt_dfwavelet_data); const struct operator_p_s* prox_4pt_dfwavelet_create(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, bool use_gpu) { struct prox_4pt_dfwavelet_data* data = prepare_prox_4pt_dfwavelet_data(im_dims, min_size, res, flow_dim, lambda, use_gpu); return operator_p_create(DIMS, im_dims, DIMS, im_dims, CAST_UP(data), prox_4pt_dfwavelet_thresh, prox_4pt_dfwavelet_del); } struct prox_4pt_dfwavelet_data* prepare_prox_4pt_dfwavelet_data(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, bool use_gpu) { PTR_ALLOC(struct prox_4pt_dfwavelet_data, data); SET_TYPEID(prox_4pt_dfwavelet_data, data); md_copy_dims(DIMS, data->im_dims, im_dims); md_select_dims(DIMS, FFT_FLAGS, data->tim_dims, im_dims); md_calc_strides(DIMS, data->im_strs, im_dims, CFL_SIZE); assert(4 == im_dims[flow_dim]); // initialize temp #ifdef USE_CUDA if (use_gpu) { data->vx = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->vy = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->vz = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->ph0 = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->pc0 = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->pc1 = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->pc2 = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); data->pc3 = md_alloc_gpu(DIMS, data->tim_dims, CFL_SIZE); } else #endif { data->vx = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->vy = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->vz = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->ph0 = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->pc0 = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->pc1 = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->pc2 = md_alloc(DIMS, data->tim_dims, CFL_SIZE); data->pc3 = md_alloc(DIMS, data->tim_dims, CFL_SIZE); } data->flow_dim = flow_dim; data->slice_flag = ~FFT_FLAGS; data->lambda = lambda; data->plan = prepare_dfwavelet_plan(3, data->tim_dims, (long*) min_size, (complex float*) res, use_gpu); long strs[DIMS]; md_calc_strides(DIMS, strs, data->tim_dims, CFL_SIZE); data->w_op = linop_wavelet_create(DIMS, FFT_FLAGS, data->tim_dims, strs, min_size, false); data->wthresh_op = prox_unithresh_create(DIMS, data->w_op, lambda, MD_BIT(data->flow_dim), use_gpu); return PTR_PASS(data); } static void prox_4pt_dfwavelet_del(const operator_data_t* _data) { struct prox_4pt_dfwavelet_data* data = CAST_DOWN(prox_4pt_dfwavelet_data, _data); md_free(data->vx); md_free(data->vy); md_free(data->vz); md_free(data->ph0); md_free(data->pc0); md_free(data->pc1); md_free(data->pc2); md_free(data->pc3); dfwavelet_free(data->plan); operator_p_free(data->wthresh_op); linop_free(data->w_op); free(data); } static void prox_4pt_dfwavelet_thresh(const operator_data_t* _data, float thresh, complex float* out, const complex float* in) { struct prox_4pt_dfwavelet_data* data = CAST_DOWN(prox_4pt_dfwavelet_data, _data); bool done = false; long pos[DIMS]; md_set_dims(DIMS, pos, 0); while (!done) { // copy pc md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->pc0, in, CFL_SIZE); pos[data->flow_dim]++; md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->pc1, in, CFL_SIZE); pos[data->flow_dim]++; md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->pc2, in, CFL_SIZE); pos[data->flow_dim]++; md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->pc3, in, CFL_SIZE); pos[data->flow_dim] = 0; // pc to velocity // TODO: Make gpu for (int i = 0; i < md_calc_size(DIMS, data->tim_dims); i++) { data->vx[i] = (data->pc1[i] - data->pc0[i]) / 2; data->vy[i] = (data->pc2[i] - data->pc1[i]) / 2; data->vz[i] = (data->pc3[i] - data->pc2[i]) / 2; data->ph0[i] = (data->pc0[i] + data->pc3[i]) / 2; } // threshold dfwavelet_thresh(data->plan, thresh * data->lambda, thresh* data->lambda, data->vx, data->vy, data->vz, data->vx, data->vy, data->vz); operator_p_apply(data->wthresh_op, thresh, DIMS, data->tim_dims, data->ph0, DIMS, data->tim_dims, data->ph0); // velocity to pc for (int i = 0; i < md_calc_size(DIMS, data->tim_dims ); i++) { data->pc0[i] = (- data->vx[i] - data->vy[i] - data->vz[i] + data->ph0[i]); data->pc1[i] = (+ data->vx[i] - data->vy[i] - data->vz[i] + data->ph0[i]); data->pc2[i] = (+ data->vx[i] + data->vy[i] - data->vz[i] + data->ph0[i]); data->pc3[i] = (+ data->vx[i] + data->vy[i] + data->vz[i] + data->ph0[i]); } // copy pc md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->pc0, CFL_SIZE); pos[data->flow_dim]++; md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->pc1, CFL_SIZE); pos[data->flow_dim]++; md_copy_block( DIMS, pos, data->im_dims, out, data->tim_dims, data->pc2, CFL_SIZE ); pos[data->flow_dim]++; md_copy_block( DIMS, pos, data->im_dims, out, data->tim_dims, data->pc3, CFL_SIZE ); pos[data->flow_dim] = 0; // increment pos long carryon = 1; for(unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(data->slice_flag & ~MD_BIT(data->flow_dim), i)) { pos[i] += carryon; if (pos[i] < data->im_dims[i]) { carryon = 0; break; } else { carryon = 1; pos[i] = 0; } } } done = carryon; } } bart-0.4.02/src/dfwavelet/prox_dfwavelet.h000066400000000000000000000006521320577655200205160ustar00rootroot00000000000000 #include "misc/mri.h" #include "num/ops.h" extern const struct operator_p_s* prox_dfwavelet_create(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, _Bool use_gpu); extern const struct operator_p_s* prox_4pt_dfwavelet_create(const long im_dims[DIMS], const long min_size[3], const complex float res[3], unsigned int flow_dim, float lambda, _Bool use_gpu); bart-0.4.02/src/ecalib.c000066400000000000000000000133421320577655200147160ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2013 Dara Bahri * 2015 Siddharth Iyer */ #include #include #include #include #include "misc/mmio.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #include "num/multind.h" #include "num/fft.h" #include "num/init.h" #include "calib/calib.h" #include "calib/estvar.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = " []"; static const char help_str[] = "Estimate coil sensitivities using ESPIRiT calibration.\n" "Optionally outputs the eigenvalue maps."; int main_ecalib(int argc, char* argv[]) { long calsize[3] = { 24, 24, 24 }; int maps = 2; bool one = false; bool calcen = false; bool print_svals = false; struct ecalib_conf conf = ecalib_defaults; const struct opt_s opts[] = { OPT_FLOAT('t', &conf.threshold, "threshold", "This determined the size of the null-space."), OPT_FLOAT('c', &conf.crop, "crop_value", "Crop the sensitivities if the eigenvalue is smaller than {crop_value}."), OPT_VEC3('k', &conf.kdims, "ksize", "kernel size"), OPT_VEC3('K', &conf.kdims, "", "()"), OPT_VEC3('r', &calsize, "cal_size", "Limits the size of the calibration region."), OPT_VEC3('R', &calsize, "", "()"), OPT_INT('m', &maps, "maps", "Number of maps to compute."), OPT_SET('S', &conf.softcrop, "create maps with smooth transitions (Soft-SENSE)."), OPT_SET('W', &conf.weighting, "soft-weighting of the singular vectors."), OPT_SET('I', &conf.intensity, "intensity correction"), OPT_SET('1', &one, "perform only first part of the calibration"), OPT_CLEAR('O', &conf.orthiter, "()"), OPT_FLOAT('b', &conf.perturb, "", "()"), OPT_SET('V', &print_svals, "()"), OPT_SET('C', &calcen, "()"), OPT_SET('g', &conf.usegpu, "()"), OPT_FLOAT('p', &conf.percentsv, "", "()"), OPT_INT('n', &conf.numsv, "", "()"), OPT_FLOAT('v', &conf.var, "variance", "Variance of noise in data."), OPT_SET('a', &conf.automate, "Automatically pick thresholds."), }; cmdline(&argc, argv, 2, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); if (-1. != conf.percentsv) conf.threshold = -1.; if (-1 != conf.numsv) conf.threshold = -1.; if (conf.automate) { conf.crop = -1.; conf.weighting = true; } if (conf.weighting) { conf.numsv = -1.; conf.threshold = 0; conf.percentsv = -1.; conf.orthiter = false; } int N = DIMS; long ksp_dims[N]; complex float* in_data = load_cfl(argv[1], N, ksp_dims); // assert((kdims[0] < calsize_ro) && (kdims[1] < calsize_ro) && (kdims[2] < calsize_ro)); // assert((ksp_dims[0] == 1) || (calsize_ro < ksp_dims[0])); if (1 != ksp_dims[MAPS_DIM]) error("MAPS dimension is not of size one.\n"); long cal_dims[N]; complex float* cal_data = NULL; if (!calcen) { #ifdef USE_CC_EXTRACT_CALIB cal_data = cc_extract_calib(cal_dims, calsize, ksp_dims, in_data); #else cal_data = extract_calib(cal_dims, calsize, ksp_dims, in_data, false); #endif } else { for (int i = 0; i < 3; i++) cal_dims[i] = (calsize[i] < ksp_dims[i]) ? calsize[i] : ksp_dims[i]; for (int i = 3; i < N; i++) cal_dims[i] = ksp_dims[i]; cal_data = md_alloc(5, cal_dims, CFL_SIZE); md_resize_center(5, cal_dims, cal_data, ksp_dims, in_data, CFL_SIZE); } for (int i = 0; i < 3; i++) if (1 == ksp_dims[i]) conf.kdims[i] = 1; long channels = cal_dims[3]; unsigned int K = conf.kdims[0] * conf.kdims[1] * conf.kdims[2] * channels; float svals[K]; for (unsigned int i = 0; i < 3; i++) if ((1 == cal_dims[i]) && (1 != ksp_dims[i])) error("Calibration region not found!\n"); // To reproduce old results turn off rotation of phase. // conf.rotphase = false; // FIXME: we should scale the data (conf.usegpu ? num_init_gpu : num_init)(); if ((conf.var < 0) && (conf.weighting || (conf.crop < 0))) conf.var = estvar_calreg(conf.kdims, cal_dims, cal_data); if (one) { #if 0 long maps = out_dims[4]; assert(caldims[3] == out_dims[3]); assert(maps <= channels); #endif long cov_dims[4]; calone_dims(&conf, cov_dims, channels); complex float* imgcov = md_alloc(4, cov_dims, CFL_SIZE); calone(&conf, cov_dims, imgcov, K, svals, cal_dims, cal_data); complex float* out = create_cfl(argv[2], 4, cov_dims); md_copy(4, cov_dims, out, imgcov, CFL_SIZE); unmap_cfl(4, cov_dims, out); // caltwo(crthr, out_dims, out_data, emaps, cov_dims, imgcov, NULL, NULL); md_free(imgcov); } else { long out_dims[N]; long map_dims[N]; for (int i = 0; i < N; i++) { out_dims[i] = 1; map_dims[i] = 1; if ((i < 3) && (1 < conf.kdims[i])) { out_dims[i] = ksp_dims[i]; map_dims[i] = ksp_dims[i]; } } assert(maps <= ksp_dims[COIL_DIM]); out_dims[COIL_DIM] = ksp_dims[COIL_DIM]; out_dims[MAPS_DIM] = maps; map_dims[COIL_DIM] = 1; map_dims[MAPS_DIM] = maps; const char* emaps_file = NULL; if (4 == argc) emaps_file = argv[3]; complex float* out_data = create_cfl(argv[2], N, out_dims); complex float* emaps = (emaps_file ? create_cfl : anon_cfl)(emaps_file, N, map_dims); calib(&conf, out_dims, out_data, emaps, K, svals, cal_dims, cal_data); unmap_cfl(N, out_dims, out_data); unmap_cfl(N, map_dims, emaps); } if (print_svals) { for (unsigned int i = 0; i < K; i++) printf("SVALS %d %f\n", i, svals[i]); } printf("Done.\n"); unmap_cfl(N, ksp_dims, in_data); md_free(cal_data); exit(0); } bart-0.4.02/src/ecaltwo.c000066400000000000000000000055341320577655200151410ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker */ #include #include #include "num/multind.h" #include "num/fft.h" #include "calib/calib.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/mri.h" #include "misc/utils.h" #include "misc/debug.h" #include "misc/opts.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "x y z []"; static const char help_str[] = "Second part of ESPIRiT calibration.\n" "Optionally outputs the eigenvalue maps."; int main_ecaltwo(int argc, char* argv[]) { long maps = 2; // channels; struct ecalib_conf conf = ecalib_defaults; const struct opt_s opts[] = { OPT_FLOAT('c', &conf.crop, "crop_value", "Crop the sensitivities if the eigenvalue is smaller than {crop_value}."), OPT_LONG('m', &maps, "maps", "Number of maps to compute."), OPT_SET('S', &conf.softcrop, "()"), OPT_CLEAR('O', &conf.orthiter, "()"), OPT_SET('g', &conf.usegpu, "()"), }; cmdline(&argc, argv, 5, 6, usage_str, help_str, ARRAY_SIZE(opts), opts); long in_dims[DIMS]; complex float* in_data = load_cfl(argv[4], DIMS, in_dims); int channels = 0; while (in_dims[3] != (channels * (channels + 1) / 2)) channels++; debug_printf(DP_INFO, "Channels: %d\n", channels); assert(maps <= channels); long out_dims[DIMS] = { [0 ... DIMS - 1] = 1 }; long map_dims[DIMS] = { [0 ... DIMS - 1] = 1 }; out_dims[0] = atoi(argv[1]); out_dims[1] = atoi(argv[2]); out_dims[2] = atoi(argv[3]); out_dims[3] = channels; out_dims[4] = maps; assert((out_dims[0] >= in_dims[0])); assert((out_dims[1] >= in_dims[1])); assert((out_dims[2] >= in_dims[2])); for (int i = 0; i < 3; i++) map_dims[i] = out_dims[i]; map_dims[3] = 1; map_dims[4] = maps; complex float* out_data = create_cfl(argv[5], DIMS, out_dims); complex float* emaps; if (7 == argc) emaps = create_cfl(argv[6], DIMS, map_dims); else emaps = md_alloc(DIMS, map_dims, CFL_SIZE); caltwo(&conf, out_dims, out_data, emaps, in_dims, in_data, NULL, NULL); if (conf.intensity) { debug_printf(DP_DEBUG1, "Normalize...\n"); normalizel1(DIMS, COIL_FLAG, out_dims, out_data); } debug_printf(DP_DEBUG1, "Crop maps... (%.2f)\n", conf.crop); crop_sens(out_dims, out_data, conf.softcrop, conf.crop, emaps); debug_printf(DP_DEBUG1, "Fix phase...\n"); fixphase(DIMS, out_dims, COIL_DIM, out_data, out_data); debug_printf(DP_INFO, "Done.\n"); unmap_cfl(DIMS, in_dims, in_data); unmap_cfl(DIMS, out_dims, out_data); if (7 == argc) unmap_cfl(DIMS, map_dims, emaps); else md_free(emaps); exit(0); } bart-0.4.02/src/estdelay.c000066400000000000000000000064721320577655200153170ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker * * * Kai Tobias Block and Martin Uecker, Simple Method for Adaptive * Gradient-Delay Compensation in Radial MRI, Annual Meeting ISMRM, * Montreal 2011, In Proc. Intl. Soc. Mag. Reson. Med 19: 2816 (2011) * * Amir Moussavi, Markus Untenberger, Martin Uecker, and Jens Frahm, * Correction of gradient-induced phase errors in radial MRI, * Magnetic Resonance in Medicine, 71:308-312 (2014) */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/qform.h" #include "misc/subpixel.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/mri.h" #ifndef DIMS #define DIMS 16 #endif static void radial_self_delays(unsigned int N, float shifts[N], const float phi[N], const long dims[DIMS], const complex float* in) { unsigned int d = 2; unsigned int flags = (1 << d); assert(N == dims[d]); long dims1[DIMS]; md_select_dims(DIMS, ~flags, dims1, dims); complex float* tmp1 = md_alloc(DIMS, dims1, CFL_SIZE); complex float* tmp2 = md_alloc(DIMS, dims1, CFL_SIZE); long pos[DIMS] = { 0 }; for (unsigned int i = 0; i < dims[d]; i++) { pos[d] = i; md_copy_block(DIMS, pos, dims1, tmp1, dims, in, CFL_SIZE); // find opposing spoke float mdelta = 0.; int mindex = 0; for (unsigned int j = 0; j < dims[d]; j++) { float delta = cabsf(cexpf(1.i * phi[j]) - cexpf(1.i * phi[i])); if (mdelta <= delta) { mdelta = delta; mindex = j; } } pos[d] = mindex; md_copy_block(DIMS, pos, dims1, tmp2, dims, in, CFL_SIZE); unsigned int d2 = 1; float rshifts[DIMS]; md_flip(DIMS, dims1, MD_BIT(d2), tmp2, tmp2, CFL_SIZE); // could be done by iFFT in est_subpixel_shift est_subpixel_shift(DIMS, rshifts, dims1, MD_BIT(d2), tmp2, tmp1); float mshift = rshifts[d2] / 2.; // mdelta shifts[i] = mshift; } md_free(tmp1); md_free(tmp2); } static const char usage_str[] = " "; static const char help_str[] = "Estimate gradient delays from radial data."; int main_estdelay(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); long tdims[DIMS]; const complex float* traj = load_cfl(argv[1], DIMS, tdims); long tdims1[DIMS]; md_select_dims(DIMS, ~MD_BIT(1), tdims1, tdims); complex float* traj1 = md_alloc(DIMS, tdims1, CFL_SIZE); md_slice(DIMS, MD_BIT(1), (long[DIMS]){ 0 }, tdims, traj1, traj, CFL_SIZE); unsigned int N = tdims[2]; float angles[N]; for (unsigned int i = 0; i < N; i++) angles[i] = M_PI + atan2f(crealf(traj1[3 * i + 0]), crealf(traj1[3 * i + 1])); unmap_cfl(DIMS, tdims, traj); long dims[DIMS]; const complex float* in = load_cfl(argv[2], DIMS, dims); // FIXME: more checks assert(dims[1] == tdims[1]); assert(dims[2] == tdims[2]); float delays[N]; radial_self_delays(N, delays, angles, dims, in); /* We allow an arbitrary quadratic form to account for * non-physical coordinate systems. * Moussavi et al., MRM 71:308-312 (2014) */ float qf[3]; fit_quadratic_form(qf, N, angles, delays); printf("%f:%f:%f\n", qf[0], qf[1], qf[2]); unmap_cfl(DIMS, dims, in); exit(0); } bart-0.4.02/src/estdims.c000066400000000000000000000020741320577655200151470ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Frank Ong */ #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/io.h" #include "misc/misc.h" #include "noncart/nufft.h" static const char usage_str[] = ""; static const char help_str[] = "Estimate image dimension from non-Cartesian trajectory.\n" "Assume trajectory scaled to -DIM/2 to DIM/2 (ie dk=1/FOV=1)\n"; int main_estdims(int argc, char* argv[]) { mini_cmdline(&argc, argv, 1, usage_str, help_str); num_init(); int N = 16; long traj_dims[N]; complex float* traj = load_cfl(argv[1], N, traj_dims); long im_dims[N]; estimate_im_dims(N, im_dims, traj_dims, traj); printf("%ld %ld %ld\n", im_dims[0], im_dims[1], im_dims[2]); unmap_cfl(N, traj_dims, traj); exit(0); } bart-0.4.02/src/estshift.c000066400000000000000000000023011320577655200153210ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include #include #include "num/multind.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/subpixel.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "flags "; static const char help_str[] = "Estimate sub-pixel shift."; int main_estshift(int argc, char* argv[]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); unsigned int flags = atoi(argv[1]); long dims1[DIMS]; long dims2[DIMS]; const complex float* in1 = load_cfl(argv[2], DIMS, dims1); const complex float* in2 = load_cfl(argv[3], DIMS, dims2); assert(md_check_compat(DIMS, ~0u, dims1, dims2)); float shifts[DIMS]; est_subpixel_shift(DIMS, shifts, dims1, flags, in1, in2); printf("Shifts:"); for (unsigned int i = 0; i < DIMS; i++) { if (!MD_IS_SET(flags, i)) continue; printf("\t%f", shifts[i]); } printf("\n"); unmap_cfl(DIMS, dims1, in1); unmap_cfl(DIMS, dims2, in2); exit(0); } bart-0.4.02/src/estvar.c000066400000000000000000000033241320577655200150020ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Siddharth Iyer * 2015-2016 Martin Uecker */ #include #include #include #include "misc/mmio.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #include "num/flpmath.h" #include "num/multind.h" #include "num/init.h" #include "misc/debug.h" #include "calib/estvar.h" static const char usage_str[] = ""; static const char help_str[] = "Estimate the noise variance assuming white Gaussian noise."; int main_estvar(int argc, char* argv[]) { long calsize_dims[3] = { 24, 24, 24}; long kernel_dims[3] = { 6, 6, 6}; const struct opt_s opts[] = { OPT_VEC3('k', &kernel_dims, "ksize", "kernel size"), OPT_VEC3('K', &kernel_dims, "", "()"), OPT_VEC3('r', &calsize_dims, "cal_size", "Limits the size of the calibration region."), OPT_VEC3('R', &calsize_dims, "", "()"), }; cmdline(&argc, argv, 1, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); int N = DIMS; long kspace_dims[N]; complex float* kspace = load_cfl(argv[1], N, kspace_dims); for (int idx = 0; idx < 3; idx++) { kernel_dims[idx] = (kspace_dims[idx] == 1) ? 1 : kernel_dims[idx]; calsize_dims[idx] = (kspace_dims[idx] == 1) ? 1 : calsize_dims[idx]; } float variance = estvar_kspace(N, kernel_dims, calsize_dims, kspace_dims, kspace); unmap_cfl(N, kspace_dims, kspace); printf("Estimated noise variance: %f\n", variance); exit(0); } bart-0.4.02/src/extract.c000066400000000000000000000026551320577655200151560ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #define DIMS 16 static const char usage_str[] = "dimension start end "; static const char help_str[] = "Extracts a sub-array along {dim} from index {start} to (not including) {end}.\n"; int main_extract(int argc, char* argv[]) { mini_cmdline(&argc, argv, 5, usage_str, help_str); num_init(); long in_dims[DIMS]; long out_dims[DIMS]; complex float* in_data = load_cfl(argv[4], DIMS, in_dims); int dim = atoi(argv[1]); int start = atoi(argv[2]); int end = atoi(argv[3]); assert((0 <= dim) && (dim < DIMS)); assert(start >= 0); assert(start < end); assert(end <= in_dims[dim]); for (int i = 0; i < DIMS; i++) out_dims[i] = in_dims[i]; out_dims[dim] = end - start; complex float* out_data = create_cfl(argv[5], DIMS, out_dims); long pos2[DIMS] = { [0 ... DIMS - 1] = 0 }; pos2[dim] = start; md_copy_block(DIMS, pos2, out_dims, out_data, in_dims, in_data, sizeof(complex float)); unmap_cfl(DIMS, in_dims, in_data); unmap_cfl(DIMS, out_dims, out_data); exit(0); } bart-0.4.02/src/fakeksp.c000066400000000000000000000050741320577655200151260ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "sense/recon.h" #include "sense/optcom.h" #include "misc/mri.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #include "misc/debug.h" static const char usage_str[] = " "; static const char help_str[] = "Recreate k-space from image and sensitivities."; int main_fakeksp(int argc, char* argv[]) { bool rplksp = false; const struct opt_s opts[] = { OPT_SET('r', &rplksp, "replace measured samples with original values"), }; cmdline(&argc, argv, 4, 4, usage_str, help_str, ARRAY_SIZE(opts), opts); const int N = DIMS; long ksp_dims[N]; long dims[N]; long img_dims[N]; complex float* kspace_data = load_cfl(argv[2], N, ksp_dims); complex float* sens_maps = load_cfl(argv[3], N, dims); complex float* image = load_cfl(argv[1], N, img_dims); for (int i = 0; i < 4; i++) if (ksp_dims[i] != dims[i]) error("Dimensions of kspace and sensitivities do not match!\n"); assert(1 == ksp_dims[MAPS_DIM]); assert(1 == img_dims[COIL_DIM]); assert(img_dims[MAPS_DIM] == dims[MAPS_DIM]); num_init(); long dims1[N]; md_select_dims(N, ~(COIL_FLAG|MAPS_FLAG), dims1, dims); long dims2[N]; md_copy_dims(DIMS, dims2, img_dims); dims2[COIL_DIM] = dims[COIL_DIM]; dims2[MAPS_DIM] = dims[MAPS_DIM]; #if 0 float scaling = estimate_scaling(ksp_dims, NULL, kspace_data); printf("Scaling: %f\n", scaling); md_zsmul(N, ksp_dims, kspace_data, kspace_data, 1. / scaling); #endif complex float* out = create_cfl(argv[4], N, ksp_dims); fftmod(N, ksp_dims, FFT_FLAGS, kspace_data, kspace_data); fftmod(N, dims, FFT_FLAGS, sens_maps, sens_maps); if (rplksp) { debug_printf(DP_INFO, "Replace kspace\n"); replace_kspace(dims2, out, kspace_data, sens_maps, image); // this overwrites kspace_data (FIXME: think not!) } else { debug_printf(DP_INFO, "Simulate kspace\n"); fake_kspace(dims2, out, sens_maps, image); } #if 0 md_zsmul(N, ksp_dims, out, out, scaling); #endif fftmod(N, ksp_dims, FFT_FLAGS, out, out); unmap_cfl(N, ksp_dims, kspace_data); unmap_cfl(N, dims, sens_maps); unmap_cfl(N, img_dims, image); unmap_cfl(N, ksp_dims, out); exit(0); } bart-0.4.02/src/fft.c000066400000000000000000000027571320577655200142660ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/fft.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/opts.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Performs a fast Fourier transform (FFT) along selected dimensions."; int main_fft(int argc, char* argv[]) { bool unitary = false; bool inv = false; bool center = true; const struct opt_s opts[] = { OPT_SET('u', &unitary, "unitary"), OPT_SET('i', &inv, "inverse"), OPT_CLEAR('n', ¢er, "un-centered"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); long dims[DIMS]; complex float* idata = load_cfl(argv[2], DIMS, dims); complex float* data = create_cfl(argv[3], DIMS, dims); unsigned long flags = labs(atol(argv[1])); md_copy(DIMS, dims, data, idata, sizeof(complex float)); unmap_cfl(DIMS, dims, idata); if (unitary) fftscale(DIMS, dims, flags, data, data); (inv ? (center ? ifftc : ifft) : (center ? fftc : fft))(DIMS, dims, flags, data, data); unmap_cfl(DIMS, dims, data); exit(0); } bart-0.4.02/src/fftmod.c000066400000000000000000000024571320577655200147630ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2016 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/fft.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Apply 1 -1 modulation along dimensions selected by the {bitmask}.\n"; int main_fftmod(int argc, char* argv[]) { bool inv = false; const struct opt_s opts[] = { OPT_SET('b', &inv, "(deprecated)"), OPT_SET('i', &inv, "\tinverse"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); unsigned long flags = labs(atol(argv[1])); int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[2], N, dims); complex float* odata = create_cfl(argv[3], N, dims); (inv ? ifftmod : fftmod)(N, dims, flags, odata, idata); unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/fftshift.c000066400000000000000000000021441320577655200153120ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2015 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/fft.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Apply fftshift along dimensions selected by the {bitmask}.\n"; int main_fftshift(int argc, char* argv[]) { bool b = mini_cmdline_bool(&argc, argv, 'b', 3, usage_str, help_str); num_init(); unsigned long flags = labs(atol(argv[1])); int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[2], N, dims); complex float* odata = create_cfl(argv[3], N, dims); (b ? ifftshift : fftshift)(N, dims, flags, odata, idata); unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/filter.c000066400000000000000000000034071320577655200147650ustar00rootroot00000000000000/* Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Martin Uecker */ #include #include #include "num/multind.h" #include "num/casorati.h" #include "num/filter.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = " "; static const char help_str[] = "Apply filter.\n"; int main_filter(int argc, char* argv[]) { int len = -1; int dim = -1; const struct opt_s opts[] = { OPT_INT('m', &dim, "dim", "median filter along dimension dim"), OPT_INT('l', &len, "len", "length of filter"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); long in_dims[DIMS]; complex float* in_data = load_cfl(argv[1], DIMS, in_dims); assert(dim >= 0); assert(dim < DIMS); assert(len > 0); assert(len <= in_dims[dim]); long tmp_dims[DIMS + 1]; md_copy_dims(DIMS, tmp_dims, in_dims); tmp_dims[DIMS] = 1; long tmp2_strs[DIMS + 1]; md_calc_strides(DIMS + 1, tmp2_strs, tmp_dims, CFL_SIZE); long tmp_strs[DIMS + 1]; md_calc_strides(DIMS, tmp_strs, tmp_dims, CFL_SIZE); tmp_dims[DIMS] = len; tmp_dims[dim] = in_dims[dim] - len + 1; tmp_strs[DIMS] = tmp_strs[dim]; long out_dims[DIMS]; md_copy_dims(DIMS, out_dims, tmp_dims); complex float* out_data = create_cfl(argv[2], DIMS, out_dims); md_medianz2(DIMS + 1, DIMS, tmp_dims, tmp2_strs, out_data, tmp_strs, in_data); unmap_cfl(DIMS, in_dims, in_data); unmap_cfl(DIMS, out_dims, out_data); exit(0); } bart-0.4.02/src/flatten.c000066400000000000000000000020631320577655200151320ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jon Tamir */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Flatten array to one dimension.\n"; int main_flatten(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); num_init(); long idims[DIMS]; complex float* idata = load_cfl(argv[1], DIMS, idims); long odims[DIMS] = MD_INIT_ARRAY(DIMS, 1); odims[0] = md_calc_size(DIMS, idims); complex float* odata = create_cfl(argv[2], DIMS, odims); md_copy(DIMS, idims, odata, idata, CFL_SIZE); unmap_cfl(DIMS, idims, idata); unmap_cfl(DIMS, odims, odata); exit(0); } bart-0.4.02/src/flip.c000066400000000000000000000020531320577655200144260ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Flip (reverse) dimensions specified by the {bitmask}.\n"; int main_flip(int argc, char* argv[]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); num_init(); int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[2], N, dims); complex float* odata = create_cfl(argv[3], N, dims); unsigned long flags = atoi(argv[1]); md_flip(N, dims, flags, odata, idata, sizeof(complex float)); unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/fmac.c000066400000000000000000000042461320577655200144100ustar00rootroot00000000000000/* Copyright 2013, 2016. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2015 Martin Uecker * 2016 Jon Tamir */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " [] "; static const char help_str[] = "Multiply and and accumulate in .\n" "If is not specified, assume all-ones."; int main_fmac(int argc, char* argv[]) { bool clear = true; bool conj = false; long squash = 0; const struct opt_s opts[] = { OPT_CLEAR('A', &clear, "add to existing output (instead of overwriting)"), OPT_SET('C', &conj, "conjugate input2"), OPT_LONG('s', &squash, "b", "squash dimensions selected by bitmask b"), }; cmdline(&argc, argv, 2, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); int num_args = argc - 1; num_init(); int N = DIMS; long dims1[N]; long dims2[N]; complex float* data1 = load_cfl(argv[1], N, dims1); complex float* data2 = NULL; if (3 == num_args) data2 = load_cfl(argv[2], N, dims2); else { md_singleton_dims(N, dims2); data2 = md_alloc(N, dims2, CFL_SIZE); md_zfill(N, dims2, data2, 1.); } long dims[N]; md_merge_dims(N, dims, dims1, dims2); long dimso[N]; md_select_dims(N, ~squash, dimso, dims); complex float* out = create_cfl((3 == num_args) ? argv[3] : argv[2], N, dimso); if (clear) { md_clear(N, dimso, out, CFL_SIZE); } long str1[N]; long str2[N]; long stro[N]; md_calc_strides(N, str1, dims1, CFL_SIZE); md_calc_strides(N, str2, dims2, CFL_SIZE); md_calc_strides(N, stro, dimso, CFL_SIZE); (conj ? md_zfmacc2 : md_zfmac2)(N, dims, stro, out, str1, data1, str2, data2); unmap_cfl(N, dims1, data1); unmap_cfl(N, dimso, out); if (3 == num_args) unmap_cfl(N, dims2, data2); else md_free(data2); exit(0); } bart-0.4.02/src/grecon/000077500000000000000000000000001320577655200146055ustar00rootroot00000000000000bart-0.4.02/src/grecon/optreg.c000066400000000000000000000331701320577655200162550ustar00rootroot00000000000000/* Copyright 2015-2017. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015-2016 Martin Uecker * 2015-2016 Frank Ong * 2015-2017 Jon Tamir * */ #include #include #include #include #include #include "num/multind.h" #include "num/iovec.h" #include "num/ops.h" #include "iter/prox.h" #include "iter/thresh.h" #include "linops/linop.h" #include "linops/someops.h" #include "linops/grad.h" #include "linops/sum.h" #include "linops/waveop.h" #include "wavelet/wavthresh.h" #include "lowrank/lrthresh.h" #include "misc/mri.h" #include "misc/utils.h" #include "misc/opts.h" #include "misc/debug.h" #include "optreg.h" #define CFL_SIZE sizeof(complex float) void help_reg(void) { printf( "Generalized regularization options (experimental)\n\n" "-R :A:B:C\t is regularization type (single letter),\n" "\t\tA is transform flags, B is joint threshold flags,\n" "\t\tand C is regularization value. Specify any number\n" "\t\tof regularization terms.\n\n" "-R Q:C \tl2-norm in image domain\n" "-R I:B:C \tl1-norm in image domain\n" "-R W:A:B:C\tl1-wavelet\n" "-R N:A:B:C\tNormalized Iterative Hard Thresholding (NIHT), image domain\n" "\t\tC is an integer percentage, i.e. from 0-100\n" "-R H:A:B:C\tNIHT, wavelet domain\n" "-R T:A:B:C\ttotal variation\n" "-R T:7:0:.01\t3D isotropic total variation with 0.01 regularization.\n" "-R L:7:7:.02\tLocally low rank with spatial decimation and 0.02 regularization.\n" "-R M:7:7:.03\tMulti-scale low rank with spatial decimation and 0.03 regularization.\n" ); } bool opt_reg(void* ptr, char c, const char* optarg) { struct opt_reg_s* p = ptr; struct reg_s* regs = p->regs; const int r = p->r; const float lambda = p->lambda; assert(r < NUM_REGS); char rt[5]; switch (c) { case 'R': { // first get transform type int ret = sscanf(optarg, "%4[^:]", rt); assert(1 == ret); // next switch based on transform type if (strcmp(rt, "W") == 0) { regs[r].xform = L1WAV; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); } else if (strcmp(rt, "H") == 0) { regs[r].xform = NIHTWAV; int ret = sscanf(optarg, "%*[^:]:%d:%d:%d", ®s[r].xflags, ®s[r].jflags, ®s[r].k); assert(3 == ret); p->algo = NIHT; } else if (strcmp(rt, "N") == 0) { regs[r].xform = NIHTIM; int ret = sscanf(optarg, "%*[^:]:%d:%d:%d", ®s[r].xflags, ®s[r].jflags, ®s[r].k); assert(3 == ret); p->algo = NIHT; } else if (strcmp(rt, "L") == 0) { regs[r].xform = LLR; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); } else if (strcmp(rt, "M") == 0) { regs[r].xform = regs[0].xform; regs[r].xflags = regs[0].xflags; regs[r].jflags = regs[0].jflags; regs[r].lambda = regs[0].lambda; regs[0].xform = MLR; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[0].xflags, ®s[0].jflags, ®s[0].lambda); assert(3 == ret); } else if (strcmp(rt, "T") == 0) { regs[r].xform = TV; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); p->algo = ADMM; } else if (strcmp(rt, "P") == 0) { regs[r].xform = LAPLACE; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); } else if (strcmp(rt, "R1") == 0) { regs[r].xform = IMAGL1; int ret = sscanf(optarg, "%*[^:]:%d:%f", ®s[r].jflags, ®s[r].lambda); assert(2 == ret); regs[r].xflags = 0u; p->algo = ADMM; } else if (strcmp(rt, "R2") == 0) { regs[r].xform = IMAGL2; int ret = sscanf(optarg, "%*[^:]:%d:%f", ®s[r].jflags, ®s[r].lambda); assert(2 == ret); regs[r].xflags = 0u; p->algo = ADMM; } else if (strcmp(rt, "I") == 0) { regs[r].xform = L1IMG; int ret = sscanf(optarg, "%*[^:]:%d:%f", ®s[r].jflags, ®s[r].lambda); assert(2 == ret); regs[r].xflags = 0u; } else if (strcmp(rt, "Q") == 0) { regs[r].xform = L2IMG; int ret = sscanf(optarg, "%*[^:]:%f", ®s[r].lambda); assert(1 == ret); regs[r].xflags = 0u; regs[r].jflags = 0u; } else if (strcmp(rt, "F") == 0) { regs[r].xform = FTL1; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); } else if (strcmp(rt, "h") == 0) { help_reg(); exit(0); } else { error("Unrecognized regularization type: \"%s\" (-Rh for help).\n", rt); } p->r++; break; } case 'l': assert(r < NUM_REGS); regs[r].lambda = lambda; regs[r].xflags = 0u; regs[r].jflags = 0u; if (0 == strcmp("1", optarg)) { regs[r].xform = L1WAV; regs[r].xflags = 7u; } else if (0 == strcmp("2", optarg)) { regs[r].xform = L2IMG; } else { error("Unknown regularization type.\n"); } p->lambda = -1.; p->r++; break; } return false; } bool opt_reg_init(struct opt_reg_s* ropts) { ropts->r = 0; ropts->algo = CG; ropts->lambda = -1; ropts->k = 0; return false; } void opt_bpursuit_configure(struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS], const struct linop_s* model_op, const complex float* data, const float eps) { int nr_penalties = ropts->r; assert(NUM_REGS > nr_penalties); const struct iovec_s* iov = linop_codomain(model_op); prox_ops[nr_penalties] = prox_l2ball_create(iov->N, iov->dims, eps, data); trafos[nr_penalties] = linop_clone(model_op); ropts->r++; } void opt_reg_configure(unsigned int N, const long img_dims[N], struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS], unsigned int llr_blk, bool randshift, bool use_gpu) { float lambda = ropts->lambda; if (-1. == lambda) lambda = 0.; // if no penalities specified but regularization // parameter is given, add a l2 penalty struct reg_s* regs = ropts->regs; if ((0 == ropts->r) && (lambda > 0.)) { regs[0].xform = L2IMG; regs[0].xflags = 0u; regs[0].jflags = 0u; regs[0].lambda = lambda; ropts->r = 1; } int nr_penalties = ropts->r; long blkdims[MAX_LEV][DIMS]; int levels; for (int nr = 0; nr < nr_penalties; nr++) { // fix up regularization parameter if (-1. == regs[nr].lambda) regs[nr].lambda = lambda; switch (regs[nr].xform) { case L1WAV: { debug_printf(DP_INFO, "l1-wavelet regularization: %f\n", regs[nr].lambda); long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(img_dims[0], 16); minsize[1] = MIN(img_dims[1], 16); minsize[2] = MIN(img_dims[2], 16); unsigned int wflags = 0; for (unsigned int i = 0; i < DIMS; i++) { if ((1 < img_dims[i]) && MD_IS_SET(regs[nr].xflags, i)) { wflags = MD_SET(wflags, i); minsize[i] = MIN(img_dims[i], 16); } } trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_wavelet_thresh_create(DIMS, img_dims, wflags, regs[nr].jflags, minsize, regs[nr].lambda, randshift); break; } case NIHTWAV: { debug_printf(DP_INFO, "NIHT with wavelets regularization: k = %d%% of total elements in each wavelet transform\n", regs[nr].k); if (use_gpu){ debug_printf(DP_WARN, "GPU operation is not currently implemented for NIHT.\nContinuing with CPU.\n"); use_gpu = false; // not implemented, TODO: implement NIHT with gpu } long img_strs[N]; md_calc_strides(N, img_strs, img_dims, CFL_SIZE); long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(img_dims[0], 16); minsize[1] = MIN(img_dims[1], 16); minsize[2] = MIN(img_dims[2], 16); unsigned int wflags = 0; unsigned int wxdim = 0; for (unsigned int i = 0; i < DIMS; i++) { if ((1 < img_dims[i]) && MD_IS_SET(regs[nr].xflags, i)) { wflags = MD_SET(wflags, i); minsize[i] = MIN(img_dims[i], 16); wxdim += 1; } } trafos[nr] = linop_wavelet_create(N, wflags, img_dims, img_strs, minsize, randshift); long wav_dims[DIMS]; md_copy_dims(DIMS, wav_dims, linop_codomain(trafos[nr])->dims); unsigned int K = (md_calc_size(wxdim, wav_dims) / 100) * regs[nr].k; debug_printf(DP_DEBUG3, "\nK = %d elements will be thresholded per wavelet transform\n", K); debug_printf(DP_DEBUG3, "Total wavelet dimensions: \n["); for (unsigned int i = 0; i < DIMS; i++) debug_printf(DP_DEBUG3,"%d ", wav_dims[i]); debug_printf(DP_DEBUG3, "]\n"); prox_ops[nr] = prox_niht_thresh_create(N, wav_dims, K, regs[nr].jflags, use_gpu ); break; } case NIHTIM: { debug_printf(DP_INFO, "NIHT regularization in the image domain: k = %d%% of total elements in image vector\n", regs[nr].k); if (use_gpu){ debug_printf(DP_WARN, "GPU operation is not currently implemented for NIHT.\nContinuing with CPU.\n"); use_gpu = false; // not implemented, TODO: implement NIHT with gpu } long thresh_dims[N]; md_select_dims(N, regs[nr].xflags, thresh_dims, img_dims); unsigned int K = (md_calc_size(N, thresh_dims) / 100) * regs[nr].k; debug_printf(DP_INFO, "k = %d%%, actual K = %d\n", regs[nr].k, K); prox_ops[nr] = prox_niht_thresh_create(N, img_dims, K, regs[nr].jflags, use_gpu ); debug_printf(DP_INFO, "NIHTIM initialization complete\n"); break; } case TV: debug_printf(DP_INFO, "TV regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_grad_create(DIMS, img_dims, regs[nr].xflags); prox_ops[nr] = prox_thresh_create(DIMS + 1, linop_codomain(trafos[nr])->dims, regs[nr].lambda, regs[nr].jflags | MD_BIT(DIMS), use_gpu); break; case LAPLACE: debug_printf(DP_INFO, "L1-Laplace regularization: %f\n", regs[nr].lambda); long krn_dims[DIMS] = { [0 ... DIMS - 1] = 1 }; for (unsigned int i = 0; i < DIMS; i++) if (MD_IS_SET(regs[nr].xflags, i)) krn_dims[i] = 3; complex float krn[] = { // laplace filter -1., -2., -1., -2., 12., -2., -1., -2., -1., }; assert(9 == md_calc_size(DIMS, krn_dims)); trafos[nr] = linop_conv_create(DIMS, regs[nr].xflags, CONV_TRUNCATED, CONV_SYMMETRIC, img_dims, img_dims, krn_dims, krn); prox_ops[nr] = prox_thresh_create(DIMS, linop_codomain(trafos[nr])->dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case LLR: debug_printf(DP_INFO, "lowrank regularization: %f\n", regs[nr].lambda); // add locally lowrank penalty levels = llr_blkdims(blkdims, regs[nr].jflags, img_dims, llr_blk); assert(1 == levels); assert(levels == img_dims[LEVEL_DIM]); for(int l = 0; l < levels; l++) #if 0 blkdims[l][MAPS_DIM] = img_dims[MAPS_DIM]; #else blkdims[l][MAPS_DIM] = 1; #endif int remove_mean = 0; trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, remove_mean, use_gpu); break; case MLR: #if 0 // FIXME: multiscale low rank changes the output image dimensions // and requires the forward linear operator. This should be decoupled... debug_printf(DP_INFO, "multi-scale lowrank regularization: %f\n", regs[nr].lambda); levels = multilr_blkdims(blkdims, regs[nr].jflags, img_dims, 8, 1); img_dims[LEVEL_DIM] = levels; max_dims[LEVEL_DIM] = levels; for(int l = 0; l < levels; l++) blkdims[l][MAPS_DIM] = 1; trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, 0, use_gpu); const struct linop_s* decom_op = sum_create( img_dims, use_gpu ); const struct linop_s* tmp_op = forward_op; forward_op = linop_chain(decom_op, forward_op); linop_free(decom_op); linop_free(tmp_op); #else debug_printf(DP_WARN, "multi-scale lowrank regularization not yet supported: %f\n", regs[nr].lambda); #endif break; case IMAGL1: debug_printf(DP_INFO, "l1 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case IMAGL2: debug_printf(DP_INFO, "l2 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; case L1IMG: debug_printf(DP_INFO, "l1 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case L2IMG: debug_printf(DP_INFO, "l2 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; case FTL1: debug_printf(DP_INFO, "l1 regularization of Fourier transform: %f\n", regs[nr].lambda); trafos[nr] = linop_fft_create(DIMS, img_dims, regs[nr].xflags); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; } } } void opt_reg_free(struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS]) { int nr_penalties = ropts->r; for (int nr = 0; nr < nr_penalties; nr++) { operator_p_free(prox_ops[nr]); linop_free(trafos[nr]); } } bart-0.4.02/src/grecon/optreg.h000066400000000000000000000027201320577655200162570ustar00rootroot00000000000000/* Copyright 2014-2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __OPTREG_H #define __OPTREG_H #include "misc/cppwrap.h" #define NUM_REGS 10 struct operator_p_s; struct linop_s; enum algo_t { CG, IST, FISTA, ADMM, NIHT, PRIDU }; struct reg_s { enum { L1WAV, NIHTWAV, NIHTIM, TV, LLR, MLR, IMAGL1, IMAGL2, L1IMG, L2IMG, FTL1, LAPLACE } xform; unsigned int xflags; unsigned int jflags; float lambda; unsigned int k; }; struct opt_reg_s { float lambda; enum algo_t algo; struct reg_s regs[NUM_REGS]; unsigned int r; unsigned int k; }; extern _Bool opt_reg_init(struct opt_reg_s* ropts); extern void opt_bpursuit_configure(struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS], const struct linop_s* model_op, const _Complex float* data, const float eps); extern void opt_reg_configure(unsigned int N, const long img_dims[__VLA(N)], struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS], unsigned int llr_blk, _Bool randshift, _Bool use_gpu); extern void opt_reg_free(struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS]); extern _Bool opt_reg(void* ptr, char c, const char* optarg); extern void help_reg(void); #include "misc/cppwrap.h" #endif bart-0.4.02/src/homodyne.c000066400000000000000000000114761320577655200153270ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2016 Martin Uecker * 2015 Jonathan Tamir */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "misc/mri.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" static const char usage_str[] = "dim fraction "; static const char help_str[] = "Perform homodyne reconstruction along dimension dim."; struct wdata { float frac; float alpha; int pfdim; long wdims[DIMS]; long wstrs[DIMS]; complex float* weights; bool clear; }; /** * Applies the Homodyne filter. * @param N k-space dimension * @param p k-space position * @param frac is the fraction of acquired k-space * @param alpha is the offset of the ramp, between 0 and 1 * @param clear clear acquired k-space * * The ramp portion is given by 2*(alpha - 1) / (end - start) * (p - end) + alpha * alpha = 0 is a full ramp, alpha = 1 is a horizontal line */ static float homodyne_filter(long N, float frac, float alpha, bool clear, long p) { if (frac <= 0.5) return 1.; float start = N * (1 - frac); float end = N * frac; float ret = clear ? 0. : 1.; if (p < start) ret = 2.; else if (p >= start && p < end) ret = 2 * (alpha - 1) / (end - start) * (p - end) + alpha; return ret; } static void comp_weights(void* _data, const long pos[]) { struct wdata* data = _data; data->weights[md_calc_offset(DIMS, data->wstrs, pos) / CFL_SIZE] = homodyne_filter(data->wdims[data->pfdim], data->frac, data->alpha, data->clear, pos[data->pfdim]); } static complex float* estimate_phase(struct wdata wdata, unsigned int flags, unsigned int N, const long dims[N], const complex float* idata) { long cdims[N]; md_copy_dims(N, cdims, dims); // cdims[0] = cdims[1] = cdims[2] = 24; cdims[wdata.pfdim] = (wdata.frac - 0.5) * dims[wdata.pfdim]; complex float* center = md_alloc(N, cdims, CFL_SIZE); complex float* phase = md_alloc(N, dims, CFL_SIZE); md_resize_center(N, cdims, center, dims, idata, CFL_SIZE); md_resize_center(N, dims, phase, cdims, center, CFL_SIZE); md_free(center); ifftuc(N, dims, flags, phase, phase); md_zphsr(N, dims, phase, phase); return phase; } static void homodyne(struct wdata wdata, unsigned int flags, unsigned int N, const long dims[N], const long strs[N], complex float* data, const complex float* idata, const long pstrs[N], const complex float* phase) { md_zmul2(N, dims, strs, data, strs, idata, wdata.wstrs, wdata.weights); ifftuc(N, dims, flags, data, data); md_zmulc2(N, dims, strs, data, strs, data, pstrs, phase); md_zreal(N, dims, data, data); } int main_homodyne(int argc, char* argv[]) { bool clear = false; bool image = false; const char* phase_ref = NULL; float alpha = 0.; num_init(); const struct opt_s opts[] = { OPT_FLOAT('r', &alpha, "alpha", "Offset of ramp filter, between 0 and 1. alpha=0 is a full ramp, alpha=1 is a horizontal line"), OPT_SET('I', &image, "Input is in image domain"), OPT_SET('C', &clear, "Clear unacquired portion of kspace"), OPT_STRING('P', &phase_ref, "phase_ref>", "Use as phase reference"), }; cmdline(&argc, argv, 4, 4, usage_str, help_str, ARRAY_SIZE(opts), opts); const int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[3], N, dims); complex float* data = create_cfl(argv[4], N, dims); int pfdim = atoi(argv[1]); float frac = atof(argv[2]); assert((0 <= pfdim) && (pfdim < N)); assert(frac > 0.); if (image) { complex float* ksp_in = md_alloc(N, dims, CFL_SIZE); fftuc(N, dims, FFT_FLAGS, ksp_in, idata); md_copy(N, dims, idata, ksp_in, CFL_SIZE); md_free(ksp_in); } long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); struct wdata wdata; wdata.frac = frac; wdata.pfdim = pfdim; md_select_dims(N, MD_BIT(pfdim), wdata.wdims, dims); md_calc_strides(N, wdata.wstrs, wdata.wdims, CFL_SIZE); wdata.weights = md_alloc(N, wdata.wdims, CFL_SIZE); wdata.alpha = alpha; wdata.clear = clear; md_loop(N, wdata.wdims, &wdata, comp_weights); long pstrs[N]; long pdims[N]; complex float* phase = NULL; if (NULL == phase_ref) { phase = estimate_phase(wdata, FFT_FLAGS, N, dims, idata); md_copy_dims(N, pdims, dims); } else phase = load_cfl(phase_ref, N, pdims); md_calc_strides(N, pstrs, pdims, CFL_SIZE); homodyne(wdata, FFT_FLAGS, N, dims, strs, data, idata, pstrs, phase); md_free(wdata.weights); if (NULL == phase_ref) md_free(phase); else { unmap_cfl(N, pdims, phase); free((void*)phase_ref); } unmap_cfl(N, dims, idata); unmap_cfl(N, dims, data); exit(0); } bart-0.4.02/src/invert.c000066400000000000000000000021021320577655200147760ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jon Tamir */ #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Invert array (1 / ). The output is set to zero in case of divide by zero.\n"; int main_invert(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); num_init(); long dims[DIMS]; complex float* idata = load_cfl(argv[1], DIMS, dims); complex float* odata = create_cfl(argv[2], DIMS, dims); #pragma omp parallel for for (long i = 0; i < md_calc_size(DIMS, dims); i++) odata[i] = idata[i] == 0 ? 0. : 1. / idata[i]; unmap_cfl(DIMS, dims, idata); unmap_cfl(DIMS, dims, odata); exit(0); } bart-0.4.02/src/ismrm/000077500000000000000000000000001320577655200144575ustar00rootroot00000000000000bart-0.4.02/src/ismrm/read.c000066400000000000000000000053371320577655200155460ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2016 Martin Uecker */ #include #include #include #include "ismrmrd/ismrmrd.h" #include "ismrmrd/dataset.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "read.h" // FIXME: does not deal correctly with repetitions (and others stuff) int ismrm_read(const char* datafile, long dims[DIMS], complex float* buf) { ISMRMRD_Dataset d; ismrmrd_init_dataset(&d, datafile, "/dataset"); ismrmrd_open_dataset(&d, false); assert(DIMS > 5); unsigned int number_of_acquisitions = ismrmrd_get_number_of_acquisitions(&d); long pos[DIMS]; long channels = -1; long slices = 0; long samples = -1; for (unsigned int i = 0; i < DIMS; i++) pos[i] = 0; long strs[DIMS]; long adc_dims[DIMS]; long adc_strs[DIMS]; if (NULL == buf) { md_singleton_dims(DIMS, dims); } else { md_calc_strides(DIMS, strs, dims, CFL_SIZE); md_select_dims(DIMS, READ_FLAG|COIL_FLAG, adc_dims, dims); md_calc_strides(DIMS, adc_strs, adc_dims, CFL_SIZE); } ISMRMRD_Acquisition acq; for (unsigned int i = 0; i < number_of_acquisitions; i++) { ismrmrd_init_acquisition(&acq); ismrmrd_read_acquisition(&d, i, &acq); if (acq.head.flags & (1 << (ISMRMRD_ACQ_IS_NOISE_MEASUREMENT - 1))) continue; if (-1 == channels) { channels = acq.head.available_channels; samples = acq.head.number_of_samples; } pos[1] = acq.head.idx.kspace_encode_step_1; pos[2] = acq.head.idx.kspace_encode_step_2; pos[4] = slices; // acq.head.idx.slice; if (buf != NULL) { assert(pos[1] < dims[1]); assert(pos[2] < dims[2]); assert(pos[4] < dims[4]); assert(dims[0] == acq.head.number_of_samples); assert(dims[3] == acq.head.active_channels); assert(dims[3] == acq.head.available_channels); debug_printf(DP_DEBUG3, ":/%ld %ld/%ld %ld/%ld :/%ld %ld/%ld %d\n", dims[0], pos[1], dims[1], pos[2], dims[2], dims[3], pos[4], dims[4], number_of_acquisitions); md_copy_block2(DIMS, pos, dims, strs, buf, adc_dims, adc_strs, acq.data, CFL_SIZE); } else { dims[1] = MAX(dims[1], pos[1] + 1); dims[2] = MAX(dims[2], pos[2] + 1); } if (acq.head.flags & (1 << (ISMRMRD_ACQ_LAST_IN_SLICE - 1))) slices++; // ismrmrd_free_acquisition(&acq); } if (NULL == buf) { dims[0] = samples; dims[3] = channels; dims[4] = slices; } else { assert(dims[3] == channels); assert(dims[4] == slices); } // printf("Done.\n"); return 0; } bart-0.4.02/src/ismrm/read.h000066400000000000000000000002611320577655200155420ustar00rootroot00000000000000 #include "misc/mri.h" #ifdef __cplusplus extern "C" { #endif extern int ismrm_read(const char* datafile, long dims[DIMS], _Complex float* buf); #ifdef __cplusplus } #endif bart-0.4.02/src/ismrmrd.c000066400000000000000000000024471320577655200151600ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/mri.h" #include "ismrm/read.h" static const char usage_str[] = " "; static const char help_str[] = "Import ISMRM raw data files.\n"; int main_ismrmrd(int argc, char* argv[]) { mini_cmdline(argc, argv, 2, usage_str, help_str); long dims[DIMS]; printf("Reading headers... "); fflush(stdout); if (-1 == ismrm_read(argv[1], dims, NULL)) { fprintf(stderr, "Reading headers failed.\n"); exit(1); } printf("done.\n"); printf("Dimensions:"); unsigned int i; for (i = 0; i < DIMS; i++) printf(" %ld", dims[i]); printf("\n"); complex float* out = create_cfl(argv[2], DIMS, dims); md_clear(DIMS, dims, out, CFL_SIZE); printf("Reading data... "); fflush(stdout); if (-1 == ismrm_read(argv[1], dims, out)) { fprintf(stderr, "Reading data failed.\n"); exit(1); } printf("done.\n"); unmap_cfl(DIMS, dims, out); exit(0); } bart-0.4.02/src/iter/000077500000000000000000000000001320577655200142735ustar00rootroot00000000000000bart-0.4.02/src/iter/admm.c000066400000000000000000000253361320577655200153660ustar00rootroot00000000000000/* Copyright 2014-2016. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2017 Martin Uecker * 2014-2016 Jonathan Tamir * * * * Glowinski R and Marroco A. Sur l'approximation, par elements finis * d'ordre un, et la resolution, par penalisation-dualite d'une classe * de problemes de Dirichlet non lineaires. ESAIM: Mathematical * Modelling and Numerical Analysis - Modelisation Mathematique * et Analyse Numerique 9.R2: 41-76 (1975) * * Daniel Gabay and Bertrand Mercier. * A dual algorithm for the solution of nonlinear variational problems * via finite element approximation * Computers & Mathematics with Applications, 2:17-40 (1976) * * * Afonso MA, Bioucas-Dias JM, Figueiredo M. An Augmented Lagrangian Approach to * the Constrained Optimization Formulation of Imaging Inverse Problems, * IEEE Trans Image Process, 20:681-695 (2011) * * Boyd S, Parikh N, Chu E, Peleato B, Eckstein J. Distributed Optimization and * Statistical Learning via the Alternating Direction Method of Multipliers, * Foundations and Trends in Machine Learning, 3:1-122 (2011) * */ #include #include #include #include "num/ops.h" #include "misc/debug.h" #include "misc/misc.h" #include "misc/types.h" #include "iter/italgos.h" #include "iter/vec.h" #include "iter/monitor.h" #include "admm.h" DEF_TYPEID(admm_history_s); struct admm_normaleq_data { INTERFACE(iter_op_data); long N; unsigned int num_funs; struct admm_op* ops; float rho; const struct vec_iter_s* vops; unsigned int nr_invokes; struct iter_op_s Aop; }; static DEF_TYPEID(admm_normaleq_data); static void admm_normaleq(iter_op_data* _data, float* dst, const float* src) { struct admm_normaleq_data* data = CAST_DOWN(admm_normaleq_data, _data); float* tmp = data->vops->allocate(data->N); data->vops->clear(data->N, dst); for (unsigned int i = 0; i < data->num_funs; i++) { iter_op_call(data->ops[i].normal, tmp, src); if (NULL != data->Aop.fun) data->vops->axpy(data->N, dst, data->rho, tmp); else data->vops->add(data->N, dst, dst, tmp); } data->nr_invokes++; if (NULL != data->Aop.fun) { iter_op_call(data->Aop, tmp, src); data->vops->add(data->N, dst, dst, tmp); } data->vops->del(tmp); } struct cg_xupdate_s { INTERFACE(iter_op_data); unsigned int N; const struct vec_iter_s* vops; unsigned int maxitercg; float cg_eps; struct admm_normaleq_data* ndata; struct iter_monitor_s* monitor; }; static DEF_TYPEID(cg_xupdate_s); static void cg_xupdate(iter_op_data* _data, float rho, float* x, const float* rhs) { struct cg_xupdate_s* data = CAST_DOWN(cg_xupdate_s, _data); assert(data->ndata->rho == rho); data->ndata->nr_invokes--; // undo counting in admm float eps = data->vops->norm(data->N, rhs); // data->vops->clear(data->N, x); if (0. == eps) // x should have been initialized already return; conjgrad(data->maxitercg, 0., data->cg_eps * eps, data->N, data->vops, (struct iter_op_s){ admm_normaleq, CAST_UP(data->ndata) }, x, rhs, data->monitor); data->ndata->nr_invokes--; // subtract one for initialization in conjgrad } static long sum_long_array(unsigned int N, const long a[N]) { return ((0 == N) ? 0 : (a[0] + sum_long_array(N - 1, a + 1))); } /* * ADMM (ADMM-2 from Afonso et al.) * * Solves min_x 0.5 || y - Ax ||_2^2 + sum_i f_i(G_i x - b_i), where the f_i are * arbitrary convex functions. If Aop is NULL, solves min_x sum_i f_i(G_i x - b_i) * * Each iteration requires solving the proximal of f_i, as well as applying * G_i, G_i^H, and G_i^H G_i, all which must be provided in admm_plan_s. * The b_i are offsets (biases) that should also be provided in admm_plan_s. */ void admm(const struct admm_plan_s* plan, unsigned int D, const long z_dims[D], long N, float* x, const float* x_adj, const struct vec_iter_s* vops, struct iter_op_s Aop, struct iter_monitor_s* monitor) { unsigned int num_funs = D; float* rhs = vops->allocate(N); float* s = vops->allocate(N); float* GH_usum = NULL; float* z[num_funs ?:1]; float* u[num_funs ?:1]; float* r[num_funs ?:1]; for (unsigned int j = 0; j < num_funs; j++) { z[j] = vops->allocate(z_dims[j]); u[j] = vops->allocate(z_dims[j]); r[j] = vops->allocate(z_dims[j]); } if (!plan->fast) GH_usum = vops->allocate(N); float rho = plan->rho; struct admm_normaleq_data ndata = { .INTERFACE.TYPEID = &TYPEID(admm_normaleq_data), .N = N, .num_funs = num_funs, .ops = plan->ops, .Aop = Aop, .rho = 1., .vops = vops, .nr_invokes = 0, }; struct iter_op_p_s xupdate = plan->xupdate; struct cg_xupdate_s cg_xupdate_data = { .INTERFACE.TYPEID = &TYPEID(cg_xupdate_s), .N = N, .vops = vops, .maxitercg = plan->maxitercg, .cg_eps = plan->cg_eps, .ndata = &ndata, .monitor = monitor, }; if (NULL == xupdate.fun) xupdate = (struct iter_op_p_s){ cg_xupdate, CAST_UP(&cg_xupdate_data) }; // hogwild int hw_K = 1; int hw_k = 0; const float* biases[num_funs ?:1]; for (unsigned int j = 0; j < num_funs; j++) biases[j] = (NULL != plan->biases) ? plan->biases[j] : NULL; // compute norm of biases -- for eps_primal double n3 = 0.; if (!plan->fast) { for (unsigned int j = 0; j < num_funs; j++) if (biases[j] != NULL) n3 += pow(vops->norm(z_dims[j], biases[j]), 2.); } if (plan->do_warmstart) { for (unsigned int j = 0; j < num_funs; j++) { // initialize for j'th function update float* Gjx_plus_uj = vops->allocate(z_dims[j]); iter_op_call(plan->ops[j].forward, Gjx_plus_uj, x); // Gj(x) if (NULL != biases[j]) vops->sub(z_dims[j], Gjx_plus_uj, Gjx_plus_uj, biases[j]); if (0. == rho) vops->copy(z_dims[j], z[j], Gjx_plus_uj); else iter_op_p_call(plan->prox_ops[j], 1. / rho, z[j], Gjx_plus_uj); vops->sub(z_dims[j], u[j], Gjx_plus_uj, z[j]); vops->del(Gjx_plus_uj); } } else { for (unsigned int j = 0; j < num_funs; j++) { vops->clear(z_dims[j], z[j]); vops->clear(z_dims[j], u[j]); } } for (unsigned int i = 0; i < plan->maxiter; i++) { iter_monitor(monitor, vops, x); // update x vops->clear(N, rhs); for (unsigned int j = 0; j < num_funs; j++) { vops->sub(z_dims[j], r[j], z[j], u[j]); if (NULL != biases[j]) vops->add(z_dims[j], r[j], r[j], biases[j]); iter_op_call(plan->ops[j].adjoint, s, r[j]); vops->add(N, rhs, rhs, s); } if (NULL != Aop.fun) { vops->xpay(N, rho, rhs, x_adj); } ndata.rho = rho; iter_op_p_call(xupdate, rho, x, rhs); ndata.nr_invokes++; double n1 = 0.; if (!plan->fast) { vops->clear(N, GH_usum); vops->clear(N, s); for (unsigned int j = 0; j < num_funs; j++) vops->clear(z_dims[j], r[j]); } // z_j prox for (unsigned int j = 0; j < num_funs; j++) { // initialize for j'th function update float* Gjx_plus_uj = vops->allocate(z_dims[j]); float* zj_old = vops->allocate(z_dims[j]); iter_op_call(plan->ops[j].forward, Gjx_plus_uj, x); // Gj(x) // over-relaxation: Gjx_hat = alpha * Gj(x) + (1 - alpha) * (zj_old + bj) if (!plan->fast) { vops->copy(z_dims[j], zj_old, z[j]); vops->copy(z_dims[j], r[j], Gjx_plus_uj); // rj = Gj(x) n1 += pow(vops->norm(z_dims[j], r[j]), 2.); vops->smul(z_dims[j], plan->alpha, Gjx_plus_uj, Gjx_plus_uj); vops->axpy(z_dims[j], Gjx_plus_uj, (1. - plan->alpha), z[j]); if (NULL != biases[j]) vops->axpy(z_dims[j], Gjx_plus_uj, (1. - plan->alpha), biases[j]); } vops->add(z_dims[j], Gjx_plus_uj, Gjx_plus_uj, u[j]); // Gj(x) + uj if (NULL != biases[j]) vops->sub(z_dims[j], Gjx_plus_uj, Gjx_plus_uj, biases[j]); // Gj(x) - bj + uj if (0. == rho) vops->copy(z_dims[j], z[j], Gjx_plus_uj); else iter_op_p_call(plan->prox_ops[j], 1. / rho, z[j], Gjx_plus_uj); vops->sub(z_dims[j], u[j], Gjx_plus_uj, z[j]); vops->del(Gjx_plus_uj); if (!plan->fast) { // rj = rj - zj - bj = Gj(x) - zj - bj vops->sub(z_dims[j], r[j], r[j], z[j]); if (NULL != biases[j]) vops->sub(z_dims[j], r[j], r[j], biases[j]); // add next term to s: s = s + Gj^H (zj - zj_old) vops->sub(z_dims[j], zj_old, z[j], zj_old); iter_op_call(plan->ops[j].adjoint, rhs, zj_old); vops->add(N, s, s, rhs); // GH_usum += G_j^H uj (for updating eps_dual) iter_op_call(plan->ops[j].adjoint, rhs, u[j]); vops->add(N, GH_usum, GH_usum, rhs); } vops->del(zj_old); } float s_norm = 0.; float r_norm = 0.; if (plan->dynamic_rho || !plan->fast) { s_norm = rho * vops->norm(N, s); r_norm = 0.; for (unsigned int j = 0; j < num_funs; j++) r_norm += pow(vops->norm(z_dims[j], r[j]), 2.); r_norm = sqrt(r_norm); } if (!plan->fast) { double n2 = 0.; for (unsigned int j = 0; j < num_funs; j++) n2 += pow(vops->norm(z_dims[j], z[j]), 2.); double n = MAX(MAX(n1, n2), n3); long M = sum_long_array(num_funs, z_dims); float eps_pri = plan->ABSTOL * sqrt(M) + plan->RELTOL * sqrt(n); float eps_dual = plan->ABSTOL * sqrt(N) + plan->RELTOL * rho * vops->norm(N, GH_usum); struct admm_history_s history; history.s_norm = s_norm; history.r_norm = r_norm; history.eps_pri = eps_pri; history.eps_dual = eps_dual; history.rho = rho; history.numiter = i; history.nr_invokes = ndata.nr_invokes; iter_history(monitor, CAST_UP(&history)); if (0 == i) debug_printf(DP_DEBUG2, "%3s\t%3s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "iter", "cgiter", "rho", "r norm", "eps pri", "s norm", "eps dual", "obj", "relMSE"); debug_printf(DP_DEBUG2, "%3d\t%3d\t%10.4f\t%10.4f\t%10.4f\t%10.4f\t%10.4f\t%10.4f\t%10.4f\n", history.numiter, history.nr_invokes, history.rho, history.r_norm, history.eps_pri, history.s_norm, history.eps_dual, (NULL == monitor) ? -1. : monitor->obj, (NULL == monitor) ? -1. : monitor->err); if ( (ndata.nr_invokes > plan->maxiter) || ( (r_norm < eps_pri) && (s_norm < eps_dual))) break; } else { debug_printf(DP_DEBUG3, "### ITER: %d (%d)\n", i, ndata.nr_invokes); if (ndata.nr_invokes > plan->maxiter) break; } float sc = 1.; assert(!(plan->dynamic_rho && plan->hogwild)); if (plan->dynamic_rho) { if (r_norm > plan->mu * s_norm) sc = plan->tau; else if (s_norm > plan->mu * r_norm) sc = 1. / plan->tau; } if (plan->hogwild) { hw_k++; if (hw_k == hw_K) { hw_k = 0; hw_K *= 2; sc = 2.; } } if (1. != sc) { rho = rho * sc; for (unsigned int j = 0; j < num_funs; j++) vops->smul(z_dims[j], 1. / sc, u[j], u[j]); } } // cleanup vops->del(rhs); vops->del(s); if (!plan->fast) vops->del(GH_usum); } bart-0.4.02/src/iter/admm.h000066400000000000000000000056001320577655200153630ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __ADMM_H #define __ADMM_H #include "misc/cppwrap.h" #include "misc/types.h" #include "iter/monitor.h" #include "iter/italgos.h" struct vec_iter_s; struct admm_op { struct iter_op_s forward; struct iter_op_s adjoint; struct iter_op_s normal; }; /** * Parameters for ADMM version 1 * * @param maxiter maximum number of iterations (gradient evaluations) before terminating * @param maxitercg maximum number of conjugate gradient iterations for updating x * @param num_funs number of convex functions in objective, excluding data consistency * * @param do_warmstart do not zero out primal and dual variables before starting * @param dynamic_rho update rho according to mu/tau rule * @param hogwild_rho update rho according to Hogwild rule (increase exponentially) * * @param ABSTOL used for early stopping condition * @param RELTOL used for early stopping condition * * @param rho -- augmented lagrangian penalty parameter * @param alpha -- over-relaxation parameter between (0, 2) * * @param tau -- multiply/divide rho by tau if residuals are more than mu times apart * @param mu -- multiply/divide rho by tau if residuals are more than mu times apart * * @param funs array of prox functions (size is num_funs) * * @param ops array of operators, G_i (size is num_funs) * @param prox_ops array of proximal functions (size is num_funs) * @param biases array of biases/offsets (size is num_funs) * * @param image_truth truth image for computing relMSE */ struct admm_plan_s { unsigned int maxiter; unsigned int maxitercg; float cg_eps; _Bool do_warmstart; _Bool dynamic_rho; _Bool hogwild; _Bool fast; double ABSTOL; double RELTOL; float rho; float alpha; float tau; float mu; unsigned int num_funs; struct iter_op_p_s* prox_ops; struct admm_op* ops; const float* const* biases; struct iter_op_p_s xupdate; }; /** * Store ADMM history * * @param numiter actual number of iterations run * @param r_norm (array) primal residual norm at each iteration * @param s_norm (array) dual residual norm at each iteration * @param eps_pri (array) primal epsilon at each iteration * @parram eps_dual (array) dual epsilon at each iteration */ struct admm_history_s { INTERFACE(iter_history_t); unsigned int numiter; unsigned int nr_invokes; double r_norm; double s_norm; double eps_pri; double eps_dual; float rho; }; extern DEF_TYPEID(admm_history_s); extern void admm(const struct admm_plan_s* plan, unsigned int D, const long z_dims[__VLA(D)], long N, float* x, const float* x_adj, const struct vec_iter_s* vops, struct iter_op_s Aop, struct iter_monitor_s* monitor); #include "misc/cppwrap.h" #endif // __ADMM_H bart-0.4.02/src/iter/italgos.c000066400000000000000000000356261320577655200161150ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2013-2014 Frank Ong * 2013-2014,2017 Jon Tamir * * * * Landweber L. An iteration formula for Fredholm integral equations of the * first kind. Amer. J. Math. 1951; 73, 615-624. * * Nesterov Y. A method of solving a convex programming problem with * convergence rate O (1/k2). Soviet Mathematics Doklady 1983; 27(2):372-376 * * Bakushinsky AB. Iterative methods for nonlinear operator equations without * regularity. New approach. In Dokl. Russian Acad. Sci 1993; 330:282-284. * * Daubechies I, Defrise M, De Mol C. An iterative thresholding algorithm for * linear inverse problems with a sparsity constraint. * Comm Pure Appl Math 2004; 57:1413-1457. * * Beck A, Teboulle M. A fast iterative shrinkage-thresholding algorithm for * linear inverse problems. SIAM Journal on Imaging Sciences 2.1 2009; 183-202. * * Chambolle A, Pock, T. A First-Order Primal-Dual Algorithm for Convex Problems * with Applications to Imaging. J. Math. Imaging Vis. 2011; 40, 120-145. * */ #include #include #include "misc/misc.h" #include "misc/debug.h" #include "iter/vec.h" #include "iter/monitor.h" #include "italgos.h" extern inline void iter_op_call(struct iter_op_s op, float* dst, const float* src); extern inline void iter_op_p_call(struct iter_op_p_s op, float rho, float* dst, const float* src); /** * ravine step * (Nesterov 1983) */ static void ravine(const struct vec_iter_s* vops, long N, float* ftp, float* xa, float* xb) { float ft = *ftp; float tfo = ft; ft = (1.f + sqrtf(1.f + 4.f * ft * ft)) / 2.f; *ftp = ft; vops->swap(N, xa, xb); vops->axpy(N, xa, (1.f - tfo) / ft - 1.f, xa); vops->axpy(N, xa, (tfo - 1.f) / ft + 1.f, xb); } void landweber_sym(unsigned int maxiter, float epsilon, float alpha, long N, const struct vec_iter_s* vops, struct iter_op_s op, float* x, const float* b, struct iter_monitor_s* monitor) { float* r = vops->allocate(N); double rsnot = vops->norm(N, b); for (unsigned int i = 0; i < maxiter; i++) { iter_monitor(monitor, vops, x); iter_op_call(op, r, x); // r = A x vops->xpay(N, -1., r, b); // r = b - r = b - A x double rsnew = vops->norm(N, r); debug_printf(DP_DEBUG3, "#%d: %f\n", i, rsnew / rsnot); if (rsnew < epsilon) break; vops->axpy(N, x, alpha, r); } vops->del(r); } /** * Store information about iterative algorithm. * Used to flexibly modify behavior, e.g. continuation * * @param rsnew current residual * @param rsnot initial residual * @param iter current iteration * @param maxiter maximum iteration */ struct iter_data { double rsnew; double rsnot; unsigned int iter; const unsigned int maxiter; }; /** * Continuation for regularization. Returns fraction to scale regularization parameter * * @param itrdata state of iterative algorithm * @param delta scaling of regularization in the final iteration (1. means don't scale, 0. means scale to zero) * */ static float ist_continuation(struct iter_data* itrdata, const float delta) { /* // for now, just divide into evenly spaced bins const float num_steps = itrdata->maxiter - 1; int step = (int)(itrdata->iter * num_steps / (itrdata->maxiter - 1)); float scale = 1. - (1. - delta) * step / num_steps; return scale; */ float a = logf( delta ) / (float) itrdata->maxiter; return expf( a * itrdata->iter ); } /** * Iterative Soft Thresholding * * @param maxiter maximum number of iterations * @param epsilon stop criterion * @param tau (step size) weighting on the residual term, A^H (b - Ax) * @param lambda_start initial regularization weighting * @param lambda_end final regularization weighting (for continuation) * @param N size of input, x * @param vops vector ops definition * @param op linear operator, e.g. A * @param thresh threshold function, e.g. complex soft threshold * @param x initial estimate * @param b observations * @param monitor compute objective value, errors, etc. */ void ist(unsigned int maxiter, float epsilon, float tau, float continuation, bool hogwild, long N, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_p_s thresh, float* x, const float* b, struct iter_monitor_s* monitor) { struct iter_data itrdata = { .rsnew = 1., .rsnot = 1., .iter = 0, .maxiter = maxiter, }; float* r = vops->allocate(N); itrdata.rsnot = vops->norm(N, b); float ls_old = 1.; float lambda_scale = 1.; int hogwild_k = 0; int hogwild_K = 10; for (itrdata.iter = 0; itrdata.iter < maxiter; itrdata.iter++) { iter_monitor(monitor, vops, x); ls_old = lambda_scale; lambda_scale = ist_continuation(&itrdata, continuation); if (lambda_scale != ls_old) debug_printf(DP_DEBUG3, "##lambda_scale = %f\n", lambda_scale); iter_op_p_call(thresh, tau, x, x); iter_op_call(op, r, x); // r = A x vops->xpay(N, -1., r, b); // r = b - r = b - A x itrdata.rsnew = vops->norm(N, r); debug_printf(DP_DEBUG3, "#It %03d: %f \n", itrdata.iter, itrdata.rsnew / itrdata.rsnot); if (itrdata.rsnew < epsilon) break; vops->axpy(N, x, tau * lambda_scale, r); if (hogwild) hogwild_k++; if (hogwild_k == hogwild_K) { hogwild_K *= 2; hogwild_k = 0; tau /= 2; } } debug_printf(DP_DEBUG3, "\n"); vops->del(r); } /** * Iterative Soft Thresholding/FISTA to solve min || b - Ax ||_2 + lambda || T x ||_1 * * @param maxiter maximum number of iterations * @param epsilon stop criterion * @param tau (step size) weighting on the residual term, A^H (b - Ax) * @param lambda_start initial regularization weighting * @param lambda_end final regularization weighting (for continuation) * @param N size of input, x * @param vops vector ops definition * @param op linear operator, e.g. A * @param thresh threshold function, e.g. complex soft threshold * @param x initial estimate * @param b observations */ void fista(unsigned int maxiter, float epsilon, float tau, float continuation, bool hogwild, long N, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_p_s thresh, float* x, const float* b, struct iter_monitor_s* monitor) { struct iter_data itrdata = { .rsnew = 1., .rsnot = 1., .iter = 0, .maxiter = maxiter, }; float* r = vops->allocate(N); float* o = vops->allocate(N); float ra = 1.; vops->copy(N, o, x); itrdata.rsnot = vops->norm(N, b); float ls_old = 1.; float lambda_scale = 1.; int hogwild_k = 0; int hogwild_K = 10; for (itrdata.iter = 0; itrdata.iter < maxiter; itrdata.iter++) { iter_monitor(monitor, vops, x); ls_old = lambda_scale; lambda_scale = ist_continuation(&itrdata, continuation); if (lambda_scale != ls_old) debug_printf(DP_DEBUG3, "##lambda_scale = %f\n", lambda_scale); iter_op_p_call(thresh, lambda_scale * tau, x, x); ravine(vops, N, &ra, x, o); // FISTA iter_op_call(op, r, x); // r = A x vops->xpay(N, -1., r, b); // r = b - r = b - A x itrdata.rsnew = vops->norm(N, r); debug_printf(DP_DEBUG3, "#It %03d: %f \n", itrdata.iter, itrdata.rsnew / itrdata.rsnot); if (itrdata.rsnew < epsilon) break; vops->axpy(N, x, tau, r); if (hogwild) hogwild_k++; if (hogwild_k == hogwild_K) { hogwild_K *= 2; hogwild_k = 0; tau /= 2; } } debug_printf(DP_DEBUG3, "\n"); vops->del(o); vops->del(r); } /** * Landweber L. An iteration formula for Fredholm integral equations of the * first kind. Amer. J. Math. 1951; 73, 615-624. */ void landweber(unsigned int maxiter, float epsilon, float alpha, long N, long M, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_s adj, float* x, const float* b, struct iter_monitor_s* monitor) { float* r = vops->allocate(M); float* p = vops->allocate(N); double rsnot = vops->norm(M, b); for (unsigned int i = 0; i < maxiter; i++) { iter_monitor(monitor, vops, x); iter_op_call(op, r, x); // r = A x vops->xpay(M, -1., r, b); // r = b - r = b - A x double rsnew = vops->norm(M, r); debug_printf(DP_DEBUG3, "#%d: %f\n", i, rsnew / rsnot); if (rsnew < epsilon) break; iter_op_call(adj, p, r); vops->axpy(N, x, alpha, p); } vops->del(r); vops->del(p); } /** * Conjugate Gradient Descent to solve Ax = b for symmetric A * * @param maxiter maximum number of iterations * @param regularization parameter * @param epsilon stop criterion * @param N size of input, x * @param vops vector ops definition * @param linop linear operator, i.e. A * @param x initial estimate * @param b observations */ float conjgrad(unsigned int maxiter, float l2lambda, float epsilon, long N, const struct vec_iter_s* vops, struct iter_op_s linop, float* x, const float* b, struct iter_monitor_s* monitor) { float* r = vops->allocate(N); float* p = vops->allocate(N); float* Ap = vops->allocate(N); // The first calculation of the residual might not // be necessary in some cases... iter_op_call(linop, r, x); // r = A x vops->axpy(N, r, l2lambda, x); vops->xpay(N, -1., r, b); // r = b - r = b - A x vops->copy(N, p, r); // p = r float rsnot = (float)pow(vops->norm(N, r), 2.); float rsold = rsnot; float rsnew = rsnot; float eps_squared = pow(epsilon, 2.); if (0. == rsold) { debug_printf(DP_DEBUG3, "CG: early out\n"); goto cleanup; } for (unsigned int i = 0; i < maxiter; i++) { iter_monitor(monitor, vops, x); debug_printf(DP_DEBUG3, "#%d: %f\n", i, (double)sqrtf(rsnew)); iter_op_call(linop, Ap, p); // Ap = A p vops->axpy(N, Ap, l2lambda, p); float pAp = (float)vops->dot(N, p, Ap); if (0. == pAp) break; float alpha = rsold / pAp; vops->axpy(N, x, +alpha, p); vops->axpy(N, r, -alpha, Ap); rsnew = (float)pow(vops->norm(N, r), 2.); float beta = rsnew / rsold; rsold = rsnew; if (rsnew <= eps_squared) { //debug_printf(DP_DEBUG3, "%d ", i); break; } vops->xpay(N, beta, p, r); // p = beta * p + r } cleanup: vops->del(Ap); vops->del(p); vops->del(r); return sqrtf(rsnew); } /** * Iteratively Regularized Gauss-Newton Method * (Bakushinsky 1993) * * y = F(x) = F x0 + DF dx + ... * * IRGNM: DF^H ((y - F x_0) + DF (xn - x0)) = ( DF^H DF + alpha ) (dx + xn - x0) * DF^H ((y - F x_0)) - alpha (xn - x0) = ( DF^H DF + alpha) dx */ void irgnm(unsigned int iter, float alpha, float redu, long N, long M, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_s adj, struct iter_op_p_s inv, float* x, const float* xref, const float* y) { float* r = vops->allocate(M); float* p = vops->allocate(N); float* h = vops->allocate(N); for (unsigned int i = 0; i < iter; i++) { // printf("#--------\n"); iter_op_call(op, r, x); // r = F x vops->xpay(M, -1., r, y); // r = y - F x debug_printf(DP_DEBUG3, "Res: %f\n", vops->norm(M, r)); iter_op_call(adj, p, r); if (NULL != xref) vops->axpy(N, p, +alpha, xref); vops->axpy(N, p, -alpha, x); iter_op_p_call(inv, alpha, h, p); vops->axpy(N, x, 1., h); alpha /= redu; } vops->del(h); vops->del(p); vops->del(r); } /** * Projection onto Convex Sets * * minimize 0 subject to: x in C_1, x in C_2, ..., x in C_D, * where the C_i are convex sets */ void pocs(unsigned int maxiter, unsigned int D, struct iter_op_p_s proj_ops[static D], const struct vec_iter_s* vops, long N, float* x, struct iter_monitor_s* monitor) { UNUSED(N); UNUSED(vops); for (unsigned int i = 0; i < maxiter; i++) { debug_printf(DP_DEBUG3, "#Iter %d\n", i); iter_monitor(monitor, vops, x); for (unsigned int j = 0; j < D; j++) iter_op_p_call(proj_ops[j], 1., x, x); // use temporary memory here? } } /** * Power iteration */ double power(unsigned int maxiter, long N, const struct vec_iter_s* vops, struct iter_op_s op, float* u) { double s = vops->norm(N, u); vops->smul(N, 1. / s, u, u); for (unsigned int i = 0; i < maxiter; i++) { iter_op_call(op, u, u); // r = A x s = vops->norm(N, u); vops->smul(N, 1. / s, u, u); } return s; } /** * Chambolle Pock First Order Primal Dual algorithm. Solves min_x F(Ax) + G(x) * * @param maxiter maximum number of iterations * @param epsilon stop criterion * @param tau primal step size * @param sigma dual step size * @param decay decay rate * @param theta convex combination rate * @param N size of input, x * @param M size of transformed input, Ax * @param vops vector ops definition * @param op_forw forward operator, A * @param op_adj adjoint operator, AH * @param prox1 proximal function of F, e.g. prox_l2ball * @param prox2 proximal function of G, e.g. prox_wavelet_thresh * @param x initial estimate * @param monitor callback function */ void chambolle_pock(unsigned int maxiter, float epsilon, float tau, float sigma, float theta, float decay, long N, long M, const struct vec_iter_s* vops, struct iter_op_s op_forw, struct iter_op_s op_adj, struct iter_op_p_s prox1, struct iter_op_p_s prox2, float* x, struct iter_monitor_s* monitor) { float* x_avg = vops->allocate(N); float* x_old = vops->allocate(N); float* x_new = vops->allocate(N); float* u_old = vops->allocate(M); float* u = vops->allocate(M); float* u_new = vops->allocate(M); vops->copy(N, x_old, x); vops->copy(N, x_new, x); vops->copy(N, x_avg, x); vops->clear(M, u); vops->clear(M, u_new); vops->clear(M, u_old); for (unsigned int i = 0; i < maxiter; i++) { float lambda = (float)pow(decay, i); /* update u * u0 = u * p = u + sigma * A(x) * u = p - sigma * prox1(p / sigma, 1 / sigma) * u = lambda * u + (1 - lambda) * u0 */ iter_op_call(op_forw, u_old, x_avg); vops->axpy(M, u_old, 1. / sigma, u); // (u + sigma * A(x)) / sigma iter_op_p_call(prox1, 1. / sigma, u_new, u_old); vops->axpbz(M, u_new, -1. * sigma, u_new, sigma, u_old); vops->copy(M, u_old, u); vops->axpbz(M, u, lambda, u_new, 1. - lambda, u_old); /* update x * x0 = x * q = x0 - tau * AH(u) * x = prox2(q, tau) * x = lambda * x + (1 - lambda * x0) */ vops->copy(N, x_old, x); iter_op_call(op_adj, x_new, u); vops->axpy(N, x, -1. * tau, x_new); iter_op_p_call(prox2, tau, x_new, x); vops->axpbz(N, x, lambda, x_new, 1. - lambda, x_old); /* update x_avg * a_avg = x + theta * (x - x0) */ vops->axpbz(N, x_avg, 1 + theta, x, -1. * theta, x_old); // residual vops->sub(N, x_old, x, x_old); vops->sub(M, u_old, u, u_old); float res1 = vops->norm(N, x_old) / sigma; float res2 = vops->norm(M, u_old) / tau; iter_monitor(monitor, vops, x); debug_printf(DP_DEBUG3, "#It %03d: %f %f \n", i, res1, res2); if (epsilon > (res1 + res2)) break; #if 0 // buggy if (res1 < 100 * res2) { sigma /= 2; tau *= 2; } else if (res2 > 100 * res1) { sigma *= 2; tau /= 2; } #endif } debug_printf(DP_DEBUG3, "\n"); vops->del(x_avg); vops->del(x_old); vops->del(x_new); vops->del(u_old); vops->del(u); vops->del(u_new); } bart-0.4.02/src/iter/italgos.h000066400000000000000000000060321320577655200161070ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __ITALGOS_H #define __ITALGOS_H #include "misc/cppwrap.h" #ifndef NUM_INTERNAL // #warning "Use of private interfaces" #endif #include "misc/types.h" struct vec_iter_s; #ifndef ITER_OP_DATA_S #define ITER_OP_DATA_S typedef struct iter_op_data_s { TYPEID* TYPEID; } iter_op_data; #endif typedef void (*iter_op_fun_t)(iter_op_data* data, float* dst, const float* src); typedef void (*iter_op_p_fun_t)(iter_op_data* data, float rho, float* dst, const float* src); struct iter_op_s { iter_op_fun_t fun; iter_op_data* data; }; struct iter_op_p_s { iter_op_p_fun_t fun; iter_op_data* data; }; inline void iter_op_call(struct iter_op_s op, float* dst, const float* src) { op.fun(op.data, dst, src); } inline void iter_op_p_call(struct iter_op_p_s op, float rho, float* dst, const float* src) { op.fun(op.data, rho, dst, src); } struct iter_monitor_s; float conjgrad(unsigned int maxiter, float l2lambda, float epsilon, long N, const struct vec_iter_s* vops, struct iter_op_s linop, float* x, const float* b, struct iter_monitor_s* monitor); void landweber(unsigned int maxiter, float epsilon, float alpha, long N, long M, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_s adj, float* x, const float* b, struct iter_monitor_s* monitor); void landweber_sym(unsigned int maxiter, float epsilon, float alpha, long N, const struct vec_iter_s* vops, struct iter_op_s op, float* x, const float* b, struct iter_monitor_s* monitor); void ist(unsigned int maxiter, float epsilon, float tau, float continuation, _Bool hogwild, long N, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_p_s thresh, float* x, const float* b, struct iter_monitor_s* monitor); void fista(unsigned int maxiter, float epsilon, float tau, float continuation, _Bool hogwild, long N, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_p_s thresh, float* x, const float* b, struct iter_monitor_s* monitor); void irgnm(unsigned int iter, float alpha, float redu, long N, long M, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_s adj, struct iter_op_p_s inv, float* x, const float* x0, const float* y); void pocs(unsigned int maxiter, unsigned int D, struct iter_op_p_s proj_ops[__VLA(D)], const struct vec_iter_s* vops, long N, float* x, struct iter_monitor_s* monitor); double power(unsigned int maxiter, long N, const struct vec_iter_s* vops, struct iter_op_s op, float* u); void chambolle_pock(unsigned int maxiter, float epsilon, float tau, float sigma, float theta, float decay, long N, long M, const struct vec_iter_s* vops, struct iter_op_s op_forw, struct iter_op_s op_adj, struct iter_op_p_s thresh1, struct iter_op_p_s thresh2, float* x, struct iter_monitor_s* monitor); #include "misc/cppwrap.h" #endif // __ITALGOS_H bart-0.4.02/src/iter/iter.c000066400000000000000000000124311320577655200154030ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2014 Martin Uecker * 2014, 2017 Jon Tamir * 2014 Frank Ong * 2017 Sofia Dimoudi */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "linops/linop.h" #include "linops/someops.h" #include "iter/italgos.h" #include "iter/prox.h" #include "iter/admm.h" #include "iter/iter2.h" #include "iter/vec.h" #include "misc/debug.h" #include "misc/misc.h" #include "iter.h" DEF_TYPEID(iter_conjgrad_conf); DEF_TYPEID(iter_landweber_conf); DEF_TYPEID(iter_ist_conf); DEF_TYPEID(iter_fista_conf); DEF_TYPEID(iter_chambolle_pock_conf); DEF_TYPEID(iter_pocs_conf); DEF_TYPEID(iter_admm_conf); DEF_TYPEID(iter_niht_conf); DEF_TYPEID(iter_call_s); const struct iter_conjgrad_conf iter_conjgrad_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_conjgrad_conf), .maxiter = 50, .l2lambda = 0., .tol = 0., }; const struct iter_landweber_conf iter_landweber_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_landweber_conf), .maxiter = 50, .step = 0.95, .tol = 0., }; const struct iter_ist_conf iter_ist_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_ist_conf), .maxiter = 50, .step = 0.95, .continuation = 1., .hogwild = false, .tol = 0., }; const struct iter_fista_conf iter_fista_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_fista_conf), .maxiter = 50, .step = 0.95, .continuation = 1., .hogwild = false, .tol = 0., }; const struct iter_admm_conf iter_admm_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_admm_conf), .maxiter = 50, .maxitercg = 10, .cg_eps = 1.E-3, .do_warmstart = false, .dynamic_rho = false, .hogwild = false, .fast = false, .ABSTOL = 1.E-4, .RELTOL = 1.E-3, .rho = 0.5, .alpha = 1.6, .tau = 2., .mu = 3, }; const struct iter_pocs_conf iter_pocs_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_pocs_conf), .maxiter = 50, }; const struct iter_niht_conf iter_niht_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_niht_conf), .maxiter = 50, .tol = 1e-8, .do_warmstart = false, }; const struct iter_chambolle_pock_conf iter_chambolle_pock_defaults = { .INTERFACE.TYPEID = &TYPEID(iter_chambolle_pock_conf), .maxiter = 50, .tol = 1e-8, .theta = 1., .tau = 1., .sigma = 1., .decay = 1., .fast = false, }; typedef void (*thresh_fun_t)(void* data, float lambda, float* dst, const float* src); static bool checkeps(float eps) { if (0. == eps) { debug_printf(DP_WARN, "Warning: data empty\n"); return true; } if (!isnormal(eps)) { debug_printf(DP_WARN, "Warning: data corrupted\n"); return true; } return false; } void iter_conjgrad(iter_conf* _conf, const struct operator_s* normaleq_op, const struct operator_p_s* thresh_prox, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { assert(NULL == thresh_prox); iter2_conjgrad(_conf, normaleq_op, 0, NULL, NULL, NULL, NULL, size, image, image_adj, monitor); } void iter_landweber(iter_conf* _conf, const struct operator_s* normaleq_op, const struct operator_p_s* thresh_prox, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { struct iter_landweber_conf* conf = CAST_DOWN(iter_landweber_conf, _conf); float eps = md_norm(1, MD_DIMS(size), image_adj); if (checkeps(eps)) goto cleanup; assert(NULL == thresh_prox); landweber_sym(conf->maxiter, 1.E-3 * eps, conf->step, size, select_vecops(image_adj), OPERATOR2ITOP(normaleq_op), image, image_adj, monitor); cleanup: ; } void iter_ist(iter_conf* _conf, const struct operator_s* normaleq_op, const struct operator_p_s* thresh_prox, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { iter2_ist(_conf, normaleq_op, 1, &thresh_prox, NULL, NULL, NULL, size, image, image_adj, monitor); } void iter_fista(iter_conf* _conf, const struct operator_s* normaleq_op, const struct operator_p_s* thresh_prox, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { iter2_fista(_conf, normaleq_op, 1, &thresh_prox, NULL, NULL, NULL, size, image, image_adj, monitor); } void iter_admm(iter_conf* _conf, const struct operator_s* normaleq_op, const struct operator_p_s* thresh_prox, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { const struct linop_s* eye[1] = { linop_identity_create(1, MD_DIMS(size / 2)) }; // using complex float identity operator... divide size by 2 iter2_admm(_conf, normaleq_op, 1, &thresh_prox, eye, NULL, NULL, size, image, image_adj, monitor); linop_free(eye[0]); } void iter_call_iter2(iter_conf* _conf, const struct operator_s* normaleq_op, const struct operator_p_s* thresh_prox, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { struct iter2_call_s* it = CAST_DOWN(iter2_call_s, _conf); it->fun(it->_conf, normaleq_op, (NULL == thresh_prox) ? 1 : 0, &thresh_prox, NULL, NULL, NULL, size, image, image_adj, monitor); } bart-0.4.02/src/iter/iter.h000066400000000000000000000056201320577655200154120ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __ITER_H #define __ITER_H struct operator_s; struct operator_p_s; #include "misc/types.h" #ifndef ITER_CONF_S #define ITER_CONF_S typedef struct iter_conf_s { TYPEID* TYPEID; } iter_conf; #endif struct iter_monitor_s; typedef void italgo_fun_f(iter_conf* conf, const struct operator_s* normaleq_op, const struct operator_p_s* thresh_prox, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor); typedef italgo_fun_f* italgo_fun_t; struct iter_conjgrad_conf { INTERFACE(iter_conf); unsigned int maxiter; float l2lambda; float tol; }; extern DEF_TYPEID(iter_conjgrad_conf); struct iter_landweber_conf { INTERFACE(iter_conf); unsigned int maxiter; float step; float tol; }; extern DEF_TYPEID(iter_landweber_conf); struct iter_ist_conf { INTERFACE(iter_conf); unsigned int maxiter; float step; float continuation; _Bool hogwild; float tol; }; extern DEF_TYPEID(iter_ist_conf); struct iter_fista_conf { INTERFACE(iter_conf); unsigned int maxiter; float step; float continuation; _Bool hogwild; float tol; }; extern DEF_TYPEID(iter_fista_conf); struct iter_chambolle_pock_conf { INTERFACE(iter_conf); unsigned int maxiter; float tau; float sigma; float theta; float decay; float tol; _Bool fast; }; extern DEF_TYPEID(iter_chambolle_pock_conf); struct iter_admm_conf { INTERFACE(iter_conf); unsigned int maxiter; unsigned int maxitercg; float rho; _Bool do_warmstart; _Bool dynamic_rho; _Bool hogwild; double ABSTOL; double RELTOL; float alpha; float tau; float mu; float cg_eps; _Bool fast; }; extern DEF_TYPEID(iter_admm_conf); struct iter_pocs_conf { INTERFACE(iter_conf); unsigned int maxiter; }; extern DEF_TYPEID(iter_pocs_conf); struct iter_niht_conf { INTERFACE(iter_conf); unsigned int maxiter; float tol; _Bool do_warmstart; }; extern DEF_TYPEID(iter_niht_conf); extern const struct iter_conjgrad_conf iter_conjgrad_defaults; extern const struct iter_landweber_conf iter_landweber_defaults; extern const struct iter_ist_conf iter_ist_defaults; extern const struct iter_fista_conf iter_fista_defaults; extern const struct iter_admm_conf iter_admm_defaults; extern const struct iter_pocs_conf iter_pocs_defaults; extern const struct iter_niht_conf iter_niht_defaults; extern const struct iter_chambolle_pock_conf iter_chambolle_pock_defaults; italgo_fun_f iter_conjgrad; italgo_fun_f iter_landweber; italgo_fun_f iter_ist; italgo_fun_f iter_fista; italgo_fun_f iter_admm; // use with iter2_call_s from iter2.h as _conf italgo_fun_f iter_call_iter2; struct iter_call_s { INTERFACE(iter_conf); italgo_fun_t fun; iter_conf* _conf; }; extern DEF_TYPEID(iter_call_s); #endif bart-0.4.02/src/iter/iter2.c000066400000000000000000000230311320577655200154630ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2014, 2017 Jon Tamir * 2017 Sofia Dimoudi */ #include #include #include #include #include #include "misc/misc.h" #include "misc/types.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/iovec.h" #include "num/ops.h" #include "linops/linop.h" #include "iter/italgos.h" #include "iter/iter.h" #include "iter/prox.h" #include "iter/admm.h" #include "iter/vec.h" #include "iter/niht.h" #include "iter2.h" void operator_iter(iter_op_data* _data, float* dst, const float* src) { struct iter_op_op* data = CAST_DOWN(iter_op_op, _data); operator_apply_unchecked(data->op, (complex float*)dst, (const complex float*)src); } void operator_p_iter(iter_op_data* _data, float rho, float* dst, const float* src) { struct iter_op_p_op* data = CAST_DOWN(iter_op_p_op, _data); operator_p_apply_unchecked(data->op, rho, (complex float*)dst, (const complex float*)src); } DEF_TYPEID(iter_op_op); DEF_TYPEID(iter_op_p_op); DEF_TYPEID(iter2_call_s); static bool checkeps(float eps) { if (0. == eps) { debug_printf(DP_WARN, "Warning: data empty\n"); return true; } if (!isnormal(eps)) { debug_printf(DP_WARN, "Warning: data corrupted\n"); return true; } return false; } void iter2_conjgrad(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { assert(0 == D); assert(NULL == prox_ops); assert(NULL == ops); assert(NULL == biases); UNUSED(xupdate_op); struct iter_conjgrad_conf* conf = CAST_DOWN(iter_conjgrad_conf, _conf); float eps = md_norm(1, MD_DIMS(size), image_adj); if (checkeps(eps)) goto cleanup; conjgrad(conf->maxiter, conf->l2lambda, eps * conf->tol, size, select_vecops(image_adj), OPERATOR2ITOP(normaleq_op), image, image_adj, monitor); cleanup: ; } void iter2_ist(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { assert(D == 1); assert(NULL != prox_ops[0]); assert(NULL == biases); #if 0 assert(NULL == ops); #else UNUSED(ops); #endif UNUSED(xupdate_op); struct iter_ist_conf* conf = CAST_DOWN(iter_ist_conf, _conf); float eps = md_norm(1, MD_DIMS(size), image_adj); if (checkeps(eps)) goto cleanup; assert((conf->continuation >= 0.) && (conf->continuation <= 1.)); ist(conf->maxiter, eps * conf->tol, conf->step, conf->continuation, conf->hogwild, size, select_vecops(image_adj), OPERATOR2ITOP(normaleq_op), OPERATOR_P2ITOP(prox_ops[0]), image, image_adj, monitor); cleanup: ; } void iter2_fista(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { assert(D == 1); assert(NULL == biases); #if 0 assert(NULL == ops); #else UNUSED(ops); #endif UNUSED(xupdate_op); struct iter_fista_conf* conf = CAST_DOWN(iter_fista_conf, _conf); float eps = md_norm(1, MD_DIMS(size), image_adj); if (checkeps(eps)) goto cleanup; assert((conf->continuation >= 0.) && (conf->continuation <= 1.)); fista(conf->maxiter, eps * conf->tol, conf->step, conf->continuation, conf->hogwild, size, select_vecops(image_adj), OPERATOR2ITOP(normaleq_op), OPERATOR_P2ITOP(prox_ops[0]), image, image_adj, monitor); cleanup: ; } /* Chambolle Pock Primal Dual algorithm. Solves G(x) + F(Ax) * Assumes that G is in prox_ops[0], F is in prox_ops[1], A is in ops[1] */ void iter2_chambolle_pock(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { assert(D == 2); assert(NULL == biases); assert(NULL == normaleq_op); UNUSED(xupdate_op); UNUSED(image_adj); struct iter_chambolle_pock_conf* conf = CAST_DOWN(iter_chambolle_pock_conf, _conf); const struct iovec_s* iv = linop_domain(ops[1]); const struct iovec_s* ov = linop_codomain(ops[1]); assert((long)md_calc_size(iv->N, iv->dims) * 2 == size); // FIXME: sensible way to check for corrupt data? #if 0 float eps = md_norm(1, MD_DIMS(size), image_adj); if (checkeps(eps)) goto cleanup; #else float eps = 1.; #endif chambolle_pock(conf->maxiter, eps * conf->tol, conf->tau, conf->sigma, conf->theta, conf->decay, 2 * md_calc_size(iv->N, iv->dims), 2 * md_calc_size(ov->N, ov->dims), select_vecops(image), OPERATOR2ITOP(ops[1]->forward), OPERATOR2ITOP(ops[1]->adjoint), OPERATOR_P2ITOP(prox_ops[1]), OPERATOR_P2ITOP(prox_ops[0]), image, monitor); //cleanup: //; } void iter2_admm(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { struct iter_admm_conf* conf = CAST_DOWN(iter_admm_conf, _conf); struct admm_plan_s admm_plan = { .maxiter = conf->maxiter, .maxitercg = conf->maxitercg, .cg_eps = conf->cg_eps, .rho = conf->rho, .num_funs = D, .do_warmstart = conf->do_warmstart, .dynamic_rho = conf->dynamic_rho, .hogwild = conf->hogwild, .ABSTOL = conf->ABSTOL, .RELTOL = conf->RELTOL, .alpha = conf->alpha, .tau = conf->tau, .mu = conf->mu, .fast = conf->fast, .biases = biases, }; struct admm_op a_ops[D ?:1]; struct iter_op_p_s a_prox_ops[D ?:1]; for (unsigned int i = 0; i < D; i++) { a_ops[i].forward = OPERATOR2ITOP(ops[i]->forward), a_ops[i].normal = OPERATOR2ITOP(ops[i]->normal); a_ops[i].adjoint = OPERATOR2ITOP(ops[i]->adjoint); a_prox_ops[i] = OPERATOR_P2ITOP(prox_ops[i]); } admm_plan.ops = a_ops; admm_plan.prox_ops = a_prox_ops; admm_plan.xupdate = OPERATOR_P2ITOP(xupdate_op); long z_dims[D ?: 1]; for (unsigned int i = 0; i < D; i++) z_dims[i] = 2 * md_calc_size(linop_codomain(ops[i])->N, linop_codomain(ops[i])->dims); if (NULL != image_adj) { float eps = md_norm(1, MD_DIMS(size), image_adj); if (checkeps(eps)) goto cleanup; } admm(&admm_plan, admm_plan.num_funs, z_dims, size, (float*)image, image_adj, select_vecops(image), OPERATOR2ITOP(normaleq_op), monitor); cleanup: ; } void iter2_pocs(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { struct iter_pocs_conf* conf = CAST_DOWN(iter_pocs_conf, _conf); assert(NULL == normaleq_op); assert(NULL == ops); assert(NULL == biases); assert(NULL == image_adj); UNUSED(xupdate_op); UNUSED(image_adj); struct iter_op_p_s proj_ops[D]; for (unsigned int i = 0; i < D; i++) proj_ops[i] = OPERATOR_P2ITOP(prox_ops[i]); pocs(conf->maxiter, D, proj_ops, select_vecops(image), size, image, monitor); } void iter2_niht(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { UNUSED(xupdate_op); UNUSED(biases); assert(D == 1); struct iter_niht_conf* conf = CAST_DOWN(iter_niht_conf, _conf); struct niht_conf_s niht_conf = { .maxiter = conf->maxiter, .N = size, .trans = 0, .do_warmstart = conf->do_warmstart, }; struct niht_transop trans; if (NULL != ops){ trans.forward = OPERATOR2ITOP(ops[0]->forward); trans.adjoint = OPERATOR2ITOP(ops[0]->adjoint); trans.N = 2 * md_calc_size(linop_codomain(ops[0])->N, linop_codomain(ops[0])->dims); niht_conf.trans = 1; } float eps = md_norm(1, MD_DIMS(size), image_adj); if (checkeps(eps)) goto cleanup; niht_conf.epsilon = eps * conf->tol; niht(&niht_conf, &trans, select_vecops(image_adj), OPERATOR2ITOP(normaleq_op), OPERATOR_P2ITOP(prox_ops[0]), image, image_adj, monitor); cleanup: ; } void iter2_call_iter(iter_conf* _conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[D], const struct linop_s* ops[D], const float* biases[D], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor) { assert(D <= 1); assert(NULL == ops); assert(NULL == biases); UNUSED(xupdate_op); struct iter_call_s* it = CAST_DOWN(iter_call_s, _conf); it->fun(it->_conf, normaleq_op, (1 == D) ? prox_ops[0] : NULL, size, image, image_adj, monitor); } bart-0.4.02/src/iter/iter2.h000066400000000000000000000045211320577655200154730ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __ITER2_H #define __ITER2_H #include "misc/cppwrap.h" #include "misc/types.h" struct linop_s; struct operator_s; struct operator_p_s; #ifndef ITER_OP_DATA_S #define ITER_OP_DATA_S typedef struct iter_op_data_s { TYPEID* TYPEID; } iter_op_data; #endif struct iter_op_op { INTERFACE(iter_op_data); const struct operator_s* op; }; extern DEF_TYPEID(iter_op_op); struct iter_op_p_op { INTERFACE(iter_op_data); const struct operator_p_s* op; }; extern DEF_TYPEID(iter_op_p_op); extern void operator_iter(iter_op_data* data, float* dst, const float* src); extern void operator_p_iter(iter_op_data* data, float rho, float* dst, const float* src); // the temporay copy is needed if used in loops #define STRUCT_TMP_COPY(x) ({ __typeof(x) __foo = (x); __typeof(__foo)* __foo2 = alloca(sizeof(__foo)); *__foo2 = __foo; __foo2; }) #define OPERATOR2ITOP(op) (struct iter_op_s){ (NULL == op) ? NULL : operator_iter, CAST_UP(STRUCT_TMP_COPY(((struct iter_op_op){ { &TYPEID(iter_op_op) }, op }))) } #define OPERATOR_P2ITOP(op) (struct iter_op_p_s){ (NULL == op) ? NULL : operator_p_iter, CAST_UP(STRUCT_TMP_COPY(((struct iter_op_p_op){ { &TYPEID(iter_op_p_op) }, op }))) } #ifndef ITER_CONF_S #define ITER_CONF_S typedef struct iter_conf_s { TYPEID* TYPEID; } iter_conf; #endif struct iter_monitor_s; typedef void (italgo_fun2_f)(iter_conf* conf, const struct operator_s* normaleq_op, unsigned int D, const struct operator_p_s* prox_ops[__VLA2(D)], const struct linop_s* ops[__VLA2(D)], const float* biases[__VLA2(D)], const struct operator_p_s* xupdate_op, long size, float* image, const float* image_adj, struct iter_monitor_s* monitor); typedef italgo_fun2_f* italgo_fun2_t; italgo_fun2_f iter2_conjgrad; italgo_fun2_f iter2_ist; italgo_fun2_f iter2_fista; italgo_fun2_f iter2_chambolle_pock; italgo_fun2_f iter2_admm; italgo_fun2_f iter2_pocs; italgo_fun2_f iter2_niht; // use with iter_call_s from iter.h as _conf italgo_fun2_f iter2_call_iter; struct iter2_call_s { INTERFACE(iter_conf); italgo_fun2_t fun; iter_conf* _conf; }; extern DEF_TYPEID(iter2_call_s); #include "misc/cppwrap.h" #endif bart-0.4.02/src/iter/iter3.c000066400000000000000000000064101320577655200154660ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "misc/types.h" #include "misc/misc.h" #include "iter/italgos.h" #include "iter/vec.h" #include "iter3.h" DEF_TYPEID(iter3_irgnm_conf); DEF_TYPEID(iter3_landweber_conf); const struct iter3_irgnm_conf iter3_irgnm_defaults = { .INTERFACE.TYPEID = &TYPEID(iter3_irgnm_conf), .iter = 8, .alpha = 1., .redu = 2., .cgiter = 100, .cgtol = 0.1, .nlinv_legacy = false, }; struct irgnm_s { INTERFACE(iter_op_data); struct iter_op_s frw; struct iter_op_s der; struct iter_op_s adj; float* tmp; long size; int cgiter; float cgtol; bool nlinv_legacy; }; DEF_TYPEID(irgnm_s); static void normal(iter_op_data* _data, float* dst, const float* src) { struct irgnm_s* data = CAST_DOWN(irgnm_s, _data); iter_op_call(data->der, data->tmp, src); iter_op_call(data->adj, dst, data->tmp); } static void inverse(iter_op_data* _data, float alpha, float* dst, const float* src) { struct irgnm_s* data = CAST_DOWN(irgnm_s, _data); md_clear(1, MD_DIMS(data->size), dst, FL_SIZE); float eps = data->cgtol * md_norm(1, MD_DIMS(data->size), src); /* The original (Matlab) nlinv implementation uses * "sqrt(rsnew) < 0.01 * rsnot" as termination condition. */ if (data->nlinv_legacy) eps = powf(eps, 2.); conjgrad(data->cgiter, alpha, eps, data->size, select_vecops(src), (struct iter_op_s){ normal, CAST_UP(data) }, dst, src, NULL); } static void forward(iter_op_data* _data, float* dst, const float* src) { struct irgnm_s* data = CAST_DOWN(irgnm_s, _data); iter_op_call(data->frw, dst, src); } static void adjoint(iter_op_data* _data, float* dst, const float* src) { struct irgnm_s* data = CAST_DOWN(irgnm_s, _data); iter_op_call(data->adj, dst, src); } void iter3_irgnm(iter3_conf* _conf, struct iter_op_s frw, struct iter_op_s der, struct iter_op_s adj, long N, float* dst, const float* ref, long M, const float* src) { struct iter3_irgnm_conf* conf = CAST_DOWN(iter3_irgnm_conf, _conf); float* tmp = md_alloc_sameplace(1, MD_DIMS(M), FL_SIZE, src); struct irgnm_s data = { { &TYPEID(irgnm_s) }, frw, der, adj, tmp, N, conf->cgiter, conf->cgtol, conf->nlinv_legacy }; irgnm(conf->iter, conf->alpha, conf->redu, N, M, select_vecops(src), (struct iter_op_s){ forward, CAST_UP(&data) }, (struct iter_op_s){ adjoint, CAST_UP(&data) }, (struct iter_op_p_s){ inverse, CAST_UP(&data) }, dst, ref, src); md_free(tmp); } void iter3_landweber(iter3_conf* _conf, struct iter_op_s frw, struct iter_op_s der, struct iter_op_s adj, long N, float* dst, const float* ref, long M, const float* src) { struct iter3_landweber_conf* conf = CAST_DOWN(iter3_landweber_conf, _conf); assert(NULL == der.fun); assert(NULL == ref); float* tmp = md_alloc_sameplace(1, MD_DIMS(N), FL_SIZE, src); landweber(conf->iter, conf->epsilon, conf->alpha, N, M, select_vecops(src), frw, adj, dst, src, NULL); md_free(tmp); } bart-0.4.02/src/iter/iter3.h000066400000000000000000000020251320577655200154710ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/types.h" typedef struct iter3_conf_s { TYPEID* TYPEID; } iter3_conf; struct iter_op_s; typedef void iter3_fun_f(iter3_conf* _conf, struct iter_op_s frw, struct iter_op_s der, struct iter_op_s adj, long N, float* dst, const float* ref, long M, const float* src); struct iter3_irgnm_conf { INTERFACE(iter3_conf); int iter; float alpha; float redu; int cgiter; float cgtol; _Bool nlinv_legacy; }; extern DEF_TYPEID(iter3_irgnm_conf); iter3_fun_f iter3_irgnm; struct iter3_landweber_conf { INTERFACE(iter3_conf); int iter; float alpha; float epsilon; }; extern DEF_TYPEID(iter3_landweber_conf); iter3_fun_f iter3_landweber; extern const struct iter3_irgnm_conf iter3_irgnm_defaults; // extern const struct iter3_landweber_conf iter3_landweber_defaults; bart-0.4.02/src/iter/iter4.c000066400000000000000000000037131320577655200154720ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "num/ops.h" #include "nlops/nlop.h" #include "misc/misc.h" #include "misc/types.h" #include "iter/italgos.h" #include "iter/iter3.h" #include "iter/iter4.h" struct iter4_nlop_s { INTERFACE(iter_op_data); struct nlop_s nlop; }; DEF_TYPEID(iter4_nlop_s); static void nlop_for_iter(iter_op_data* _o, float* _dst, const float* _src) { const struct iter4_nlop_s* nlop = CAST_DOWN(iter4_nlop_s, _o); operator_apply_unchecked(nlop->nlop.op, (complex float*)_dst, (const complex float*)_src); } static void nlop_der_iter(iter_op_data* _o, float* _dst, const float* _src) { const struct iter4_nlop_s* nlop = CAST_DOWN(iter4_nlop_s, _o); linop_forward_unchecked(nlop->nlop.derivative, (complex float*)_dst, (const complex float*)_src); } static void nlop_adj_iter(iter_op_data* _o, float* _dst, const float* _src) { const struct iter4_nlop_s* nlop = CAST_DOWN(iter4_nlop_s, _o); linop_adjoint_unchecked(nlop->nlop.derivative, (complex float*)_dst, (const complex float*)_src); } void iter4_irgnm(iter3_conf* _conf, struct nlop_s* nlop, long N, float* dst, const float* ref, long M, const float* src) { struct iter4_nlop_s data = { { &TYPEID(iter4_nlop_s) }, *nlop }; iter3_irgnm(_conf, (struct iter_op_s){ nlop_for_iter, CAST_UP(&data) }, (struct iter_op_s){ nlop_der_iter, CAST_UP(&data) }, (struct iter_op_s){ nlop_adj_iter, CAST_UP(&data) }, N, dst, ref, M, src); } void iter4_landweber(iter3_conf* _conf, struct nlop_s* nlop, long N, float* dst, const float* ref, long M, const float* src) { struct iter4_nlop_s data = { { &TYPEID(iter4_nlop_s) }, *nlop }; iter3_landweber(_conf, (struct iter_op_s){ nlop_for_iter, CAST_UP(&data) }, (struct iter_op_s){ nlop_der_iter, CAST_UP(&data) }, (struct iter_op_s){ nlop_adj_iter, CAST_UP(&data) }, N, dst, ref, M, src); } bart-0.4.02/src/iter/iter4.h000066400000000000000000000010701320577655200154710ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/types.h" struct iter3_conf_s; struct iter_op_s; struct nlop_s; typedef void iter4_fun_f(iter3_conf* _conf, struct nlop_s* nlop, long N, float* dst, const float* ref, long M, const float* src); iter4_fun_f iter4_irgnm; iter4_fun_f iter4_landweber; extern const struct iter3_irgnm_conf iter3_irgnm_defaults; // extern const struct iter3_landweber_conf iter3_landweber_defaults; bart-0.4.02/src/iter/itop.c000066400000000000000000000066761320577655200154310ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker * 2017 Jon Tamir */ #include #include "misc/misc.h" #include "misc/types.h" #include "num/multind.h" #include "num/ops.h" #include "num/iovec.h" #include "linops/linop.h" #include "iter/iter2.h" #include "itop.h" struct itop_s { INTERFACE(operator_data_t); italgo_fun2_t italgo; iter_conf* iconf; struct iter_monitor_s* monitor; const struct operator_s* op; unsigned int num_funs; long size; const struct iovec_s* iov; const float* init; const struct operator_p_s** prox_funs; const struct linop_s** prox_linops; }; static DEF_TYPEID(itop_s); static void itop_apply(const operator_data_t* _data, unsigned int N, void* args[static N]) { assert(2 == N); const struct itop_s* data = CAST_DOWN(itop_s, _data); if (NULL == data->init) { md_clear(1, MD_DIMS(data->size), args[0], sizeof(float)); } else { md_copy(data->iov->N, data->iov->dims, args[0], data->init, data->iov->size); } data->italgo(data->iconf, data->op, data->num_funs, data->prox_funs, data->prox_linops, NULL, NULL, data->size, args[0], args[1], data->monitor); } static void itop_del(const operator_data_t* _data) { const struct itop_s* data = CAST_DOWN(itop_s, _data); iovec_free(data->iov); operator_free(data->op); if (NULL != data->init) md_free(data->init); if (NULL != data->prox_funs) { for (unsigned int i = 0; i < data->num_funs; i++) operator_p_free(data->prox_funs[i]); xfree(data->prox_funs); } if (NULL != data->prox_linops) { for (unsigned int i = 0; i < data->num_funs; i++) linop_free(data->prox_linops[i]); xfree(data->prox_linops); } xfree(data); } const struct operator_s* itop_create( italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct operator_s* op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs], struct iter_monitor_s* monitor) { PTR_ALLOC(struct itop_s, data); SET_TYPEID(itop_s, data); const struct iovec_s* iov; if (NULL == op) { assert(0 < num_funs); iov = linop_domain(prox_linops[0]); } else iov = operator_domain(op); data->iconf = iconf; data->italgo = italgo; data->monitor = monitor; data->op = (NULL == op) ? NULL : operator_ref(op); data->num_funs = num_funs; data->size = 2 * md_calc_size(iov->N, iov->dims); // FIXME: do not assume complex data->prox_funs = NULL; data->prox_linops = NULL; data->init = NULL; data->iov = iovec_create(iov->N, iov->dims, iov->size); if (NULL != init) { float* init2 = md_alloc(iov->N, iov->dims, iov->size); md_copy(iov->N, iov->dims, init2, init, iov->size); data->init = init2; } if (NULL != prox_funs) { data->prox_funs = *TYPE_ALLOC(const struct operator_p_s*[num_funs]); for (unsigned int i = 0; i < num_funs; i++) data->prox_funs[i] = operator_p_ref(prox_funs[i]); } if (NULL != prox_linops) { data->prox_linops = *TYPE_ALLOC(const struct linop_s*[num_funs]); for (unsigned int i = 0; i < num_funs; i++) data->prox_linops[i] = linop_clone(prox_linops[i]); } return operator_create(iov->N, iov->dims, iov->N, iov->dims, CAST_UP(PTR_PASS(data)), itop_apply, itop_del); } bart-0.4.02/src/iter/itop.h000066400000000000000000000006411320577655200154200ustar00rootroot00000000000000 #include "iter/iter2.h" struct operator_s; struct operator_p_s; struct linop_s; struct iter_monitor_s; const struct operator_s* itop_create( italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct operator_s* model_op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs], struct iter_monitor_s* monitor); bart-0.4.02/src/iter/lad.c000066400000000000000000000115671320577655200152110ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2012-2016 Martin Uecker * 2014 Frank Ong */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "num/iovec.h" #include "linops/linop.h" #include "misc/debug.h" #include "misc/misc.h" #include "iter/iter.h" #include "iter/lsqr.h" #include "lad.h" const struct lad_conf lad_defaults = { 5, 0.1, ~0u, &lsqr_defaults }; /** * Perform iterative, regularized least-absolute derivation reconstruction. */ void lad2( unsigned int N, const struct lad_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs], const long x_dims[static N], complex float* x, const long y_dims[static N], const complex float* y) { long w_dims[N]; md_select_dims(N, conf->wflags, w_dims, y_dims); complex float* weights = md_alloc_sameplace(N, w_dims, CFL_SIZE, y); complex float* tmp2 = md_alloc_sameplace(N, y_dims, CFL_SIZE, y); // use iterative reweigted least-squares // ADMM may be a better choice though... for (int i = 0; i < conf->rwiter; i++) { // recompute weights linop_forward(model_op, N, y_dims, tmp2, N, x_dims, x); md_zsub(N, y_dims, tmp2, tmp2, y); md_zrss(N, y_dims, ~(conf->wflags), weights, tmp2); for (long l = 0; l < md_calc_size(N, w_dims); l++) if (weights[l] != 0.) weights[l] = 1. / sqrtf(MAX(conf->gamma, cabsf(weights[l]))); // solve weighted least-squares wlsqr2(N, conf->lsqr_conf, italgo, iconf, model_op, 1, prox_funs, prox_linops, x_dims, x, y_dims, y, w_dims, weights, NULL); } md_free(tmp2); md_free(weights); } /** * Perform iterative, regularized least-absolute derivation reconstruction. */ void lad( unsigned int N, const struct lad_conf* conf, italgo_fun_t italgo, iter_conf* iconf, const struct linop_s* model_op, const struct operator_p_s* prox_funs, const long x_dims[static N], complex float* x, const long y_dims[static N], const complex float* y) { lad2(N, conf, iter2_call_iter, CAST_UP(&((struct iter_call_s){ { &TYPEID(iter_call_s) }, italgo, iconf })), model_op, (NULL != prox_funs) ? 1 : 0, &prox_funs, NULL, x_dims, x, y_dims, y); } struct lad_s { operator_data_t base; const struct lad_conf* conf; italgo_fun2_t italgo; iter_conf* iconf; const struct linop_s* model_op; unsigned int num_funs; const struct operator_p_s** prox_funs; const struct linop_s** prox_linops; }; static void lad_apply(const operator_data_t* _data, unsigned int N, void* args[static N]) { assert(2 == N); const struct lad_s* data = CONTAINER_OF(_data, const struct lad_s, base); const struct iovec_s* dom_iov = operator_domain(data->model_op->forward); const struct iovec_s* cod_iov = operator_codomain(data->model_op->forward); lad2(dom_iov->N, data->conf, data->italgo, data->iconf, data->model_op, data->num_funs, data->prox_funs, data->prox_linops, cod_iov->dims, args[0], dom_iov->dims, args[1]); } static void lad_del(const operator_data_t* _data) { const struct lad_s* data = CONTAINER_OF(_data, const struct lad_s, base); linop_free(data->model_op); if (NULL != data->prox_funs) { for (unsigned int i = 0; i < data->num_funs; i++) operator_p_free(data->prox_funs[i]); xfree(data->prox_funs); } if (NULL != data->prox_linops) { for (unsigned int i = 0; i < data->num_funs; i++) linop_free(data->prox_linops[i]); xfree(data->prox_linops); } xfree(data); } const struct operator_s* lad2_create(const struct lad_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs]) { PTR_ALLOC(struct lad_s, data); const struct iovec_s* dom_iov = operator_domain(model_op->forward); const struct iovec_s* cod_iov = operator_codomain(model_op->forward); assert(cod_iov->N == dom_iov->N); // this should be relaxed data->conf = conf; data->italgo = italgo; data->iconf = iconf; data->model_op = linop_clone(model_op); data->num_funs = num_funs; data->prox_funs = *TYPE_ALLOC(const struct operator_p_s*[num_funs]); data->prox_linops = *TYPE_ALLOC(const struct linop_s*[num_funs]); assert(NULL == init); for (unsigned int i = 0; i < num_funs; i++) { data->prox_funs[i] = operator_p_ref(prox_funs[i]); data->prox_linops[i] = linop_clone(prox_linops[i]); } return operator_create(cod_iov->N, cod_iov->dims, dom_iov->N, dom_iov->dims, &PTR_PASS(data)->base, lad_apply, lad_del); } bart-0.4.02/src/iter/lad.h000066400000000000000000000020261320577655200152040ustar00rootroot00000000000000 struct lsqr_conf; struct lad_conf { int rwiter; float gamma; unsigned int wflags; const struct lsqr_conf* lsqr_conf; }; extern void lad(unsigned int N, const struct lad_conf* conf, italgo_fun_t italgo, iter_conf* iconf, const struct linop_s* model_op, const struct operator_p_s* thresh_op, const long x_dims[N], complex float* x, const long y_dims[N], const complex float* y); extern void lad2(unsigned int N, const struct lad_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* thresh_op[num_funs], const struct linop_s* thresh_funs[num_funs], const long x_dims[N], complex float* x, const long y_dims[N], const complex float* y); extern const struct operator_s* lad2_create(const struct lad_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* thresh_op[num_funs], const struct linop_s* thresh_funs[num_funs]); bart-0.4.02/src/iter/lsqr.c000066400000000000000000000166121320577655200154260ustar00rootroot00000000000000/* Copyright 2014,2017. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2012-2017 Martin Uecker * 2014 Frank Ong * 2014,2017 Jon Tamir */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "num/iovec.h" #include "linops/linop.h" #include "linops/someops.h" #include "misc/debug.h" #include "misc/types.h" #include "misc/misc.h" #include "iter/iter.h" #include "iter/iter2.h" #include "iter/itop.h" #include "lsqr.h" const struct lsqr_conf lsqr_defaults = { .lambda = 0., .it_gpu = false }; struct lsqr_data { INTERFACE(operator_data_t); float l2_lambda; long size; const struct linop_s* model_op; }; static DEF_TYPEID(lsqr_data); static void normaleq_l2_apply(const operator_data_t* _data, unsigned int N, void* args[static N]) { const struct lsqr_data* data = CAST_DOWN(lsqr_data, _data); assert(2 == N); linop_normal_unchecked(data->model_op, args[0], args[1]); md_axpy(1, MD_DIMS(data->size), args[0], data->l2_lambda, args[1]); } static void normaleq_del(const operator_data_t* _data) { const struct lsqr_data* data = CAST_DOWN(lsqr_data, _data); linop_free(data->model_op); xfree(data); } /** * Operator for iterative, multi-regularized least-squares reconstruction */ const struct operator_s* lsqr2_create(const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct linop_s* model_op, const struct operator_s* precond_op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs], struct iter_monitor_s* monitor) { PTR_ALLOC(struct lsqr_data, data); SET_TYPEID(lsqr_data, data); const struct iovec_s* iov = NULL; if (NULL == model_op) { assert(0 < num_funs); iov = linop_domain(prox_linops[0]); data->model_op = NULL; } else { iov = operator_domain(model_op->forward); data->model_op = linop_clone(model_op); } data->l2_lambda = conf->lambda; data->size = 2 * md_calc_size(iov->N, iov->dims); // FIXME: assume complex const struct operator_s* normaleq_op = NULL; const struct operator_s* adjoint = NULL; if (NULL != model_op) { normaleq_op = operator_create(iov->N, iov->dims, iov->N, iov->dims, CAST_UP(PTR_PASS(data)), normaleq_l2_apply, normaleq_del); adjoint = operator_ref(model_op->adjoint); } else { PTR_FREE(data); } if (NULL != precond_op) { const struct operator_s* tmp; tmp = normaleq_op; normaleq_op = operator_chain(normaleq_op, precond_op); operator_free(tmp); tmp = adjoint; adjoint = operator_chain(adjoint, precond_op); operator_free(tmp); } const struct operator_s* itop_op = itop_create(italgo, iconf, init, normaleq_op, num_funs, prox_funs, prox_linops, monitor); if (conf->it_gpu) { debug_printf(DP_DEBUG1, "lsqr: add GPU wrapper\n"); itop_op = operator_gpu_wrapper(itop_op); } const struct operator_s* lsqr_op; if (NULL != adjoint) lsqr_op = operator_chain(adjoint, itop_op); else lsqr_op = operator_ref(itop_op); operator_free(normaleq_op); operator_free(itop_op); operator_free(adjoint); return lsqr_op; } /** * Perform iterative, multi-regularized least-squares reconstruction */ void lsqr2(unsigned int N, const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs], const long x_dims[static N], complex float* x, const long y_dims[static N], const complex float* y, const struct operator_s* precond_op, struct iter_monitor_s* monitor) { // nicer, but is still missing some features const struct operator_s* op = lsqr2_create(conf, italgo, iconf, NULL, model_op, precond_op, num_funs, prox_funs, prox_linops, monitor); operator_apply(op, N, x_dims, x, N, y_dims, y); operator_free(op); } /** * Perform iterative, regularized least-squares reconstruction. */ void lsqr(unsigned int N, const struct lsqr_conf* conf, italgo_fun_t italgo, iter_conf* iconf, const struct linop_s* model_op, const struct operator_p_s* thresh_op, const long x_dims[static N], complex float* x, const long y_dims[static N], const complex float* y, const struct operator_s* precond_op) { lsqr2(N, conf, iter2_call_iter, CAST_UP(&((struct iter_call_s){ { &TYPEID(iter_call_s) }, italgo, iconf })), model_op, (NULL != thresh_op) ? 1 : 0, &thresh_op, NULL, x_dims, x, y_dims, y, precond_op, NULL); } const struct operator_s* wlsqr2_create( const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct linop_s* model_op, const struct linop_s* weights, const struct operator_s* precond_op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs], struct iter_monitor_s* monitor) { struct linop_s* op = linop_chain(model_op, weights); const struct operator_s* lsqr_op = lsqr2_create(conf, italgo, iconf, init, op, precond_op, num_funs, prox_funs, prox_linops, monitor); const struct operator_s* wlsqr_op = operator_chain(weights->forward, lsqr_op); operator_free(lsqr_op); linop_free(op); return wlsqr_op; } void wlsqr2(unsigned int N, const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* prox_funs[num_funs], const struct linop_s* prox_linops[num_funs], const long x_dims[static N], complex float* x, const long y_dims[static N], const complex float* y, const long w_dims[static N], const complex float* w, const struct operator_s* precond_op) { unsigned int flags = 0; for (unsigned int i = 0; i < N; i++) if (1 < w_dims[i]) flags = MD_SET(flags, i); struct linop_s* weights = linop_cdiag_create(N, y_dims, flags, w); #if 1 struct linop_s* op = linop_chain(model_op, weights); complex float* wy = md_alloc_sameplace(N, y_dims, CFL_SIZE, y); linop_forward(weights, N, y_dims, wy, N, y_dims, y); lsqr2(N, conf, italgo, iconf, op, num_funs, prox_funs, prox_linops, x_dims, x, y_dims, wy, precond_op, NULL); md_free(wy); linop_free(op); #else const struct operator_s* op = wlsqr2_create(conf, italgo, iconf, model_op, weights, precond_op, num_funs, prox_funs, prox_linops); operator_apply(op, N, x_dims, x, N, y_dims, y); #endif linop_free(weights); } // A^H W W A - A^H W W y void wlsqr(unsigned int N, const struct lsqr_conf* conf, italgo_fun_t italgo, iter_conf* iconf, const struct linop_s* model_op, const struct operator_p_s* thresh_op, const long x_dims[static N], complex float* x, const long y_dims[static N], const complex float* y, const long w_dims[static N], const complex float* w, const struct operator_s* precond_op) { wlsqr2(N, conf, iter2_call_iter, CAST_UP(&((struct iter_call_s){ { &TYPEID(iter_call_s) }, italgo, iconf })), model_op, (NULL != thresh_op) ? 1 : 0, &thresh_op, NULL, x_dims, x, y_dims, y, w_dims, w, precond_op); } bart-0.4.02/src/iter/lsqr.h000066400000000000000000000060471320577655200154340ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __LSQR_H #define __LSQR_H 1 #include "iter/iter.h" #include "iter/iter2.h" #include "misc/cppwrap.h" struct operator_s; struct operator_p_s; struct lsqr_conf { float lambda; _Bool it_gpu; }; struct iter_monitor_s; extern const struct lsqr_conf lsqr_defaults; extern const struct operator_s* lsqr2_create(const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct linop_s* model_op, const struct operator_s* precond_op, unsigned int num_funs, const struct operator_p_s* prox_funs[__VLA2(num_funs)], const struct linop_s* prox_linops[__VLA2(num_funs)], struct iter_monitor_s* monitor); extern const struct operator_s* wlsqr2_create( const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const float* init, const struct linop_s* model_op, const struct linop_s* weights, const struct operator_s* precond_op, unsigned int num_funs, const struct operator_p_s* prox_funs[__VLA2(num_funs)], const struct linop_s* prox_linops[__VLA2(num_funs)], struct iter_monitor_s* monitor); extern void lsqr( unsigned int N, const struct lsqr_conf* conf, italgo_fun_t italgo, iter_conf* iconf, const struct linop_s* model_op, const struct operator_p_s* thresh_op, const long x_dims[__VLA(N)], _Complex float* x, const long y_dims[__VLA(N)], const _Complex float* y, const struct operator_s* precond_op); extern void wlsqr( unsigned int N, const struct lsqr_conf* conf, italgo_fun_t italgo, iter_conf* iconf, const struct linop_s* model_op, const struct operator_p_s* thresh_op, const long x_dims[__VLA(N)], _Complex float* x, const long y_dims[__VLA(N)], const _Complex float* y, const long w_dims[__VLA(N)], const _Complex float* w, const struct operator_s* precond_op); extern void lsqr2( unsigned int N, const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* prox_funs[__VLA2(num_funs)], const struct linop_s* prox_linops[__VLA2(num_funs)], const long x_dims[__VLA(N)], _Complex float* x, const long y_dims[__VLA(N)], const _Complex float* y, const struct operator_s* precond_op, struct iter_monitor_s* monitor); extern void wlsqr2( unsigned int N, const struct lsqr_conf* conf, italgo_fun2_t italgo, iter_conf* iconf, const struct linop_s* model_op, unsigned int num_funs, const struct operator_p_s* prox_funs[__VLA2(num_funs)], const struct linop_s* prox_linops[__VLA2(num_funs)], const long x_dims[__VLA(N)], complex float* x, const long y_dims[__VLA(N)], const complex float* y, const long w_dims[__VLA(N)], const complex float* w, const struct operator_s* precond_op); #include "misc/cppwrap.h" #endif bart-0.4.02/src/iter/misc.c000066400000000000000000000020101320577655200153630ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong * 2015 Martin Uecker */ #include "num/multind.h" #include "num/ops.h" #include "num/iovec.h" #include "num/rand.h" #include "misc/misc.h" #include "iter/italgos.h" #include "iter/iter2.h" #include "iter/vec.h" #include "misc.h" double iter_power(unsigned int maxiter, const struct operator_s* normaleq_op, long size, float* u) { return power(maxiter, size, select_vecops(u), OPERATOR2ITOP(normaleq_op), u); } double estimate_maxeigenval(const struct operator_s* op) { const struct iovec_s* io = operator_domain(op); long size = md_calc_size(io->N, io->dims); void* x = md_alloc(io->N, io->dims, io->size); md_gaussian_rand(io->N, io->dims, x); double max_eval = iter_power(30, op, 2 * size, x); md_free(x); return max_eval; } bart-0.4.02/src/iter/misc.h000066400000000000000000000006111320577655200153750ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ struct operator_s; extern double iter_power(unsigned int maxiter, const struct operator_s* normaleq_op, long size, float* u); extern double estimate_maxeigenval(const struct operator_s* op); bart-0.4.02/src/iter/monitor.c000066400000000000000000000042621320577655200161320ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include "misc/debug.h" #include "misc/types.h" #include "misc/misc.h" #include "iter/vec.h" #include "monitor.h" void iter_monitor(struct iter_monitor_s* monitor, const struct vec_iter_s* ops, const float* x) { if ((NULL != monitor) && (NULL != monitor->fun)) monitor->fun(monitor, ops, x); } void iter_history(struct iter_monitor_s* monitor, const struct iter_history_s* hist) { if ((NULL != monitor) && (NULL != monitor->record)) monitor->record(monitor, hist); } struct monitor_default_s { INTERFACE(iter_monitor_t); long N; const float* image_truth; double it_norm; void* data; float (*objective)(const void* data, const float* x); }; static DEF_TYPEID(monitor_default_s); static void monitor_default_fun(struct iter_monitor_s* _data, const struct vec_iter_s* vops, const float* x) { struct monitor_default_s* data = CAST_DOWN(monitor_default_s, _data); double err = -1.; double obj = -1.; long N = data->N; if (NULL != data->image_truth) { if (-1. == data->it_norm) data->it_norm = vops->norm(N, data->image_truth); float* x_err = vops->allocate(N); vops->sub(N, x_err, data->image_truth, x); err = vops->norm(N, x_err) / data->it_norm; vops->del(x_err); } if (NULL != data->objective) obj = data->objective(data->data, x); debug_printf(DP_DEBUG4, "Objective: %f, Error: %f\n", obj, err); data->INTERFACE.obj = obj; data->INTERFACE.err = err; } struct iter_monitor_s* create_monitor(long N, const float* image_truth, void* data, float (*objective)(const void* data, const float* x)) { PTR_ALLOC(struct monitor_default_s, monitor); SET_TYPEID(monitor_default_s, monitor); monitor->N = N; monitor->image_truth = image_truth; monitor->it_norm = -1.; monitor->data = data; monitor->objective = objective; monitor->INTERFACE.fun = monitor_default_fun; monitor->INTERFACE.record = NULL; monitor->INTERFACE.obj = -1.; monitor->INTERFACE.err = -1.; return CAST_UP(PTR_PASS(monitor)); } bart-0.4.02/src/iter/monitor.h000066400000000000000000000017261320577655200161410ustar00rootroot00000000000000 #ifndef __ITER_MONITOR_H #define __ITER_MONITOR_H struct iter_monitor_s; struct vec_iter_s; struct typeid_s; struct iter_history_s { const struct typeid_s* TYPEID; }; typedef struct iter_history_s iter_history_t; typedef void (*iter_monitor_fun_t)(struct iter_monitor_s* data, const struct vec_iter_s* ops, const float* x); typedef void (*iter_history_fun_t)(struct iter_monitor_s* data, const struct iter_history_s*); struct iter_monitor_s { const struct typeid_s* TYPEID; iter_monitor_fun_t fun; iter_history_fun_t record; double obj; double err; }; typedef struct iter_monitor_s iter_monitor_t; extern void iter_monitor(struct iter_monitor_s* monitor, const struct vec_iter_s* ops, const float* x); extern void iter_history(struct iter_monitor_s* monitor, const struct iter_history_s*); extern struct iter_monitor_s* create_monitor(long N, const float* image_truth, void* data, float (*object)(const void* data, const float* x)); #endif // __ITER_MONITOR_H bart-0.4.02/src/iter/niht.c000066400000000000000000000175331320577655200154120ustar00rootroot00000000000000/* Copyright 2014-2016. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Sofia Dimoudi * * * * Blumensath T, Davies ME. Normalized iterative hard thresholding: Guaranteed * stability and performance. IEEE Journal of selected topics in signal * processing. 2010 Apr;4(2):298-309. * * Blanchard JD, Tanner J. Performance comparisons of greedy algorithms in * compressed sensing. Numerical Linear Algebra with Applications. * 2015 Mar 1;22(2):254-82. * */ #include #include #include #include #include "num/ops.h" #include "misc/debug.h" #include "misc/misc.h" #include "misc/types.h" #include "iter/italgos.h" #include "iter/vec.h" #include "iter/monitor.h" #include "niht.h" static void niht_imdom(const struct niht_conf_s* conf, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_p_s thresh, float* x, const float* b, struct iter_monitor_s* monitor) { double rsnew = 1.; // current residual double rsnot = 1.; // initial residual double rsold = 1.; // previous residual float mu = 1.; // step size unsigned int ic = 0; // iteration counter for criterion 3 unsigned int iter = 0; long N = conf->N; float* r = vops->allocate(N); float* g = vops->allocate(N); // negative gradient of ||y - Ax||^2 with non-zero support float* m = vops->allocate(N); // non-zero support mask rsnot = vops->norm(N, b); // initial residual norm rsold = rsnot; // create initial support if (!conf->do_warmstart){ //x_0 = 0, take support from b iter_op_p_call(thresh, 1.0, m, b); vops->zmul(N/2, (complex float*)x, (complex float*)b, (complex float*)m); } else{ // x_0 has an initial value, take support from x iter_op_p_call(thresh, 1.0, m, x); vops->zmul(N/2, (complex float*)x, (complex float*)x, (complex float*)m); } for (iter = 0; iter < conf->maxiter; iter++) { iter_monitor(monitor, vops, x); iter_op_call(op, r, x); // r = A x vops->xpay(N, -1., r, b); // r = b - r = b - A x. // calculate step size. // 1. apply support x->g vops->zmul(N/2, (complex float*)g, (complex float*)r, (complex float*)m); //mu = ||g_n||^2 / ||A g_n||^2 double num = vops->dot(N, g, g); iter_op_call(op, g, g); double den = vops->dot(N, g, g); mu = num / den; //step size rsnew = vops->norm(N, r); debug_printf(DP_DEBUG3, "\n#It %03d relative residual r / r_0: %f \n", iter, rsnew / rsnot); // Stopping criteria: Blanchard and Tanner 2015 // TODO: select appropriate epsilon and other criteria values if (rsnew < conf->epsilon) // residual is small break; if (rsnew > 100.0 * rsnot){ // algorithm is diverging r_l > 100*r_0 break; } if (fabs(rsnew - rsold) <= 1.0E-06f){ // no significant change in residual debug_printf(DP_INFO, "\n*** rsnew - rsold = %f **\n", fabs(rsnew - rsold) ); ic++; if (15 == ic) // in 16 iterations. Normally 1e-06 break; // more appropriate for noisy measurements } // where convergence will occur with larger residual vops->axpy(N, x, mu, r); // update solution: xk+1 = xk + mu rk+1 iter_op_p_call(thresh, 1.0, m, x); // apply thresholding Hs(xk+1) vops->zmul(N/2, (complex float*)x, (complex float*)x, (complex float*)m); rsold = rsnew; // keep residual for comparison } debug_printf(DP_DEBUG3, "\n"); debug_printf(DP_DEBUG2, "\n#absolute residual: %f\n", rsnew); debug_printf(DP_DEBUG2, "\n#relative signal residual: %f\n\n", rsnew / rsnot); vops->del(r); vops->del(g); vops->del(m); } /** * Normalised Iterative Hard Thresholding/NIHT to solve min || b - Ax ||_2 s.t. || T x ||_0 <= k * using an adaptive step size with the iteration: x_n+1 = H_k (x_n + mu_n(A^T (y - A x_n)) * where H_k(x) = support(x) the hard thresholding operator, keeps the k largest elements of x * mu_n the adaptive step size. * * @param conf configuration params, eg maxiter, epsilon * @param trans linear transform operator, eg wavelets * @param vops vector ops definition * @param op linear operator, e.g. A * @param thresh NIHT threshold function * @param x initial estimate * @param b observations * @param monitor compute objective value, errors, etc. */ void niht(const struct niht_conf_s* conf, const struct niht_transop* trans, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_p_s thresh, float* x, const float* b, struct iter_monitor_s* monitor) { if (0 == conf->trans){ // do NIHT in image domain niht_imdom(conf, vops, op, thresh, x, b, monitor); return; } double rsnew = 1.; // current residual double rsnot = 1.; // initial residual double rsold = 1.; // previous residual float mu = 1.; // step size unsigned int ic = 0; // iteration counter for criterion 3 unsigned int iter = 0; long N = conf->N; long WN = trans->N; float* r = vops->allocate(N); float* g = vops->allocate(N); // negative gradient of ||y - Ax||^2 with non-zero support float* wg = vops->allocate(WN); // wavelet transform of g float* wx = vops->allocate(WN); // wavelet transform of x float* wm = vops->allocate(WN); // nonzero support mask with wavelet dimensions rsnot = vops->norm(N, b); // initial residual norm rsold = rsnot; // create initial support if (!conf->do_warmstart){ //x_0 = 0, take support from b iter_op_call(trans->forward, wx, b); iter_op_p_call(thresh, 1.0, wm, wx); //produce mask by thresholding vops->zmul(WN/2, (complex float*)wx, (complex float*)wx, (complex float*)wm); // apply mask } else { // x_0 has an initial value, take support from x iter_op_call(trans->forward, wx, x); iter_op_p_call(thresh, 1.0, wm, wx); vops->zmul(WN/2, (complex float*)wx, (complex float*)wx, (complex float*)wm); } iter_op_call(trans->adjoint, x, wx); for (iter = 0; iter < conf->maxiter; iter++) { iter_monitor(monitor, vops, x); iter_op_call(op, r, x); // r = A x vops->xpay(N, -1., r, b); // r = b - r = b - A x. // calculate step size. // 1. apply support x->g iter_op_call(trans->forward, wg, r); vops->zmul(WN/2, (complex float*)wg, (complex float*)wg, (complex float*)wm); iter_op_call(trans->adjoint, g, wg); // 2. mu = ||g_n||^2 / ||A g_n||^2 double num = vops->dot(N, g, g); iter_op_call(op, g, g); double den = vops->dot(N, g, g); mu = num / den; debug_printf(DP_DEBUG3, "\n#step size: %f\n", mu); rsnew = vops->norm(N, r); debug_printf(DP_DEBUG3, "\n#It %03d relative residual r / r_0: %f \n", iter, rsnew / rsnot); // Stopping criteria: Blanchard and Tanner 2015 if (rsnew < conf->epsilon) // residual is small break; if (rsnew > 100.0 * rsnot) // algorithm is diverging r_l > 100*r_0 break; if (fabs(rsnew - rsold) <= 1.0E-06f) { // no significant change in residual debug_printf(DP_INFO, "\n*** rsnew - rsold = %f **\n", fabs(rsnew - rsold)); ic++; if (15 == ic) // in 16 iterations. Normally 1e-06 break; // more appropriate for noisy measurements } // where convergence will occur with larger residual vops->axpy(N, x, mu, r); // update solution: xk+1 = xk + mu rk+1 iter_op_call(trans->forward, wx, x); iter_op_p_call(thresh, 1.0, wm, wx); // apply thresholding Hs(xk+1) vops->zmul(WN/2, (complex float*)wx, (complex float*)wx, (complex float*)wm); iter_op_call(trans->adjoint, x, wx); rsold = rsnew; // keep residual for comparison } debug_printf(DP_DEBUG3, "\n"); debug_printf(DP_DEBUG2, "\n#absolute residual: %f\n", rsnew); debug_printf(DP_DEBUG2, "\n#relative signal residual: %f\n\n", rsnew / rsnot); vops->del(r); vops->del(g); vops->del(wg); vops->del(wx); vops->del(wm); } bart-0.4.02/src/iter/niht.h000066400000000000000000000023201320577655200154030ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __NIHT_H #define __NIHT_H /** * struct containing linear transform operator for NIHT (e.g. wavelets) * * @param forward the forward transform operator * @param adjoint the adjoint transform operator * @param N length of transform vector in floats */ struct niht_transop { struct iter_op_s forward; struct iter_op_s adjoint; long N; }; /** * Store information about NIHT algorithm configuration. * * @param maxiter maximum iteration * @param epsilon stopping criterion * @param N length of image vector in floats * @param trans flag for use of transform (0/1) * @param do_warmstart flag for initial x vector */ struct niht_conf_s { unsigned int maxiter; float epsilon; long N; int trans; _Bool do_warmstart; }; void niht(const struct niht_conf_s* conf, const struct niht_transop* trans, const struct vec_iter_s* vops, struct iter_op_s op, struct iter_op_p_s thresh, float* x, const float* b, struct iter_monitor_s* monitor); #endif // __NIHT_H bart-0.4.02/src/iter/prox.c000066400000000000000000000402041320577655200154270ustar00rootroot00000000000000/* Copyright 2014-2017. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2017 Jon Tamir * 2016 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "num/iovec.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "linops/linop.h" #include "iter/iter.h" #include "misc/misc.h" #include "prox.h" /** * Proximal function of f is defined as * (prox_f)(z) = arg min_x 0.5 || z - x ||_2^2 + f(x) * * (prox_{mu f})(z) = arg min_x 0.5 || z - x ||_2^2 + mu f(x) */ /** * Data for computing prox_normaleq_fun: * Proximal function for f(z) = 0.5 || y - A z ||_2^2. * * @param op operator that applies A^H A * @param cgconf conf file for conjugate gradient iter interface * @param adj A^H y * @param size size of z */ struct prox_normaleq_data { INTERFACE(operator_data_t); const struct linop_s* op; void* cgconf; float* adj; long size; }; static DEF_TYPEID(prox_normaleq_data); /** * Proximal function for f(z) = 0.5 || y - A z ||_2^2. * Solution is (A^H A + (1/mu) I)z = A^H y + (1/mu)(x_plus_u) * * @param prox_data should be of type prox_normaleq_data * @param mu proximal penalty * @param z output * @param x_plus_u input */ static void prox_normaleq_fun(const operator_data_t* prox_data, float mu, float* z, const float* x_plus_u) { struct prox_normaleq_data* pdata = CAST_DOWN(prox_normaleq_data, prox_data); if (0 == mu) { md_copy(1, MD_DIMS(pdata->size), z, x_plus_u, FL_SIZE); } else { float rho = 1. / mu; float* b = md_alloc_sameplace(1, MD_DIMS(pdata->size), FL_SIZE, x_plus_u); md_copy(1, MD_DIMS(pdata->size), b, pdata->adj, FL_SIZE); md_axpy(1, MD_DIMS(pdata->size), b, rho, x_plus_u); if (NULL == pdata->op->norm_inv) { struct iter_conjgrad_conf* cg_conf = pdata->cgconf; cg_conf->l2lambda = rho; iter_conjgrad(CAST_UP(cg_conf), pdata->op->normal, NULL, pdata->size, z, (float*)b, NULL); } else { linop_norm_inv_unchecked(pdata->op, rho, (complex float*)z, (const complex float*)b); } md_free(b); } } static void prox_normaleq_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { prox_normaleq_fun(_data, mu, (float*)dst, (const float*)src); } static void prox_normaleq_del(const operator_data_t* _data) { struct prox_normaleq_data* pdata = CAST_DOWN(prox_normaleq_data, _data); free(pdata->cgconf); md_free(pdata->adj); free(pdata); } const struct operator_p_s* prox_normaleq_create(const struct linop_s* op, const complex float* y) { PTR_ALLOC(struct prox_normaleq_data, pdata); SET_TYPEID(prox_normaleq_data, pdata); PTR_ALLOC(struct iter_conjgrad_conf, cgconf); *cgconf = iter_conjgrad_defaults; cgconf->maxiter = 10; cgconf->l2lambda = 0; pdata->cgconf = PTR_PASS(cgconf); pdata->op = op; pdata->size = 2 * md_calc_size(linop_domain(op)->N, linop_domain(op)->dims); pdata->adj = md_alloc_sameplace(1, &(pdata->size), FL_SIZE, y); linop_adjoint_unchecked(op, (complex float*)pdata->adj, y); return operator_p_create(linop_domain(op)->N, linop_domain(op)->dims, linop_domain(op)->N, linop_domain(op)->dims, CAST_UP(PTR_PASS(pdata)), prox_normaleq_apply, prox_normaleq_del); } /** * Data for computing prox_leastsquares_fun: * Proximal function for f(z) = lambda / 2 || y - z ||_2^2. * * @param y * @param lambda regularization * @param size size of z */ struct prox_leastsquares_data { INTERFACE(operator_data_t); const float* y; float lambda; long size; }; static DEF_TYPEID(prox_leastsquares_data); /** * Proximal function for f(z) = lambda / 2 || y - z ||_2^2. * Solution is z = (mu * lambda * y + x_plus_u) / (mu * lambda + 1) * * @param prox_data should be of type prox_leastsquares_data * @param mu proximal penalty * @param z output * @param x_plus_u input */ static void prox_leastsquares_fun(const operator_data_t* prox_data, float mu, float* z, const float* x_plus_u) { struct prox_leastsquares_data* pdata = CAST_DOWN(prox_leastsquares_data, prox_data); md_copy(1, MD_DIMS(pdata->size), z, x_plus_u, FL_SIZE); if (0 != mu) { if (NULL != pdata->y) md_axpy(1, MD_DIMS(pdata->size), z, pdata->lambda * mu, pdata->y); md_smul(1, MD_DIMS(pdata->size), z, z, 1. / (mu * pdata->lambda + 1)); } } static void prox_leastsquares_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { prox_leastsquares_fun(_data, mu, (float*)dst, (const float*)src); } static void prox_leastsquares_del(const operator_data_t* _data) { xfree(CAST_DOWN(prox_leastsquares_data, _data)); } const struct operator_p_s* prox_leastsquares_create(unsigned int N, const long dims[N], float lambda, const complex float* y) { PTR_ALLOC(struct prox_leastsquares_data, pdata); SET_TYPEID(prox_leastsquares_data, pdata); pdata->y = (const float*)y; pdata->lambda = lambda; pdata->size = md_calc_size(N, dims) * 2; return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(pdata)), prox_leastsquares_apply, prox_leastsquares_del); } /** * Data for computing prox_l2norm_fun: * Proximal function for f(z) = lambda || z ||_2. * * @param lambda regularization * @param size size of z */ struct prox_l2norm_data { INTERFACE(operator_data_t); float lambda; long size; }; static DEF_TYPEID(prox_l2norm_data); /** * Proximal function for f(z) = lambda || z ||_2. * Solution is z = ( 1 - lambda * mu / norm(z) )_+ * z, * i.e. block soft thresholding * * @param prox_data should be of type prox_l2norm_data * @param mu proximal penalty * @param z output * @param x_plus_u input */ static void prox_l2norm_fun(const operator_data_t* prox_data, float mu, float* z, const float* x_plus_u) { struct prox_l2norm_data* pdata = CAST_DOWN(prox_l2norm_data, prox_data); md_clear(1, MD_DIMS(pdata->size), z, FL_SIZE); double q1 = md_norm(1, MD_DIMS(pdata->size), x_plus_u); if (q1 != 0) { double q2 = 1 - pdata->lambda * mu / q1; if (q2 > 0.) md_smul(1, MD_DIMS(pdata->size), z, x_plus_u, q2); } } static void prox_l2norm_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { prox_l2norm_fun(_data, mu, (float*)dst, (const float*)src); } static void prox_l2norm_del(const operator_data_t* _data) { xfree(CAST_DOWN(prox_l2norm_data, _data)); } const struct operator_p_s* prox_l2norm_create(unsigned int N, const long dims[N], float lambda) { PTR_ALLOC(struct prox_l2norm_data, pdata); SET_TYPEID(prox_l2norm_data, pdata); pdata->lambda = lambda; pdata->size = md_calc_size(N, dims) * 2; return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(pdata)), prox_l2norm_apply, prox_l2norm_del); } /** * Data for computing prox_l2ball_fun: * Proximal function for f(z) = Ind{ || y - z ||_2 < eps } * * @param y y * @param eps * @param size size of z */ struct prox_l2ball_data { INTERFACE(operator_data_t); float* y; float eps; long size; #ifdef USE_CUDA const float* gpu_y; #endif }; static DEF_TYPEID(prox_l2ball_data); #ifdef USE_CUDA static const float* get_y(const struct prox_l2ball_data* data, bool gpu) { const float* y = data->y; if (gpu) { if (NULL == data->gpu_y) ((struct prox_l2ball_data*)data)->gpu_y = md_gpu_move(1, MD_DIMS(data->size), data->y, FL_SIZE); y = data->gpu_y; } return y; } #endif /** * Proximal function for f(z) = Ind{ || y - z ||_2 < eps } * Solution is y + (x - y) * q, where q = eps / norm(x - y) if norm(x - y) > eps, 1 o.w. * * @param prox_data should be of type prox_l2ball_data * @param mu proximal penalty * @param z output * @param x_plus_u input */ static void prox_l2ball_fun(const operator_data_t* prox_data, float mu, float* z, const float* x_plus_u) { UNUSED(mu); struct prox_l2ball_data* pdata = CAST_DOWN(prox_l2ball_data, prox_data); #ifdef USE_CUDA const float* y = get_y(pdata, cuda_ondevice(x_plus_u)); #else const float* y = pdata->y; #endif if (NULL != y) md_sub(1, MD_DIMS(pdata->size), z, x_plus_u, y); else md_copy(1, MD_DIMS(pdata->size), z, x_plus_u, FL_SIZE); float q1 = md_norm(1, MD_DIMS(pdata->size), z); if (q1 > pdata->eps) md_smul(1, MD_DIMS(pdata->size), z, z, pdata->eps / q1); if (NULL != y) md_add(1, MD_DIMS(pdata->size), z, z, y); } static void prox_l2ball_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { prox_l2ball_fun(_data, mu, (float*)dst, (const float*)src); } static void prox_l2ball_del(const operator_data_t* _data) { struct prox_l2ball_data* data = CAST_DOWN(prox_l2ball_data, _data); #ifdef USE_CUDA if (NULL != data->gpu_y) { md_free((void*)data->gpu_y); } #endif xfree(data); } const struct operator_p_s* prox_l2ball_create(unsigned int N, const long dims[N], float eps, const complex float* y) { PTR_ALLOC(struct prox_l2ball_data, pdata); SET_TYPEID(prox_l2ball_data, pdata); pdata->y = (float*)y; pdata->eps = eps; pdata->size = md_calc_size(N, dims) * 2; #ifdef USE_CUDA pdata->gpu_y = NULL; #endif return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(pdata)), prox_l2ball_apply, prox_l2ball_del); } #if 0 /** * Data for computing prox_thresh_fun: * Proximal function for f(z) = lambda || z ||_1 * * @param thresh function to apply SoftThresh * @param data data used by thresh function * @param lambda regularization */ struct prox_thresh_data { void (*thresh)(void* _data, float lambda, float* _dst, const float* _src); void* data; float lambda; }; /** * Proximal function for f(z) = lambda || z ||_1 * Solution is z = SoftThresh(x_plus_u, lambda * mu) * * @param prox_data should be of type prox_thresh_data */ void prox_thresh_fun(void* prox_data, float mu, float* z, const float* x_plus_u) { struct prox_thresh_data* pdata = (struct prox_thresh_data*)prox_data; pdata->thresh(pdata->data, pdata->lambda * mu, z, x_plus_u); } static void prox_thresh_apply(const void* _data, float mu, complex float* dst, const complex float* src) { prox_thresh_fun((void*)_data, mu, (float*)dst, (const float*)src); } static void prox_thresh_del(const void* _data) { free((void*)_data); } const struct operator_p_s* prox_thresh_create(unsigned int N, const long dims[N], float lambda, void (*thresh)(void* _data, float lambda, float* _dst, const float* _src), void* data) { PTR_ALLOC(struct prox_thresh_data, pdata); pdata->thresh = thresh; pdata->lambda = lambda; pdata->data = data; return operator_p_create(N, dims, dims, PTR_PASS(pdata), prox_thresh_apply, prox_thresh_del); } #endif /** * Data for computing prox_zero_fun: * Proximal function for f(z) = 0 * * @param size size of z */ struct prox_zero_data { INTERFACE(operator_data_t); long size; }; static DEF_TYPEID(prox_zero_data); /** * Proximal function for f(z) = 0 * Solution is z = x_plus_u * * @param prox_data should be of type prox_zero_data * @param mu proximal penalty * @param z output * @param x_plus_u input */ static void prox_zero_fun(const operator_data_t* prox_data, float mu, float* z, const float* x_plus_u) { UNUSED(mu); struct prox_zero_data* pdata = CAST_DOWN(prox_zero_data, prox_data); md_copy(1, MD_DIMS(pdata->size), z, x_plus_u, FL_SIZE); } static void prox_zero_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { prox_zero_fun(_data, mu, (float*)dst, (const float*)src); } static void prox_zero_del(const operator_data_t* _data) { xfree(CAST_DOWN(prox_zero_data, _data)); } const struct operator_p_s* prox_zero_create(unsigned int N, const long dims[N]) { PTR_ALLOC(struct prox_zero_data, pdata); SET_TYPEID(prox_zero_data, pdata); pdata->size = md_calc_size(N, dims) * 2; return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(pdata)), prox_zero_apply, prox_zero_del); } /** * Data for computing prox_lineq_fun: * Proximal function for f(z) = 1{ A z = y } * Assumes AA^T = I * Solution is z = x - A^T A x + A^T y * * @param op linop A * @param adj A^H y * @param tmp tmp */ struct prox_lineq_data { INTERFACE(operator_data_t); const struct linop_s* op; complex float* adj; complex float* tmp; }; static DEF_TYPEID(prox_lineq_data); static void prox_lineq_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { UNUSED(mu); struct prox_lineq_data* pdata = CAST_DOWN(prox_lineq_data, _data); const struct linop_s* op = pdata->op; linop_normal(op, linop_domain(op)->N, linop_domain(op)->dims, pdata->tmp, src); md_zsub(linop_domain(op)->N, linop_domain(op)->dims, dst, src, pdata->tmp); md_zadd(linop_domain(op)->N, linop_domain(op)->dims, dst, dst, pdata->adj); } static void prox_lineq_del(const operator_data_t* _data) { struct prox_lineq_data* pdata = CAST_DOWN(prox_lineq_data, _data); md_free(pdata->adj); md_free(pdata->tmp); free(pdata); } const struct operator_p_s* prox_lineq_create(const struct linop_s* op, const complex float* y) { PTR_ALLOC(struct prox_lineq_data, pdata); unsigned int N = linop_domain(op)->N; const long* dims = linop_domain(op)->dims; pdata->op = op; pdata->adj = md_alloc_sameplace(N, dims, CFL_SIZE, y); linop_adjoint(op, N, dims, pdata->adj, N, linop_codomain(op)->dims, y); pdata->tmp = md_alloc_sameplace(N, dims, CFL_SIZE, y); return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(pdata)), prox_lineq_apply, prox_lineq_del); } /** * Data for computing prox_ineq_fun: * Proximal function for f(z) = 1{ z <= b } * and f(z) = 1{ z >= b } * * @param b b * @param size size of z */ struct prox_ineq_data { INTERFACE(operator_data_t); const float* b; long size; bool positive; }; static DEF_TYPEID(prox_ineq_data); static void prox_ineq_fun(const operator_data_t* _data, float mu, float* dst, const float* src) { UNUSED(mu); struct prox_ineq_data* pdata = CAST_DOWN(prox_ineq_data, _data); if (NULL == pdata->b) (pdata->positive ? md_smax : md_smin)(1, MD_DIMS(pdata->size), dst, src, 0.); else (pdata->positive ? md_max : md_min)(1, MD_DIMS(pdata->size), dst, src, pdata->b); } static void prox_ineq_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { prox_ineq_fun(_data, mu, (float*)dst, (const float*)src); } static void prox_ineq_del(const operator_data_t* _data) { xfree(CAST_DOWN(prox_ineq_data, _data)); } static const struct operator_p_s* prox_ineq_create(unsigned int N, const long dims[N], const complex float* b, bool positive) { PTR_ALLOC(struct prox_ineq_data, pdata); SET_TYPEID(prox_ineq_data, pdata); pdata->size = md_calc_size(N, dims) * 2; pdata->b = (const float*)b; pdata->positive = positive; return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(pdata)), prox_ineq_apply, prox_ineq_del); } /* * Proximal function for less than or equal to: * f(z) = 1{z <= b} */ const struct operator_p_s* prox_lesseq_create(unsigned int N, const long dims[N], const complex float* b) { return prox_ineq_create(N, dims, b, false); } /* * Proximal function for greater than or equal to: * f(z) = 1{z >= b} */ const struct operator_p_s* prox_greq_create(unsigned int N, const long dims[N], const complex float* b) { return prox_ineq_create(N, dims, b, true); } /* * Proximal function for nonnegative orthant * f(z) = 1{z >= 0} */ const struct operator_p_s* prox_nonneg_create(unsigned int N, const long dims[N]) { return prox_ineq_create(N, dims, NULL, true); } struct prox_rvc_data { INTERFACE(operator_data_t); long size; }; static DEF_TYPEID(prox_rvc_data); static void prox_rvc_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { UNUSED(mu); struct prox_rvc_data* pdata = CAST_DOWN(prox_rvc_data, _data); md_zreal(1, MD_DIMS(pdata->size), dst, src); } static void prox_rvc_del(const operator_data_t* _data) { xfree(CAST_DOWN(prox_rvc_data, _data)); } /* * Proximal function for real-value constraint */ const struct operator_p_s* prox_rvc_create(unsigned int N, const long dims[N]) { PTR_ALLOC(struct prox_rvc_data, pdata); SET_TYPEID(prox_rvc_data, pdata); pdata->size = md_calc_size(N, dims); return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(pdata)), prox_rvc_apply, prox_rvc_del); } bart-0.4.02/src/iter/prox.h000066400000000000000000000026731320577655200154440ustar00rootroot00000000000000/* Copyright 2014-2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __PROX_H #define __PROX_H #include "misc/cppwrap.h" struct operator_p_s; struct linop_s; extern const struct operator_p_s* prox_normaleq_create(const struct linop_s* op, const _Complex float* y); extern const struct operator_p_s* prox_leastsquares_create(unsigned int N, const long dims[__VLA(N)], float lambda, const _Complex float* y); extern const struct operator_p_s* prox_l2norm_create(unsigned int N, const long dims[__VLA(N)], float lambda); extern const struct operator_p_s* prox_l2ball_create(unsigned int N, const long dims[__VLA(N)], float eps, const _Complex float* center); extern const struct operator_p_s* prox_zero_create(unsigned int N, const long dims[__VLA(N)]); extern const struct operator_p_s* prox_lineq_create(const struct linop_s* op, const _Complex float* y); extern const struct operator_p_s* prox_lesseq_create(unsigned int N, const long dims[__VLA(N)], const _Complex float* b); extern const struct operator_p_s* prox_greq_create(unsigned int N, const long dims[__VLA(N)], const _Complex float* b); extern const struct operator_p_s* prox_rvc_create(unsigned int N, const long dims[__VLA(N)]); extern const struct operator_p_s* prox_nonneg_create(unsigned int N, const long dims[__VLA(N)]); #include "misc/cppwrap.h" #endif bart-0.4.02/src/iter/thresh.c000066400000000000000000000167621320577655200157500ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2014 Jonathan Tamir * 2013,2016 Martin Uecker * 2017 Sofia Dimoudi */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "num/iovec.h" #include "linops/linop.h" #include "misc/misc.h" #include "misc/debug.h" #include "thresh.h" /** * Contains parameters for soft threshold functions * * @param lambda threshold parameter * @param D number of dimensions * @param dim dimensions of input * @param str strides of input * @param tmp_norm temporary storage for norm computation * @param flags bitmask for joint thresholding * @param unitary_op linear operator if using unitary soft thresholding */ struct thresh_s { INTERFACE(operator_data_t); float lambda; //for soft thresholding unsigned int k; // for hard thresholding int D; long* dim; long* str; complex float* tmp_norm; unsigned int flags; const struct linop_s* unitary_op; }; static DEF_TYPEID(thresh_s); static void softthresh_apply(const operator_data_t* _data, float mu, complex float* optr, const complex float* iptr) { const struct thresh_s* data = CAST_DOWN(thresh_s, _data); if (0. == mu) md_copy(data->D, data->dim, optr, iptr, CFL_SIZE); else md_zsoftthresh_core2(data->D, data->dim, data->lambda * mu, data->flags, data->tmp_norm, data->str, optr, data->str, iptr); } static void unisoftthresh_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { const struct thresh_s* data = CAST_DOWN(thresh_s, _data); if (0. == mu) md_copy(data->D, data->dim, dst, src, CFL_SIZE); else { const long* transform_dims = linop_codomain(data->unitary_op)->dims; const long* transform_strs = linop_codomain(data->unitary_op)->strs; complex float* tmp = md_alloc_sameplace(data->D, transform_dims, CFL_SIZE, dst); linop_forward(data->unitary_op, data->D, transform_dims, tmp, data->D, data->dim, src); md_zsoftthresh_core2(data->D, transform_dims, data->lambda * mu, data->flags, data->tmp_norm, transform_strs, tmp, transform_strs, tmp); linop_adjoint(data->unitary_op, data->D, data->dim, dst, data->D, transform_dims, tmp); md_free(tmp); } } static void hardthresh_apply(const operator_data_t* _data, float mu, complex float* optr, const complex float* iptr) { UNUSED(mu); const struct thresh_s* data = CAST_DOWN(thresh_s, _data); //only producing the support mask md_zhardthresh_mask2(data->D, data->dim, data->k, data->flags, data->tmp_norm, data->str, optr, data->str, iptr); } static void thresh_del(const operator_data_t* _data) { const struct thresh_s* data = CAST_DOWN(thresh_s, _data); xfree(data->dim); xfree(data->str); md_free(data->tmp_norm); xfree(data); } /** * Proximal operator for l1-norm: f(x) = lambda || x ||_1 * y = ST(x, lambda) * 1) computes resid = MAX( (norm(x) - lambda)/norm(x), 0 ) * 2) multiplies y = resid * x * * @param D number of dimensions * @param dim dimensions of x * @param lambda threshold parameter * @param flags bitmask for joint soft-thresholding * @param gpu true if using gpu, false if using cpu */ const struct operator_p_s* prox_thresh_create(unsigned int D, const long dim[D], const float lambda, const unsigned long flags, bool gpu) { PTR_ALLOC(struct thresh_s, data); SET_TYPEID(thresh_s, data); data->lambda = lambda; data->D = D; data->flags = flags; data->unitary_op = NULL; data->dim = *TYPE_ALLOC(long[D]); md_copy_dims(D, data->dim, dim); // norm dimensions are the flagged input dimensions long norm_dim[D]; md_select_dims(D, ~flags, norm_dim, data->dim); data->str = *TYPE_ALLOC(long[D]); md_calc_strides(D, data->str, data->dim, CFL_SIZE); #ifdef USE_CUDA data->tmp_norm = (gpu ? md_alloc_gpu : md_alloc)(D, norm_dim, CFL_SIZE); #else assert(!gpu); data->tmp_norm = md_alloc(D, norm_dim, CFL_SIZE); #endif return operator_p_create(D, dim, D, dim, CAST_UP(PTR_PASS(data)), softthresh_apply, thresh_del); } /** * Proximal operator for l1-norm with unitary transform: f(x) = lambda || T x ||_1 * * @param D number of dimensions * @param dim dimensions of x * @param lambda threshold parameter * @param unitary_op unitary linear operator * @param flags bitmask for joint soft-thresholding * @param gpu true if using gpu, false if using cpu */ extern const struct operator_p_s* prox_unithresh_create(unsigned int D, const struct linop_s* unitary_op, const float lambda, const unsigned long flags, bool gpu) { PTR_ALLOC(struct thresh_s, data); SET_TYPEID(thresh_s, data); data->lambda = lambda; data->D = D; data->flags = flags; data->unitary_op = unitary_op; const long* dims = linop_domain(unitary_op)->dims; data->dim = *TYPE_ALLOC(long[D]); md_copy_dims(D, data->dim, dims); data->str = *TYPE_ALLOC(long[D]); md_calc_strides(D, data->str, data->dim, CFL_SIZE); // norm dimensions are the flagged transform dimensions // FIXME should use linop_codomain(unitary_op)->N long norm_dim[D]; md_select_dims(D, ~flags, norm_dim, linop_codomain(unitary_op)->dims); #ifdef USE_CUDA data->tmp_norm = (gpu ? md_alloc_gpu : md_alloc)(D, norm_dim, CFL_SIZE); #else assert(!gpu); data->tmp_norm = md_alloc(D, norm_dim, CFL_SIZE); #endif return operator_p_create(D, dims, D, dims, CAST_UP(PTR_PASS(data)), unisoftthresh_apply, thresh_del); } /** * Thresholding operator for l0-norm: f(x) = || x ||_0 <= k, as used in NIHT algorithm. * y = HT(x, k) (hard thresholding, ie keeping the k largest elements). * * @param D number of dimensions * @param dim dimensions of x * @param k threshold parameter (non-zero elements to keep) * @param flags bitmask for joint thresholding * @param gpu true if using gpu, false if using cpu */ const struct operator_p_s* prox_niht_thresh_create(unsigned int D, const long dim[D], const unsigned int k, const unsigned long flags, bool gpu) { PTR_ALLOC(struct thresh_s, data); SET_TYPEID(thresh_s, data); data->lambda = 0.; data->k = k; data->D = D; data->flags = flags; data->unitary_op = NULL; data->dim = *TYPE_ALLOC(long[D]); md_copy_dims(D, data->dim, dim); // norm dimensions are the flagged input dimensions long norm_dim[D]; md_select_dims(D, ~flags, norm_dim, data->dim); data->str = *TYPE_ALLOC(long[D]); md_calc_strides(D, data->str, data->dim, CFL_SIZE); #ifdef USE_CUDA data->tmp_norm = (gpu ? md_alloc_gpu : md_alloc)(D, norm_dim, CFL_SIZE); #else assert(!gpu); data->tmp_norm = md_alloc(D, norm_dim, CFL_SIZE); #endif return operator_p_create(D, dim, D, dim, CAST_UP(PTR_PASS(data)), hardthresh_apply, thresh_del); } void thresh_free(const struct operator_p_s* o) { operator_p_free(o); } /** * Change the threshold parameter of the soft threshold function * * @param o soft threshold prox operator * @param lambda new threshold parameter */ void set_thresh_lambda(const struct operator_p_s* o, const float lambda) { struct thresh_s* data = CAST_DOWN(thresh_s, operator_p_get_data(o)); data->lambda = lambda; } /** * Returns the regularization parameter of the soft threshold function * * @param o soft threshold prox operator */ float get_thresh_lambda(const struct operator_p_s* o) { struct thresh_s* data = CAST_DOWN(thresh_s, operator_p_get_data(o)); return data->lambda; } bart-0.4.02/src/iter/thresh.h000066400000000000000000000020471320577655200157440ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __THRESH #define __THRESH #include "misc/cppwrap.h" struct operator_p_s; extern const struct operator_p_s* prox_thresh_create(unsigned int D, const long dim[__VLA(D)], const float lambda, const unsigned long flags, _Bool gpu); extern const struct operator_p_s* prox_niht_thresh_create(unsigned int D, const long dim[D], const unsigned int k, const unsigned long flags, _Bool gpu); extern void thresh_free(const struct operator_p_s* data); extern void set_thresh_lambda(const struct operator_p_s* o, const float lambda); extern float get_thresh_lambda(const struct operator_p_s* o); struct linop_s; extern const struct operator_p_s* prox_unithresh_create(unsigned int D, const struct linop_s* unitary_op, const float lambda, const unsigned long flags, _Bool gpu); #include "misc/cppwrap.h" #endif bart-0.4.02/src/iter/vec.c000066400000000000000000000004741320577655200152210ustar00rootroot00000000000000 #include "num/vecops.h" #include "num/gpuops.h" #include "misc/misc.h" #include "vec.h" // defined in vecops.c and gpuops.c const struct vec_iter_s* select_vecops(const float* x) { #ifdef USE_CUDA return cuda_ondevice(x) ? &gpu_iter_ops : &cpu_iter_ops; #else UNUSED(x); return &cpu_iter_ops; #endif } bart-0.4.02/src/iter/vec.h000066400000000000000000000020271320577655200152220ustar00rootroot00000000000000 #ifndef __ITER_VEC_H #define __ITER_VEC_H struct vec_iter_s { float* (*allocate)(long N); void (*del)(float* x); void (*clear)(long N, float* x); void (*copy)(long N, float* a, const float* x); void (*swap)(long N, float* a, float* x); double (*norm)(long N, const float* x); double (*dot)(long N, const float* x, const float* y); void (*sub)(long N, float* a, const float* x, const float* y); void (*add)(long N, float* a, const float* x, const float* y); void (*smul)(long N, float alpha, float* a, const float* x); void (*xpay)(long N, float alpha, float* a, const float* x); void (*axpy)(long N, float* a, float alpha, const float* x); void (*axpbz)(long N, float* out, const float a, const float* x, const float b, const float* z); void (*zmul)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); }; #ifdef USE_CUDA extern const struct vec_iter_s gpu_iter_ops; #endif extern const struct vec_iter_s cpu_iter_ops; extern const struct vec_iter_s* select_vecops(const float* x); #endif bart-0.4.02/src/itsense.c000066400000000000000000000117361320577655200151560ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * * * Basic iterative sense reconstruction */ #include #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "num/ops.h" #include "iter/iter.h" #include "misc/misc.h" #include "misc/types.h" #include "misc/mmio.h" #include "misc/mri.h" #include "misc/types.h" struct sense_data { INTERFACE(operator_data_t); long sens_dims[DIMS]; long sens_strs[DIMS]; long imgs_dims[DIMS]; long imgs_strs[DIMS]; long data_dims[DIMS]; long data_strs[DIMS]; long mask_dims[DIMS]; long mask_strs[DIMS]; const complex float* sens; const complex float* pattern; complex float* tmp; float alpha; }; static DEF_TYPEID(sense_data); static void sense_forward(const struct sense_data* data, complex float* out, const complex float* imgs) { md_clear(DIMS, data->data_dims, out, CFL_SIZE); md_zfmac2(DIMS, data->sens_dims, data->data_strs, out, data->sens_strs, data->sens, data->imgs_strs, imgs); fftc(DIMS, data->data_dims, FFT_FLAGS, out, out); fftscale(DIMS, data->data_dims, FFT_FLAGS, out, out); md_zmul2(DIMS, data->data_dims, data->data_strs, out, data->data_strs, out, data->mask_strs, data->pattern); } static void sense_adjoint(const struct sense_data* data, complex float* imgs, const complex float* out) { md_zmulc2(DIMS, data->data_dims, data->data_strs, data->tmp, data->data_strs, out, data->mask_strs, data->pattern); ifftc(DIMS, data->data_dims, FFT_FLAGS, data->tmp, data->tmp); fftscale(DIMS, data->data_dims, FFT_FLAGS, data->tmp, data->tmp); md_clear(DIMS, data->imgs_dims, imgs, CFL_SIZE); md_zfmacc2(DIMS, data->sens_dims, data->imgs_strs, imgs, data->data_strs, data->tmp, data->sens_strs, data->sens); } static void sense_normal(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct sense_data* data = CAST_DOWN(sense_data, _data); assert(2 == N); float* out = args[0]; const float* in = args[1]; sense_forward(data, data->tmp, (const complex float*)in); sense_adjoint(data, (complex float*)out, data->tmp); } static void sense_reco(struct sense_data* data, complex float* imgs, const complex float* kspace) { complex float* adj = md_alloc(DIMS, data->imgs_dims, CFL_SIZE); md_clear(DIMS, data->imgs_dims, imgs, CFL_SIZE); sense_adjoint(data, adj, kspace); long size = md_calc_size(DIMS, data->imgs_dims); const struct operator_s* op = operator_create(DIMS, data->imgs_dims, DIMS, data->imgs_dims, CAST_UP(data), sense_normal, NULL); struct iter_conjgrad_conf conf = iter_conjgrad_defaults; conf.maxiter = 100; conf.l2lambda = data->alpha; conf.tol = 1.E-3; iter_conjgrad(CAST_UP(&conf), op, NULL, size, (float*)imgs, (const float*)adj, NULL); operator_free(op); md_free(adj); } static bool check_dimensions(struct sense_data* data) { bool ok = true; for (int i = 0; i < 3; i++) { ok &= (data->mask_dims[i] == data->sens_dims[i]); ok &= (data->data_dims[i] == data->sens_dims[i]); ok &= (data->imgs_dims[i] == data->sens_dims[i]); } ok &= (data->data_dims[COIL_DIM] == data->sens_dims[COIL_DIM]); ok &= (data->imgs_dims[MAPS_DIM] == data->sens_dims[MAPS_DIM]); ok &= (1 == data->data_dims[MAPS_DIM]); ok &= (1 == data->mask_dims[COIL_DIM]); ok &= (1 == data->mask_dims[MAPS_DIM]); ok &= (1 == data->imgs_dims[COIL_DIM]); return ok; } static const char usage_str[] = "alpha "; static const char help_str[] = "A simplified implementation of iterative sense reconstruction\n" "with l2-regularization.\n"; int main_itsense(int argc, char* argv[]) { mini_cmdline(&argc, argv, 5, usage_str, help_str); struct sense_data data; SET_TYPEID(sense_data, &data); data.alpha = atof(argv[1]); complex float* kspace = load_cfl(argv[3], DIMS, data.data_dims); data.sens = load_cfl(argv[2], DIMS, data.sens_dims); data.pattern = load_cfl(argv[4], DIMS, data.mask_dims); // 1 2 4 8 md_select_dims(DIMS, ~COIL_FLAG, data.imgs_dims, data.sens_dims); assert(check_dimensions(&data)); complex float* image = create_cfl(argv[5], DIMS, data.imgs_dims); md_calc_strides(DIMS, data.sens_strs, data.sens_dims, CFL_SIZE); md_calc_strides(DIMS, data.imgs_strs, data.imgs_dims, CFL_SIZE); md_calc_strides(DIMS, data.data_strs, data.data_dims, CFL_SIZE); md_calc_strides(DIMS, data.mask_strs, data.mask_dims, CFL_SIZE); data.tmp = md_alloc(DIMS, data.data_dims, CFL_SIZE); num_init(); sense_reco(&data, image, kspace); unmap_cfl(DIMS, data.imgs_dims, image); unmap_cfl(DIMS, data.mask_dims, data.pattern); unmap_cfl(DIMS, data.sens_dims, data.sens); unmap_cfl(DIMS, data.data_dims, data.sens); md_free(data.tmp); exit(0); } bart-0.4.02/src/join.c000066400000000000000000000051231320577655200144340ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013, 2015 Martin Uecker * 2015 Jonathan Tamir */ #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/debug.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "dimension ... "; static const char help_str[] = "Join input files along {dimensions}. All other dimensions must have the same size.\n" "\t Example 1: join 0 slice_001 slice_002 slice_003 full_data\n" "\t Example 2: join 0 `seq -f \"slice_%%03g\" 0 255` full_data\n"; int main_join(int argc, char* argv[]) { bool append = false; const struct opt_s opts[] = { OPT_SET('a', &append, "append - only works for cfl files!"), }; cmdline(&argc, argv, 3, 1000, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); int N = DIMS; int dim = atoi(argv[1]); assert(dim < N); int count = argc - 3; if (append) { count += 1; // FIXME: check for cfl file } long in_dims[count][N]; long offsets[count]; complex float* idata[count]; long sum = 0; // figure out size of output for (int l = 0, i = 0; i < count; i++) { const char* name = NULL; if (append && (i == 0)) { name = argv[argc - 1]; } else { name = argv[2 + l++]; } debug_printf(DP_DEBUG1, "loading %s\n", name); idata[i] = load_cfl(name, N, in_dims[i]); offsets[i] = sum; sum += in_dims[i][dim]; for (int j = 0; j < N; j++) assert((dim == j) || (in_dims[0][j] == in_dims[i][j])); } long out_dims[N]; for (int i = 0; i < N; i++) out_dims[i] = in_dims[0][i]; out_dims[dim] = sum; complex float* out_data = create_cfl(argv[argc - 1], N, out_dims); long ostr[N]; md_calc_strides(N, ostr, out_dims, CFL_SIZE); #pragma omp parallel for for (int i = 0; i < count; i++) { if (!(append && (0 == i))) { long pos[N]; md_singleton_strides(N, pos); pos[dim] = offsets[i]; long istr[N]; md_calc_strides(N, istr, in_dims[i], CFL_SIZE); md_copy_block(N, pos, out_dims, out_data, in_dims[i], idata[i], CFL_SIZE); } unmap_cfl(N, in_dims[i], idata[i]); debug_printf(DP_DEBUG1, "done copying file %d\n", i); } unmap_cfl(N, out_dims, out_data); exit(0); } bart-0.4.02/src/lapacke/000077500000000000000000000000001320577655200147305ustar00rootroot00000000000000bart-0.4.02/src/lapacke/LICENSE000066400000000000000000000030431320577655200157350ustar00rootroot00000000000000 Copyright (c) 2012, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. bart-0.4.02/src/lapacke/README000066400000000000000000000004151320577655200156100ustar00rootroot00000000000000 This is a small subset of LAPACKE needed for BART extracted from the Debian package of lapack with version 3.5.0-4. It is provided here as a workaround for systems without LAPACKE. Add the flag NOLAPACKE=1 to Makefile.local if LAPACKE is not available on the system. bart-0.4.02/src/lapacke/lapacke.h000066400000000000000000040464111320577655200165130ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2010, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** * Contents: Native C interface to LAPACK * Author: Intel Corporation * Generated November, 2011 *****************************************************************************/ #ifndef _LAPACKE_H_ #define _LAPACKE_H_ /* * Turn on HAVE_LAPACK_CONFIG_H to redefine C-LAPACK datatypes */ #ifdef HAVE_LAPACK_CONFIG_H #include "lapacke_config.h" #endif #include #ifndef lapack_int #define lapack_int int #endif #ifndef lapack_logical #define lapack_logical lapack_int #endif /* Complex types are structures equivalent to the * Fortran complex types COMPLEX(4) and COMPLEX(8). * * One can also redefine the types with his own types * for example by including in the code definitions like * * #define lapack_complex_float std::complex * #define lapack_complex_double std::complex * * or define these types in the command line: * * -Dlapack_complex_float="std::complex" * -Dlapack_complex_double="std::complex" */ #ifndef LAPACK_COMPLEX_CUSTOM /* Complex type (single precision) */ #ifndef lapack_complex_float #include #define lapack_complex_float float _Complex #endif #ifndef lapack_complex_float_real #define lapack_complex_float_real(z) (creal(z)) #endif #ifndef lapack_complex_float_imag #define lapack_complex_float_imag(z) (cimag(z)) #endif lapack_complex_float lapack_make_complex_float( float re, float im ); /* Complex type (double precision) */ #ifndef lapack_complex_double #include #define lapack_complex_double double _Complex #endif #ifndef lapack_complex_double_real #define lapack_complex_double_real(z) (creal(z)) #endif #ifndef lapack_complex_double_imag #define lapack_complex_double_imag(z) (cimag(z)) #endif lapack_complex_double lapack_make_complex_double( double re, double im ); #endif #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ #ifndef LAPACKE_malloc #define LAPACKE_malloc( size ) malloc( size ) #endif #ifndef LAPACKE_free #define LAPACKE_free( p ) free( p ) #endif #define LAPACK_C2INT( x ) (lapack_int)(*((float*)&x )) #define LAPACK_Z2INT( x ) (lapack_int)(*((double*)&x )) #define LAPACK_ROW_MAJOR 101 #define LAPACK_COL_MAJOR 102 #define LAPACK_WORK_MEMORY_ERROR -1010 #define LAPACK_TRANSPOSE_MEMORY_ERROR -1011 /* Callback logical functions of one, two, or three arguments are used * to select eigenvalues to sort to the top left of the Schur form. * The value is selected if function returns TRUE (non-zero). */ typedef lapack_logical (*LAPACK_S_SELECT2) ( const float*, const float* ); typedef lapack_logical (*LAPACK_S_SELECT3) ( const float*, const float*, const float* ); typedef lapack_logical (*LAPACK_D_SELECT2) ( const double*, const double* ); typedef lapack_logical (*LAPACK_D_SELECT3) ( const double*, const double*, const double* ); typedef lapack_logical (*LAPACK_C_SELECT1) ( const lapack_complex_float* ); typedef lapack_logical (*LAPACK_C_SELECT2) ( const lapack_complex_float*, const lapack_complex_float* ); typedef lapack_logical (*LAPACK_Z_SELECT1) ( const lapack_complex_double* ); typedef lapack_logical (*LAPACK_Z_SELECT2) ( const lapack_complex_double*, const lapack_complex_double* ); #include "lapacke_mangling.h" #define LAPACK_lsame LAPACK_GLOBAL(lsame,LSAME) lapack_logical LAPACK_lsame( char* ca, char* cb, lapack_int lca, lapack_int lcb ); /* C-LAPACK function prototypes */ lapack_int LAPACKE_sbdsdc( int matrix_order, char uplo, char compq, lapack_int n, float* d, float* e, float* u, lapack_int ldu, float* vt, lapack_int ldvt, float* q, lapack_int* iq ); lapack_int LAPACKE_dbdsdc( int matrix_order, char uplo, char compq, lapack_int n, double* d, double* e, double* u, lapack_int ldu, double* vt, lapack_int ldvt, double* q, lapack_int* iq ); lapack_int LAPACKE_sbdsqr( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, float* d, float* e, float* vt, lapack_int ldvt, float* u, lapack_int ldu, float* c, lapack_int ldc ); lapack_int LAPACKE_dbdsqr( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, double* d, double* e, double* vt, lapack_int ldvt, double* u, lapack_int ldu, double* c, lapack_int ldc ); lapack_int LAPACKE_cbdsqr( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, float* d, float* e, lapack_complex_float* vt, lapack_int ldvt, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zbdsqr( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, double* d, double* e, lapack_complex_double* vt, lapack_int ldvt, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_sdisna( char job, lapack_int m, lapack_int n, const float* d, float* sep ); lapack_int LAPACKE_ddisna( char job, lapack_int m, lapack_int n, const double* d, double* sep ); lapack_int LAPACKE_sgbbrd( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, float* ab, lapack_int ldab, float* d, float* e, float* q, lapack_int ldq, float* pt, lapack_int ldpt, float* c, lapack_int ldc ); lapack_int LAPACKE_dgbbrd( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, double* ab, lapack_int ldab, double* d, double* e, double* q, lapack_int ldq, double* pt, lapack_int ldpt, double* c, lapack_int ldc ); lapack_int LAPACKE_cgbbrd( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, lapack_complex_float* ab, lapack_int ldab, float* d, float* e, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* pt, lapack_int ldpt, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zgbbrd( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, lapack_complex_double* ab, lapack_int ldab, double* d, double* e, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* pt, lapack_int ldpt, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_sgbcon( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const float* ab, lapack_int ldab, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_dgbcon( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const double* ab, lapack_int ldab, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_cgbcon( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float* ab, lapack_int ldab, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zgbcon( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double* ab, lapack_int ldab, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_sgbequ( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgbequ( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgbequ( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgbequ( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgbequb( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgbequb( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgbequb( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgbequb( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgbrfs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const float* ab, lapack_int ldab, const float* afb, lapack_int ldafb, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dgbrfs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const double* ab, lapack_int ldab, const double* afb, lapack_int ldafb, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cgbrfs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* afb, lapack_int ldafb, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zgbrfs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* afb, lapack_int ldafb, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_sgbrfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const float* ab, lapack_int ldab, const float* afb, lapack_int ldafb, const lapack_int* ipiv, const float* r, const float* c, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dgbrfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const double* ab, lapack_int ldab, const double* afb, lapack_int ldafb, const lapack_int* ipiv, const double* r, const double* c, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_cgbrfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* afb, lapack_int ldafb, const lapack_int* ipiv, const float* r, const float* c, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zgbrfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* afb, lapack_int ldafb, const lapack_int* ipiv, const double* r, const double* c, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_sgbsv( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, float* ab, lapack_int ldab, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgbsv( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, double* ab, lapack_int ldab, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgbsv( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgbsv( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgbsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, float* ab, lapack_int ldab, float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* rpivot ); lapack_int LAPACKE_dgbsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, double* ab, lapack_int ldab, double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* rpivot ); lapack_int LAPACKE_cgbsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* rpivot ); lapack_int LAPACKE_zgbsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* rpivot ); lapack_int LAPACKE_sgbsvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, float* ab, lapack_int ldab, float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dgbsvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, double* ab, lapack_int ldab, double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_cgbsvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zgbsvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_sgbtrf( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, float* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_dgbtrf( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, double* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_cgbtrf( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, lapack_complex_float* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_zgbtrf( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, lapack_complex_double* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_sgbtrs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const float* ab, lapack_int ldab, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgbtrs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const double* ab, lapack_int ldab, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgbtrs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgbtrs( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgebak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* scale, lapack_int m, float* v, lapack_int ldv ); lapack_int LAPACKE_dgebak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* scale, lapack_int m, double* v, lapack_int ldv ); lapack_int LAPACKE_cgebak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* scale, lapack_int m, lapack_complex_float* v, lapack_int ldv ); lapack_int LAPACKE_zgebak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* scale, lapack_int m, lapack_complex_double* v, lapack_int ldv ); lapack_int LAPACKE_sgebal( int matrix_order, char job, lapack_int n, float* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, float* scale ); lapack_int LAPACKE_dgebal( int matrix_order, char job, lapack_int n, double* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, double* scale ); lapack_int LAPACKE_cgebal( int matrix_order, char job, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, float* scale ); lapack_int LAPACKE_zgebal( int matrix_order, char job, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, double* scale ); lapack_int LAPACKE_sgebrd( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* d, float* e, float* tauq, float* taup ); lapack_int LAPACKE_dgebrd( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* d, double* e, double* tauq, double* taup ); lapack_int LAPACKE_cgebrd( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* d, float* e, lapack_complex_float* tauq, lapack_complex_float* taup ); lapack_int LAPACKE_zgebrd( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* d, double* e, lapack_complex_double* tauq, lapack_complex_double* taup ); lapack_int LAPACKE_sgecon( int matrix_order, char norm, lapack_int n, const float* a, lapack_int lda, float anorm, float* rcond ); lapack_int LAPACKE_dgecon( int matrix_order, char norm, lapack_int n, const double* a, lapack_int lda, double anorm, double* rcond ); lapack_int LAPACKE_cgecon( int matrix_order, char norm, lapack_int n, const lapack_complex_float* a, lapack_int lda, float anorm, float* rcond ); lapack_int LAPACKE_zgecon( int matrix_order, char norm, lapack_int n, const lapack_complex_double* a, lapack_int lda, double anorm, double* rcond ); lapack_int LAPACKE_sgeequ( int matrix_order, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgeequ( int matrix_order, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgeequ( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgeequ( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgeequb( int matrix_order, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgeequb( int matrix_order, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgeequb( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgeequb( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgees( int matrix_order, char jobvs, char sort, LAPACK_S_SELECT2 select, lapack_int n, float* a, lapack_int lda, lapack_int* sdim, float* wr, float* wi, float* vs, lapack_int ldvs ); lapack_int LAPACKE_dgees( int matrix_order, char jobvs, char sort, LAPACK_D_SELECT2 select, lapack_int n, double* a, lapack_int lda, lapack_int* sdim, double* wr, double* wi, double* vs, lapack_int ldvs ); lapack_int LAPACKE_cgees( int matrix_order, char jobvs, char sort, LAPACK_C_SELECT1 select, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* sdim, lapack_complex_float* w, lapack_complex_float* vs, lapack_int ldvs ); lapack_int LAPACKE_zgees( int matrix_order, char jobvs, char sort, LAPACK_Z_SELECT1 select, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* sdim, lapack_complex_double* w, lapack_complex_double* vs, lapack_int ldvs ); lapack_int LAPACKE_sgeesx( int matrix_order, char jobvs, char sort, LAPACK_S_SELECT2 select, char sense, lapack_int n, float* a, lapack_int lda, lapack_int* sdim, float* wr, float* wi, float* vs, lapack_int ldvs, float* rconde, float* rcondv ); lapack_int LAPACKE_dgeesx( int matrix_order, char jobvs, char sort, LAPACK_D_SELECT2 select, char sense, lapack_int n, double* a, lapack_int lda, lapack_int* sdim, double* wr, double* wi, double* vs, lapack_int ldvs, double* rconde, double* rcondv ); lapack_int LAPACKE_cgeesx( int matrix_order, char jobvs, char sort, LAPACK_C_SELECT1 select, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* sdim, lapack_complex_float* w, lapack_complex_float* vs, lapack_int ldvs, float* rconde, float* rcondv ); lapack_int LAPACKE_zgeesx( int matrix_order, char jobvs, char sort, LAPACK_Z_SELECT1 select, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* sdim, lapack_complex_double* w, lapack_complex_double* vs, lapack_int ldvs, double* rconde, double* rcondv ); lapack_int LAPACKE_sgeev( int matrix_order, char jobvl, char jobvr, lapack_int n, float* a, lapack_int lda, float* wr, float* wi, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr ); lapack_int LAPACKE_dgeev( int matrix_order, char jobvl, char jobvr, lapack_int n, double* a, lapack_int lda, double* wr, double* wi, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr ); lapack_int LAPACKE_cgeev( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* w, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr ); lapack_int LAPACKE_zgeev( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* w, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr ); lapack_int LAPACKE_sgeevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, float* a, lapack_int lda, float* wr, float* wi, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* scale, float* abnrm, float* rconde, float* rcondv ); lapack_int LAPACKE_dgeevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, double* a, lapack_int lda, double* wr, double* wi, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* scale, double* abnrm, double* rconde, double* rcondv ); lapack_int LAPACKE_cgeevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* w, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* scale, float* abnrm, float* rconde, float* rcondv ); lapack_int LAPACKE_zgeevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* w, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* scale, double* abnrm, double* rconde, double* rcondv ); lapack_int LAPACKE_sgehrd( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgehrd( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgehrd( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgehrd( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgejsv( int matrix_order, char joba, char jobu, char jobv, char jobr, char jobt, char jobp, lapack_int m, lapack_int n, float* a, lapack_int lda, float* sva, float* u, lapack_int ldu, float* v, lapack_int ldv, float* stat, lapack_int* istat ); lapack_int LAPACKE_dgejsv( int matrix_order, char joba, char jobu, char jobv, char jobr, char jobt, char jobp, lapack_int m, lapack_int n, double* a, lapack_int lda, double* sva, double* u, lapack_int ldu, double* v, lapack_int ldv, double* stat, lapack_int* istat ); lapack_int LAPACKE_sgelq2( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgelq2( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgelq2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgelq2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgelqf( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgelqf( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgelqf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgelqf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_cgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgels( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgelsd( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank ); lapack_int LAPACKE_dgelsd( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank ); lapack_int LAPACKE_cgelsd( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank ); lapack_int LAPACKE_zgelsd( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank ); lapack_int LAPACKE_sgelss( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank ); lapack_int LAPACKE_dgelss( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank ); lapack_int LAPACKE_cgelss( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank ); lapack_int LAPACKE_zgelss( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank ); lapack_int LAPACKE_sgelsy( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* jpvt, float rcond, lapack_int* rank ); lapack_int LAPACKE_dgelsy( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* jpvt, double rcond, lapack_int* rank ); lapack_int LAPACKE_cgelsy( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* jpvt, float rcond, lapack_int* rank ); lapack_int LAPACKE_zgelsy( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* jpvt, double rcond, lapack_int* rank ); lapack_int LAPACKE_sgeqlf( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgeqlf( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgeqlf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgeqlf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgeqp3( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* jpvt, float* tau ); lapack_int LAPACKE_dgeqp3( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* jpvt, double* tau ); lapack_int LAPACKE_cgeqp3( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* jpvt, lapack_complex_float* tau ); lapack_int LAPACKE_zgeqp3( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* jpvt, lapack_complex_double* tau ); lapack_int LAPACKE_sgeqpf( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* jpvt, float* tau ); lapack_int LAPACKE_dgeqpf( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* jpvt, double* tau ); lapack_int LAPACKE_cgeqpf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* jpvt, lapack_complex_float* tau ); lapack_int LAPACKE_zgeqpf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* jpvt, lapack_complex_double* tau ); lapack_int LAPACKE_sgeqr2( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgeqr2( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgeqr2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgeqr2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgeqrf( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgeqrf( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgeqrf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgeqrf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgeqrfp( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgeqrfp( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgeqrfp( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgeqrfp( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgerfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dgerfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cgerfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zgerfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_sgerfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* r, const float* c, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dgerfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* r, const double* c, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_cgerfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const float* r, const float* c, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zgerfsx( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const double* r, const double* c, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_sgerqf( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dgerqf( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_cgerqf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_zgerqf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_sgesdd( int matrix_order, char jobz, lapack_int m, lapack_int n, float* a, lapack_int lda, float* s, float* u, lapack_int ldu, float* vt, lapack_int ldvt ); lapack_int LAPACKE_dgesdd( int matrix_order, char jobz, lapack_int m, lapack_int n, double* a, lapack_int lda, double* s, double* u, lapack_int ldu, double* vt, lapack_int ldvt ); lapack_int LAPACKE_cgesdd( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* vt, lapack_int ldvt ); lapack_int LAPACKE_zgesdd( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* vt, lapack_int ldvt ); lapack_int LAPACKE_sgesv( int matrix_order, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgesv( int matrix_order, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgesv( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgesv( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_dsgesv( int matrix_order, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb, double* x, lapack_int ldx, lapack_int* iter ); lapack_int LAPACKE_zcgesv( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, lapack_int* iter ); lapack_int LAPACKE_sgesvd( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, float* a, lapack_int lda, float* s, float* u, lapack_int ldu, float* vt, lapack_int ldvt, float* superb ); lapack_int LAPACKE_dgesvd( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, double* a, lapack_int lda, double* s, double* u, lapack_int ldu, double* vt, lapack_int ldvt, double* superb ); lapack_int LAPACKE_cgesvd( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* vt, lapack_int ldvt, float* superb ); lapack_int LAPACKE_zgesvd( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* vt, lapack_int ldvt, double* superb ); lapack_int LAPACKE_sgesvj( int matrix_order, char joba, char jobu, char jobv, lapack_int m, lapack_int n, float* a, lapack_int lda, float* sva, lapack_int mv, float* v, lapack_int ldv, float* stat ); lapack_int LAPACKE_dgesvj( int matrix_order, char joba, char jobu, char jobv, lapack_int m, lapack_int n, double* a, lapack_int lda, double* sva, lapack_int mv, double* v, lapack_int ldv, double* stat ); lapack_int LAPACKE_sgesvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* rpivot ); lapack_int LAPACKE_dgesvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* rpivot ); lapack_int LAPACKE_cgesvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* rpivot ); lapack_int LAPACKE_zgesvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* rpivot ); lapack_int LAPACKE_sgesvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dgesvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_cgesvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zgesvxx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_sgetf2( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_dgetf2( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_cgetf2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_zgetf2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_sgetrf( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_dgetrf( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_cgetrf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_zgetrf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_sgetri( int matrix_order, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_dgetri( int matrix_order, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_cgetri( int matrix_order, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_zgetri( int matrix_order, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_sgetrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgetrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgetrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgetrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sggbak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* lscale, const float* rscale, lapack_int m, float* v, lapack_int ldv ); lapack_int LAPACKE_dggbak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* lscale, const double* rscale, lapack_int m, double* v, lapack_int ldv ); lapack_int LAPACKE_cggbak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* lscale, const float* rscale, lapack_int m, lapack_complex_float* v, lapack_int ldv ); lapack_int LAPACKE_zggbak( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* lscale, const double* rscale, lapack_int m, lapack_complex_double* v, lapack_int ldv ); lapack_int LAPACKE_sggbal( int matrix_order, char job, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale ); lapack_int LAPACKE_dggbal( int matrix_order, char job, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale ); lapack_int LAPACKE_cggbal( int matrix_order, char job, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale ); lapack_int LAPACKE_zggbal( int matrix_order, char job, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale ); lapack_int LAPACKE_sgges( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_S_SELECT3 selctg, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* sdim, float* alphar, float* alphai, float* beta, float* vsl, lapack_int ldvsl, float* vsr, lapack_int ldvsr ); lapack_int LAPACKE_dgges( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_D_SELECT3 selctg, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* sdim, double* alphar, double* alphai, double* beta, double* vsl, lapack_int ldvsl, double* vsr, lapack_int ldvsr ); lapack_int LAPACKE_cgges( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_C_SELECT2 selctg, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* sdim, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vsl, lapack_int ldvsl, lapack_complex_float* vsr, lapack_int ldvsr ); lapack_int LAPACKE_zgges( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_Z_SELECT2 selctg, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* sdim, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vsl, lapack_int ldvsl, lapack_complex_double* vsr, lapack_int ldvsr ); lapack_int LAPACKE_sggesx( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_S_SELECT3 selctg, char sense, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* sdim, float* alphar, float* alphai, float* beta, float* vsl, lapack_int ldvsl, float* vsr, lapack_int ldvsr, float* rconde, float* rcondv ); lapack_int LAPACKE_dggesx( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_D_SELECT3 selctg, char sense, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* sdim, double* alphar, double* alphai, double* beta, double* vsl, lapack_int ldvsl, double* vsr, lapack_int ldvsr, double* rconde, double* rcondv ); lapack_int LAPACKE_cggesx( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_C_SELECT2 selctg, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* sdim, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vsl, lapack_int ldvsl, lapack_complex_float* vsr, lapack_int ldvsr, float* rconde, float* rcondv ); lapack_int LAPACKE_zggesx( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_Z_SELECT2 selctg, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* sdim, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vsl, lapack_int ldvsl, lapack_complex_double* vsr, lapack_int ldvsr, double* rconde, double* rcondv ); lapack_int LAPACKE_sggev( int matrix_order, char jobvl, char jobvr, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* alphar, float* alphai, float* beta, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr ); lapack_int LAPACKE_dggev( int matrix_order, char jobvl, char jobvr, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* alphar, double* alphai, double* beta, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr ); lapack_int LAPACKE_cggev( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr ); lapack_int LAPACKE_zggev( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr ); lapack_int LAPACKE_sggevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* alphar, float* alphai, float* beta, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* abnrm, float* bbnrm, float* rconde, float* rcondv ); lapack_int LAPACKE_dggevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* alphar, double* alphai, double* beta, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* abnrm, double* bbnrm, double* rconde, double* rcondv ); lapack_int LAPACKE_cggevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* abnrm, float* bbnrm, float* rconde, float* rcondv ); lapack_int LAPACKE_zggevx( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* abnrm, double* bbnrm, double* rconde, double* rcondv ); lapack_int LAPACKE_sggglm( int matrix_order, lapack_int n, lapack_int m, lapack_int p, float* a, lapack_int lda, float* b, lapack_int ldb, float* d, float* x, float* y ); lapack_int LAPACKE_dggglm( int matrix_order, lapack_int n, lapack_int m, lapack_int p, double* a, lapack_int lda, double* b, lapack_int ldb, double* d, double* x, double* y ); lapack_int LAPACKE_cggglm( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* d, lapack_complex_float* x, lapack_complex_float* y ); lapack_int LAPACKE_zggglm( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* d, lapack_complex_double* x, lapack_complex_double* y ); lapack_int LAPACKE_sgghrd( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, float* a, lapack_int lda, float* b, lapack_int ldb, float* q, lapack_int ldq, float* z, lapack_int ldz ); lapack_int LAPACKE_dgghrd( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, double* a, lapack_int lda, double* b, lapack_int ldb, double* q, lapack_int ldq, double* z, lapack_int ldz ); lapack_int LAPACKE_cgghrd( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zgghrd( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_sgglse( int matrix_order, lapack_int m, lapack_int n, lapack_int p, float* a, lapack_int lda, float* b, lapack_int ldb, float* c, float* d, float* x ); lapack_int LAPACKE_dgglse( int matrix_order, lapack_int m, lapack_int n, lapack_int p, double* a, lapack_int lda, double* b, lapack_int ldb, double* c, double* d, double* x ); lapack_int LAPACKE_cgglse( int matrix_order, lapack_int m, lapack_int n, lapack_int p, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* c, lapack_complex_float* d, lapack_complex_float* x ); lapack_int LAPACKE_zgglse( int matrix_order, lapack_int m, lapack_int n, lapack_int p, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* c, lapack_complex_double* d, lapack_complex_double* x ); lapack_int LAPACKE_sggqrf( int matrix_order, lapack_int n, lapack_int m, lapack_int p, float* a, lapack_int lda, float* taua, float* b, lapack_int ldb, float* taub ); lapack_int LAPACKE_dggqrf( int matrix_order, lapack_int n, lapack_int m, lapack_int p, double* a, lapack_int lda, double* taua, double* b, lapack_int ldb, double* taub ); lapack_int LAPACKE_cggqrf( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_float* a, lapack_int lda, lapack_complex_float* taua, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* taub ); lapack_int LAPACKE_zggqrf( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_double* a, lapack_int lda, lapack_complex_double* taua, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* taub ); lapack_int LAPACKE_sggrqf( int matrix_order, lapack_int m, lapack_int p, lapack_int n, float* a, lapack_int lda, float* taua, float* b, lapack_int ldb, float* taub ); lapack_int LAPACKE_dggrqf( int matrix_order, lapack_int m, lapack_int p, lapack_int n, double* a, lapack_int lda, double* taua, double* b, lapack_int ldb, double* taub ); lapack_int LAPACKE_cggrqf( int matrix_order, lapack_int m, lapack_int p, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* taua, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* taub ); lapack_int LAPACKE_zggrqf( int matrix_order, lapack_int m, lapack_int p, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* taua, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* taub ); lapack_int LAPACKE_sggsvd( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, float* a, lapack_int lda, float* b, lapack_int ldb, float* alpha, float* beta, float* u, lapack_int ldu, float* v, lapack_int ldv, float* q, lapack_int ldq, lapack_int* iwork ); lapack_int LAPACKE_dggsvd( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, double* a, lapack_int lda, double* b, lapack_int ldb, double* alpha, double* beta, double* u, lapack_int ldu, double* v, lapack_int ldv, double* q, lapack_int ldq, lapack_int* iwork ); lapack_int LAPACKE_cggsvd( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* alpha, float* beta, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* v, lapack_int ldv, lapack_complex_float* q, lapack_int ldq, lapack_int* iwork ); lapack_int LAPACKE_zggsvd( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* alpha, double* beta, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* v, lapack_int ldv, lapack_complex_double* q, lapack_int ldq, lapack_int* iwork ); lapack_int LAPACKE_sggsvp( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float tola, float tolb, lapack_int* k, lapack_int* l, float* u, lapack_int ldu, float* v, lapack_int ldv, float* q, lapack_int ldq ); lapack_int LAPACKE_dggsvp( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double tola, double tolb, lapack_int* k, lapack_int* l, double* u, lapack_int ldu, double* v, lapack_int ldv, double* q, lapack_int ldq ); lapack_int LAPACKE_cggsvp( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float tola, float tolb, lapack_int* k, lapack_int* l, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* v, lapack_int ldv, lapack_complex_float* q, lapack_int ldq ); lapack_int LAPACKE_zggsvp( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double tola, double tolb, lapack_int* k, lapack_int* l, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* v, lapack_int ldv, lapack_complex_double* q, lapack_int ldq ); lapack_int LAPACKE_sgtcon( char norm, lapack_int n, const float* dl, const float* d, const float* du, const float* du2, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_dgtcon( char norm, lapack_int n, const double* dl, const double* d, const double* du, const double* du2, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_cgtcon( char norm, lapack_int n, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* du2, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zgtcon( char norm, lapack_int n, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* du2, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_sgtrfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* dl, const float* d, const float* du, const float* dlf, const float* df, const float* duf, const float* du2, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dgtrfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* dl, const double* d, const double* du, const double* dlf, const double* df, const double* duf, const double* du2, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cgtrfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* dlf, const lapack_complex_float* df, const lapack_complex_float* duf, const lapack_complex_float* du2, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zgtrfs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* dlf, const lapack_complex_double* df, const lapack_complex_double* duf, const lapack_complex_double* du2, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_sgtsv( int matrix_order, lapack_int n, lapack_int nrhs, float* dl, float* d, float* du, float* b, lapack_int ldb ); lapack_int LAPACKE_dgtsv( int matrix_order, lapack_int n, lapack_int nrhs, double* dl, double* d, double* du, double* b, lapack_int ldb ); lapack_int LAPACKE_cgtsv( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_float* dl, lapack_complex_float* d, lapack_complex_float* du, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgtsv( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_double* dl, lapack_complex_double* d, lapack_complex_double* du, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgtsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const float* dl, const float* d, const float* du, float* dlf, float* df, float* duf, float* du2, lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_dgtsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const double* dl, const double* d, const double* du, double* dlf, double* df, double* duf, double* du2, lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_cgtsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, lapack_complex_float* dlf, lapack_complex_float* df, lapack_complex_float* duf, lapack_complex_float* du2, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zgtsvx( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, lapack_complex_double* dlf, lapack_complex_double* df, lapack_complex_double* duf, lapack_complex_double* du2, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_sgttrf( lapack_int n, float* dl, float* d, float* du, float* du2, lapack_int* ipiv ); lapack_int LAPACKE_dgttrf( lapack_int n, double* dl, double* d, double* du, double* du2, lapack_int* ipiv ); lapack_int LAPACKE_cgttrf( lapack_int n, lapack_complex_float* dl, lapack_complex_float* d, lapack_complex_float* du, lapack_complex_float* du2, lapack_int* ipiv ); lapack_int LAPACKE_zgttrf( lapack_int n, lapack_complex_double* dl, lapack_complex_double* d, lapack_complex_double* du, lapack_complex_double* du2, lapack_int* ipiv ); lapack_int LAPACKE_sgttrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* dl, const float* d, const float* du, const float* du2, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgttrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* dl, const double* d, const double* du, const double* du2, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgttrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* du2, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgttrs( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* du2, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chbev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhbev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chbevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhbevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chbevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_zhbevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_chbgst( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* bb, lapack_int ldbb, lapack_complex_float* x, lapack_int ldx ); lapack_int LAPACKE_zhbgst( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* bb, lapack_int ldbb, lapack_complex_double* x, lapack_int ldx ); lapack_int LAPACKE_chbgv( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* bb, lapack_int ldbb, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhbgv( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* bb, lapack_int ldbb, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chbgvd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* bb, lapack_int ldbb, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhbgvd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* bb, lapack_int ldbb, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chbgvx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* bb, lapack_int ldbb, lapack_complex_float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_zhbgvx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* bb, lapack_int ldbb, lapack_complex_double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_chbtrd( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, float* d, float* e, lapack_complex_float* q, lapack_int ldq ); lapack_int LAPACKE_zhbtrd( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, double* d, double* e, lapack_complex_double* q, lapack_int ldq ); lapack_int LAPACKE_checon( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zhecon( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_cheequb( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_zheequb( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_cheev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float* w ); lapack_int LAPACKE_zheev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double* w ); lapack_int LAPACKE_cheevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float* w ); lapack_int LAPACKE_zheevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double* w ); lapack_int LAPACKE_cheevr( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_zheevr( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_cheevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_zheevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_chegst( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhegst( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chegv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* w ); lapack_int LAPACKE_zhegv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* w ); lapack_int LAPACKE_chegvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* w ); lapack_int LAPACKE_zhegvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* w ); lapack_int LAPACKE_chegvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_zhegvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_cherfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zherfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cherfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const float* s, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zherfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const double* s, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_chesv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhesv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chesvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zhesvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_chesvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zhesvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_chetrd( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float* d, float* e, lapack_complex_float* tau ); lapack_int LAPACKE_zhetrd( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double* d, double* e, lapack_complex_double* tau ); lapack_int LAPACKE_chetrf( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_zhetrf( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_chetri( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_zhetri( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_chetrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhetrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chfrk( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, float alpha, const lapack_complex_float* a, lapack_int lda, float beta, lapack_complex_float* c ); lapack_int LAPACKE_zhfrk( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, double alpha, const lapack_complex_double* a, lapack_int lda, double beta, lapack_complex_double* c ); lapack_int LAPACKE_shgeqz( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, float* h, lapack_int ldh, float* t, lapack_int ldt, float* alphar, float* alphai, float* beta, float* q, lapack_int ldq, float* z, lapack_int ldz ); lapack_int LAPACKE_dhgeqz( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, double* h, lapack_int ldh, double* t, lapack_int ldt, double* alphar, double* alphai, double* beta, double* q, lapack_int ldq, double* z, lapack_int ldz ); lapack_int LAPACKE_chgeqz( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* h, lapack_int ldh, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhgeqz( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* h, lapack_int ldh, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chpcon( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zhpcon( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_chpev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhpev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chpevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhpevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chpevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* ap, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_zhpevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* ap, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_chpgst( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_float* ap, const lapack_complex_float* bp ); lapack_int LAPACKE_zhpgst( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_double* ap, const lapack_complex_double* bp ); lapack_int LAPACKE_chpgv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, lapack_complex_float* bp, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhpgv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, lapack_complex_double* bp, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chpgvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, lapack_complex_float* bp, float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhpgvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, lapack_complex_double* bp, double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_chpgvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* ap, lapack_complex_float* bp, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_zhpgvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* ap, lapack_complex_double* bp, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_chprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zhprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_chpsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhpsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chpsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* afp, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zhpsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* afp, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_chptrd( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, float* d, float* e, lapack_complex_float* tau ); lapack_int LAPACKE_zhptrd( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, double* d, double* e, lapack_complex_double* tau ); lapack_int LAPACKE_chptrf( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, lapack_int* ipiv ); lapack_int LAPACKE_zhptrf( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, lapack_int* ipiv ); lapack_int LAPACKE_chptri( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, const lapack_int* ipiv ); lapack_int LAPACKE_zhptri( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, const lapack_int* ipiv ); lapack_int LAPACKE_chptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_shsein( int matrix_order, char job, char eigsrc, char initv, lapack_logical* select, lapack_int n, const float* h, lapack_int ldh, float* wr, const float* wi, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_dhsein( int matrix_order, char job, char eigsrc, char initv, lapack_logical* select, lapack_int n, const double* h, lapack_int ldh, double* wr, const double* wi, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_chsein( int matrix_order, char job, char eigsrc, char initv, const lapack_logical* select, lapack_int n, const lapack_complex_float* h, lapack_int ldh, lapack_complex_float* w, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_zhsein( int matrix_order, char job, char eigsrc, char initv, const lapack_logical* select, lapack_int n, const lapack_complex_double* h, lapack_int ldh, lapack_complex_double* w, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_shseqr( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, float* h, lapack_int ldh, float* wr, float* wi, float* z, lapack_int ldz ); lapack_int LAPACKE_dhseqr( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, double* h, lapack_int ldh, double* wr, double* wi, double* z, lapack_int ldz ); lapack_int LAPACKE_chseqr( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* h, lapack_int ldh, lapack_complex_float* w, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zhseqr( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* h, lapack_int ldh, lapack_complex_double* w, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_clacgv( lapack_int n, lapack_complex_float* x, lapack_int incx ); lapack_int LAPACKE_zlacgv( lapack_int n, lapack_complex_double* x, lapack_int incx ); lapack_int LAPACKE_slacn2( lapack_int n, float* v, float* x, lapack_int* isgn, float* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_dlacn2( lapack_int n, double* v, double* x, lapack_int* isgn, double* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_clacn2( lapack_int n, lapack_complex_float* v, lapack_complex_float* x, float* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_zlacn2( lapack_int n, lapack_complex_double* v, lapack_complex_double* x, double* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_slacpy( int matrix_order, char uplo, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dlacpy( int matrix_order, char uplo, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_clacpy( int matrix_order, char uplo, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zlacpy( int matrix_order, char uplo, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_clacp2( int matrix_order, char uplo, lapack_int m, lapack_int n, const float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zlacp2( int matrix_order, char uplo, lapack_int m, lapack_int n, const double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_zlag2c( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_float* sa, lapack_int ldsa ); lapack_int LAPACKE_slag2d( int matrix_order, lapack_int m, lapack_int n, const float* sa, lapack_int ldsa, double* a, lapack_int lda ); lapack_int LAPACKE_dlag2s( int matrix_order, lapack_int m, lapack_int n, const double* a, lapack_int lda, float* sa, lapack_int ldsa ); lapack_int LAPACKE_clag2z( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* sa, lapack_int ldsa, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_slagge( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* d, float* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_dlagge( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* d, double* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_clagge( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* d, lapack_complex_float* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_zlagge( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* d, lapack_complex_double* a, lapack_int lda, lapack_int* iseed ); float LAPACKE_slamch( char cmach ); double LAPACKE_dlamch( char cmach ); float LAPACKE_slange( int matrix_order, char norm, lapack_int m, lapack_int n, const float* a, lapack_int lda ); double LAPACKE_dlange( int matrix_order, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda ); float LAPACKE_clange( int matrix_order, char norm, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda ); double LAPACKE_zlange( int matrix_order, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda ); float LAPACKE_clanhe( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda ); double LAPACKE_zlanhe( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda ); float LAPACKE_slansy( int matrix_order, char norm, char uplo, lapack_int n, const float* a, lapack_int lda ); double LAPACKE_dlansy( int matrix_order, char norm, char uplo, lapack_int n, const double* a, lapack_int lda ); float LAPACKE_clansy( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda ); double LAPACKE_zlansy( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda ); float LAPACKE_slantr( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const float* a, lapack_int lda ); double LAPACKE_dlantr( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const double* a, lapack_int lda ); float LAPACKE_clantr( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda ); double LAPACKE_zlantr( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_slarfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* c, lapack_int ldc ); lapack_int LAPACKE_dlarfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* c, lapack_int ldc ); lapack_int LAPACKE_clarfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zlarfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_slarfg( lapack_int n, float* alpha, float* x, lapack_int incx, float* tau ); lapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x, lapack_int incx, double* tau ); lapack_int LAPACKE_clarfg( lapack_int n, lapack_complex_float* alpha, lapack_complex_float* x, lapack_int incx, lapack_complex_float* tau ); lapack_int LAPACKE_zlarfg( lapack_int n, lapack_complex_double* alpha, lapack_complex_double* x, lapack_int incx, lapack_complex_double* tau ); lapack_int LAPACKE_slarft( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const float* v, lapack_int ldv, const float* tau, float* t, lapack_int ldt ); lapack_int LAPACKE_dlarft( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const double* v, lapack_int ldv, const double* tau, double* t, lapack_int ldt ); lapack_int LAPACKE_clarft( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* tau, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_zlarft( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* tau, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_slarfx( int matrix_order, char side, lapack_int m, lapack_int n, const float* v, float tau, float* c, lapack_int ldc, float* work ); lapack_int LAPACKE_dlarfx( int matrix_order, char side, lapack_int m, lapack_int n, const double* v, double tau, double* c, lapack_int ldc, double* work ); lapack_int LAPACKE_clarfx( int matrix_order, char side, lapack_int m, lapack_int n, const lapack_complex_float* v, lapack_complex_float tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work ); lapack_int LAPACKE_zlarfx( int matrix_order, char side, lapack_int m, lapack_int n, const lapack_complex_double* v, lapack_complex_double tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work ); lapack_int LAPACKE_slarnv( lapack_int idist, lapack_int* iseed, lapack_int n, float* x ); lapack_int LAPACKE_dlarnv( lapack_int idist, lapack_int* iseed, lapack_int n, double* x ); lapack_int LAPACKE_clarnv( lapack_int idist, lapack_int* iseed, lapack_int n, lapack_complex_float* x ); lapack_int LAPACKE_zlarnv( lapack_int idist, lapack_int* iseed, lapack_int n, lapack_complex_double* x ); lapack_int LAPACKE_slaset( int matrix_order, char uplo, lapack_int m, lapack_int n, float alpha, float beta, float* a, lapack_int lda ); lapack_int LAPACKE_dlaset( int matrix_order, char uplo, lapack_int m, lapack_int n, double alpha, double beta, double* a, lapack_int lda ); lapack_int LAPACKE_claset( int matrix_order, char uplo, lapack_int m, lapack_int n, lapack_complex_float alpha, lapack_complex_float beta, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zlaset( int matrix_order, char uplo, lapack_int m, lapack_int n, lapack_complex_double alpha, lapack_complex_double beta, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_slasrt( char id, lapack_int n, float* d ); lapack_int LAPACKE_dlasrt( char id, lapack_int n, double* d ); lapack_int LAPACKE_slaswp( int matrix_order, lapack_int n, float* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_dlaswp( int matrix_order, lapack_int n, double* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_claswp( int matrix_order, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_zlaswp( int matrix_order, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_slatms( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, float* d, lapack_int mode, float cond, float dmax, lapack_int kl, lapack_int ku, char pack, float* a, lapack_int lda ); lapack_int LAPACKE_dlatms( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, double* d, lapack_int mode, double cond, double dmax, lapack_int kl, lapack_int ku, char pack, double* a, lapack_int lda ); lapack_int LAPACKE_clatms( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, float* d, lapack_int mode, float cond, float dmax, lapack_int kl, lapack_int ku, char pack, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zlatms( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, double* d, lapack_int mode, double cond, double dmax, lapack_int kl, lapack_int ku, char pack, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_slauum( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dlauum( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_clauum( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zlauum( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_sopgtr( int matrix_order, char uplo, lapack_int n, const float* ap, const float* tau, float* q, lapack_int ldq ); lapack_int LAPACKE_dopgtr( int matrix_order, char uplo, lapack_int n, const double* ap, const double* tau, double* q, lapack_int ldq ); lapack_int LAPACKE_sopmtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const float* ap, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dopmtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const double* ap, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sorgbr( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau ); lapack_int LAPACKE_dorgbr( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau ); lapack_int LAPACKE_sorghr( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, float* a, lapack_int lda, const float* tau ); lapack_int LAPACKE_dorghr( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, double* a, lapack_int lda, const double* tau ); lapack_int LAPACKE_sorglq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau ); lapack_int LAPACKE_dorglq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau ); lapack_int LAPACKE_sorgql( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau ); lapack_int LAPACKE_dorgql( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau ); lapack_int LAPACKE_sorgqr( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau ); lapack_int LAPACKE_dorgqr( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau ); lapack_int LAPACKE_sorgrq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau ); lapack_int LAPACKE_dorgrq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau ); lapack_int LAPACKE_sorgtr( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const float* tau ); lapack_int LAPACKE_dorgtr( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const double* tau ); lapack_int LAPACKE_sormbr( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormbr( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sormhr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormhr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sormlq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormlq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sormql( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormql( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sormqr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormqr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sormrq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormrq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sormrz( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormrz( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_sormtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc ); lapack_int LAPACKE_dormtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc ); lapack_int LAPACKE_spbcon( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab, float anorm, float* rcond ); lapack_int LAPACKE_dpbcon( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab, double anorm, double* rcond ); lapack_int LAPACKE_cpbcon( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab, float anorm, float* rcond ); lapack_int LAPACKE_zpbcon( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab, double anorm, double* rcond ); lapack_int LAPACKE_spbequ( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab, float* s, float* scond, float* amax ); lapack_int LAPACKE_dpbequ( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab, double* s, double* scond, double* amax ); lapack_int LAPACKE_cpbequ( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab, float* s, float* scond, float* amax ); lapack_int LAPACKE_zpbequ( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab, double* s, double* scond, double* amax ); lapack_int LAPACKE_spbrfs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, const float* afb, lapack_int ldafb, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dpbrfs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, const double* afb, lapack_int ldafb, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cpbrfs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* afb, lapack_int ldafb, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zpbrfs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* afb, lapack_int ldafb, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_spbstf( int matrix_order, char uplo, lapack_int n, lapack_int kb, float* bb, lapack_int ldbb ); lapack_int LAPACKE_dpbstf( int matrix_order, char uplo, lapack_int n, lapack_int kb, double* bb, lapack_int ldbb ); lapack_int LAPACKE_cpbstf( int matrix_order, char uplo, lapack_int n, lapack_int kb, lapack_complex_float* bb, lapack_int ldbb ); lapack_int LAPACKE_zpbstf( int matrix_order, char uplo, lapack_int n, lapack_int kb, lapack_complex_double* bb, lapack_int ldbb ); lapack_int LAPACKE_spbsv( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, float* ab, lapack_int ldab, float* b, lapack_int ldb ); lapack_int LAPACKE_dpbsv( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, double* ab, lapack_int ldab, double* b, lapack_int ldb ); lapack_int LAPACKE_cpbsv( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpbsv( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spbsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, float* ab, lapack_int ldab, float* afb, lapack_int ldafb, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_dpbsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, double* ab, lapack_int ldab, double* afb, lapack_int ldafb, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_cpbsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* afb, lapack_int ldafb, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zpbsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* afb, lapack_int ldafb, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_spbtrf( int matrix_order, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab ); lapack_int LAPACKE_dpbtrf( int matrix_order, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab ); lapack_int LAPACKE_cpbtrf( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab ); lapack_int LAPACKE_zpbtrf( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab ); lapack_int LAPACKE_spbtrs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, float* b, lapack_int ldb ); lapack_int LAPACKE_dpbtrs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, double* b, lapack_int ldb ); lapack_int LAPACKE_cpbtrs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpbtrs( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spftrf( int matrix_order, char transr, char uplo, lapack_int n, float* a ); lapack_int LAPACKE_dpftrf( int matrix_order, char transr, char uplo, lapack_int n, double* a ); lapack_int LAPACKE_cpftrf( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_float* a ); lapack_int LAPACKE_zpftrf( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_double* a ); lapack_int LAPACKE_spftri( int matrix_order, char transr, char uplo, lapack_int n, float* a ); lapack_int LAPACKE_dpftri( int matrix_order, char transr, char uplo, lapack_int n, double* a ); lapack_int LAPACKE_cpftri( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_float* a ); lapack_int LAPACKE_zpftri( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_double* a ); lapack_int LAPACKE_spftrs( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const float* a, float* b, lapack_int ldb ); lapack_int LAPACKE_dpftrs( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const double* a, double* b, lapack_int ldb ); lapack_int LAPACKE_cpftrs( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpftrs( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spocon( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, float anorm, float* rcond ); lapack_int LAPACKE_dpocon( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, double anorm, double* rcond ); lapack_int LAPACKE_cpocon( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float anorm, float* rcond ); lapack_int LAPACKE_zpocon( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double anorm, double* rcond ); lapack_int LAPACKE_spoequ( int matrix_order, lapack_int n, const float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_dpoequ( int matrix_order, lapack_int n, const double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_cpoequ( int matrix_order, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_zpoequ( int matrix_order, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_spoequb( int matrix_order, lapack_int n, const float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_dpoequb( int matrix_order, lapack_int n, const double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_cpoequb( int matrix_order, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_zpoequb( int matrix_order, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_sporfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dporfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cporfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zporfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_sporfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const float* s, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dporfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const double* s, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_cporfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const float* s, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zporfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const double* s, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_cposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_dsposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, double* x, lapack_int ldx, lapack_int* iter ); lapack_int LAPACKE_zcposv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, lapack_int* iter ); lapack_int LAPACKE_sposvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_dposvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_cposvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zposvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_sposvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dposvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_cposvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zposvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_spotrf( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dpotrf( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_cpotrf( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zpotrf( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_spotri( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dpotri( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_cpotri( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zpotri( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_spotrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dpotrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_cpotrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpotrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sppcon( int matrix_order, char uplo, lapack_int n, const float* ap, float anorm, float* rcond ); lapack_int LAPACKE_dppcon( int matrix_order, char uplo, lapack_int n, const double* ap, double anorm, double* rcond ); lapack_int LAPACKE_cppcon( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, float anorm, float* rcond ); lapack_int LAPACKE_zppcon( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, double anorm, double* rcond ); lapack_int LAPACKE_sppequ( int matrix_order, char uplo, lapack_int n, const float* ap, float* s, float* scond, float* amax ); lapack_int LAPACKE_dppequ( int matrix_order, char uplo, lapack_int n, const double* ap, double* s, double* scond, double* amax ); lapack_int LAPACKE_cppequ( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, float* s, float* scond, float* amax ); lapack_int LAPACKE_zppequ( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, double* s, double* scond, double* amax ); lapack_int LAPACKE_spprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, const float* afp, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dpprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, const double* afp, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cpprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zpprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_sppsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* ap, float* b, lapack_int ldb ); lapack_int LAPACKE_dppsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* ap, double* b, lapack_int ldb ); lapack_int LAPACKE_cppsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zppsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sppsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* ap, float* afp, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_dppsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* ap, double* afp, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_cppsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_complex_float* afp, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zppsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_complex_double* afp, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_spptrf( int matrix_order, char uplo, lapack_int n, float* ap ); lapack_int LAPACKE_dpptrf( int matrix_order, char uplo, lapack_int n, double* ap ); lapack_int LAPACKE_cpptrf( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap ); lapack_int LAPACKE_zpptrf( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap ); lapack_int LAPACKE_spptri( int matrix_order, char uplo, lapack_int n, float* ap ); lapack_int LAPACKE_dpptri( int matrix_order, char uplo, lapack_int n, double* ap ); lapack_int LAPACKE_cpptri( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap ); lapack_int LAPACKE_zpptri( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap ); lapack_int LAPACKE_spptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, float* b, lapack_int ldb ); lapack_int LAPACKE_dpptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, double* b, lapack_int ldb ); lapack_int LAPACKE_cpptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spstrf( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, lapack_int* piv, lapack_int* rank, float tol ); lapack_int LAPACKE_dpstrf( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, lapack_int* piv, lapack_int* rank, double tol ); lapack_int LAPACKE_cpstrf( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* piv, lapack_int* rank, float tol ); lapack_int LAPACKE_zpstrf( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* piv, lapack_int* rank, double tol ); lapack_int LAPACKE_sptcon( lapack_int n, const float* d, const float* e, float anorm, float* rcond ); lapack_int LAPACKE_dptcon( lapack_int n, const double* d, const double* e, double anorm, double* rcond ); lapack_int LAPACKE_cptcon( lapack_int n, const float* d, const lapack_complex_float* e, float anorm, float* rcond ); lapack_int LAPACKE_zptcon( lapack_int n, const double* d, const lapack_complex_double* e, double anorm, double* rcond ); lapack_int LAPACKE_spteqr( int matrix_order, char compz, lapack_int n, float* d, float* e, float* z, lapack_int ldz ); lapack_int LAPACKE_dpteqr( int matrix_order, char compz, lapack_int n, double* d, double* e, double* z, lapack_int ldz ); lapack_int LAPACKE_cpteqr( int matrix_order, char compz, lapack_int n, float* d, float* e, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zpteqr( int matrix_order, char compz, lapack_int n, double* d, double* e, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_sptrfs( int matrix_order, lapack_int n, lapack_int nrhs, const float* d, const float* e, const float* df, const float* ef, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dptrfs( int matrix_order, lapack_int n, lapack_int nrhs, const double* d, const double* e, const double* df, const double* ef, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_cptrfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* d, const lapack_complex_float* e, const float* df, const lapack_complex_float* ef, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zptrfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* d, const lapack_complex_double* e, const double* df, const lapack_complex_double* ef, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_sptsv( int matrix_order, lapack_int n, lapack_int nrhs, float* d, float* e, float* b, lapack_int ldb ); lapack_int LAPACKE_dptsv( int matrix_order, lapack_int n, lapack_int nrhs, double* d, double* e, double* b, lapack_int ldb ); lapack_int LAPACKE_cptsv( int matrix_order, lapack_int n, lapack_int nrhs, float* d, lapack_complex_float* e, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zptsv( int matrix_order, lapack_int n, lapack_int nrhs, double* d, lapack_complex_double* e, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sptsvx( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const float* d, const float* e, float* df, float* ef, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_dptsvx( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const double* d, const double* e, double* df, double* ef, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_cptsvx( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const float* d, const lapack_complex_float* e, float* df, lapack_complex_float* ef, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zptsvx( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const double* d, const lapack_complex_double* e, double* df, lapack_complex_double* ef, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_spttrf( lapack_int n, float* d, float* e ); lapack_int LAPACKE_dpttrf( lapack_int n, double* d, double* e ); lapack_int LAPACKE_cpttrf( lapack_int n, float* d, lapack_complex_float* e ); lapack_int LAPACKE_zpttrf( lapack_int n, double* d, lapack_complex_double* e ); lapack_int LAPACKE_spttrs( int matrix_order, lapack_int n, lapack_int nrhs, const float* d, const float* e, float* b, lapack_int ldb ); lapack_int LAPACKE_dpttrs( int matrix_order, lapack_int n, lapack_int nrhs, const double* d, const double* e, double* b, lapack_int ldb ); lapack_int LAPACKE_cpttrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* d, const lapack_complex_float* e, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpttrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* d, const lapack_complex_double* e, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_ssbev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dsbev( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_ssbevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dsbevd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_ssbevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_dsbevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_ssbgst( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, const float* bb, lapack_int ldbb, float* x, lapack_int ldx ); lapack_int LAPACKE_dsbgst( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, const double* bb, lapack_int ldbb, double* x, lapack_int ldx ); lapack_int LAPACKE_ssbgv( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, float* bb, lapack_int ldbb, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dsbgv( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, double* bb, lapack_int ldbb, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_ssbgvd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, float* bb, lapack_int ldbb, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dsbgvd( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, double* bb, lapack_int ldbb, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_ssbgvx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, float* bb, lapack_int ldbb, float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_dsbgvx( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, double* bb, lapack_int ldbb, double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_ssbtrd( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* d, float* e, float* q, lapack_int ldq ); lapack_int LAPACKE_dsbtrd( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* d, double* e, double* q, lapack_int ldq ); lapack_int LAPACKE_ssfrk( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, float alpha, const float* a, lapack_int lda, float beta, float* c ); lapack_int LAPACKE_dsfrk( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, double alpha, const double* a, lapack_int lda, double beta, double* c ); lapack_int LAPACKE_sspcon( int matrix_order, char uplo, lapack_int n, const float* ap, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_dspcon( int matrix_order, char uplo, lapack_int n, const double* ap, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_cspcon( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zspcon( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_sspev( int matrix_order, char jobz, char uplo, lapack_int n, float* ap, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dspev( int matrix_order, char jobz, char uplo, lapack_int n, double* ap, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_sspevd( int matrix_order, char jobz, char uplo, lapack_int n, float* ap, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dspevd( int matrix_order, char jobz, char uplo, lapack_int n, double* ap, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_sspevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, float* ap, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_dspevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, double* ap, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_sspgst( int matrix_order, lapack_int itype, char uplo, lapack_int n, float* ap, const float* bp ); lapack_int LAPACKE_dspgst( int matrix_order, lapack_int itype, char uplo, lapack_int n, double* ap, const double* bp ); lapack_int LAPACKE_sspgv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* ap, float* bp, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dspgv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* ap, double* bp, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_sspgvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* ap, float* bp, float* w, float* z, lapack_int ldz ); lapack_int LAPACKE_dspgvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* ap, double* bp, double* w, double* z, lapack_int ldz ); lapack_int LAPACKE_sspgvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, float* ap, float* bp, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_dspgvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, double* ap, double* bp, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_ssprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, const float* afp, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dsprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, const double* afp, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_csprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zsprfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_sspsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* ap, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dspsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* ap, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cspsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zspsv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sspsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const float* ap, float* afp, lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_dspsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const double* ap, double* afp, lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_cspsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* afp, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zspsvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* afp, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_ssptrd( int matrix_order, char uplo, lapack_int n, float* ap, float* d, float* e, float* tau ); lapack_int LAPACKE_dsptrd( int matrix_order, char uplo, lapack_int n, double* ap, double* d, double* e, double* tau ); lapack_int LAPACKE_ssptrf( int matrix_order, char uplo, lapack_int n, float* ap, lapack_int* ipiv ); lapack_int LAPACKE_dsptrf( int matrix_order, char uplo, lapack_int n, double* ap, lapack_int* ipiv ); lapack_int LAPACKE_csptrf( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, lapack_int* ipiv ); lapack_int LAPACKE_zsptrf( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, lapack_int* ipiv ); lapack_int LAPACKE_ssptri( int matrix_order, char uplo, lapack_int n, float* ap, const lapack_int* ipiv ); lapack_int LAPACKE_dsptri( int matrix_order, char uplo, lapack_int n, double* ap, const lapack_int* ipiv ); lapack_int LAPACKE_csptri( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, const lapack_int* ipiv ); lapack_int LAPACKE_zsptri( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, const lapack_int* ipiv ); lapack_int LAPACKE_ssptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dsptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_csptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zsptrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sstebz( char range, char order, lapack_int n, float vl, float vu, lapack_int il, lapack_int iu, float abstol, const float* d, const float* e, lapack_int* m, lapack_int* nsplit, float* w, lapack_int* iblock, lapack_int* isplit ); lapack_int LAPACKE_dstebz( char range, char order, lapack_int n, double vl, double vu, lapack_int il, lapack_int iu, double abstol, const double* d, const double* e, lapack_int* m, lapack_int* nsplit, double* w, lapack_int* iblock, lapack_int* isplit ); lapack_int LAPACKE_sstedc( int matrix_order, char compz, lapack_int n, float* d, float* e, float* z, lapack_int ldz ); lapack_int LAPACKE_dstedc( int matrix_order, char compz, lapack_int n, double* d, double* e, double* z, lapack_int ldz ); lapack_int LAPACKE_cstedc( int matrix_order, char compz, lapack_int n, float* d, float* e, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zstedc( int matrix_order, char compz, lapack_int n, double* d, double* e, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_sstegr( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_dstegr( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_cstegr( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_zstegr( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_sstein( int matrix_order, lapack_int n, const float* d, const float* e, lapack_int m, const float* w, const lapack_int* iblock, const lapack_int* isplit, float* z, lapack_int ldz, lapack_int* ifailv ); lapack_int LAPACKE_dstein( int matrix_order, lapack_int n, const double* d, const double* e, lapack_int m, const double* w, const lapack_int* iblock, const lapack_int* isplit, double* z, lapack_int ldz, lapack_int* ifailv ); lapack_int LAPACKE_cstein( int matrix_order, lapack_int n, const float* d, const float* e, lapack_int m, const float* w, const lapack_int* iblock, const lapack_int* isplit, lapack_complex_float* z, lapack_int ldz, lapack_int* ifailv ); lapack_int LAPACKE_zstein( int matrix_order, lapack_int n, const double* d, const double* e, lapack_int m, const double* w, const lapack_int* iblock, const lapack_int* isplit, lapack_complex_double* z, lapack_int ldz, lapack_int* ifailv ); lapack_int LAPACKE_sstemr( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac ); lapack_int LAPACKE_dstemr( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac ); lapack_int LAPACKE_cstemr( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac ); lapack_int LAPACKE_zstemr( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac ); lapack_int LAPACKE_ssteqr( int matrix_order, char compz, lapack_int n, float* d, float* e, float* z, lapack_int ldz ); lapack_int LAPACKE_dsteqr( int matrix_order, char compz, lapack_int n, double* d, double* e, double* z, lapack_int ldz ); lapack_int LAPACKE_csteqr( int matrix_order, char compz, lapack_int n, float* d, float* e, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zsteqr( int matrix_order, char compz, lapack_int n, double* d, double* e, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_ssterf( lapack_int n, float* d, float* e ); lapack_int LAPACKE_dsterf( lapack_int n, double* d, double* e ); lapack_int LAPACKE_sstev( int matrix_order, char jobz, lapack_int n, float* d, float* e, float* z, lapack_int ldz ); lapack_int LAPACKE_dstev( int matrix_order, char jobz, lapack_int n, double* d, double* e, double* z, lapack_int ldz ); lapack_int LAPACKE_sstevd( int matrix_order, char jobz, lapack_int n, float* d, float* e, float* z, lapack_int ldz ); lapack_int LAPACKE_dstevd( int matrix_order, char jobz, lapack_int n, double* d, double* e, double* z, lapack_int ldz ); lapack_int LAPACKE_sstevr( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_dstevr( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_sstevx( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_dstevx( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_ssycon( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_dsycon( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_csycon( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, float anorm, float* rcond ); lapack_int LAPACKE_zsycon( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, double anorm, double* rcond ); lapack_int LAPACKE_ssyequb( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_dsyequb( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_csyequb( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_zsyequb( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_ssyev( int matrix_order, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* w ); lapack_int LAPACKE_dsyev( int matrix_order, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* w ); lapack_int LAPACKE_ssyevd( int matrix_order, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* w ); lapack_int LAPACKE_dsyevd( int matrix_order, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* w ); lapack_int LAPACKE_ssyevr( int matrix_order, char jobz, char range, char uplo, lapack_int n, float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_dsyevr( int matrix_order, char jobz, char range, char uplo, lapack_int n, double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* isuppz ); lapack_int LAPACKE_ssyevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_dsyevx( int matrix_order, char jobz, char range, char uplo, lapack_int n, double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_ssygst( int matrix_order, lapack_int itype, char uplo, lapack_int n, float* a, lapack_int lda, const float* b, lapack_int ldb ); lapack_int LAPACKE_dsygst( int matrix_order, lapack_int itype, char uplo, lapack_int n, double* a, lapack_int lda, const double* b, lapack_int ldb ); lapack_int LAPACKE_ssygv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* w ); lapack_int LAPACKE_dsygv( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* w ); lapack_int LAPACKE_ssygvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* w ); lapack_int LAPACKE_dsygvd( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* w ); lapack_int LAPACKE_ssygvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_dsygvx( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* ifail ); lapack_int LAPACKE_ssyrfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dsyrfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_csyrfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_zsyrfs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_ssyrfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* s, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dsyrfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* s, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_csyrfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const float* s, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zsyrfsx( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const double* s, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_ssysv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dsysv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_csysv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zsysv( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_ssysvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_dsysvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_csysvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr ); lapack_int LAPACKE_zsysvx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr ); lapack_int LAPACKE_ssysvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_dsysvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_csysvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params ); lapack_int LAPACKE_zsysvxx( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params ); lapack_int LAPACKE_ssytrd( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, float* d, float* e, float* tau ); lapack_int LAPACKE_dsytrd( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, double* d, double* e, double* tau ); lapack_int LAPACKE_ssytrf( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_dsytrf( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_csytrf( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_zsytrf( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_ssytri( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_dsytri( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_csytri( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_zsytri( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_ssytrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dsytrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_csytrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zsytrs( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stbcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab, float* rcond ); lapack_int LAPACKE_dtbcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab, double* rcond ); lapack_int LAPACKE_ctbcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab, float* rcond ); lapack_int LAPACKE_ztbcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab, double* rcond ); lapack_int LAPACKE_stbrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, const float* b, lapack_int ldb, const float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dtbrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, const double* b, lapack_int ldb, const double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_ctbrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_ztbrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_stbtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, float* b, lapack_int ldb ); lapack_int LAPACKE_dtbtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, double* b, lapack_int ldb ); lapack_int LAPACKE_ctbtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztbtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stfsm( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, float alpha, const float* a, float* b, lapack_int ldb ); lapack_int LAPACKE_dtfsm( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, double alpha, const double* a, double* b, lapack_int ldb ); lapack_int LAPACKE_ctfsm( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, lapack_complex_float alpha, const lapack_complex_float* a, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztfsm( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, lapack_complex_double alpha, const lapack_complex_double* a, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stftri( int matrix_order, char transr, char uplo, char diag, lapack_int n, float* a ); lapack_int LAPACKE_dtftri( int matrix_order, char transr, char uplo, char diag, lapack_int n, double* a ); lapack_int LAPACKE_ctftri( int matrix_order, char transr, char uplo, char diag, lapack_int n, lapack_complex_float* a ); lapack_int LAPACKE_ztftri( int matrix_order, char transr, char uplo, char diag, lapack_int n, lapack_complex_double* a ); lapack_int LAPACKE_stfttp( int matrix_order, char transr, char uplo, lapack_int n, const float* arf, float* ap ); lapack_int LAPACKE_dtfttp( int matrix_order, char transr, char uplo, lapack_int n, const double* arf, double* ap ); lapack_int LAPACKE_ctfttp( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* arf, lapack_complex_float* ap ); lapack_int LAPACKE_ztfttp( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* arf, lapack_complex_double* ap ); lapack_int LAPACKE_stfttr( int matrix_order, char transr, char uplo, lapack_int n, const float* arf, float* a, lapack_int lda ); lapack_int LAPACKE_dtfttr( int matrix_order, char transr, char uplo, lapack_int n, const double* arf, double* a, lapack_int lda ); lapack_int LAPACKE_ctfttr( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* arf, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_ztfttr( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* arf, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_stgevc( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const float* s, lapack_int lds, const float* p, lapack_int ldp, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_dtgevc( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const double* s, lapack_int lds, const double* p, lapack_int ldp, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ctgevc( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_float* s, lapack_int lds, const lapack_complex_float* p, lapack_int ldp, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ztgevc( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_double* s, lapack_int lds, const lapack_complex_double* p, lapack_int ldp, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_stgexc( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* q, lapack_int ldq, float* z, lapack_int ldz, lapack_int* ifst, lapack_int* ilst ); lapack_int LAPACKE_dtgexc( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* q, lapack_int ldq, double* z, lapack_int ldz, lapack_int* ifst, lapack_int* ilst ); lapack_int LAPACKE_ctgexc( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_ztgexc( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_stgsen( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* alphar, float* alphai, float* beta, float* q, lapack_int ldq, float* z, lapack_int ldz, lapack_int* m, float* pl, float* pr, float* dif ); lapack_int LAPACKE_dtgsen( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* alphar, double* alphai, double* beta, double* q, lapack_int ldq, double* z, lapack_int ldz, lapack_int* m, double* pl, double* pr, double* dif ); lapack_int LAPACKE_ctgsen( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz, lapack_int* m, float* pl, float* pr, float* dif ); lapack_int LAPACKE_ztgsen( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz, lapack_int* m, double* pl, double* pr, double* dif ); lapack_int LAPACKE_stgsja( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, float* a, lapack_int lda, float* b, lapack_int ldb, float tola, float tolb, float* alpha, float* beta, float* u, lapack_int ldu, float* v, lapack_int ldv, float* q, lapack_int ldq, lapack_int* ncycle ); lapack_int LAPACKE_dtgsja( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, double* a, lapack_int lda, double* b, lapack_int ldb, double tola, double tolb, double* alpha, double* beta, double* u, lapack_int ldu, double* v, lapack_int ldv, double* q, lapack_int ldq, lapack_int* ncycle ); lapack_int LAPACKE_ctgsja( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float tola, float tolb, float* alpha, float* beta, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* v, lapack_int ldv, lapack_complex_float* q, lapack_int ldq, lapack_int* ncycle ); lapack_int LAPACKE_ztgsja( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double tola, double tolb, double* alpha, double* beta, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* v, lapack_int ldv, lapack_complex_double* q, lapack_int ldq, lapack_int* ncycle ); lapack_int LAPACKE_stgsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const float* a, lapack_int lda, const float* b, lapack_int ldb, const float* vl, lapack_int ldvl, const float* vr, lapack_int ldvr, float* s, float* dif, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_dtgsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const double* a, lapack_int lda, const double* b, lapack_int ldb, const double* vl, lapack_int ldvl, const double* vr, lapack_int ldvr, double* s, double* dif, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ctgsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* vl, lapack_int ldvl, const lapack_complex_float* vr, lapack_int ldvr, float* s, float* dif, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ztgsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* vl, lapack_int ldvl, const lapack_complex_double* vr, lapack_int ldvr, double* s, double* dif, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_stgsyl( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const float* a, lapack_int lda, const float* b, lapack_int ldb, float* c, lapack_int ldc, const float* d, lapack_int ldd, const float* e, lapack_int lde, float* f, lapack_int ldf, float* scale, float* dif ); lapack_int LAPACKE_dtgsyl( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const double* a, lapack_int lda, const double* b, lapack_int ldb, double* c, lapack_int ldc, const double* d, lapack_int ldd, const double* e, lapack_int lde, double* f, lapack_int ldf, double* scale, double* dif ); lapack_int LAPACKE_ctgsyl( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* c, lapack_int ldc, const lapack_complex_float* d, lapack_int ldd, const lapack_complex_float* e, lapack_int lde, lapack_complex_float* f, lapack_int ldf, float* scale, float* dif ); lapack_int LAPACKE_ztgsyl( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* c, lapack_int ldc, const lapack_complex_double* d, lapack_int ldd, const lapack_complex_double* e, lapack_int lde, lapack_complex_double* f, lapack_int ldf, double* scale, double* dif ); lapack_int LAPACKE_stpcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const float* ap, float* rcond ); lapack_int LAPACKE_dtpcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const double* ap, double* rcond ); lapack_int LAPACKE_ctpcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_float* ap, float* rcond ); lapack_int LAPACKE_ztpcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_double* ap, double* rcond ); lapack_int LAPACKE_stprfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* ap, const float* b, lapack_int ldb, const float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dtprfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* ap, const double* b, lapack_int ldb, const double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_ctprfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_ztprfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_stptri( int matrix_order, char uplo, char diag, lapack_int n, float* ap ); lapack_int LAPACKE_dtptri( int matrix_order, char uplo, char diag, lapack_int n, double* ap ); lapack_int LAPACKE_ctptri( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_float* ap ); lapack_int LAPACKE_ztptri( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_double* ap ); lapack_int LAPACKE_stptrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* ap, float* b, lapack_int ldb ); lapack_int LAPACKE_dtptrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* ap, double* b, lapack_int ldb ); lapack_int LAPACKE_ctptrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztptrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stpttf( int matrix_order, char transr, char uplo, lapack_int n, const float* ap, float* arf ); lapack_int LAPACKE_dtpttf( int matrix_order, char transr, char uplo, lapack_int n, const double* ap, double* arf ); lapack_int LAPACKE_ctpttf( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* ap, lapack_complex_float* arf ); lapack_int LAPACKE_ztpttf( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* ap, lapack_complex_double* arf ); lapack_int LAPACKE_stpttr( int matrix_order, char uplo, lapack_int n, const float* ap, float* a, lapack_int lda ); lapack_int LAPACKE_dtpttr( int matrix_order, char uplo, lapack_int n, const double* ap, double* a, lapack_int lda ); lapack_int LAPACKE_ctpttr( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_ztpttr( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_strcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const float* a, lapack_int lda, float* rcond ); lapack_int LAPACKE_dtrcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const double* a, lapack_int lda, double* rcond ); lapack_int LAPACKE_ctrcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* rcond ); lapack_int LAPACKE_ztrcon( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* rcond ); lapack_int LAPACKE_strevc( int matrix_order, char side, char howmny, lapack_logical* select, lapack_int n, const float* t, lapack_int ldt, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_dtrevc( int matrix_order, char side, char howmny, lapack_logical* select, lapack_int n, const double* t, lapack_int ldt, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ctrevc( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ztrevc( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_strexc( int matrix_order, char compq, lapack_int n, float* t, lapack_int ldt, float* q, lapack_int ldq, lapack_int* ifst, lapack_int* ilst ); lapack_int LAPACKE_dtrexc( int matrix_order, char compq, lapack_int n, double* t, lapack_int ldt, double* q, lapack_int ldq, lapack_int* ifst, lapack_int* ilst ); lapack_int LAPACKE_ctrexc( int matrix_order, char compq, lapack_int n, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* q, lapack_int ldq, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_ztrexc( int matrix_order, char compq, lapack_int n, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* q, lapack_int ldq, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_strrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* b, lapack_int ldb, const float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_dtrrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* b, lapack_int ldb, const double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_ctrrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr ); lapack_int LAPACKE_ztrrfs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr ); lapack_int LAPACKE_strsen( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, float* t, lapack_int ldt, float* q, lapack_int ldq, float* wr, float* wi, lapack_int* m, float* s, float* sep ); lapack_int LAPACKE_dtrsen( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, double* t, lapack_int ldt, double* q, lapack_int ldq, double* wr, double* wi, lapack_int* m, double* s, double* sep ); lapack_int LAPACKE_ctrsen( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* w, lapack_int* m, float* s, float* sep ); lapack_int LAPACKE_ztrsen( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* w, lapack_int* m, double* s, double* sep ); lapack_int LAPACKE_strsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const float* t, lapack_int ldt, const float* vl, lapack_int ldvl, const float* vr, lapack_int ldvr, float* s, float* sep, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_dtrsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const double* t, lapack_int ldt, const double* vl, lapack_int ldvl, const double* vr, lapack_int ldvr, double* s, double* sep, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ctrsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_float* t, lapack_int ldt, const lapack_complex_float* vl, lapack_int ldvl, const lapack_complex_float* vr, lapack_int ldvr, float* s, float* sep, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_ztrsna( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_double* t, lapack_int ldt, const lapack_complex_double* vl, lapack_int ldvl, const lapack_complex_double* vr, lapack_int ldvr, double* s, double* sep, lapack_int mm, lapack_int* m ); lapack_int LAPACKE_strsyl( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const float* a, lapack_int lda, const float* b, lapack_int ldb, float* c, lapack_int ldc, float* scale ); lapack_int LAPACKE_dtrsyl( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const double* a, lapack_int lda, const double* b, lapack_int ldb, double* c, lapack_int ldc, double* scale ); lapack_int LAPACKE_ctrsyl( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* c, lapack_int ldc, float* scale ); lapack_int LAPACKE_ztrsyl( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* c, lapack_int ldc, double* scale ); lapack_int LAPACKE_strtri( int matrix_order, char uplo, char diag, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dtrtri( int matrix_order, char uplo, char diag, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_ctrtri( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_ztrtri( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_strtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dtrtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_ctrtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztrtrs( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_strttf( int matrix_order, char transr, char uplo, lapack_int n, const float* a, lapack_int lda, float* arf ); lapack_int LAPACKE_dtrttf( int matrix_order, char transr, char uplo, lapack_int n, const double* a, lapack_int lda, double* arf ); lapack_int LAPACKE_ctrttf( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* arf ); lapack_int LAPACKE_ztrttf( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* arf ); lapack_int LAPACKE_strttp( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, float* ap ); lapack_int LAPACKE_dtrttp( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, double* ap ); lapack_int LAPACKE_ctrttp( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* ap ); lapack_int LAPACKE_ztrttp( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* ap ); lapack_int LAPACKE_stzrzf( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau ); lapack_int LAPACKE_dtzrzf( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau ); lapack_int LAPACKE_ctzrzf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau ); lapack_int LAPACKE_ztzrzf( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau ); lapack_int LAPACKE_cungbr( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau ); lapack_int LAPACKE_zungbr( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); lapack_int LAPACKE_cunghr( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau ); lapack_int LAPACKE_zunghr( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); lapack_int LAPACKE_cunglq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau ); lapack_int LAPACKE_zunglq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); lapack_int LAPACKE_cungql( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau ); lapack_int LAPACKE_zungql( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); lapack_int LAPACKE_cungqr( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau ); lapack_int LAPACKE_zungqr( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); lapack_int LAPACKE_cungrq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau ); lapack_int LAPACKE_zungrq( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); lapack_int LAPACKE_cungtr( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau ); lapack_int LAPACKE_zungtr( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau ); lapack_int LAPACKE_cunmbr( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmbr( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cunmhr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmhr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cunmlq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmlq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cunmql( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmql( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cunmqr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmqr( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cunmrq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmrq( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cunmrz( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmrz( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cunmtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zunmtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_cupgtr( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, const lapack_complex_float* tau, lapack_complex_float* q, lapack_int ldq ); lapack_int LAPACKE_zupgtr( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, const lapack_complex_double* tau, lapack_complex_double* q, lapack_int ldq ); lapack_int LAPACKE_cupmtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_float* ap, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zupmtr( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_double* ap, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_sbdsdc_work( int matrix_order, char uplo, char compq, lapack_int n, float* d, float* e, float* u, lapack_int ldu, float* vt, lapack_int ldvt, float* q, lapack_int* iq, float* work, lapack_int* iwork ); lapack_int LAPACKE_dbdsdc_work( int matrix_order, char uplo, char compq, lapack_int n, double* d, double* e, double* u, lapack_int ldu, double* vt, lapack_int ldvt, double* q, lapack_int* iq, double* work, lapack_int* iwork ); lapack_int LAPACKE_sbdsqr_work( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, float* d, float* e, float* vt, lapack_int ldvt, float* u, lapack_int ldu, float* c, lapack_int ldc, float* work ); lapack_int LAPACKE_dbdsqr_work( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, double* d, double* e, double* vt, lapack_int ldvt, double* u, lapack_int ldu, double* c, lapack_int ldc, double* work ); lapack_int LAPACKE_cbdsqr_work( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, float* d, float* e, lapack_complex_float* vt, lapack_int ldvt, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* c, lapack_int ldc, float* work ); lapack_int LAPACKE_zbdsqr_work( int matrix_order, char uplo, lapack_int n, lapack_int ncvt, lapack_int nru, lapack_int ncc, double* d, double* e, lapack_complex_double* vt, lapack_int ldvt, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* c, lapack_int ldc, double* work ); lapack_int LAPACKE_sdisna_work( char job, lapack_int m, lapack_int n, const float* d, float* sep ); lapack_int LAPACKE_ddisna_work( char job, lapack_int m, lapack_int n, const double* d, double* sep ); lapack_int LAPACKE_sgbbrd_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, float* ab, lapack_int ldab, float* d, float* e, float* q, lapack_int ldq, float* pt, lapack_int ldpt, float* c, lapack_int ldc, float* work ); lapack_int LAPACKE_dgbbrd_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, double* ab, lapack_int ldab, double* d, double* e, double* q, lapack_int ldq, double* pt, lapack_int ldpt, double* c, lapack_int ldc, double* work ); lapack_int LAPACKE_cgbbrd_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, lapack_complex_float* ab, lapack_int ldab, float* d, float* e, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* pt, lapack_int ldpt, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgbbrd_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int ncc, lapack_int kl, lapack_int ku, lapack_complex_double* ab, lapack_int ldab, double* d, double* e, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* pt, lapack_int ldpt, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgbcon_work( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const float* ab, lapack_int ldab, const lapack_int* ipiv, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgbcon_work( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const double* ab, lapack_int ldab, const lapack_int* ipiv, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgbcon_work( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float* ab, lapack_int ldab, const lapack_int* ipiv, float anorm, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgbcon_work( int matrix_order, char norm, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double* ab, lapack_int ldab, const lapack_int* ipiv, double anorm, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgbequ_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgbequ_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgbequ_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgbequ_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgbequb_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgbequb_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgbequb_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float* ab, lapack_int ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgbequb_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double* ab, lapack_int ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgbrfs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const float* ab, lapack_int ldab, const float* afb, lapack_int ldafb, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgbrfs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const double* ab, lapack_int ldab, const double* afb, lapack_int ldafb, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgbrfs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* afb, lapack_int ldafb, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgbrfs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* afb, lapack_int ldafb, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgbrfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const float* ab, lapack_int ldab, const float* afb, lapack_int ldafb, const lapack_int* ipiv, const float* r, const float* c, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgbrfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const double* ab, lapack_int ldab, const double* afb, lapack_int ldafb, const lapack_int* ipiv, const double* r, const double* c, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgbrfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* afb, lapack_int ldafb, const lapack_int* ipiv, const float* r, const float* c, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgbrfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* afb, lapack_int ldafb, const lapack_int* ipiv, const double* r, const double* c, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgbsv_work( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, float* ab, lapack_int ldab, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgbsv_work( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, double* ab, lapack_int ldab, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgbsv_work( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgbsv_work( int matrix_order, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgbsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, float* ab, lapack_int ldab, float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgbsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, double* ab, lapack_int ldab, double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgbsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgbsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgbsvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, float* ab, lapack_int ldab, float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgbsvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, double* ab, lapack_int ldab, double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgbsvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgbsvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* afb, lapack_int ldafb, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgbtrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, float* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_dgbtrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, double* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_cgbtrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, lapack_complex_float* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_zgbtrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, lapack_complex_double* ab, lapack_int ldab, lapack_int* ipiv ); lapack_int LAPACKE_sgbtrs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const float* ab, lapack_int ldab, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgbtrs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const double* ab, lapack_int ldab, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgbtrs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgbtrs_work( int matrix_order, char trans, lapack_int n, lapack_int kl, lapack_int ku, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgebak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* scale, lapack_int m, float* v, lapack_int ldv ); lapack_int LAPACKE_dgebak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* scale, lapack_int m, double* v, lapack_int ldv ); lapack_int LAPACKE_cgebak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* scale, lapack_int m, lapack_complex_float* v, lapack_int ldv ); lapack_int LAPACKE_zgebak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* scale, lapack_int m, lapack_complex_double* v, lapack_int ldv ); lapack_int LAPACKE_sgebal_work( int matrix_order, char job, lapack_int n, float* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, float* scale ); lapack_int LAPACKE_dgebal_work( int matrix_order, char job, lapack_int n, double* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, double* scale ); lapack_int LAPACKE_cgebal_work( int matrix_order, char job, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, float* scale ); lapack_int LAPACKE_zgebal_work( int matrix_order, char job, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ilo, lapack_int* ihi, double* scale ); lapack_int LAPACKE_sgebrd_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* d, float* e, float* tauq, float* taup, float* work, lapack_int lwork ); lapack_int LAPACKE_dgebrd_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* d, double* e, double* tauq, double* taup, double* work, lapack_int lwork ); lapack_int LAPACKE_cgebrd_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* d, float* e, lapack_complex_float* tauq, lapack_complex_float* taup, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgebrd_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* d, double* e, lapack_complex_double* tauq, lapack_complex_double* taup, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgecon_work( int matrix_order, char norm, lapack_int n, const float* a, lapack_int lda, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgecon_work( int matrix_order, char norm, lapack_int n, const double* a, lapack_int lda, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgecon_work( int matrix_order, char norm, lapack_int n, const lapack_complex_float* a, lapack_int lda, float anorm, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgecon_work( int matrix_order, char norm, lapack_int n, const lapack_complex_double* a, lapack_int lda, double anorm, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgeequ_work( int matrix_order, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgeequ_work( int matrix_order, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgeequ_work( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgeequ_work( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgeequb_work( int matrix_order, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_dgeequb_work( int matrix_order, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_cgeequb_work( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax ); lapack_int LAPACKE_zgeequb_work( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax ); lapack_int LAPACKE_sgees_work( int matrix_order, char jobvs, char sort, LAPACK_S_SELECT2 select, lapack_int n, float* a, lapack_int lda, lapack_int* sdim, float* wr, float* wi, float* vs, lapack_int ldvs, float* work, lapack_int lwork, lapack_logical* bwork ); lapack_int LAPACKE_dgees_work( int matrix_order, char jobvs, char sort, LAPACK_D_SELECT2 select, lapack_int n, double* a, lapack_int lda, lapack_int* sdim, double* wr, double* wi, double* vs, lapack_int ldvs, double* work, lapack_int lwork, lapack_logical* bwork ); lapack_int LAPACKE_cgees_work( int matrix_order, char jobvs, char sort, LAPACK_C_SELECT1 select, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* sdim, lapack_complex_float* w, lapack_complex_float* vs, lapack_int ldvs, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_logical* bwork ); lapack_int LAPACKE_zgees_work( int matrix_order, char jobvs, char sort, LAPACK_Z_SELECT1 select, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* sdim, lapack_complex_double* w, lapack_complex_double* vs, lapack_int ldvs, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_logical* bwork ); lapack_int LAPACKE_sgeesx_work( int matrix_order, char jobvs, char sort, LAPACK_S_SELECT2 select, char sense, lapack_int n, float* a, lapack_int lda, lapack_int* sdim, float* wr, float* wi, float* vs, lapack_int ldvs, float* rconde, float* rcondv, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork, lapack_logical* bwork ); lapack_int LAPACKE_dgeesx_work( int matrix_order, char jobvs, char sort, LAPACK_D_SELECT2 select, char sense, lapack_int n, double* a, lapack_int lda, lapack_int* sdim, double* wr, double* wi, double* vs, lapack_int ldvs, double* rconde, double* rcondv, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork, lapack_logical* bwork ); lapack_int LAPACKE_cgeesx_work( int matrix_order, char jobvs, char sort, LAPACK_C_SELECT1 select, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* sdim, lapack_complex_float* w, lapack_complex_float* vs, lapack_int ldvs, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_logical* bwork ); lapack_int LAPACKE_zgeesx_work( int matrix_order, char jobvs, char sort, LAPACK_Z_SELECT1 select, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* sdim, lapack_complex_double* w, lapack_complex_double* vs, lapack_int ldvs, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_logical* bwork ); lapack_int LAPACKE_sgeev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, float* a, lapack_int lda, float* wr, float* wi, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, float* work, lapack_int lwork ); lapack_int LAPACKE_dgeev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, double* a, lapack_int lda, double* wr, double* wi, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, double* work, lapack_int lwork ); lapack_int LAPACKE_cgeev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* w, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zgeev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* w, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sgeevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, float* a, lapack_int lda, float* wr, float* wi, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* scale, float* abnrm, float* rconde, float* rcondv, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dgeevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, double* a, lapack_int lda, double* wr, double* wi, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* scale, double* abnrm, double* rconde, double* rcondv, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_cgeevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* w, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* scale, float* abnrm, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zgeevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* w, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* scale, double* abnrm, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, float* a, lapack_int lda, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, double* a, lapack_int lda, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_cgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgehrd_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgejsv_work( int matrix_order, char joba, char jobu, char jobv, char jobr, char jobt, char jobp, lapack_int m, lapack_int n, float* a, lapack_int lda, float* sva, float* u, lapack_int ldu, float* v, lapack_int ldv, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dgejsv_work( int matrix_order, char joba, char jobu, char jobv, char jobr, char jobt, char jobp, lapack_int m, lapack_int n, double* a, lapack_int lda, double* sva, double* u, lapack_int ldu, double* v, lapack_int ldv, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_sgelq2_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work ); lapack_int LAPACKE_dgelq2_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work ); lapack_int LAPACKE_cgelq2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work ); lapack_int LAPACKE_zgelq2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work ); lapack_int LAPACKE_sgelqf_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dgelqf_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_cgelqf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgelqf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgels_work( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb, float* work, lapack_int lwork ); lapack_int LAPACKE_dgels_work( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, double* work, lapack_int lwork ); lapack_int LAPACKE_cgels_work( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgels_work( int matrix_order, char trans, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgelsd_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dgelsd_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_cgelsd_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int* iwork ); lapack_int LAPACKE_zgelsd_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork ); lapack_int LAPACKE_sgelss_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank, float* work, lapack_int lwork ); lapack_int LAPACKE_dgelss_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank, double* work, lapack_int lwork ); lapack_int LAPACKE_cgelss_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* s, float rcond, lapack_int* rank, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zgelss_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* s, double rcond, lapack_int* rank, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sgelsy_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* jpvt, float rcond, lapack_int* rank, float* work, lapack_int lwork ); lapack_int LAPACKE_dgelsy_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* jpvt, double rcond, lapack_int* rank, double* work, lapack_int lwork ); lapack_int LAPACKE_cgelsy_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* jpvt, float rcond, lapack_int* rank, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zgelsy_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* jpvt, double rcond, lapack_int* rank, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sgeqlf_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dgeqlf_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_cgeqlf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgeqlf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgeqp3_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* jpvt, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dgeqp3_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* jpvt, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_cgeqp3_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* jpvt, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zgeqp3_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* jpvt, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sgeqpf_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* jpvt, float* tau, float* work ); lapack_int LAPACKE_dgeqpf_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* jpvt, double* tau, double* work ); lapack_int LAPACKE_cgeqpf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* jpvt, lapack_complex_float* tau, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgeqpf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* jpvt, lapack_complex_double* tau, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgeqr2_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work ); lapack_int LAPACKE_dgeqr2_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work ); lapack_int LAPACKE_cgeqr2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work ); lapack_int LAPACKE_zgeqr2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work ); lapack_int LAPACKE_sgeqrf_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dgeqrf_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_cgeqrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgeqrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_cgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgeqrfp_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgerfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgerfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgerfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgerfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgerfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* r, const float* c, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgerfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* r, const double* c, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgerfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const float* r, const float* c, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgerfsx_work( int matrix_order, char trans, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const double* r, const double* c, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgerqf_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dgerqf_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_cgerqf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgerqf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgesdd_work( int matrix_order, char jobz, lapack_int m, lapack_int n, float* a, lapack_int lda, float* s, float* u, lapack_int ldu, float* vt, lapack_int ldvt, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dgesdd_work( int matrix_order, char jobz, lapack_int m, lapack_int n, double* a, lapack_int lda, double* s, double* u, lapack_int ldu, double* vt, lapack_int ldvt, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_cgesdd_work( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* vt, lapack_int ldvt, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int* iwork ); lapack_int LAPACKE_zgesdd_work( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* vt, lapack_int ldvt, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork ); lapack_int LAPACKE_sgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_dsgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb, double* x, lapack_int ldx, double* work, float* swork, lapack_int* iter ); lapack_int LAPACKE_zcgesv_work( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, lapack_complex_double* work, lapack_complex_float* swork, double* rwork, lapack_int* iter ); lapack_int LAPACKE_sgesvd_work( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, float* a, lapack_int lda, float* s, float* u, lapack_int ldu, float* vt, lapack_int ldvt, float* work, lapack_int lwork ); lapack_int LAPACKE_dgesvd_work( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, double* a, lapack_int lda, double* s, double* u, lapack_int ldu, double* vt, lapack_int ldvt, double* work, lapack_int lwork ); lapack_int LAPACKE_cgesvd_work( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* vt, lapack_int ldvt, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zgesvd_work( int matrix_order, char jobu, char jobvt, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* vt, lapack_int ldvt, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sgesvj_work( int matrix_order, char joba, char jobu, char jobv, lapack_int m, lapack_int n, float* a, lapack_int lda, float* sva, lapack_int mv, float* v, lapack_int ldv, float* work, lapack_int lwork ); lapack_int LAPACKE_dgesvj_work( int matrix_order, char joba, char jobu, char jobv, lapack_int m, lapack_int n, double* a, lapack_int lda, double* sva, lapack_int mv, double* v, lapack_int ldv, double* work, lapack_int lwork ); lapack_int LAPACKE_sgesvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgesvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgesvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgesvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgesvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgesvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgesvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgesvxx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgetf2_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_dgetf2_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_cgetf2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_zgetf2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_sgetrf_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_dgetrf_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_cgetrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_zgetrf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv ); lapack_int LAPACKE_sgetri_work( int matrix_order, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv, float* work, lapack_int lwork ); lapack_int LAPACKE_dgetri_work( int matrix_order, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv, double* work, lapack_int lwork ); lapack_int LAPACKE_cgetri_work( int matrix_order, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgetri_work( int matrix_order, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgetrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgetrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgetrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgetrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sggbak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* lscale, const float* rscale, lapack_int m, float* v, lapack_int ldv ); lapack_int LAPACKE_dggbak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* lscale, const double* rscale, lapack_int m, double* v, lapack_int ldv ); lapack_int LAPACKE_cggbak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const float* lscale, const float* rscale, lapack_int m, lapack_complex_float* v, lapack_int ldv ); lapack_int LAPACKE_zggbak_work( int matrix_order, char job, char side, lapack_int n, lapack_int ilo, lapack_int ihi, const double* lscale, const double* rscale, lapack_int m, lapack_complex_double* v, lapack_int ldv ); lapack_int LAPACKE_sggbal_work( int matrix_order, char job, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* work ); lapack_int LAPACKE_dggbal_work( int matrix_order, char job, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* work ); lapack_int LAPACKE_cggbal_work( int matrix_order, char job, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* work ); lapack_int LAPACKE_zggbal_work( int matrix_order, char job, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* work ); lapack_int LAPACKE_sgges_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_S_SELECT3 selctg, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* sdim, float* alphar, float* alphai, float* beta, float* vsl, lapack_int ldvsl, float* vsr, lapack_int ldvsr, float* work, lapack_int lwork, lapack_logical* bwork ); lapack_int LAPACKE_dgges_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_D_SELECT3 selctg, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* sdim, double* alphar, double* alphai, double* beta, double* vsl, lapack_int ldvsl, double* vsr, lapack_int ldvsr, double* work, lapack_int lwork, lapack_logical* bwork ); lapack_int LAPACKE_cgges_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_C_SELECT2 selctg, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* sdim, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vsl, lapack_int ldvsl, lapack_complex_float* vsr, lapack_int ldvsr, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_logical* bwork ); lapack_int LAPACKE_zgges_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_Z_SELECT2 selctg, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* sdim, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vsl, lapack_int ldvsl, lapack_complex_double* vsr, lapack_int ldvsr, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_logical* bwork ); lapack_int LAPACKE_sggesx_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_S_SELECT3 selctg, char sense, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, lapack_int* sdim, float* alphar, float* alphai, float* beta, float* vsl, lapack_int ldvsl, float* vsr, lapack_int ldvsr, float* rconde, float* rcondv, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork, lapack_logical* bwork ); lapack_int LAPACKE_dggesx_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_D_SELECT3 selctg, char sense, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, lapack_int* sdim, double* alphar, double* alphai, double* beta, double* vsl, lapack_int ldvsl, double* vsr, lapack_int ldvsr, double* rconde, double* rcondv, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork, lapack_logical* bwork ); lapack_int LAPACKE_cggesx_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_C_SELECT2 selctg, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_int* sdim, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vsl, lapack_int ldvsl, lapack_complex_float* vsr, lapack_int ldvsr, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int* iwork, lapack_int liwork, lapack_logical* bwork ); lapack_int LAPACKE_zggesx_work( int matrix_order, char jobvsl, char jobvsr, char sort, LAPACK_Z_SELECT2 selctg, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_int* sdim, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vsl, lapack_int ldvsl, lapack_complex_double* vsr, lapack_int ldvsr, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork, lapack_int liwork, lapack_logical* bwork ); lapack_int LAPACKE_sggev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* alphar, float* alphai, float* beta, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, float* work, lapack_int lwork ); lapack_int LAPACKE_dggev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* alphar, double* alphai, double* beta, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, double* work, lapack_int lwork ); lapack_int LAPACKE_cggev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zggev_work( int matrix_order, char jobvl, char jobvr, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_sggevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* alphar, float* alphai, float* beta, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* abnrm, float* bbnrm, float* rconde, float* rcondv, float* work, lapack_int lwork, lapack_int* iwork, lapack_logical* bwork ); lapack_int LAPACKE_dggevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* alphar, double* alphai, double* beta, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* abnrm, double* bbnrm, double* rconde, double* rcondv, double* work, lapack_int lwork, lapack_int* iwork, lapack_logical* bwork ); lapack_int LAPACKE_cggevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* abnrm, float* bbnrm, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int* iwork, lapack_logical* bwork ); lapack_int LAPACKE_zggevx_work( int matrix_order, char balanc, char jobvl, char jobvr, char sense, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* abnrm, double* bbnrm, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork, lapack_logical* bwork ); lapack_int LAPACKE_sggglm_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, float* a, lapack_int lda, float* b, lapack_int ldb, float* d, float* x, float* y, float* work, lapack_int lwork ); lapack_int LAPACKE_dggglm_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, double* a, lapack_int lda, double* b, lapack_int ldb, double* d, double* x, double* y, double* work, lapack_int lwork ); lapack_int LAPACKE_cggglm_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* d, lapack_complex_float* x, lapack_complex_float* y, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zggglm_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* d, lapack_complex_double* x, lapack_complex_double* y, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sgghrd_work( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, float* a, lapack_int lda, float* b, lapack_int ldb, float* q, lapack_int ldq, float* z, lapack_int ldz ); lapack_int LAPACKE_dgghrd_work( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, double* a, lapack_int lda, double* b, lapack_int ldb, double* q, lapack_int ldq, double* z, lapack_int ldz ); lapack_int LAPACKE_cgghrd_work( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz ); lapack_int LAPACKE_zgghrd_work( int matrix_order, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz ); lapack_int LAPACKE_sgglse_work( int matrix_order, lapack_int m, lapack_int n, lapack_int p, float* a, lapack_int lda, float* b, lapack_int ldb, float* c, float* d, float* x, float* work, lapack_int lwork ); lapack_int LAPACKE_dgglse_work( int matrix_order, lapack_int m, lapack_int n, lapack_int p, double* a, lapack_int lda, double* b, lapack_int ldb, double* c, double* d, double* x, double* work, lapack_int lwork ); lapack_int LAPACKE_cgglse_work( int matrix_order, lapack_int m, lapack_int n, lapack_int p, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* c, lapack_complex_float* d, lapack_complex_float* x, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zgglse_work( int matrix_order, lapack_int m, lapack_int n, lapack_int p, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* c, lapack_complex_double* d, lapack_complex_double* x, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sggqrf_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, float* a, lapack_int lda, float* taua, float* b, lapack_int ldb, float* taub, float* work, lapack_int lwork ); lapack_int LAPACKE_dggqrf_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, double* a, lapack_int lda, double* taua, double* b, lapack_int ldb, double* taub, double* work, lapack_int lwork ); lapack_int LAPACKE_cggqrf_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_float* a, lapack_int lda, lapack_complex_float* taua, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* taub, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zggqrf_work( int matrix_order, lapack_int n, lapack_int m, lapack_int p, lapack_complex_double* a, lapack_int lda, lapack_complex_double* taua, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* taub, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sggrqf_work( int matrix_order, lapack_int m, lapack_int p, lapack_int n, float* a, lapack_int lda, float* taua, float* b, lapack_int ldb, float* taub, float* work, lapack_int lwork ); lapack_int LAPACKE_dggrqf_work( int matrix_order, lapack_int m, lapack_int p, lapack_int n, double* a, lapack_int lda, double* taua, double* b, lapack_int ldb, double* taub, double* work, lapack_int lwork ); lapack_int LAPACKE_cggrqf_work( int matrix_order, lapack_int m, lapack_int p, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* taua, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* taub, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zggrqf_work( int matrix_order, lapack_int m, lapack_int p, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* taua, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* taub, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_sggsvd_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, float* a, lapack_int lda, float* b, lapack_int ldb, float* alpha, float* beta, float* u, lapack_int ldu, float* v, lapack_int ldv, float* q, lapack_int ldq, float* work, lapack_int* iwork ); lapack_int LAPACKE_dggsvd_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, double* a, lapack_int lda, double* b, lapack_int ldb, double* alpha, double* beta, double* u, lapack_int ldu, double* v, lapack_int ldv, double* q, lapack_int ldq, double* work, lapack_int* iwork ); lapack_int LAPACKE_cggsvd_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* alpha, float* beta, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* v, lapack_int ldv, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* work, float* rwork, lapack_int* iwork ); lapack_int LAPACKE_zggsvd_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int n, lapack_int p, lapack_int* k, lapack_int* l, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* alpha, double* beta, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* v, lapack_int ldv, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* work, double* rwork, lapack_int* iwork ); lapack_int LAPACKE_sggsvp_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float tola, float tolb, lapack_int* k, lapack_int* l, float* u, lapack_int ldu, float* v, lapack_int ldv, float* q, lapack_int ldq, lapack_int* iwork, float* tau, float* work ); lapack_int LAPACKE_dggsvp_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double tola, double tolb, lapack_int* k, lapack_int* l, double* u, lapack_int ldu, double* v, lapack_int ldv, double* q, lapack_int ldq, lapack_int* iwork, double* tau, double* work ); lapack_int LAPACKE_cggsvp_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float tola, float tolb, lapack_int* k, lapack_int* l, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* v, lapack_int ldv, lapack_complex_float* q, lapack_int ldq, lapack_int* iwork, float* rwork, lapack_complex_float* tau, lapack_complex_float* work ); lapack_int LAPACKE_zggsvp_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double tola, double tolb, lapack_int* k, lapack_int* l, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* v, lapack_int ldv, lapack_complex_double* q, lapack_int ldq, lapack_int* iwork, double* rwork, lapack_complex_double* tau, lapack_complex_double* work ); lapack_int LAPACKE_sgtcon_work( char norm, lapack_int n, const float* dl, const float* d, const float* du, const float* du2, const lapack_int* ipiv, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgtcon_work( char norm, lapack_int n, const double* dl, const double* d, const double* du, const double* du2, const lapack_int* ipiv, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgtcon_work( char norm, lapack_int n, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* du2, const lapack_int* ipiv, float anorm, float* rcond, lapack_complex_float* work ); lapack_int LAPACKE_zgtcon_work( char norm, lapack_int n, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* du2, const lapack_int* ipiv, double anorm, double* rcond, lapack_complex_double* work ); lapack_int LAPACKE_sgtrfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* dl, const float* d, const float* du, const float* dlf, const float* df, const float* duf, const float* du2, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgtrfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* dl, const double* d, const double* du, const double* dlf, const double* df, const double* duf, const double* du2, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgtrfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* dlf, const lapack_complex_float* df, const lapack_complex_float* duf, const lapack_complex_float* du2, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgtrfs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* dlf, const lapack_complex_double* df, const lapack_complex_double* duf, const lapack_complex_double* du2, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, float* dl, float* d, float* du, float* b, lapack_int ldb ); lapack_int LAPACKE_dgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, double* dl, double* d, double* du, double* b, lapack_int ldb ); lapack_int LAPACKE_cgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_float* dl, lapack_complex_float* d, lapack_complex_float* du, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs, lapack_complex_double* dl, lapack_complex_double* d, lapack_complex_double* du, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgtsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const float* dl, const float* d, const float* du, float* dlf, float* df, float* duf, float* du2, lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dgtsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const double* dl, const double* d, const double* du, double* dlf, double* df, double* duf, double* du2, lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cgtsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, lapack_complex_float* dlf, lapack_complex_float* df, lapack_complex_float* duf, lapack_complex_float* du2, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zgtsvx_work( int matrix_order, char fact, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, lapack_complex_double* dlf, lapack_complex_double* df, lapack_complex_double* duf, lapack_complex_double* du2, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sgttrf_work( lapack_int n, float* dl, float* d, float* du, float* du2, lapack_int* ipiv ); lapack_int LAPACKE_dgttrf_work( lapack_int n, double* dl, double* d, double* du, double* du2, lapack_int* ipiv ); lapack_int LAPACKE_cgttrf_work( lapack_int n, lapack_complex_float* dl, lapack_complex_float* d, lapack_complex_float* du, lapack_complex_float* du2, lapack_int* ipiv ); lapack_int LAPACKE_zgttrf_work( lapack_int n, lapack_complex_double* dl, lapack_complex_double* d, lapack_complex_double* du, lapack_complex_double* du2, lapack_int* ipiv ); lapack_int LAPACKE_sgttrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const float* dl, const float* d, const float* du, const float* du2, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dgttrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const double* dl, const double* d, const double* du, const double* du2, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cgttrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* du2, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zgttrs_work( int matrix_order, char trans, lapack_int n, lapack_int nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* du2, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chbev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhbev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chbevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zhbevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_chbevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_zhbevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_chbgst_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* bb, lapack_int ldbb, lapack_complex_float* x, lapack_int ldx, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhbgst_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* bb, lapack_int ldbb, lapack_complex_double* x, lapack_int ldx, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chbgv_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* bb, lapack_int ldbb, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhbgv_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* bb, lapack_int ldbb, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chbgvd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* bb, lapack_int ldbb, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zhbgvd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* bb, lapack_int ldbb, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_chbgvx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* bb, lapack_int ldbb, lapack_complex_float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_zhbgvx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* bb, lapack_int ldbb, lapack_complex_double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_chbtrd_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab, float* d, float* e, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* work ); lapack_int LAPACKE_zhbtrd_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab, double* d, double* e, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* work ); lapack_int LAPACKE_checon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, float anorm, float* rcond, lapack_complex_float* work ); lapack_int LAPACKE_zhecon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, double anorm, double* rcond, lapack_complex_double* work ); lapack_int LAPACKE_cheequb_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax, lapack_complex_float* work ); lapack_int LAPACKE_zheequb_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax, lapack_complex_double* work ); lapack_int LAPACKE_cheev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float* w, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zheev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double* w, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_cheevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float* w, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zheevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double* w, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_cheevr_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* isuppz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zheevr_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* isuppz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_cheevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_zheevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_chegst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhegst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chegv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* w, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zhegv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* w, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_chegvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float* w, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zhegvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double* w, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_chegvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_zhegvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_cherfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zherfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_cherfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const float* s, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zherfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const double* s, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chesv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zhesv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_chesvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zhesvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_chesvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhesvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chetrd_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, float* d, float* e, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zhetrd_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, double* d, double* e, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_chetrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zhetrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_chetri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work ); lapack_int LAPACKE_zhetri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work ); lapack_int LAPACKE_chetrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhetrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chfrk_work( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, float alpha, const lapack_complex_float* a, lapack_int lda, float beta, lapack_complex_float* c ); lapack_int LAPACKE_zhfrk_work( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, double alpha, const lapack_complex_double* a, lapack_int lda, double beta, lapack_complex_double* c ); lapack_int LAPACKE_shgeqz_work( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, float* h, lapack_int ldh, float* t, lapack_int ldt, float* alphar, float* alphai, float* beta, float* q, lapack_int ldq, float* z, lapack_int ldz, float* work, lapack_int lwork ); lapack_int LAPACKE_dhgeqz_work( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, double* h, lapack_int ldh, double* t, lapack_int ldt, double* alphar, double* alphai, double* beta, double* q, lapack_int ldq, double* z, lapack_int ldz, double* work, lapack_int lwork ); lapack_int LAPACKE_chgeqz_work( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* h, lapack_int ldh, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zhgeqz_work( int matrix_order, char job, char compq, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* h, lapack_int ldh, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_chpcon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, const lapack_int* ipiv, float anorm, float* rcond, lapack_complex_float* work ); lapack_int LAPACKE_zhpcon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, const lapack_int* ipiv, double anorm, double* rcond, lapack_complex_double* work ); lapack_int LAPACKE_chpev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhpev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chpevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zhpevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_chpevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* ap, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_zhpevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* ap, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_chpgst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_float* ap, const lapack_complex_float* bp ); lapack_int LAPACKE_zhpgst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, lapack_complex_double* ap, const lapack_complex_double* bp ); lapack_int LAPACKE_chpgv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, lapack_complex_float* bp, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhpgv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, lapack_complex_double* bp, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chpgvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_float* ap, lapack_complex_float* bp, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zhpgvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, lapack_complex_double* ap, lapack_complex_double* bp, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_chpgvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_float* ap, lapack_complex_float* bp, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_zhpgvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, lapack_complex_double* ap, lapack_complex_double* bp, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_chprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chpsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhpsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_chpsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* afp, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zhpsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* afp, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_chptrd_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, float* d, float* e, lapack_complex_float* tau ); lapack_int LAPACKE_zhptrd_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, double* d, double* e, lapack_complex_double* tau ); lapack_int LAPACKE_chptrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, lapack_int* ipiv ); lapack_int LAPACKE_zhptrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, lapack_int* ipiv ); lapack_int LAPACKE_chptri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* work ); lapack_int LAPACKE_zhptri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* work ); lapack_int LAPACKE_chptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zhptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_shsein_work( int matrix_order, char job, char eigsrc, char initv, lapack_logical* select, lapack_int n, const float* h, lapack_int ldh, float* wr, const float* wi, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, float* work, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_dhsein_work( int matrix_order, char job, char eigsrc, char initv, lapack_logical* select, lapack_int n, const double* h, lapack_int ldh, double* wr, const double* wi, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, double* work, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_chsein_work( int matrix_order, char job, char eigsrc, char initv, const lapack_logical* select, lapack_int n, const lapack_complex_float* h, lapack_int ldh, lapack_complex_float* w, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_complex_float* work, float* rwork, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_zhsein_work( int matrix_order, char job, char eigsrc, char initv, const lapack_logical* select, lapack_int n, const lapack_complex_double* h, lapack_int ldh, lapack_complex_double* w, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_complex_double* work, double* rwork, lapack_int* ifaill, lapack_int* ifailr ); lapack_int LAPACKE_shseqr_work( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, float* h, lapack_int ldh, float* wr, float* wi, float* z, lapack_int ldz, float* work, lapack_int lwork ); lapack_int LAPACKE_dhseqr_work( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, double* h, lapack_int ldh, double* wr, double* wi, double* z, lapack_int ldz, double* work, lapack_int lwork ); lapack_int LAPACKE_chseqr_work( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* h, lapack_int ldh, lapack_complex_float* w, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zhseqr_work( int matrix_order, char job, char compz, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* h, lapack_int ldh, lapack_complex_double* w, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_clacgv_work( lapack_int n, lapack_complex_float* x, lapack_int incx ); lapack_int LAPACKE_zlacgv_work( lapack_int n, lapack_complex_double* x, lapack_int incx ); lapack_int LAPACKE_slacn2_work( lapack_int n, float* v, float* x, lapack_int* isgn, float* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_dlacn2_work( lapack_int n, double* v, double* x, lapack_int* isgn, double* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_clacn2_work( lapack_int n, lapack_complex_float* v, lapack_complex_float* x, float* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_zlacn2_work( lapack_int n, lapack_complex_double* v, lapack_complex_double* x, double* est, lapack_int* kase, lapack_int* isave ); lapack_int LAPACKE_slacpy_work( int matrix_order, char uplo, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dlacpy_work( int matrix_order, char uplo, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_clacpy_work( int matrix_order, char uplo, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zlacpy_work( int matrix_order, char uplo, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_clacp2_work( int matrix_order, char uplo, lapack_int m, lapack_int n, const float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zlacp2_work( int matrix_order, char uplo, lapack_int m, lapack_int n, const double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_zlag2c_work( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_float* sa, lapack_int ldsa ); lapack_int LAPACKE_slag2d_work( int matrix_order, lapack_int m, lapack_int n, const float* sa, lapack_int ldsa, double* a, lapack_int lda ); lapack_int LAPACKE_dlag2s_work( int matrix_order, lapack_int m, lapack_int n, const double* a, lapack_int lda, float* sa, lapack_int ldsa ); lapack_int LAPACKE_clag2z_work( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* sa, lapack_int ldsa, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_slagge_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* d, float* a, lapack_int lda, lapack_int* iseed, float* work ); lapack_int LAPACKE_dlagge_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* d, double* a, lapack_int lda, lapack_int* iseed, double* work ); lapack_int LAPACKE_clagge_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float* d, lapack_complex_float* a, lapack_int lda, lapack_int* iseed, lapack_complex_float* work ); lapack_int LAPACKE_zlagge_work( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double* d, lapack_complex_double* a, lapack_int lda, lapack_int* iseed, lapack_complex_double* work ); lapack_int LAPACKE_claghe_work( int matrix_order, lapack_int n, lapack_int k, const float* d, lapack_complex_float* a, lapack_int lda, lapack_int* iseed, lapack_complex_float* work ); lapack_int LAPACKE_zlaghe_work( int matrix_order, lapack_int n, lapack_int k, const double* d, lapack_complex_double* a, lapack_int lda, lapack_int* iseed, lapack_complex_double* work ); lapack_int LAPACKE_slagsy_work( int matrix_order, lapack_int n, lapack_int k, const float* d, float* a, lapack_int lda, lapack_int* iseed, float* work ); lapack_int LAPACKE_dlagsy_work( int matrix_order, lapack_int n, lapack_int k, const double* d, double* a, lapack_int lda, lapack_int* iseed, double* work ); lapack_int LAPACKE_clagsy_work( int matrix_order, lapack_int n, lapack_int k, const float* d, lapack_complex_float* a, lapack_int lda, lapack_int* iseed, lapack_complex_float* work ); lapack_int LAPACKE_zlagsy_work( int matrix_order, lapack_int n, lapack_int k, const double* d, lapack_complex_double* a, lapack_int lda, lapack_int* iseed, lapack_complex_double* work ); lapack_int LAPACKE_slapmr_work( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, float* x, lapack_int ldx, lapack_int* k ); lapack_int LAPACKE_dlapmr_work( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, double* x, lapack_int ldx, lapack_int* k ); lapack_int LAPACKE_clapmr_work( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, lapack_complex_float* x, lapack_int ldx, lapack_int* k ); lapack_int LAPACKE_zlapmr_work( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, lapack_complex_double* x, lapack_int ldx, lapack_int* k ); lapack_int LAPACKE_slartgp_work( float f, float g, float* cs, float* sn, float* r ); lapack_int LAPACKE_dlartgp_work( double f, double g, double* cs, double* sn, double* r ); lapack_int LAPACKE_slartgs_work( float x, float y, float sigma, float* cs, float* sn ); lapack_int LAPACKE_dlartgs_work( double x, double y, double sigma, double* cs, double* sn ); float LAPACKE_slapy2_work( float x, float y ); double LAPACKE_dlapy2_work( double x, double y ); float LAPACKE_slapy3_work( float x, float y, float z ); double LAPACKE_dlapy3_work( double x, double y, double z ); float LAPACKE_slamch_work( char cmach ); double LAPACKE_dlamch_work( char cmach ); float LAPACKE_slange_work( int matrix_order, char norm, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* work ); double LAPACKE_dlange_work( int matrix_order, char norm, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* work ); float LAPACKE_clange_work( int matrix_order, char norm, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* work ); double LAPACKE_zlange_work( int matrix_order, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* work ); float LAPACKE_clanhe_work( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* work ); double LAPACKE_zlanhe_work( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* work ); float LAPACKE_slansy_work( int matrix_order, char norm, char uplo, lapack_int n, const float* a, lapack_int lda, float* work ); double LAPACKE_dlansy_work( int matrix_order, char norm, char uplo, lapack_int n, const double* a, lapack_int lda, double* work ); float LAPACKE_clansy_work( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* work ); double LAPACKE_zlansy_work( int matrix_order, char norm, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* work ); float LAPACKE_slantr_work( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* work ); double LAPACKE_dlantr_work( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* work ); float LAPACKE_clantr_work( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* work ); double LAPACKE_zlantr_work( int matrix_order, char norm, char uplo, char diag, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* work ); lapack_int LAPACKE_slarfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* c, lapack_int ldc, float* work, lapack_int ldwork ); lapack_int LAPACKE_dlarfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* c, lapack_int ldc, double* work, lapack_int ldwork ); lapack_int LAPACKE_clarfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int ldwork ); lapack_int LAPACKE_zlarfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int ldwork ); lapack_int LAPACKE_slarfg_work( lapack_int n, float* alpha, float* x, lapack_int incx, float* tau ); lapack_int LAPACKE_dlarfg_work( lapack_int n, double* alpha, double* x, lapack_int incx, double* tau ); lapack_int LAPACKE_clarfg_work( lapack_int n, lapack_complex_float* alpha, lapack_complex_float* x, lapack_int incx, lapack_complex_float* tau ); lapack_int LAPACKE_zlarfg_work( lapack_int n, lapack_complex_double* alpha, lapack_complex_double* x, lapack_int incx, lapack_complex_double* tau ); lapack_int LAPACKE_slarft_work( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const float* v, lapack_int ldv, const float* tau, float* t, lapack_int ldt ); lapack_int LAPACKE_dlarft_work( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const double* v, lapack_int ldv, const double* tau, double* t, lapack_int ldt ); lapack_int LAPACKE_clarft_work( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* tau, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_zlarft_work( int matrix_order, char direct, char storev, lapack_int n, lapack_int k, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* tau, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_slarfx_work( int matrix_order, char side, lapack_int m, lapack_int n, const float* v, float tau, float* c, lapack_int ldc, float* work ); lapack_int LAPACKE_dlarfx_work( int matrix_order, char side, lapack_int m, lapack_int n, const double* v, double tau, double* c, lapack_int ldc, double* work ); lapack_int LAPACKE_clarfx_work( int matrix_order, char side, lapack_int m, lapack_int n, const lapack_complex_float* v, lapack_complex_float tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work ); lapack_int LAPACKE_zlarfx_work( int matrix_order, char side, lapack_int m, lapack_int n, const lapack_complex_double* v, lapack_complex_double tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work ); lapack_int LAPACKE_slarnv_work( lapack_int idist, lapack_int* iseed, lapack_int n, float* x ); lapack_int LAPACKE_dlarnv_work( lapack_int idist, lapack_int* iseed, lapack_int n, double* x ); lapack_int LAPACKE_clarnv_work( lapack_int idist, lapack_int* iseed, lapack_int n, lapack_complex_float* x ); lapack_int LAPACKE_zlarnv_work( lapack_int idist, lapack_int* iseed, lapack_int n, lapack_complex_double* x ); lapack_int LAPACKE_slaset_work( int matrix_order, char uplo, lapack_int m, lapack_int n, float alpha, float beta, float* a, lapack_int lda ); lapack_int LAPACKE_dlaset_work( int matrix_order, char uplo, lapack_int m, lapack_int n, double alpha, double beta, double* a, lapack_int lda ); lapack_int LAPACKE_claset_work( int matrix_order, char uplo, lapack_int m, lapack_int n, lapack_complex_float alpha, lapack_complex_float beta, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zlaset_work( int matrix_order, char uplo, lapack_int m, lapack_int n, lapack_complex_double alpha, lapack_complex_double beta, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_slasrt_work( char id, lapack_int n, float* d ); lapack_int LAPACKE_dlasrt_work( char id, lapack_int n, double* d ); lapack_int LAPACKE_slaswp_work( int matrix_order, lapack_int n, float* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_dlaswp_work( int matrix_order, lapack_int n, double* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_claswp_work( int matrix_order, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_zlaswp_work( int matrix_order, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int k1, lapack_int k2, const lapack_int* ipiv, lapack_int incx ); lapack_int LAPACKE_slatms_work( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, float* d, lapack_int mode, float cond, float dmax, lapack_int kl, lapack_int ku, char pack, float* a, lapack_int lda, float* work ); lapack_int LAPACKE_dlatms_work( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, double* d, lapack_int mode, double cond, double dmax, lapack_int kl, lapack_int ku, char pack, double* a, lapack_int lda, double* work ); lapack_int LAPACKE_clatms_work( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, float* d, lapack_int mode, float cond, float dmax, lapack_int kl, lapack_int ku, char pack, lapack_complex_float* a, lapack_int lda, lapack_complex_float* work ); lapack_int LAPACKE_zlatms_work( int matrix_order, lapack_int m, lapack_int n, char dist, lapack_int* iseed, char sym, double* d, lapack_int mode, double cond, double dmax, lapack_int kl, lapack_int ku, char pack, lapack_complex_double* a, lapack_int lda, lapack_complex_double* work ); lapack_int LAPACKE_slauum_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dlauum_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_clauum_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zlauum_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_sopgtr_work( int matrix_order, char uplo, lapack_int n, const float* ap, const float* tau, float* q, lapack_int ldq, float* work ); lapack_int LAPACKE_dopgtr_work( int matrix_order, char uplo, lapack_int n, const double* ap, const double* tau, double* q, lapack_int ldq, double* work ); lapack_int LAPACKE_sopmtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const float* ap, const float* tau, float* c, lapack_int ldc, float* work ); lapack_int LAPACKE_dopmtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const double* ap, const double* tau, double* c, lapack_int ldc, double* work ); lapack_int LAPACKE_sorgbr_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dorgbr_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_sorghr_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, float* a, lapack_int lda, const float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dorghr_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_sorglq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dorglq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_sorgql_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dorgql_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_sorgqr_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dorgqr_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_sorgrq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, float* a, lapack_int lda, const float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dorgrq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_sorgtr_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dorgtr_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_sormbr_work( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormbr_work( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_sormhr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormhr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_sormql_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormql_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_sormqr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormqr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_sormrq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormrq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_sormrz_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormrz_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_sormtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ); lapack_int LAPACKE_dormtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ); lapack_int LAPACKE_spbcon_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dpbcon_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_cpbcon_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab, float anorm, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zpbcon_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab, double anorm, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_spbequ_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab, float* s, float* scond, float* amax ); lapack_int LAPACKE_dpbequ_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab, double* s, double* scond, double* amax ); lapack_int LAPACKE_cpbequ_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab, float* s, float* scond, float* amax ); lapack_int LAPACKE_zpbequ_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab, double* s, double* scond, double* amax ); lapack_int LAPACKE_spbrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, const float* afb, lapack_int ldafb, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dpbrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, const double* afb, lapack_int ldafb, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cpbrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* afb, lapack_int ldafb, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zpbrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* afb, lapack_int ldafb, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_spbstf_work( int matrix_order, char uplo, lapack_int n, lapack_int kb, float* bb, lapack_int ldbb ); lapack_int LAPACKE_dpbstf_work( int matrix_order, char uplo, lapack_int n, lapack_int kb, double* bb, lapack_int ldbb ); lapack_int LAPACKE_cpbstf_work( int matrix_order, char uplo, lapack_int n, lapack_int kb, lapack_complex_float* bb, lapack_int ldbb ); lapack_int LAPACKE_zpbstf_work( int matrix_order, char uplo, lapack_int n, lapack_int kb, lapack_complex_double* bb, lapack_int ldbb ); lapack_int LAPACKE_spbsv_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, float* ab, lapack_int ldab, float* b, lapack_int ldb ); lapack_int LAPACKE_dpbsv_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, double* ab, lapack_int ldab, double* b, lapack_int ldb ); lapack_int LAPACKE_cpbsv_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpbsv_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spbsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, float* ab, lapack_int ldab, float* afb, lapack_int ldafb, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dpbsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, double* ab, lapack_int ldab, double* afb, lapack_int ldafb, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cpbsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* afb, lapack_int ldafb, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zpbsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* afb, lapack_int ldafb, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_spbtrf_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab ); lapack_int LAPACKE_dpbtrf_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab ); lapack_int LAPACKE_cpbtrf_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_complex_float* ab, lapack_int ldab ); lapack_int LAPACKE_zpbtrf_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_complex_double* ab, lapack_int ldab ); lapack_int LAPACKE_spbtrs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, float* b, lapack_int ldb ); lapack_int LAPACKE_dpbtrs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, double* b, lapack_int ldb ); lapack_int LAPACKE_cpbtrs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpbtrs_work( int matrix_order, char uplo, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spftrf_work( int matrix_order, char transr, char uplo, lapack_int n, float* a ); lapack_int LAPACKE_dpftrf_work( int matrix_order, char transr, char uplo, lapack_int n, double* a ); lapack_int LAPACKE_cpftrf_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_float* a ); lapack_int LAPACKE_zpftrf_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_double* a ); lapack_int LAPACKE_spftri_work( int matrix_order, char transr, char uplo, lapack_int n, float* a ); lapack_int LAPACKE_dpftri_work( int matrix_order, char transr, char uplo, lapack_int n, double* a ); lapack_int LAPACKE_cpftri_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_float* a ); lapack_int LAPACKE_zpftri_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_complex_double* a ); lapack_int LAPACKE_spftrs_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const float* a, float* b, lapack_int ldb ); lapack_int LAPACKE_dpftrs_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const double* a, double* b, lapack_int ldb ); lapack_int LAPACKE_cpftrs_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpftrs_work( int matrix_order, char transr, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spocon_work( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dpocon_work( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_cpocon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float anorm, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zpocon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double anorm, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_spoequ_work( int matrix_order, lapack_int n, const float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_dpoequ_work( int matrix_order, lapack_int n, const double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_cpoequ_work( int matrix_order, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_zpoequ_work( int matrix_order, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_spoequb_work( int matrix_order, lapack_int n, const float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_dpoequb_work( int matrix_order, lapack_int n, const double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_cpoequb_work( int matrix_order, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax ); lapack_int LAPACKE_zpoequb_work( int matrix_order, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax ); lapack_int LAPACKE_sporfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dporfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cporfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zporfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sporfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const float* s, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dporfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const double* s, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_cporfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const float* s, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zporfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const double* s, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sposv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dposv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_cposv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zposv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_dsposv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* b, lapack_int ldb, double* x, lapack_int ldx, double* work, float* swork, lapack_int* iter ); lapack_int LAPACKE_zcposv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, lapack_complex_double* work, lapack_complex_float* swork, double* rwork, lapack_int* iter ); lapack_int LAPACKE_sposvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dposvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cposvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zposvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sposvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dposvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_cposvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zposvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_spotrf_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dpotrf_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_cpotrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zpotrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_spotri_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dpotri_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_cpotri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zpotri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_spotrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dpotrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_cpotrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpotrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sppcon_work( int matrix_order, char uplo, lapack_int n, const float* ap, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dppcon_work( int matrix_order, char uplo, lapack_int n, const double* ap, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_cppcon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, float anorm, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zppcon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, double anorm, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sppequ_work( int matrix_order, char uplo, lapack_int n, const float* ap, float* s, float* scond, float* amax ); lapack_int LAPACKE_dppequ_work( int matrix_order, char uplo, lapack_int n, const double* ap, double* s, double* scond, double* amax ); lapack_int LAPACKE_cppequ_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, float* s, float* scond, float* amax ); lapack_int LAPACKE_zppequ_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, double* s, double* scond, double* amax ); lapack_int LAPACKE_spprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, const float* afp, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dpprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, const double* afp, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cpprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zpprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sppsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* ap, float* b, lapack_int ldb ); lapack_int LAPACKE_dppsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* ap, double* b, lapack_int ldb ); lapack_int LAPACKE_cppsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zppsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sppsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* ap, float* afp, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dppsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* ap, double* afp, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cppsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_complex_float* afp, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zppsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_complex_double* afp, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_spptrf_work( int matrix_order, char uplo, lapack_int n, float* ap ); lapack_int LAPACKE_dpptrf_work( int matrix_order, char uplo, lapack_int n, double* ap ); lapack_int LAPACKE_cpptrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap ); lapack_int LAPACKE_zpptrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap ); lapack_int LAPACKE_spptri_work( int matrix_order, char uplo, lapack_int n, float* ap ); lapack_int LAPACKE_dpptri_work( int matrix_order, char uplo, lapack_int n, double* ap ); lapack_int LAPACKE_cpptri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap ); lapack_int LAPACKE_zpptri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap ); lapack_int LAPACKE_spptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, float* b, lapack_int ldb ); lapack_int LAPACKE_dpptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, double* b, lapack_int ldb ); lapack_int LAPACKE_cpptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_spstrf_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, lapack_int* piv, lapack_int* rank, float tol, float* work ); lapack_int LAPACKE_dpstrf_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, lapack_int* piv, lapack_int* rank, double tol, double* work ); lapack_int LAPACKE_cpstrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* piv, lapack_int* rank, float tol, float* work ); lapack_int LAPACKE_zpstrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* piv, lapack_int* rank, double tol, double* work ); lapack_int LAPACKE_sptcon_work( lapack_int n, const float* d, const float* e, float anorm, float* rcond, float* work ); lapack_int LAPACKE_dptcon_work( lapack_int n, const double* d, const double* e, double anorm, double* rcond, double* work ); lapack_int LAPACKE_cptcon_work( lapack_int n, const float* d, const lapack_complex_float* e, float anorm, float* rcond, float* work ); lapack_int LAPACKE_zptcon_work( lapack_int n, const double* d, const lapack_complex_double* e, double anorm, double* rcond, double* work ); lapack_int LAPACKE_spteqr_work( int matrix_order, char compz, lapack_int n, float* d, float* e, float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_dpteqr_work( int matrix_order, char compz, lapack_int n, double* d, double* e, double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_cpteqr_work( int matrix_order, char compz, lapack_int n, float* d, float* e, lapack_complex_float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_zpteqr_work( int matrix_order, char compz, lapack_int n, double* d, double* e, lapack_complex_double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_sptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs, const float* d, const float* e, const float* df, const float* ef, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work ); lapack_int LAPACKE_dptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs, const double* d, const double* e, const double* df, const double* ef, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work ); lapack_int LAPACKE_cptrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* d, const lapack_complex_float* e, const float* df, const lapack_complex_float* ef, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zptrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* d, const lapack_complex_double* e, const double* df, const lapack_complex_double* ef, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, float* d, float* e, float* b, lapack_int ldb ); lapack_int LAPACKE_dptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, double* d, double* e, double* b, lapack_int ldb ); lapack_int LAPACKE_cptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, float* d, lapack_complex_float* e, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zptsv_work( int matrix_order, lapack_int n, lapack_int nrhs, double* d, lapack_complex_double* e, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sptsvx_work( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const float* d, const float* e, float* df, float* ef, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work ); lapack_int LAPACKE_dptsvx_work( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const double* d, const double* e, double* df, double* ef, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work ); lapack_int LAPACKE_cptsvx_work( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const float* d, const lapack_complex_float* e, float* df, lapack_complex_float* ef, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zptsvx_work( int matrix_order, char fact, lapack_int n, lapack_int nrhs, const double* d, const lapack_complex_double* e, double* df, lapack_complex_double* ef, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_spttrf_work( lapack_int n, float* d, float* e ); lapack_int LAPACKE_dpttrf_work( lapack_int n, double* d, double* e ); lapack_int LAPACKE_cpttrf_work( lapack_int n, float* d, lapack_complex_float* e ); lapack_int LAPACKE_zpttrf_work( lapack_int n, double* d, lapack_complex_double* e ); lapack_int LAPACKE_spttrs_work( int matrix_order, lapack_int n, lapack_int nrhs, const float* d, const float* e, float* b, lapack_int ldb ); lapack_int LAPACKE_dpttrs_work( int matrix_order, lapack_int n, lapack_int nrhs, const double* d, const double* e, double* b, lapack_int ldb ); lapack_int LAPACKE_cpttrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* d, const lapack_complex_float* e, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zpttrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* d, const lapack_complex_double* e, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_ssbev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* w, float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_dsbev_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* w, double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_ssbevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* w, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dsbevd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* w, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ssbevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, float* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_dsbevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, double* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_ssbgst_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, const float* bb, lapack_int ldbb, float* x, lapack_int ldx, float* work ); lapack_int LAPACKE_dsbgst_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, const double* bb, lapack_int ldbb, double* x, lapack_int ldx, double* work ); lapack_int LAPACKE_ssbgv_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, float* bb, lapack_int ldbb, float* w, float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_dsbgv_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, double* bb, lapack_int ldbb, double* w, double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_ssbgvd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, float* bb, lapack_int ldbb, float* w, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dsbgvd_work( int matrix_order, char jobz, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, double* bb, lapack_int ldbb, double* w, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ssbgvx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, float* ab, lapack_int ldab, float* bb, lapack_int ldbb, float* q, lapack_int ldq, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, float* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_dsbgvx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, lapack_int ka, lapack_int kb, double* ab, lapack_int ldab, double* bb, lapack_int ldbb, double* q, lapack_int ldq, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, double* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_ssbtrd_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, float* ab, lapack_int ldab, float* d, float* e, float* q, lapack_int ldq, float* work ); lapack_int LAPACKE_dsbtrd_work( int matrix_order, char vect, char uplo, lapack_int n, lapack_int kd, double* ab, lapack_int ldab, double* d, double* e, double* q, lapack_int ldq, double* work ); lapack_int LAPACKE_ssfrk_work( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, float alpha, const float* a, lapack_int lda, float beta, float* c ); lapack_int LAPACKE_dsfrk_work( int matrix_order, char transr, char uplo, char trans, lapack_int n, lapack_int k, double alpha, const double* a, lapack_int lda, double beta, double* c ); lapack_int LAPACKE_sspcon_work( int matrix_order, char uplo, lapack_int n, const float* ap, const lapack_int* ipiv, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dspcon_work( int matrix_order, char uplo, lapack_int n, const double* ap, const lapack_int* ipiv, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_cspcon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, const lapack_int* ipiv, float anorm, float* rcond, lapack_complex_float* work ); lapack_int LAPACKE_zspcon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, const lapack_int* ipiv, double anorm, double* rcond, lapack_complex_double* work ); lapack_int LAPACKE_sspev_work( int matrix_order, char jobz, char uplo, lapack_int n, float* ap, float* w, float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_dspev_work( int matrix_order, char jobz, char uplo, lapack_int n, double* ap, double* w, double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_sspevd_work( int matrix_order, char jobz, char uplo, lapack_int n, float* ap, float* w, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dspevd_work( int matrix_order, char jobz, char uplo, lapack_int n, double* ap, double* w, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_sspevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, float* ap, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, float* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_dspevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, double* ap, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, double* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_sspgst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, float* ap, const float* bp ); lapack_int LAPACKE_dspgst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, double* ap, const double* bp ); lapack_int LAPACKE_sspgv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* ap, float* bp, float* w, float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_dspgv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* ap, double* bp, double* w, double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_sspgvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* ap, float* bp, float* w, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dspgvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* ap, double* bp, double* w, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_sspgvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, float* ap, float* bp, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, float* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_dspgvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, double* ap, double* bp, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, double* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_ssprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, const float* afp, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dsprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, const double* afp, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_csprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zsprfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_sspsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* ap, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dspsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* ap, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_cspsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* ap, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zspsv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* ap, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sspsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const float* ap, float* afp, lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dspsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const double* ap, double* afp, lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_cspsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* afp, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zspsvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* afp, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_ssptrd_work( int matrix_order, char uplo, lapack_int n, float* ap, float* d, float* e, float* tau ); lapack_int LAPACKE_dsptrd_work( int matrix_order, char uplo, lapack_int n, double* ap, double* d, double* e, double* tau ); lapack_int LAPACKE_ssptrf_work( int matrix_order, char uplo, lapack_int n, float* ap, lapack_int* ipiv ); lapack_int LAPACKE_dsptrf_work( int matrix_order, char uplo, lapack_int n, double* ap, lapack_int* ipiv ); lapack_int LAPACKE_csptrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, lapack_int* ipiv ); lapack_int LAPACKE_zsptrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, lapack_int* ipiv ); lapack_int LAPACKE_ssptri_work( int matrix_order, char uplo, lapack_int n, float* ap, const lapack_int* ipiv, float* work ); lapack_int LAPACKE_dsptri_work( int matrix_order, char uplo, lapack_int n, double* ap, const lapack_int* ipiv, double* work ); lapack_int LAPACKE_csptri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* work ); lapack_int LAPACKE_zsptri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* work ); lapack_int LAPACKE_ssptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* ap, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dsptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* ap, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_csptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zsptrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sstebz_work( char range, char order, lapack_int n, float vl, float vu, lapack_int il, lapack_int iu, float abstol, const float* d, const float* e, lapack_int* m, lapack_int* nsplit, float* w, lapack_int* iblock, lapack_int* isplit, float* work, lapack_int* iwork ); lapack_int LAPACKE_dstebz_work( char range, char order, lapack_int n, double vl, double vu, lapack_int il, lapack_int iu, double abstol, const double* d, const double* e, lapack_int* m, lapack_int* nsplit, double* w, lapack_int* iblock, lapack_int* isplit, double* work, lapack_int* iwork ); lapack_int LAPACKE_sstedc_work( int matrix_order, char compz, lapack_int n, float* d, float* e, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dstedc_work( int matrix_order, char compz, lapack_int n, double* d, double* e, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_cstedc_work( int matrix_order, char compz, lapack_int n, float* d, float* e, lapack_complex_float* z, lapack_int ldz, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zstedc_work( int matrix_order, char compz, lapack_int n, double* d, double* e, lapack_complex_double* z, lapack_int ldz, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_sstegr_work( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* isuppz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dstegr_work( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* isuppz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_cstegr_work( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int* isuppz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zstegr_work( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int* isuppz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_sstein_work( int matrix_order, lapack_int n, const float* d, const float* e, lapack_int m, const float* w, const lapack_int* iblock, const lapack_int* isplit, float* z, lapack_int ldz, float* work, lapack_int* iwork, lapack_int* ifailv ); lapack_int LAPACKE_dstein_work( int matrix_order, lapack_int n, const double* d, const double* e, lapack_int m, const double* w, const lapack_int* iblock, const lapack_int* isplit, double* z, lapack_int ldz, double* work, lapack_int* iwork, lapack_int* ifailv ); lapack_int LAPACKE_cstein_work( int matrix_order, lapack_int n, const float* d, const float* e, lapack_int m, const float* w, const lapack_int* iblock, const lapack_int* isplit, lapack_complex_float* z, lapack_int ldz, float* work, lapack_int* iwork, lapack_int* ifailv ); lapack_int LAPACKE_zstein_work( int matrix_order, lapack_int n, const double* d, const double* e, lapack_int m, const double* w, const lapack_int* iblock, const lapack_int* isplit, lapack_complex_double* z, lapack_int ldz, double* work, lapack_int* iwork, lapack_int* ifailv ); lapack_int LAPACKE_sstemr_work( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dstemr_work( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_cstemr_work( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, lapack_int* m, float* w, lapack_complex_float* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_zstemr_work( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, lapack_int* m, double* w, lapack_complex_double* z, lapack_int ldz, lapack_int nzc, lapack_int* isuppz, lapack_logical* tryrac, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ssteqr_work( int matrix_order, char compz, lapack_int n, float* d, float* e, float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_dsteqr_work( int matrix_order, char compz, lapack_int n, double* d, double* e, double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_csteqr_work( int matrix_order, char compz, lapack_int n, float* d, float* e, lapack_complex_float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_zsteqr_work( int matrix_order, char compz, lapack_int n, double* d, double* e, lapack_complex_double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_ssterf_work( lapack_int n, float* d, float* e ); lapack_int LAPACKE_dsterf_work( lapack_int n, double* d, double* e ); lapack_int LAPACKE_sstev_work( int matrix_order, char jobz, lapack_int n, float* d, float* e, float* z, lapack_int ldz, float* work ); lapack_int LAPACKE_dstev_work( int matrix_order, char jobz, lapack_int n, double* d, double* e, double* z, lapack_int ldz, double* work ); lapack_int LAPACKE_sstevd_work( int matrix_order, char jobz, lapack_int n, float* d, float* e, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dstevd_work( int matrix_order, char jobz, lapack_int n, double* d, double* e, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_sstevr_work( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* isuppz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dstevr_work( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* isuppz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_sstevx_work( int matrix_order, char jobz, char range, lapack_int n, float* d, float* e, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, float* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_dstevx_work( int matrix_order, char jobz, char range, lapack_int n, double* d, double* e, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, double* work, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_ssycon_work( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, const lapack_int* ipiv, float anorm, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dsycon_work( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, const lapack_int* ipiv, double anorm, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_csycon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, float anorm, float* rcond, lapack_complex_float* work ); lapack_int LAPACKE_zsycon_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, double anorm, double* rcond, lapack_complex_double* work ); lapack_int LAPACKE_ssyequb_work( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, float* s, float* scond, float* amax, float* work ); lapack_int LAPACKE_dsyequb_work( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, double* s, double* scond, double* amax, double* work ); lapack_int LAPACKE_csyequb_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* s, float* scond, float* amax, lapack_complex_float* work ); lapack_int LAPACKE_zsyequb_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* s, double* scond, double* amax, lapack_complex_double* work ); lapack_int LAPACKE_ssyev_work( int matrix_order, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* w, float* work, lapack_int lwork ); lapack_int LAPACKE_dsyev_work( int matrix_order, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* w, double* work, lapack_int lwork ); lapack_int LAPACKE_ssyevd_work( int matrix_order, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* w, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dsyevd_work( int matrix_order, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* w, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ssyevr_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, lapack_int* isuppz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dsyevr_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, lapack_int* isuppz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ssyevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, float* a, lapack_int lda, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_dsyevx_work( int matrix_order, char jobz, char range, char uplo, lapack_int n, double* a, lapack_int lda, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_ssygst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, float* a, lapack_int lda, const float* b, lapack_int ldb ); lapack_int LAPACKE_dsygst_work( int matrix_order, lapack_int itype, char uplo, lapack_int n, double* a, lapack_int lda, const double* b, lapack_int ldb ); lapack_int LAPACKE_ssygv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* w, float* work, lapack_int lwork ); lapack_int LAPACKE_dsygv_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* w, double* work, lapack_int lwork ); lapack_int LAPACKE_ssygvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* w, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dsygvd_work( int matrix_order, lapack_int itype, char jobz, char uplo, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* w, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ssygvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float vl, float vu, lapack_int il, lapack_int iu, float abstol, lapack_int* m, float* w, float* z, lapack_int ldz, float* work, lapack_int lwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_dsygvx_work( int matrix_order, lapack_int itype, char jobz, char range, char uplo, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double vl, double vu, lapack_int il, lapack_int iu, double abstol, lapack_int* m, double* w, double* z, lapack_int ldz, double* work, lapack_int lwork, lapack_int* iwork, lapack_int* ifail ); lapack_int LAPACKE_ssyrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dsyrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_csyrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zsyrfs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_ssyrfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* af, lapack_int ldaf, const lapack_int* ipiv, const float* s, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dsyrfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* af, lapack_int ldaf, const lapack_int* ipiv, const double* s, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_csyrfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* af, lapack_int ldaf, const lapack_int* ipiv, const float* s, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zsyrfsx_work( int matrix_order, char uplo, char equed, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* af, lapack_int ldaf, const lapack_int* ipiv, const double* s, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_ssysv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb, float* work, lapack_int lwork ); lapack_int LAPACKE_dsysv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb, double* work, lapack_int lwork ); lapack_int LAPACKE_csysv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zsysv_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_ssysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb, float* work, lapack_int lwork ); lapack_int LAPACKE_dsysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb, double* work, lapack_int lwork ); lapack_int LAPACKE_csysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zsysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_ssysvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, const float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dsysvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, const double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_csysvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, lapack_int lwork, float* rwork ); lapack_int LAPACKE_zsysvx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, lapack_int lwork, double* rwork ); lapack_int LAPACKE_ssysvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* s, float* b, lapack_int ldb, float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, float* work, lapack_int* iwork ); lapack_int LAPACKE_dsysvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* s, double* b, lapack_int ldb, double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, double* work, lapack_int* iwork ); lapack_int LAPACKE_csysvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_complex_float* af, lapack_int ldaf, lapack_int* ipiv, char* equed, float* s, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* x, lapack_int ldx, float* rcond, float* rpvgrw, float* berr, lapack_int n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int nparams, float* params, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_zsysvxx_work( int matrix_order, char fact, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_complex_double* af, lapack_int ldaf, lapack_int* ipiv, char* equed, double* s, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* x, lapack_int ldx, double* rcond, double* rpvgrw, double* berr, lapack_int n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int nparams, double* params, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_ssytrd_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, float* d, float* e, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dsytrd_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, double* d, double* e, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_ssytrf_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, lapack_int* ipiv, float* work, lapack_int lwork ); lapack_int LAPACKE_dsytrf_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, lapack_int* ipiv, double* work, lapack_int lwork ); lapack_int LAPACKE_csytrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zsytrf_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_ssytri_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv, float* work ); lapack_int LAPACKE_dsytri_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv, double* work ); lapack_int LAPACKE_csytri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work ); lapack_int LAPACKE_zsytri_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work ); lapack_int LAPACKE_ssytrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dsytrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_csytrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zsytrs_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stbcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dtbcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_ctbcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztbcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_stbrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, const float* b, lapack_int ldb, const float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dtbrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, const double* b, lapack_int ldb, const double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_ctbrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztbrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_stbtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const float* ab, lapack_int ldab, float* b, lapack_int ldb ); lapack_int LAPACKE_dtbtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const double* ab, lapack_int ldab, double* b, lapack_int ldb ); lapack_int LAPACKE_ctbtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_float* ab, lapack_int ldab, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztbtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int kd, lapack_int nrhs, const lapack_complex_double* ab, lapack_int ldab, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stfsm_work( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, float alpha, const float* a, float* b, lapack_int ldb ); lapack_int LAPACKE_dtfsm_work( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, double alpha, const double* a, double* b, lapack_int ldb ); lapack_int LAPACKE_ctfsm_work( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, lapack_complex_float alpha, const lapack_complex_float* a, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztfsm_work( int matrix_order, char transr, char side, char uplo, char trans, char diag, lapack_int m, lapack_int n, lapack_complex_double alpha, const lapack_complex_double* a, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stftri_work( int matrix_order, char transr, char uplo, char diag, lapack_int n, float* a ); lapack_int LAPACKE_dtftri_work( int matrix_order, char transr, char uplo, char diag, lapack_int n, double* a ); lapack_int LAPACKE_ctftri_work( int matrix_order, char transr, char uplo, char diag, lapack_int n, lapack_complex_float* a ); lapack_int LAPACKE_ztftri_work( int matrix_order, char transr, char uplo, char diag, lapack_int n, lapack_complex_double* a ); lapack_int LAPACKE_stfttp_work( int matrix_order, char transr, char uplo, lapack_int n, const float* arf, float* ap ); lapack_int LAPACKE_dtfttp_work( int matrix_order, char transr, char uplo, lapack_int n, const double* arf, double* ap ); lapack_int LAPACKE_ctfttp_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* arf, lapack_complex_float* ap ); lapack_int LAPACKE_ztfttp_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* arf, lapack_complex_double* ap ); lapack_int LAPACKE_stfttr_work( int matrix_order, char transr, char uplo, lapack_int n, const float* arf, float* a, lapack_int lda ); lapack_int LAPACKE_dtfttr_work( int matrix_order, char transr, char uplo, lapack_int n, const double* arf, double* a, lapack_int lda ); lapack_int LAPACKE_ctfttr_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* arf, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_ztfttr_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* arf, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_stgevc_work( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const float* s, lapack_int lds, const float* p, lapack_int ldp, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, float* work ); lapack_int LAPACKE_dtgevc_work( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const double* s, lapack_int lds, const double* p, lapack_int ldp, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, double* work ); lapack_int LAPACKE_ctgevc_work( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_float* s, lapack_int lds, const lapack_complex_float* p, lapack_int ldp, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztgevc_work( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_double* s, lapack_int lds, const lapack_complex_double* p, lapack_int ldp, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_stgexc_work( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* q, lapack_int ldq, float* z, lapack_int ldz, lapack_int* ifst, lapack_int* ilst, float* work, lapack_int lwork ); lapack_int LAPACKE_dtgexc_work( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* q, lapack_int ldq, double* z, lapack_int ldz, lapack_int* ifst, lapack_int* ilst, double* work, lapack_int lwork ); lapack_int LAPACKE_ctgexc_work( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_ztgexc_work( int matrix_order, lapack_logical wantq, lapack_logical wantz, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_stgsen_work( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, float* a, lapack_int lda, float* b, lapack_int ldb, float* alphar, float* alphai, float* beta, float* q, lapack_int ldq, float* z, lapack_int ldz, lapack_int* m, float* pl, float* pr, float* dif, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dtgsen_work( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, double* a, lapack_int lda, double* b, lapack_int ldb, double* alphar, double* alphai, double* beta, double* q, lapack_int ldq, double* z, lapack_int ldz, lapack_int* m, double* pl, double* pr, double* dif, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ctgsen_work( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* z, lapack_int ldz, lapack_int* m, float* pl, float* pr, float* dif, lapack_complex_float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ztgsen_work( int matrix_order, lapack_int ijob, lapack_logical wantq, lapack_logical wantz, const lapack_logical* select, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* z, lapack_int ldz, lapack_int* m, double* pl, double* pr, double* dif, lapack_complex_double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_stgsja_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, float* a, lapack_int lda, float* b, lapack_int ldb, float tola, float tolb, float* alpha, float* beta, float* u, lapack_int ldu, float* v, lapack_int ldv, float* q, lapack_int ldq, float* work, lapack_int* ncycle ); lapack_int LAPACKE_dtgsja_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, double* a, lapack_int lda, double* b, lapack_int ldb, double tola, double tolb, double* alpha, double* beta, double* u, lapack_int ldu, double* v, lapack_int ldv, double* q, lapack_int ldq, double* work, lapack_int* ncycle ); lapack_int LAPACKE_ctgsja_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, float tola, float tolb, float* alpha, float* beta, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* v, lapack_int ldv, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* work, lapack_int* ncycle ); lapack_int LAPACKE_ztgsja_work( int matrix_order, char jobu, char jobv, char jobq, lapack_int m, lapack_int p, lapack_int n, lapack_int k, lapack_int l, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, double tola, double tolb, double* alpha, double* beta, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* v, lapack_int ldv, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* work, lapack_int* ncycle ); lapack_int LAPACKE_stgsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const float* a, lapack_int lda, const float* b, lapack_int ldb, const float* vl, lapack_int ldvl, const float* vr, lapack_int ldvr, float* s, float* dif, lapack_int mm, lapack_int* m, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dtgsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const double* a, lapack_int lda, const double* b, lapack_int ldb, const double* vl, lapack_int ldvl, const double* vr, lapack_int ldvr, double* s, double* dif, lapack_int mm, lapack_int* m, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_ctgsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* vl, lapack_int ldvl, const lapack_complex_float* vr, lapack_int ldvr, float* s, float* dif, lapack_int mm, lapack_int* m, lapack_complex_float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_ztgsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* vl, lapack_int ldvl, const lapack_complex_double* vr, lapack_int ldvr, double* s, double* dif, lapack_int mm, lapack_int* m, lapack_complex_double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_stgsyl_work( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const float* a, lapack_int lda, const float* b, lapack_int ldb, float* c, lapack_int ldc, const float* d, lapack_int ldd, const float* e, lapack_int lde, float* f, lapack_int ldf, float* scale, float* dif, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dtgsyl_work( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const double* a, lapack_int lda, const double* b, lapack_int ldb, double* c, lapack_int ldc, const double* d, lapack_int ldd, const double* e, lapack_int lde, double* f, lapack_int ldf, double* scale, double* dif, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_ctgsyl_work( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* c, lapack_int ldc, const lapack_complex_float* d, lapack_int ldd, const lapack_complex_float* e, lapack_int lde, lapack_complex_float* f, lapack_int ldf, float* scale, float* dif, lapack_complex_float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_ztgsyl_work( int matrix_order, char trans, lapack_int ijob, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* c, lapack_int ldc, const lapack_complex_double* d, lapack_int ldd, const lapack_complex_double* e, lapack_int lde, lapack_complex_double* f, lapack_int ldf, double* scale, double* dif, lapack_complex_double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_stpcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const float* ap, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dtpcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const double* ap, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_ctpcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_float* ap, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztpcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_double* ap, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_stprfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* ap, const float* b, lapack_int ldb, const float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dtprfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* ap, const double* b, lapack_int ldb, const double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_ctprfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztprfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_stptri_work( int matrix_order, char uplo, char diag, lapack_int n, float* ap ); lapack_int LAPACKE_dtptri_work( int matrix_order, char uplo, char diag, lapack_int n, double* ap ); lapack_int LAPACKE_ctptri_work( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_float* ap ); lapack_int LAPACKE_ztptri_work( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_double* ap ); lapack_int LAPACKE_stptrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* ap, float* b, lapack_int ldb ); lapack_int LAPACKE_dtptrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* ap, double* b, lapack_int ldb ); lapack_int LAPACKE_ctptrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* ap, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztptrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* ap, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_stpttf_work( int matrix_order, char transr, char uplo, lapack_int n, const float* ap, float* arf ); lapack_int LAPACKE_dtpttf_work( int matrix_order, char transr, char uplo, lapack_int n, const double* ap, double* arf ); lapack_int LAPACKE_ctpttf_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* ap, lapack_complex_float* arf ); lapack_int LAPACKE_ztpttf_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* ap, lapack_complex_double* arf ); lapack_int LAPACKE_stpttr_work( int matrix_order, char uplo, lapack_int n, const float* ap, float* a, lapack_int lda ); lapack_int LAPACKE_dtpttr_work( int matrix_order, char uplo, lapack_int n, const double* ap, double* a, lapack_int lda ); lapack_int LAPACKE_ctpttr_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_ztpttr_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_strcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const float* a, lapack_int lda, float* rcond, float* work, lapack_int* iwork ); lapack_int LAPACKE_dtrcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const double* a, lapack_int lda, double* rcond, double* work, lapack_int* iwork ); lapack_int LAPACKE_ctrcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_float* a, lapack_int lda, float* rcond, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztrcon_work( int matrix_order, char norm, char uplo, char diag, lapack_int n, const lapack_complex_double* a, lapack_int lda, double* rcond, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_strevc_work( int matrix_order, char side, char howmny, lapack_logical* select, lapack_int n, const float* t, lapack_int ldt, float* vl, lapack_int ldvl, float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, float* work ); lapack_int LAPACKE_dtrevc_work( int matrix_order, char side, char howmny, lapack_logical* select, lapack_int n, const double* t, lapack_int ldt, double* vl, lapack_int ldvl, double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, double* work ); lapack_int LAPACKE_ctrevc_work( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* vl, lapack_int ldvl, lapack_complex_float* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztrevc_work( int matrix_order, char side, char howmny, const lapack_logical* select, lapack_int n, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* vl, lapack_int ldvl, lapack_complex_double* vr, lapack_int ldvr, lapack_int mm, lapack_int* m, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_strexc_work( int matrix_order, char compq, lapack_int n, float* t, lapack_int ldt, float* q, lapack_int ldq, lapack_int* ifst, lapack_int* ilst, float* work ); lapack_int LAPACKE_dtrexc_work( int matrix_order, char compq, lapack_int n, double* t, lapack_int ldt, double* q, lapack_int ldq, lapack_int* ifst, lapack_int* ilst, double* work ); lapack_int LAPACKE_ctrexc_work( int matrix_order, char compq, lapack_int n, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* q, lapack_int ldq, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_ztrexc_work( int matrix_order, char compq, lapack_int n, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* q, lapack_int ldq, lapack_int ifst, lapack_int ilst ); lapack_int LAPACKE_strrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const float* b, lapack_int ldb, const float* x, lapack_int ldx, float* ferr, float* berr, float* work, lapack_int* iwork ); lapack_int LAPACKE_dtrrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const double* b, lapack_int ldb, const double* x, lapack_int ldx, double* ferr, double* berr, double* work, lapack_int* iwork ); lapack_int LAPACKE_ctrrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, const lapack_complex_float* x, lapack_int ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork ); lapack_int LAPACKE_ztrrfs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, const lapack_complex_double* x, lapack_int ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork ); lapack_int LAPACKE_strsen_work( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, float* t, lapack_int ldt, float* q, lapack_int ldq, float* wr, float* wi, lapack_int* m, float* s, float* sep, float* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_dtrsen_work( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, double* t, lapack_int ldt, double* q, lapack_int ldq, double* wr, double* wi, lapack_int* m, double* s, double* sep, double* work, lapack_int lwork, lapack_int* iwork, lapack_int liwork ); lapack_int LAPACKE_ctrsen_work( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* w, lapack_int* m, float* s, float* sep, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_ztrsen_work( int matrix_order, char job, char compq, const lapack_logical* select, lapack_int n, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* w, lapack_int* m, double* s, double* sep, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_strsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const float* t, lapack_int ldt, const float* vl, lapack_int ldvl, const float* vr, lapack_int ldvr, float* s, float* sep, lapack_int mm, lapack_int* m, float* work, lapack_int ldwork, lapack_int* iwork ); lapack_int LAPACKE_dtrsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const double* t, lapack_int ldt, const double* vl, lapack_int ldvl, const double* vr, lapack_int ldvr, double* s, double* sep, lapack_int mm, lapack_int* m, double* work, lapack_int ldwork, lapack_int* iwork ); lapack_int LAPACKE_ctrsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_float* t, lapack_int ldt, const lapack_complex_float* vl, lapack_int ldvl, const lapack_complex_float* vr, lapack_int ldvr, float* s, float* sep, lapack_int mm, lapack_int* m, lapack_complex_float* work, lapack_int ldwork, float* rwork ); lapack_int LAPACKE_ztrsna_work( int matrix_order, char job, char howmny, const lapack_logical* select, lapack_int n, const lapack_complex_double* t, lapack_int ldt, const lapack_complex_double* vl, lapack_int ldvl, const lapack_complex_double* vr, lapack_int ldvr, double* s, double* sep, lapack_int mm, lapack_int* m, lapack_complex_double* work, lapack_int ldwork, double* rwork ); lapack_int LAPACKE_strsyl_work( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const float* a, lapack_int lda, const float* b, lapack_int ldb, float* c, lapack_int ldc, float* scale ); lapack_int LAPACKE_dtrsyl_work( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const double* a, lapack_int lda, const double* b, lapack_int ldb, double* c, lapack_int ldc, double* scale ); lapack_int LAPACKE_ctrsyl_work( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* b, lapack_int ldb, lapack_complex_float* c, lapack_int ldc, float* scale ); lapack_int LAPACKE_ztrsyl_work( int matrix_order, char trana, char tranb, lapack_int isgn, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* b, lapack_int ldb, lapack_complex_double* c, lapack_int ldc, double* scale ); lapack_int LAPACKE_strtri_work( int matrix_order, char uplo, char diag, lapack_int n, float* a, lapack_int lda ); lapack_int LAPACKE_dtrtri_work( int matrix_order, char uplo, char diag, lapack_int n, double* a, lapack_int lda ); lapack_int LAPACKE_ctrtri_work( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_ztrtri_work( int matrix_order, char uplo, char diag, lapack_int n, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_strtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dtrtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_ctrtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztrtrs_work( int matrix_order, char uplo, char trans, char diag, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_strttf_work( int matrix_order, char transr, char uplo, lapack_int n, const float* a, lapack_int lda, float* arf ); lapack_int LAPACKE_dtrttf_work( int matrix_order, char transr, char uplo, lapack_int n, const double* a, lapack_int lda, double* arf ); lapack_int LAPACKE_ctrttf_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* arf ); lapack_int LAPACKE_ztrttf_work( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* arf ); lapack_int LAPACKE_strttp_work( int matrix_order, char uplo, lapack_int n, const float* a, lapack_int lda, float* ap ); lapack_int LAPACKE_dtrttp_work( int matrix_order, char uplo, lapack_int n, const double* a, lapack_int lda, double* ap ); lapack_int LAPACKE_ctrttp_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* a, lapack_int lda, lapack_complex_float* ap ); lapack_int LAPACKE_ztrttp_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* a, lapack_int lda, lapack_complex_double* ap ); lapack_int LAPACKE_stzrzf_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* tau, float* work, lapack_int lwork ); lapack_int LAPACKE_dtzrzf_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* tau, double* work, lapack_int lwork ); lapack_int LAPACKE_ctzrzf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_ztzrzf_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cungbr_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zungbr_work( int matrix_order, char vect, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunghr_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunghr_work( int matrix_order, lapack_int n, lapack_int ilo, lapack_int ihi, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunglq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunglq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cungql_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zungql_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cungqr_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zungqr_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cungrq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zungrq_work( int matrix_order, lapack_int m, lapack_int n, lapack_int k, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cungtr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zungtr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmbr_work( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmbr_work( int matrix_order, char vect, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmhr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmhr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int ilo, lapack_int ihi, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmql_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmql_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmqr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmqr_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmrq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmrq_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmrz_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmrz_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cunmtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zunmtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_cupgtr_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_float* ap, const lapack_complex_float* tau, lapack_complex_float* q, lapack_int ldq, lapack_complex_float* work ); lapack_int LAPACKE_zupgtr_work( int matrix_order, char uplo, lapack_int n, const lapack_complex_double* ap, const lapack_complex_double* tau, lapack_complex_double* q, lapack_int ldq, lapack_complex_double* work ); lapack_int LAPACKE_cupmtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_float* ap, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work ); lapack_int LAPACKE_zupmtr_work( int matrix_order, char side, char uplo, char trans, lapack_int m, lapack_int n, const lapack_complex_double* ap, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work ); lapack_int LAPACKE_claghe( int matrix_order, lapack_int n, lapack_int k, const float* d, lapack_complex_float* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_zlaghe( int matrix_order, lapack_int n, lapack_int k, const double* d, lapack_complex_double* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_slagsy( int matrix_order, lapack_int n, lapack_int k, const float* d, float* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_dlagsy( int matrix_order, lapack_int n, lapack_int k, const double* d, double* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_clagsy( int matrix_order, lapack_int n, lapack_int k, const float* d, lapack_complex_float* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_zlagsy( int matrix_order, lapack_int n, lapack_int k, const double* d, lapack_complex_double* a, lapack_int lda, lapack_int* iseed ); lapack_int LAPACKE_slapmr( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, float* x, lapack_int ldx, lapack_int* k ); lapack_int LAPACKE_dlapmr( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, double* x, lapack_int ldx, lapack_int* k ); lapack_int LAPACKE_clapmr( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, lapack_complex_float* x, lapack_int ldx, lapack_int* k ); lapack_int LAPACKE_zlapmr( int matrix_order, lapack_logical forwrd, lapack_int m, lapack_int n, lapack_complex_double* x, lapack_int ldx, lapack_int* k ); float LAPACKE_slapy2( float x, float y ); double LAPACKE_dlapy2( double x, double y ); float LAPACKE_slapy3( float x, float y, float z ); double LAPACKE_dlapy3( double x, double y, double z ); lapack_int LAPACKE_slartgp( float f, float g, float* cs, float* sn, float* r ); lapack_int LAPACKE_dlartgp( double f, double g, double* cs, double* sn, double* r ); lapack_int LAPACKE_slartgs( float x, float y, float sigma, float* cs, float* sn ); lapack_int LAPACKE_dlartgs( double x, double y, double sigma, double* cs, double* sn ); //LAPACK 3.3.0 lapack_int LAPACKE_cbbcsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, float* theta, float* phi, lapack_complex_float* u1, lapack_int ldu1, lapack_complex_float* u2, lapack_int ldu2, lapack_complex_float* v1t, lapack_int ldv1t, lapack_complex_float* v2t, lapack_int ldv2t, float* b11d, float* b11e, float* b12d, float* b12e, float* b21d, float* b21e, float* b22d, float* b22e ); lapack_int LAPACKE_cbbcsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, float* theta, float* phi, lapack_complex_float* u1, lapack_int ldu1, lapack_complex_float* u2, lapack_int ldu2, lapack_complex_float* v1t, lapack_int ldv1t, lapack_complex_float* v2t, lapack_int ldv2t, float* b11d, float* b11e, float* b12d, float* b12e, float* b21d, float* b21e, float* b22d, float* b22e, float* rwork, lapack_int lrwork ); lapack_int LAPACKE_cheswapr( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_cheswapr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_chetri2( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_chetri2_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_chetri2x( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_int nb ); lapack_int LAPACKE_chetri2x_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int nb ); lapack_int LAPACKE_chetrs2( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_chetrs2_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work ); lapack_int LAPACKE_csyconv( int matrix_order, char uplo, char way, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_csyconv_work( int matrix_order, char uplo, char way, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work ); lapack_int LAPACKE_csyswapr( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_csyswapr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_csytri2( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_csytri2_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_csytri2x( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_int nb ); lapack_int LAPACKE_csytri2x_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int nb ); lapack_int LAPACKE_csytrs2( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_csytrs2_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work ); lapack_int LAPACKE_cunbdb( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_float* x11, lapack_int ldx11, lapack_complex_float* x12, lapack_int ldx12, lapack_complex_float* x21, lapack_int ldx21, lapack_complex_float* x22, lapack_int ldx22, float* theta, float* phi, lapack_complex_float* taup1, lapack_complex_float* taup2, lapack_complex_float* tauq1, lapack_complex_float* tauq2 ); lapack_int LAPACKE_cunbdb_work( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_float* x11, lapack_int ldx11, lapack_complex_float* x12, lapack_int ldx12, lapack_complex_float* x21, lapack_int ldx21, lapack_complex_float* x22, lapack_int ldx22, float* theta, float* phi, lapack_complex_float* taup1, lapack_complex_float* taup2, lapack_complex_float* tauq1, lapack_complex_float* tauq2, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_cuncsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_float* x11, lapack_int ldx11, lapack_complex_float* x12, lapack_int ldx12, lapack_complex_float* x21, lapack_int ldx21, lapack_complex_float* x22, lapack_int ldx22, float* theta, lapack_complex_float* u1, lapack_int ldu1, lapack_complex_float* u2, lapack_int ldu2, lapack_complex_float* v1t, lapack_int ldv1t, lapack_complex_float* v2t, lapack_int ldv2t ); lapack_int LAPACKE_cuncsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_float* x11, lapack_int ldx11, lapack_complex_float* x12, lapack_int ldx12, lapack_complex_float* x21, lapack_int ldx21, lapack_complex_float* x22, lapack_int ldx22, float* theta, lapack_complex_float* u1, lapack_int ldu1, lapack_complex_float* u2, lapack_int ldu2, lapack_complex_float* v1t, lapack_int ldv1t, lapack_complex_float* v2t, lapack_int ldv2t, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int lrwork, lapack_int* iwork ); lapack_int LAPACKE_dbbcsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, double* theta, double* phi, double* u1, lapack_int ldu1, double* u2, lapack_int ldu2, double* v1t, lapack_int ldv1t, double* v2t, lapack_int ldv2t, double* b11d, double* b11e, double* b12d, double* b12e, double* b21d, double* b21e, double* b22d, double* b22e ); lapack_int LAPACKE_dbbcsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, double* theta, double* phi, double* u1, lapack_int ldu1, double* u2, lapack_int ldu2, double* v1t, lapack_int ldv1t, double* v2t, lapack_int ldv2t, double* b11d, double* b11e, double* b12d, double* b12e, double* b21d, double* b21e, double* b22d, double* b22e, double* work, lapack_int lwork ); lapack_int LAPACKE_dorbdb( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, double* x11, lapack_int ldx11, double* x12, lapack_int ldx12, double* x21, lapack_int ldx21, double* x22, lapack_int ldx22, double* theta, double* phi, double* taup1, double* taup2, double* tauq1, double* tauq2 ); lapack_int LAPACKE_dorbdb_work( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, double* x11, lapack_int ldx11, double* x12, lapack_int ldx12, double* x21, lapack_int ldx21, double* x22, lapack_int ldx22, double* theta, double* phi, double* taup1, double* taup2, double* tauq1, double* tauq2, double* work, lapack_int lwork ); lapack_int LAPACKE_dorcsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, double* x11, lapack_int ldx11, double* x12, lapack_int ldx12, double* x21, lapack_int ldx21, double* x22, lapack_int ldx22, double* theta, double* u1, lapack_int ldu1, double* u2, lapack_int ldu2, double* v1t, lapack_int ldv1t, double* v2t, lapack_int ldv2t ); lapack_int LAPACKE_dorcsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, double* x11, lapack_int ldx11, double* x12, lapack_int ldx12, double* x21, lapack_int ldx21, double* x22, lapack_int ldx22, double* theta, double* u1, lapack_int ldu1, double* u2, lapack_int ldu2, double* v1t, lapack_int ldv1t, double* v2t, lapack_int ldv2t, double* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_dsyconv( int matrix_order, char uplo, char way, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_dsyconv_work( int matrix_order, char uplo, char way, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv, double* work ); lapack_int LAPACKE_dsyswapr( int matrix_order, char uplo, lapack_int n, double* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_dsyswapr_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_dsytri2( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_dsytri2_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_dsytri2x( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv, lapack_int nb ); lapack_int LAPACKE_dsytri2x_work( int matrix_order, char uplo, lapack_int n, double* a, lapack_int lda, const lapack_int* ipiv, double* work, lapack_int nb ); lapack_int LAPACKE_dsytrs2( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_dsytrs2_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const double* a, lapack_int lda, const lapack_int* ipiv, double* b, lapack_int ldb, double* work ); lapack_int LAPACKE_sbbcsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, float* theta, float* phi, float* u1, lapack_int ldu1, float* u2, lapack_int ldu2, float* v1t, lapack_int ldv1t, float* v2t, lapack_int ldv2t, float* b11d, float* b11e, float* b12d, float* b12e, float* b21d, float* b21e, float* b22d, float* b22e ); lapack_int LAPACKE_sbbcsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, float* theta, float* phi, float* u1, lapack_int ldu1, float* u2, lapack_int ldu2, float* v1t, lapack_int ldv1t, float* v2t, lapack_int ldv2t, float* b11d, float* b11e, float* b12d, float* b12e, float* b21d, float* b21e, float* b22d, float* b22e, float* work, lapack_int lwork ); lapack_int LAPACKE_sorbdb( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, float* x11, lapack_int ldx11, float* x12, lapack_int ldx12, float* x21, lapack_int ldx21, float* x22, lapack_int ldx22, float* theta, float* phi, float* taup1, float* taup2, float* tauq1, float* tauq2 ); lapack_int LAPACKE_sorbdb_work( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, float* x11, lapack_int ldx11, float* x12, lapack_int ldx12, float* x21, lapack_int ldx21, float* x22, lapack_int ldx22, float* theta, float* phi, float* taup1, float* taup2, float* tauq1, float* tauq2, float* work, lapack_int lwork ); lapack_int LAPACKE_sorcsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, float* x11, lapack_int ldx11, float* x12, lapack_int ldx12, float* x21, lapack_int ldx21, float* x22, lapack_int ldx22, float* theta, float* u1, lapack_int ldu1, float* u2, lapack_int ldu2, float* v1t, lapack_int ldv1t, float* v2t, lapack_int ldv2t ); lapack_int LAPACKE_sorcsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, float* x11, lapack_int ldx11, float* x12, lapack_int ldx12, float* x21, lapack_int ldx21, float* x22, lapack_int ldx22, float* theta, float* u1, lapack_int ldu1, float* u2, lapack_int ldu2, float* v1t, lapack_int ldv1t, float* v2t, lapack_int ldv2t, float* work, lapack_int lwork, lapack_int* iwork ); lapack_int LAPACKE_ssyconv( int matrix_order, char uplo, char way, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_ssyconv_work( int matrix_order, char uplo, char way, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv, float* work ); lapack_int LAPACKE_ssyswapr( int matrix_order, char uplo, lapack_int n, float* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_ssyswapr_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_ssytri2( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_ssytri2_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_ssytri2x( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv, lapack_int nb ); lapack_int LAPACKE_ssytri2x_work( int matrix_order, char uplo, lapack_int n, float* a, lapack_int lda, const lapack_int* ipiv, float* work, lapack_int nb ); lapack_int LAPACKE_ssytrs2( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_ssytrs2_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const float* a, lapack_int lda, const lapack_int* ipiv, float* b, lapack_int ldb, float* work ); lapack_int LAPACKE_zbbcsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, double* theta, double* phi, lapack_complex_double* u1, lapack_int ldu1, lapack_complex_double* u2, lapack_int ldu2, lapack_complex_double* v1t, lapack_int ldv1t, lapack_complex_double* v2t, lapack_int ldv2t, double* b11d, double* b11e, double* b12d, double* b12e, double* b21d, double* b21e, double* b22d, double* b22e ); lapack_int LAPACKE_zbbcsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, lapack_int m, lapack_int p, lapack_int q, double* theta, double* phi, lapack_complex_double* u1, lapack_int ldu1, lapack_complex_double* u2, lapack_int ldu2, lapack_complex_double* v1t, lapack_int ldv1t, lapack_complex_double* v2t, lapack_int ldv2t, double* b11d, double* b11e, double* b12d, double* b12e, double* b21d, double* b21e, double* b22d, double* b22e, double* rwork, lapack_int lrwork ); lapack_int LAPACKE_zheswapr( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_zheswapr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_zhetri2( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_zhetri2_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_zhetri2x( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_int nb ); lapack_int LAPACKE_zhetri2x_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int nb ); lapack_int LAPACKE_zhetrs2( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_zhetrs2_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work ); lapack_int LAPACKE_zsyconv( int matrix_order, char uplo, char way, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_zsyconv_work( int matrix_order, char uplo, char way, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work ); lapack_int LAPACKE_zsyswapr( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_zsyswapr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int i1, lapack_int i2 ); lapack_int LAPACKE_zsytri2( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv ); lapack_int LAPACKE_zsytri2_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_zsytri2x( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_int nb ); lapack_int LAPACKE_zsytri2x_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int nb ); lapack_int LAPACKE_zsytrs2( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_zsytrs2_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, const lapack_complex_double* a, lapack_int lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work ); lapack_int LAPACKE_zunbdb( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_double* x11, lapack_int ldx11, lapack_complex_double* x12, lapack_int ldx12, lapack_complex_double* x21, lapack_int ldx21, lapack_complex_double* x22, lapack_int ldx22, double* theta, double* phi, lapack_complex_double* taup1, lapack_complex_double* taup2, lapack_complex_double* tauq1, lapack_complex_double* tauq2 ); lapack_int LAPACKE_zunbdb_work( int matrix_order, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_double* x11, lapack_int ldx11, lapack_complex_double* x12, lapack_int ldx12, lapack_complex_double* x21, lapack_int ldx21, lapack_complex_double* x22, lapack_int ldx22, double* theta, double* phi, lapack_complex_double* taup1, lapack_complex_double* taup2, lapack_complex_double* tauq1, lapack_complex_double* tauq2, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_zuncsd( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_double* x11, lapack_int ldx11, lapack_complex_double* x12, lapack_int ldx12, lapack_complex_double* x21, lapack_int ldx21, lapack_complex_double* x22, lapack_int ldx22, double* theta, lapack_complex_double* u1, lapack_int ldu1, lapack_complex_double* u2, lapack_int ldu2, lapack_complex_double* v1t, lapack_int ldv1t, lapack_complex_double* v2t, lapack_int ldv2t ); lapack_int LAPACKE_zuncsd_work( int matrix_order, char jobu1, char jobu2, char jobv1t, char jobv2t, char trans, char signs, lapack_int m, lapack_int p, lapack_int q, lapack_complex_double* x11, lapack_int ldx11, lapack_complex_double* x12, lapack_int ldx12, lapack_complex_double* x21, lapack_int ldx21, lapack_complex_double* x22, lapack_int ldx22, double* theta, lapack_complex_double* u1, lapack_int ldu1, lapack_complex_double* u2, lapack_int ldu2, lapack_complex_double* v1t, lapack_int ldv1t, lapack_complex_double* v2t, lapack_int ldv2t, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int lrwork, lapack_int* iwork ); //LAPACK 3.4.0 lapack_int LAPACKE_sgemqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* c, lapack_int ldc ); lapack_int LAPACKE_dgemqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* c, lapack_int ldc ); lapack_int LAPACKE_cgemqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* c, lapack_int ldc ); lapack_int LAPACKE_zgemqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* c, lapack_int ldc ); lapack_int LAPACKE_sgeqrt( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, float* a, lapack_int lda, float* t, lapack_int ldt ); lapack_int LAPACKE_dgeqrt( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, double* a, lapack_int lda, double* t, lapack_int ldt ); lapack_int LAPACKE_cgeqrt( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, lapack_complex_float* a, lapack_int lda, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_zgeqrt( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, lapack_complex_double* a, lapack_int lda, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_sgeqrt2( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* t, lapack_int ldt ); lapack_int LAPACKE_dgeqrt2( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* t, lapack_int ldt ); lapack_int LAPACKE_cgeqrt2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_zgeqrt2( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_sgeqrt3( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* t, lapack_int ldt ); lapack_int LAPACKE_dgeqrt3( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* t, lapack_int ldt ); lapack_int LAPACKE_cgeqrt3( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_zgeqrt3( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_stpmqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dtpmqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_ctpmqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztpmqrt( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_dtpqrt( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_int nb, double* a, lapack_int lda, double* b, lapack_int ldb, double* t, lapack_int ldt ); lapack_int LAPACKE_ctpqrt( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_int nb, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_ztpqrt( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_int nb, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_stpqrt2( int matrix_order, lapack_int m, lapack_int n, lapack_int l, float* a, lapack_int lda, float* b, lapack_int ldb, float* t, lapack_int ldt ); lapack_int LAPACKE_dtpqrt2( int matrix_order, lapack_int m, lapack_int n, lapack_int l, double* a, lapack_int lda, double* b, lapack_int ldb, double* t, lapack_int ldt ); lapack_int LAPACKE_ctpqrt2( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_ztpqrt2( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_stprfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* a, lapack_int lda, float* b, lapack_int ldb ); lapack_int LAPACKE_dtprfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* a, lapack_int lda, double* b, lapack_int ldb ); lapack_int LAPACKE_ctprfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_ztprfb( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_sgemqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* c, lapack_int ldc, float* work ); lapack_int LAPACKE_dgemqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* c, lapack_int ldc, double* work ); lapack_int LAPACKE_cgemqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* c, lapack_int ldc, lapack_complex_float* work ); lapack_int LAPACKE_zgemqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int nb, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* c, lapack_int ldc, lapack_complex_double* work ); lapack_int LAPACKE_sgeqrt_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, float* a, lapack_int lda, float* t, lapack_int ldt, float* work ); lapack_int LAPACKE_dgeqrt_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, double* a, lapack_int lda, double* t, lapack_int ldt, double* work ); lapack_int LAPACKE_cgeqrt_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, lapack_complex_float* a, lapack_int lda, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* work ); lapack_int LAPACKE_zgeqrt_work( int matrix_order, lapack_int m, lapack_int n, lapack_int nb, lapack_complex_double* a, lapack_int lda, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* work ); lapack_int LAPACKE_sgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* t, lapack_int ldt ); lapack_int LAPACKE_dgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* t, lapack_int ldt ); lapack_int LAPACKE_cgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_zgeqrt2_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_sgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, float* a, lapack_int lda, float* t, lapack_int ldt ); lapack_int LAPACKE_dgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, double* a, lapack_int lda, double* t, lapack_int ldt ); lapack_int LAPACKE_cgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_zgeqrt3_work( int matrix_order, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_stpmqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* a, lapack_int lda, float* b, lapack_int ldb, float* work ); lapack_int LAPACKE_dtpmqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* a, lapack_int lda, double* b, lapack_int ldb, double* work ); lapack_int LAPACKE_ctpmqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work ); lapack_int LAPACKE_ztpmqrt_work( int matrix_order, char side, char trans, lapack_int m, lapack_int n, lapack_int k, lapack_int l, lapack_int nb, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work ); lapack_int LAPACKE_dtpqrt_work( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_int nb, double* a, lapack_int lda, double* b, lapack_int ldb, double* t, lapack_int ldt, double* work ); lapack_int LAPACKE_ctpqrt_work( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_int nb, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* t, lapack_int ldt, lapack_complex_float* work ); lapack_int LAPACKE_ztpqrt_work( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_int nb, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* t, lapack_int ldt, lapack_complex_double* work ); lapack_int LAPACKE_stpqrt2_work( int matrix_order, lapack_int m, lapack_int n, lapack_int l, float* a, lapack_int lda, float* b, lapack_int ldb, float* t, lapack_int ldt ); lapack_int LAPACKE_dtpqrt2_work( int matrix_order, lapack_int m, lapack_int n, lapack_int l, double* a, lapack_int lda, double* b, lapack_int ldb, double* t, lapack_int ldt ); lapack_int LAPACKE_ctpqrt2_work( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* t, lapack_int ldt ); lapack_int LAPACKE_ztpqrt2_work( int matrix_order, lapack_int m, lapack_int n, lapack_int l, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* t, lapack_int ldt ); lapack_int LAPACKE_stprfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const float* v, lapack_int ldv, const float* t, lapack_int ldt, float* a, lapack_int lda, float* b, lapack_int ldb, const float* work, lapack_int ldwork ); lapack_int LAPACKE_dtprfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const double* v, lapack_int ldv, const double* t, lapack_int ldt, double* a, lapack_int lda, double* b, lapack_int ldb, const double* work, lapack_int ldwork ); lapack_int LAPACKE_ctprfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_float* v, lapack_int ldv, const lapack_complex_float* t, lapack_int ldt, lapack_complex_float* a, lapack_int lda, lapack_complex_float* b, lapack_int ldb, const float* work, lapack_int ldwork ); lapack_int LAPACKE_ztprfb_work( int matrix_order, char side, char trans, char direct, char storev, lapack_int m, lapack_int n, lapack_int k, lapack_int l, const lapack_complex_double* v, lapack_int ldv, const lapack_complex_double* t, lapack_int ldt, lapack_complex_double* a, lapack_int lda, lapack_complex_double* b, lapack_int ldb, const double* work, lapack_int ldwork ); //LAPACK 3.X.X lapack_int LAPACKE_ssysv_rook( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb ); lapack_int LAPACKE_dsysv_rook( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb ); lapack_int LAPACKE_csysv_rook( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb ); lapack_int LAPACKE_zsysv_rook( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb ); lapack_int LAPACKE_csyr( int matrix_order, char uplo, lapack_int n, lapack_complex_float alpha, const lapack_complex_float* x, lapack_int incx, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zsyr( int matrix_order, char uplo, lapack_int n, lapack_complex_double alpha, const lapack_complex_double* x, lapack_int incx, lapack_complex_double* a, lapack_int lda ); lapack_int LAPACKE_ssysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, float* a, lapack_int lda, lapack_int* ipiv, float* b, lapack_int ldb, float* work, lapack_int lwork ); lapack_int LAPACKE_dsysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, double* a, lapack_int lda, lapack_int* ipiv, double* b, lapack_int ldb, double* work, lapack_int lwork ); lapack_int LAPACKE_csysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_float* a, lapack_int lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int ldb, lapack_complex_float* work, lapack_int lwork ); lapack_int LAPACKE_zsysv_rook_work( int matrix_order, char uplo, lapack_int n, lapack_int nrhs, lapack_complex_double* a, lapack_int lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int ldb, lapack_complex_double* work, lapack_int lwork ); lapack_int LAPACKE_csyr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_float alpha, const lapack_complex_float* x, lapack_int incx, lapack_complex_float* a, lapack_int lda ); lapack_int LAPACKE_zsyr_work( int matrix_order, char uplo, lapack_int n, lapack_complex_double alpha, const lapack_complex_double* x, lapack_int incx, lapack_complex_double* a, lapack_int lda ); void LAPACKE_ilaver( const lapack_int* vers_major, const lapack_int* vers_minor, const lapack_int* vers_patch ); #define LAPACK_sgetrf LAPACK_GLOBAL(sgetrf,SGETRF) #define LAPACK_dgetrf LAPACK_GLOBAL(dgetrf,DGETRF) #define LAPACK_cgetrf LAPACK_GLOBAL(cgetrf,CGETRF) #define LAPACK_zgetrf LAPACK_GLOBAL(zgetrf,ZGETRF) #define LAPACK_sgbtrf LAPACK_GLOBAL(sgbtrf,SGBTRF) #define LAPACK_dgbtrf LAPACK_GLOBAL(dgbtrf,DGBTRF) #define LAPACK_cgbtrf LAPACK_GLOBAL(cgbtrf,CGBTRF) #define LAPACK_zgbtrf LAPACK_GLOBAL(zgbtrf,ZGBTRF) #define LAPACK_sgttrf LAPACK_GLOBAL(sgttrf,SGTTRF) #define LAPACK_dgttrf LAPACK_GLOBAL(dgttrf,DGTTRF) #define LAPACK_cgttrf LAPACK_GLOBAL(cgttrf,CGTTRF) #define LAPACK_zgttrf LAPACK_GLOBAL(zgttrf,ZGTTRF) #define LAPACK_spotrf LAPACK_GLOBAL(spotrf,SPOTRF) #define LAPACK_dpotrf LAPACK_GLOBAL(dpotrf,DPOTRF) #define LAPACK_cpotrf LAPACK_GLOBAL(cpotrf,CPOTRF) #define LAPACK_zpotrf LAPACK_GLOBAL(zpotrf,ZPOTRF) #define LAPACK_dpstrf LAPACK_GLOBAL(dpstrf,DPSTRF) #define LAPACK_spstrf LAPACK_GLOBAL(spstrf,SPSTRF) #define LAPACK_zpstrf LAPACK_GLOBAL(zpstrf,ZPSTRF) #define LAPACK_cpstrf LAPACK_GLOBAL(cpstrf,CPSTRF) #define LAPACK_dpftrf LAPACK_GLOBAL(dpftrf,DPFTRF) #define LAPACK_spftrf LAPACK_GLOBAL(spftrf,SPFTRF) #define LAPACK_zpftrf LAPACK_GLOBAL(zpftrf,ZPFTRF) #define LAPACK_cpftrf LAPACK_GLOBAL(cpftrf,CPFTRF) #define LAPACK_spptrf LAPACK_GLOBAL(spptrf,SPPTRF) #define LAPACK_dpptrf LAPACK_GLOBAL(dpptrf,DPPTRF) #define LAPACK_cpptrf LAPACK_GLOBAL(cpptrf,CPPTRF) #define LAPACK_zpptrf LAPACK_GLOBAL(zpptrf,ZPPTRF) #define LAPACK_spbtrf LAPACK_GLOBAL(spbtrf,SPBTRF) #define LAPACK_dpbtrf LAPACK_GLOBAL(dpbtrf,DPBTRF) #define LAPACK_cpbtrf LAPACK_GLOBAL(cpbtrf,CPBTRF) #define LAPACK_zpbtrf LAPACK_GLOBAL(zpbtrf,ZPBTRF) #define LAPACK_spttrf LAPACK_GLOBAL(spttrf,SPTTRF) #define LAPACK_dpttrf LAPACK_GLOBAL(dpttrf,DPTTRF) #define LAPACK_cpttrf LAPACK_GLOBAL(cpttrf,CPTTRF) #define LAPACK_zpttrf LAPACK_GLOBAL(zpttrf,ZPTTRF) #define LAPACK_ssytrf LAPACK_GLOBAL(ssytrf,SSYTRF) #define LAPACK_dsytrf LAPACK_GLOBAL(dsytrf,DSYTRF) #define LAPACK_csytrf LAPACK_GLOBAL(csytrf,CSYTRF) #define LAPACK_zsytrf LAPACK_GLOBAL(zsytrf,ZSYTRF) #define LAPACK_chetrf LAPACK_GLOBAL(chetrf,CHETRF) #define LAPACK_zhetrf LAPACK_GLOBAL(zhetrf,ZHETRF) #define LAPACK_ssptrf LAPACK_GLOBAL(ssptrf,SSPTRF) #define LAPACK_dsptrf LAPACK_GLOBAL(dsptrf,DSPTRF) #define LAPACK_csptrf LAPACK_GLOBAL(csptrf,CSPTRF) #define LAPACK_zsptrf LAPACK_GLOBAL(zsptrf,ZSPTRF) #define LAPACK_chptrf LAPACK_GLOBAL(chptrf,CHPTRF) #define LAPACK_zhptrf LAPACK_GLOBAL(zhptrf,ZHPTRF) #define LAPACK_sgetrs LAPACK_GLOBAL(sgetrs,SGETRS) #define LAPACK_dgetrs LAPACK_GLOBAL(dgetrs,DGETRS) #define LAPACK_cgetrs LAPACK_GLOBAL(cgetrs,CGETRS) #define LAPACK_zgetrs LAPACK_GLOBAL(zgetrs,ZGETRS) #define LAPACK_sgbtrs LAPACK_GLOBAL(sgbtrs,SGBTRS) #define LAPACK_dgbtrs LAPACK_GLOBAL(dgbtrs,DGBTRS) #define LAPACK_cgbtrs LAPACK_GLOBAL(cgbtrs,CGBTRS) #define LAPACK_zgbtrs LAPACK_GLOBAL(zgbtrs,ZGBTRS) #define LAPACK_sgttrs LAPACK_GLOBAL(sgttrs,SGTTRS) #define LAPACK_dgttrs LAPACK_GLOBAL(dgttrs,DGTTRS) #define LAPACK_cgttrs LAPACK_GLOBAL(cgttrs,CGTTRS) #define LAPACK_zgttrs LAPACK_GLOBAL(zgttrs,ZGTTRS) #define LAPACK_spotrs LAPACK_GLOBAL(spotrs,SPOTRS) #define LAPACK_dpotrs LAPACK_GLOBAL(dpotrs,DPOTRS) #define LAPACK_cpotrs LAPACK_GLOBAL(cpotrs,CPOTRS) #define LAPACK_zpotrs LAPACK_GLOBAL(zpotrs,ZPOTRS) #define LAPACK_dpftrs LAPACK_GLOBAL(dpftrs,DPFTRS) #define LAPACK_spftrs LAPACK_GLOBAL(spftrs,SPFTRS) #define LAPACK_zpftrs LAPACK_GLOBAL(zpftrs,ZPFTRS) #define LAPACK_cpftrs LAPACK_GLOBAL(cpftrs,CPFTRS) #define LAPACK_spptrs LAPACK_GLOBAL(spptrs,SPPTRS) #define LAPACK_dpptrs LAPACK_GLOBAL(dpptrs,DPPTRS) #define LAPACK_cpptrs LAPACK_GLOBAL(cpptrs,CPPTRS) #define LAPACK_zpptrs LAPACK_GLOBAL(zpptrs,ZPPTRS) #define LAPACK_spbtrs LAPACK_GLOBAL(spbtrs,SPBTRS) #define LAPACK_dpbtrs LAPACK_GLOBAL(dpbtrs,DPBTRS) #define LAPACK_cpbtrs LAPACK_GLOBAL(cpbtrs,CPBTRS) #define LAPACK_zpbtrs LAPACK_GLOBAL(zpbtrs,ZPBTRS) #define LAPACK_spttrs LAPACK_GLOBAL(spttrs,SPTTRS) #define LAPACK_dpttrs LAPACK_GLOBAL(dpttrs,DPTTRS) #define LAPACK_cpttrs LAPACK_GLOBAL(cpttrs,CPTTRS) #define LAPACK_zpttrs LAPACK_GLOBAL(zpttrs,ZPTTRS) #define LAPACK_ssytrs LAPACK_GLOBAL(ssytrs,SSYTRS) #define LAPACK_dsytrs LAPACK_GLOBAL(dsytrs,DSYTRS) #define LAPACK_csytrs LAPACK_GLOBAL(csytrs,CSYTRS) #define LAPACK_zsytrs LAPACK_GLOBAL(zsytrs,ZSYTRS) #define LAPACK_chetrs LAPACK_GLOBAL(chetrs,CHETRS) #define LAPACK_zhetrs LAPACK_GLOBAL(zhetrs,ZHETRS) #define LAPACK_ssptrs LAPACK_GLOBAL(ssptrs,SSPTRS) #define LAPACK_dsptrs LAPACK_GLOBAL(dsptrs,DSPTRS) #define LAPACK_csptrs LAPACK_GLOBAL(csptrs,CSPTRS) #define LAPACK_zsptrs LAPACK_GLOBAL(zsptrs,ZSPTRS) #define LAPACK_chptrs LAPACK_GLOBAL(chptrs,CHPTRS) #define LAPACK_zhptrs LAPACK_GLOBAL(zhptrs,ZHPTRS) #define LAPACK_strtrs LAPACK_GLOBAL(strtrs,STRTRS) #define LAPACK_dtrtrs LAPACK_GLOBAL(dtrtrs,DTRTRS) #define LAPACK_ctrtrs LAPACK_GLOBAL(ctrtrs,CTRTRS) #define LAPACK_ztrtrs LAPACK_GLOBAL(ztrtrs,ZTRTRS) #define LAPACK_stptrs LAPACK_GLOBAL(stptrs,STPTRS) #define LAPACK_dtptrs LAPACK_GLOBAL(dtptrs,DTPTRS) #define LAPACK_ctptrs LAPACK_GLOBAL(ctptrs,CTPTRS) #define LAPACK_ztptrs LAPACK_GLOBAL(ztptrs,ZTPTRS) #define LAPACK_stbtrs LAPACK_GLOBAL(stbtrs,STBTRS) #define LAPACK_dtbtrs LAPACK_GLOBAL(dtbtrs,DTBTRS) #define LAPACK_ctbtrs LAPACK_GLOBAL(ctbtrs,CTBTRS) #define LAPACK_ztbtrs LAPACK_GLOBAL(ztbtrs,ZTBTRS) #define LAPACK_sgecon LAPACK_GLOBAL(sgecon,SGECON) #define LAPACK_dgecon LAPACK_GLOBAL(dgecon,DGECON) #define LAPACK_cgecon LAPACK_GLOBAL(cgecon,CGECON) #define LAPACK_zgecon LAPACK_GLOBAL(zgecon,ZGECON) #define LAPACK_sgbcon LAPACK_GLOBAL(sgbcon,SGBCON) #define LAPACK_dgbcon LAPACK_GLOBAL(dgbcon,DGBCON) #define LAPACK_cgbcon LAPACK_GLOBAL(cgbcon,CGBCON) #define LAPACK_zgbcon LAPACK_GLOBAL(zgbcon,ZGBCON) #define LAPACK_sgtcon LAPACK_GLOBAL(sgtcon,SGTCON) #define LAPACK_dgtcon LAPACK_GLOBAL(dgtcon,DGTCON) #define LAPACK_cgtcon LAPACK_GLOBAL(cgtcon,CGTCON) #define LAPACK_zgtcon LAPACK_GLOBAL(zgtcon,ZGTCON) #define LAPACK_spocon LAPACK_GLOBAL(spocon,SPOCON) #define LAPACK_dpocon LAPACK_GLOBAL(dpocon,DPOCON) #define LAPACK_cpocon LAPACK_GLOBAL(cpocon,CPOCON) #define LAPACK_zpocon LAPACK_GLOBAL(zpocon,ZPOCON) #define LAPACK_sppcon LAPACK_GLOBAL(sppcon,SPPCON) #define LAPACK_dppcon LAPACK_GLOBAL(dppcon,DPPCON) #define LAPACK_cppcon LAPACK_GLOBAL(cppcon,CPPCON) #define LAPACK_zppcon LAPACK_GLOBAL(zppcon,ZPPCON) #define LAPACK_spbcon LAPACK_GLOBAL(spbcon,SPBCON) #define LAPACK_dpbcon LAPACK_GLOBAL(dpbcon,DPBCON) #define LAPACK_cpbcon LAPACK_GLOBAL(cpbcon,CPBCON) #define LAPACK_zpbcon LAPACK_GLOBAL(zpbcon,ZPBCON) #define LAPACK_sptcon LAPACK_GLOBAL(sptcon,SPTCON) #define LAPACK_dptcon LAPACK_GLOBAL(dptcon,DPTCON) #define LAPACK_cptcon LAPACK_GLOBAL(cptcon,CPTCON) #define LAPACK_zptcon LAPACK_GLOBAL(zptcon,ZPTCON) #define LAPACK_ssycon LAPACK_GLOBAL(ssycon,SSYCON) #define LAPACK_dsycon LAPACK_GLOBAL(dsycon,DSYCON) #define LAPACK_csycon LAPACK_GLOBAL(csycon,CSYCON) #define LAPACK_zsycon LAPACK_GLOBAL(zsycon,ZSYCON) #define LAPACK_checon LAPACK_GLOBAL(checon,CHECON) #define LAPACK_zhecon LAPACK_GLOBAL(zhecon,ZHECON) #define LAPACK_sspcon LAPACK_GLOBAL(sspcon,SSPCON) #define LAPACK_dspcon LAPACK_GLOBAL(dspcon,DSPCON) #define LAPACK_cspcon LAPACK_GLOBAL(cspcon,CSPCON) #define LAPACK_zspcon LAPACK_GLOBAL(zspcon,ZSPCON) #define LAPACK_chpcon LAPACK_GLOBAL(chpcon,CHPCON) #define LAPACK_zhpcon LAPACK_GLOBAL(zhpcon,ZHPCON) #define LAPACK_strcon LAPACK_GLOBAL(strcon,STRCON) #define LAPACK_dtrcon LAPACK_GLOBAL(dtrcon,DTRCON) #define LAPACK_ctrcon LAPACK_GLOBAL(ctrcon,CTRCON) #define LAPACK_ztrcon LAPACK_GLOBAL(ztrcon,ZTRCON) #define LAPACK_stpcon LAPACK_GLOBAL(stpcon,STPCON) #define LAPACK_dtpcon LAPACK_GLOBAL(dtpcon,DTPCON) #define LAPACK_ctpcon LAPACK_GLOBAL(ctpcon,CTPCON) #define LAPACK_ztpcon LAPACK_GLOBAL(ztpcon,ZTPCON) #define LAPACK_stbcon LAPACK_GLOBAL(stbcon,STBCON) #define LAPACK_dtbcon LAPACK_GLOBAL(dtbcon,DTBCON) #define LAPACK_ctbcon LAPACK_GLOBAL(ctbcon,CTBCON) #define LAPACK_ztbcon LAPACK_GLOBAL(ztbcon,ZTBCON) #define LAPACK_sgerfs LAPACK_GLOBAL(sgerfs,SGERFS) #define LAPACK_dgerfs LAPACK_GLOBAL(dgerfs,DGERFS) #define LAPACK_cgerfs LAPACK_GLOBAL(cgerfs,CGERFS) #define LAPACK_zgerfs LAPACK_GLOBAL(zgerfs,ZGERFS) #define LAPACK_dgerfsx LAPACK_GLOBAL(dgerfsx,DGERFSX) #define LAPACK_sgerfsx LAPACK_GLOBAL(sgerfsx,SGERFSX) #define LAPACK_zgerfsx LAPACK_GLOBAL(zgerfsx,ZGERFSX) #define LAPACK_cgerfsx LAPACK_GLOBAL(cgerfsx,CGERFSX) #define LAPACK_sgbrfs LAPACK_GLOBAL(sgbrfs,SGBRFS) #define LAPACK_dgbrfs LAPACK_GLOBAL(dgbrfs,DGBRFS) #define LAPACK_cgbrfs LAPACK_GLOBAL(cgbrfs,CGBRFS) #define LAPACK_zgbrfs LAPACK_GLOBAL(zgbrfs,ZGBRFS) #define LAPACK_dgbrfsx LAPACK_GLOBAL(dgbrfsx,DGBRFSX) #define LAPACK_sgbrfsx LAPACK_GLOBAL(sgbrfsx,SGBRFSX) #define LAPACK_zgbrfsx LAPACK_GLOBAL(zgbrfsx,ZGBRFSX) #define LAPACK_cgbrfsx LAPACK_GLOBAL(cgbrfsx,CGBRFSX) #define LAPACK_sgtrfs LAPACK_GLOBAL(sgtrfs,SGTRFS) #define LAPACK_dgtrfs LAPACK_GLOBAL(dgtrfs,DGTRFS) #define LAPACK_cgtrfs LAPACK_GLOBAL(cgtrfs,CGTRFS) #define LAPACK_zgtrfs LAPACK_GLOBAL(zgtrfs,ZGTRFS) #define LAPACK_sporfs LAPACK_GLOBAL(sporfs,SPORFS) #define LAPACK_dporfs LAPACK_GLOBAL(dporfs,DPORFS) #define LAPACK_cporfs LAPACK_GLOBAL(cporfs,CPORFS) #define LAPACK_zporfs LAPACK_GLOBAL(zporfs,ZPORFS) #define LAPACK_dporfsx LAPACK_GLOBAL(dporfsx,DPORFSX) #define LAPACK_sporfsx LAPACK_GLOBAL(sporfsx,SPORFSX) #define LAPACK_zporfsx LAPACK_GLOBAL(zporfsx,ZPORFSX) #define LAPACK_cporfsx LAPACK_GLOBAL(cporfsx,CPORFSX) #define LAPACK_spprfs LAPACK_GLOBAL(spprfs,SPPRFS) #define LAPACK_dpprfs LAPACK_GLOBAL(dpprfs,DPPRFS) #define LAPACK_cpprfs LAPACK_GLOBAL(cpprfs,CPPRFS) #define LAPACK_zpprfs LAPACK_GLOBAL(zpprfs,ZPPRFS) #define LAPACK_spbrfs LAPACK_GLOBAL(spbrfs,SPBRFS) #define LAPACK_dpbrfs LAPACK_GLOBAL(dpbrfs,DPBRFS) #define LAPACK_cpbrfs LAPACK_GLOBAL(cpbrfs,CPBRFS) #define LAPACK_zpbrfs LAPACK_GLOBAL(zpbrfs,ZPBRFS) #define LAPACK_sptrfs LAPACK_GLOBAL(sptrfs,SPTRFS) #define LAPACK_dptrfs LAPACK_GLOBAL(dptrfs,DPTRFS) #define LAPACK_cptrfs LAPACK_GLOBAL(cptrfs,CPTRFS) #define LAPACK_zptrfs LAPACK_GLOBAL(zptrfs,ZPTRFS) #define LAPACK_ssyrfs LAPACK_GLOBAL(ssyrfs,SSYRFS) #define LAPACK_dsyrfs LAPACK_GLOBAL(dsyrfs,DSYRFS) #define LAPACK_csyrfs LAPACK_GLOBAL(csyrfs,CSYRFS) #define LAPACK_zsyrfs LAPACK_GLOBAL(zsyrfs,ZSYRFS) #define LAPACK_dsyrfsx LAPACK_GLOBAL(dsyrfsx,DSYRFSX) #define LAPACK_ssyrfsx LAPACK_GLOBAL(ssyrfsx,SSYRFSX) #define LAPACK_zsyrfsx LAPACK_GLOBAL(zsyrfsx,ZSYRFSX) #define LAPACK_csyrfsx LAPACK_GLOBAL(csyrfsx,CSYRFSX) #define LAPACK_cherfs LAPACK_GLOBAL(cherfs,CHERFS) #define LAPACK_zherfs LAPACK_GLOBAL(zherfs,ZHERFS) #define LAPACK_zherfsx LAPACK_GLOBAL(zherfsx,ZHERFSX) #define LAPACK_cherfsx LAPACK_GLOBAL(cherfsx,CHERFSX) #define LAPACK_ssprfs LAPACK_GLOBAL(ssprfs,SSPRFS) #define LAPACK_dsprfs LAPACK_GLOBAL(dsprfs,DSPRFS) #define LAPACK_csprfs LAPACK_GLOBAL(csprfs,CSPRFS) #define LAPACK_zsprfs LAPACK_GLOBAL(zsprfs,ZSPRFS) #define LAPACK_chprfs LAPACK_GLOBAL(chprfs,CHPRFS) #define LAPACK_zhprfs LAPACK_GLOBAL(zhprfs,ZHPRFS) #define LAPACK_strrfs LAPACK_GLOBAL(strrfs,STRRFS) #define LAPACK_dtrrfs LAPACK_GLOBAL(dtrrfs,DTRRFS) #define LAPACK_ctrrfs LAPACK_GLOBAL(ctrrfs,CTRRFS) #define LAPACK_ztrrfs LAPACK_GLOBAL(ztrrfs,ZTRRFS) #define LAPACK_stprfs LAPACK_GLOBAL(stprfs,STPRFS) #define LAPACK_dtprfs LAPACK_GLOBAL(dtprfs,DTPRFS) #define LAPACK_ctprfs LAPACK_GLOBAL(ctprfs,CTPRFS) #define LAPACK_ztprfs LAPACK_GLOBAL(ztprfs,ZTPRFS) #define LAPACK_stbrfs LAPACK_GLOBAL(stbrfs,STBRFS) #define LAPACK_dtbrfs LAPACK_GLOBAL(dtbrfs,DTBRFS) #define LAPACK_ctbrfs LAPACK_GLOBAL(ctbrfs,CTBRFS) #define LAPACK_ztbrfs LAPACK_GLOBAL(ztbrfs,ZTBRFS) #define LAPACK_sgetri LAPACK_GLOBAL(sgetri,SGETRI) #define LAPACK_dgetri LAPACK_GLOBAL(dgetri,DGETRI) #define LAPACK_cgetri LAPACK_GLOBAL(cgetri,CGETRI) #define LAPACK_zgetri LAPACK_GLOBAL(zgetri,ZGETRI) #define LAPACK_spotri LAPACK_GLOBAL(spotri,SPOTRI) #define LAPACK_dpotri LAPACK_GLOBAL(dpotri,DPOTRI) #define LAPACK_cpotri LAPACK_GLOBAL(cpotri,CPOTRI) #define LAPACK_zpotri LAPACK_GLOBAL(zpotri,ZPOTRI) #define LAPACK_dpftri LAPACK_GLOBAL(dpftri,DPFTRI) #define LAPACK_spftri LAPACK_GLOBAL(spftri,SPFTRI) #define LAPACK_zpftri LAPACK_GLOBAL(zpftri,ZPFTRI) #define LAPACK_cpftri LAPACK_GLOBAL(cpftri,CPFTRI) #define LAPACK_spptri LAPACK_GLOBAL(spptri,SPPTRI) #define LAPACK_dpptri LAPACK_GLOBAL(dpptri,DPPTRI) #define LAPACK_cpptri LAPACK_GLOBAL(cpptri,CPPTRI) #define LAPACK_zpptri LAPACK_GLOBAL(zpptri,ZPPTRI) #define LAPACK_ssytri LAPACK_GLOBAL(ssytri,SSYTRI) #define LAPACK_dsytri LAPACK_GLOBAL(dsytri,DSYTRI) #define LAPACK_csytri LAPACK_GLOBAL(csytri,CSYTRI) #define LAPACK_zsytri LAPACK_GLOBAL(zsytri,ZSYTRI) #define LAPACK_chetri LAPACK_GLOBAL(chetri,CHETRI) #define LAPACK_zhetri LAPACK_GLOBAL(zhetri,ZHETRI) #define LAPACK_ssptri LAPACK_GLOBAL(ssptri,SSPTRI) #define LAPACK_dsptri LAPACK_GLOBAL(dsptri,DSPTRI) #define LAPACK_csptri LAPACK_GLOBAL(csptri,CSPTRI) #define LAPACK_zsptri LAPACK_GLOBAL(zsptri,ZSPTRI) #define LAPACK_chptri LAPACK_GLOBAL(chptri,CHPTRI) #define LAPACK_zhptri LAPACK_GLOBAL(zhptri,ZHPTRI) #define LAPACK_strtri LAPACK_GLOBAL(strtri,STRTRI) #define LAPACK_dtrtri LAPACK_GLOBAL(dtrtri,DTRTRI) #define LAPACK_ctrtri LAPACK_GLOBAL(ctrtri,CTRTRI) #define LAPACK_ztrtri LAPACK_GLOBAL(ztrtri,ZTRTRI) #define LAPACK_dtftri LAPACK_GLOBAL(dtftri,DTFTRI) #define LAPACK_stftri LAPACK_GLOBAL(stftri,STFTRI) #define LAPACK_ztftri LAPACK_GLOBAL(ztftri,ZTFTRI) #define LAPACK_ctftri LAPACK_GLOBAL(ctftri,CTFTRI) #define LAPACK_stptri LAPACK_GLOBAL(stptri,STPTRI) #define LAPACK_dtptri LAPACK_GLOBAL(dtptri,DTPTRI) #define LAPACK_ctptri LAPACK_GLOBAL(ctptri,CTPTRI) #define LAPACK_ztptri LAPACK_GLOBAL(ztptri,ZTPTRI) #define LAPACK_sgeequ LAPACK_GLOBAL(sgeequ,SGEEQU) #define LAPACK_dgeequ LAPACK_GLOBAL(dgeequ,DGEEQU) #define LAPACK_cgeequ LAPACK_GLOBAL(cgeequ,CGEEQU) #define LAPACK_zgeequ LAPACK_GLOBAL(zgeequ,ZGEEQU) #define LAPACK_dgeequb LAPACK_GLOBAL(dgeequb,DGEEQUB) #define LAPACK_sgeequb LAPACK_GLOBAL(sgeequb,SGEEQUB) #define LAPACK_zgeequb LAPACK_GLOBAL(zgeequb,ZGEEQUB) #define LAPACK_cgeequb LAPACK_GLOBAL(cgeequb,CGEEQUB) #define LAPACK_sgbequ LAPACK_GLOBAL(sgbequ,SGBEQU) #define LAPACK_dgbequ LAPACK_GLOBAL(dgbequ,DGBEQU) #define LAPACK_cgbequ LAPACK_GLOBAL(cgbequ,CGBEQU) #define LAPACK_zgbequ LAPACK_GLOBAL(zgbequ,ZGBEQU) #define LAPACK_dgbequb LAPACK_GLOBAL(dgbequb,DGBEQUB) #define LAPACK_sgbequb LAPACK_GLOBAL(sgbequb,SGBEQUB) #define LAPACK_zgbequb LAPACK_GLOBAL(zgbequb,ZGBEQUB) #define LAPACK_cgbequb LAPACK_GLOBAL(cgbequb,CGBEQUB) #define LAPACK_spoequ LAPACK_GLOBAL(spoequ,SPOEQU) #define LAPACK_dpoequ LAPACK_GLOBAL(dpoequ,DPOEQU) #define LAPACK_cpoequ LAPACK_GLOBAL(cpoequ,CPOEQU) #define LAPACK_zpoequ LAPACK_GLOBAL(zpoequ,ZPOEQU) #define LAPACK_dpoequb LAPACK_GLOBAL(dpoequb,DPOEQUB) #define LAPACK_spoequb LAPACK_GLOBAL(spoequb,SPOEQUB) #define LAPACK_zpoequb LAPACK_GLOBAL(zpoequb,ZPOEQUB) #define LAPACK_cpoequb LAPACK_GLOBAL(cpoequb,CPOEQUB) #define LAPACK_sppequ LAPACK_GLOBAL(sppequ,SPPEQU) #define LAPACK_dppequ LAPACK_GLOBAL(dppequ,DPPEQU) #define LAPACK_cppequ LAPACK_GLOBAL(cppequ,CPPEQU) #define LAPACK_zppequ LAPACK_GLOBAL(zppequ,ZPPEQU) #define LAPACK_spbequ LAPACK_GLOBAL(spbequ,SPBEQU) #define LAPACK_dpbequ LAPACK_GLOBAL(dpbequ,DPBEQU) #define LAPACK_cpbequ LAPACK_GLOBAL(cpbequ,CPBEQU) #define LAPACK_zpbequ LAPACK_GLOBAL(zpbequ,ZPBEQU) #define LAPACK_dsyequb LAPACK_GLOBAL(dsyequb,DSYEQUB) #define LAPACK_ssyequb LAPACK_GLOBAL(ssyequb,SSYEQUB) #define LAPACK_zsyequb LAPACK_GLOBAL(zsyequb,ZSYEQUB) #define LAPACK_csyequb LAPACK_GLOBAL(csyequb,CSYEQUB) #define LAPACK_zheequb LAPACK_GLOBAL(zheequb,ZHEEQUB) #define LAPACK_cheequb LAPACK_GLOBAL(cheequb,CHEEQUB) #define LAPACK_sgesv LAPACK_GLOBAL(sgesv,SGESV) #define LAPACK_dgesv LAPACK_GLOBAL(dgesv,DGESV) #define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV) #define LAPACK_zgesv LAPACK_GLOBAL(zgesv,ZGESV) #define LAPACK_dsgesv LAPACK_GLOBAL(dsgesv,DSGESV) #define LAPACK_zcgesv LAPACK_GLOBAL(zcgesv,ZCGESV) #define LAPACK_sgesvx LAPACK_GLOBAL(sgesvx,SGESVX) #define LAPACK_dgesvx LAPACK_GLOBAL(dgesvx,DGESVX) #define LAPACK_cgesvx LAPACK_GLOBAL(cgesvx,CGESVX) #define LAPACK_zgesvx LAPACK_GLOBAL(zgesvx,ZGESVX) #define LAPACK_dgesvxx LAPACK_GLOBAL(dgesvxx,DGESVXX) #define LAPACK_sgesvxx LAPACK_GLOBAL(sgesvxx,SGESVXX) #define LAPACK_zgesvxx LAPACK_GLOBAL(zgesvxx,ZGESVXX) #define LAPACK_cgesvxx LAPACK_GLOBAL(cgesvxx,CGESVXX) #define LAPACK_sgbsv LAPACK_GLOBAL(sgbsv,SGBSV) #define LAPACK_dgbsv LAPACK_GLOBAL(dgbsv,DGBSV) #define LAPACK_cgbsv LAPACK_GLOBAL(cgbsv,CGBSV) #define LAPACK_zgbsv LAPACK_GLOBAL(zgbsv,ZGBSV) #define LAPACK_sgbsvx LAPACK_GLOBAL(sgbsvx,SGBSVX) #define LAPACK_dgbsvx LAPACK_GLOBAL(dgbsvx,DGBSVX) #define LAPACK_cgbsvx LAPACK_GLOBAL(cgbsvx,CGBSVX) #define LAPACK_zgbsvx LAPACK_GLOBAL(zgbsvx,ZGBSVX) #define LAPACK_dgbsvxx LAPACK_GLOBAL(dgbsvxx,DGBSVXX) #define LAPACK_sgbsvxx LAPACK_GLOBAL(sgbsvxx,SGBSVXX) #define LAPACK_zgbsvxx LAPACK_GLOBAL(zgbsvxx,ZGBSVXX) #define LAPACK_cgbsvxx LAPACK_GLOBAL(cgbsvxx,CGBSVXX) #define LAPACK_sgtsv LAPACK_GLOBAL(sgtsv,SGTSV) #define LAPACK_dgtsv LAPACK_GLOBAL(dgtsv,DGTSV) #define LAPACK_cgtsv LAPACK_GLOBAL(cgtsv,CGTSV) #define LAPACK_zgtsv LAPACK_GLOBAL(zgtsv,ZGTSV) #define LAPACK_sgtsvx LAPACK_GLOBAL(sgtsvx,SGTSVX) #define LAPACK_dgtsvx LAPACK_GLOBAL(dgtsvx,DGTSVX) #define LAPACK_cgtsvx LAPACK_GLOBAL(cgtsvx,CGTSVX) #define LAPACK_zgtsvx LAPACK_GLOBAL(zgtsvx,ZGTSVX) #define LAPACK_sposv LAPACK_GLOBAL(sposv,SPOSV) #define LAPACK_dposv LAPACK_GLOBAL(dposv,DPOSV) #define LAPACK_cposv LAPACK_GLOBAL(cposv,CPOSV) #define LAPACK_zposv LAPACK_GLOBAL(zposv,ZPOSV) #define LAPACK_dsposv LAPACK_GLOBAL(dsposv,DSPOSV) #define LAPACK_zcposv LAPACK_GLOBAL(zcposv,ZCPOSV) #define LAPACK_sposvx LAPACK_GLOBAL(sposvx,SPOSVX) #define LAPACK_dposvx LAPACK_GLOBAL(dposvx,DPOSVX) #define LAPACK_cposvx LAPACK_GLOBAL(cposvx,CPOSVX) #define LAPACK_zposvx LAPACK_GLOBAL(zposvx,ZPOSVX) #define LAPACK_dposvxx LAPACK_GLOBAL(dposvxx,DPOSVXX) #define LAPACK_sposvxx LAPACK_GLOBAL(sposvxx,SPOSVXX) #define LAPACK_zposvxx LAPACK_GLOBAL(zposvxx,ZPOSVXX) #define LAPACK_cposvxx LAPACK_GLOBAL(cposvxx,CPOSVXX) #define LAPACK_sppsv LAPACK_GLOBAL(sppsv,SPPSV) #define LAPACK_dppsv LAPACK_GLOBAL(dppsv,DPPSV) #define LAPACK_cppsv LAPACK_GLOBAL(cppsv,CPPSV) #define LAPACK_zppsv LAPACK_GLOBAL(zppsv,ZPPSV) #define LAPACK_sppsvx LAPACK_GLOBAL(sppsvx,SPPSVX) #define LAPACK_dppsvx LAPACK_GLOBAL(dppsvx,DPPSVX) #define LAPACK_cppsvx LAPACK_GLOBAL(cppsvx,CPPSVX) #define LAPACK_zppsvx LAPACK_GLOBAL(zppsvx,ZPPSVX) #define LAPACK_spbsv LAPACK_GLOBAL(spbsv,SPBSV) #define LAPACK_dpbsv LAPACK_GLOBAL(dpbsv,DPBSV) #define LAPACK_cpbsv LAPACK_GLOBAL(cpbsv,CPBSV) #define LAPACK_zpbsv LAPACK_GLOBAL(zpbsv,ZPBSV) #define LAPACK_spbsvx LAPACK_GLOBAL(spbsvx,SPBSVX) #define LAPACK_dpbsvx LAPACK_GLOBAL(dpbsvx,DPBSVX) #define LAPACK_cpbsvx LAPACK_GLOBAL(cpbsvx,CPBSVX) #define LAPACK_zpbsvx LAPACK_GLOBAL(zpbsvx,ZPBSVX) #define LAPACK_sptsv LAPACK_GLOBAL(sptsv,SPTSV) #define LAPACK_dptsv LAPACK_GLOBAL(dptsv,DPTSV) #define LAPACK_cptsv LAPACK_GLOBAL(cptsv,CPTSV) #define LAPACK_zptsv LAPACK_GLOBAL(zptsv,ZPTSV) #define LAPACK_sptsvx LAPACK_GLOBAL(sptsvx,SPTSVX) #define LAPACK_dptsvx LAPACK_GLOBAL(dptsvx,DPTSVX) #define LAPACK_cptsvx LAPACK_GLOBAL(cptsvx,CPTSVX) #define LAPACK_zptsvx LAPACK_GLOBAL(zptsvx,ZPTSVX) #define LAPACK_ssysv LAPACK_GLOBAL(ssysv,SSYSV) #define LAPACK_dsysv LAPACK_GLOBAL(dsysv,DSYSV) #define LAPACK_csysv LAPACK_GLOBAL(csysv,CSYSV) #define LAPACK_zsysv LAPACK_GLOBAL(zsysv,ZSYSV) #define LAPACK_ssysvx LAPACK_GLOBAL(ssysvx,SSYSVX) #define LAPACK_dsysvx LAPACK_GLOBAL(dsysvx,DSYSVX) #define LAPACK_csysvx LAPACK_GLOBAL(csysvx,CSYSVX) #define LAPACK_zsysvx LAPACK_GLOBAL(zsysvx,ZSYSVX) #define LAPACK_dsysvxx LAPACK_GLOBAL(dsysvxx,DSYSVXX) #define LAPACK_ssysvxx LAPACK_GLOBAL(ssysvxx,SSYSVXX) #define LAPACK_zsysvxx LAPACK_GLOBAL(zsysvxx,ZSYSVXX) #define LAPACK_csysvxx LAPACK_GLOBAL(csysvxx,CSYSVXX) #define LAPACK_chesv LAPACK_GLOBAL(chesv,CHESV) #define LAPACK_zhesv LAPACK_GLOBAL(zhesv,ZHESV) #define LAPACK_chesvx LAPACK_GLOBAL(chesvx,CHESVX) #define LAPACK_zhesvx LAPACK_GLOBAL(zhesvx,ZHESVX) #define LAPACK_zhesvxx LAPACK_GLOBAL(zhesvxx,ZHESVXX) #define LAPACK_chesvxx LAPACK_GLOBAL(chesvxx,CHESVXX) #define LAPACK_sspsv LAPACK_GLOBAL(sspsv,SSPSV) #define LAPACK_dspsv LAPACK_GLOBAL(dspsv,DSPSV) #define LAPACK_cspsv LAPACK_GLOBAL(cspsv,CSPSV) #define LAPACK_zspsv LAPACK_GLOBAL(zspsv,ZSPSV) #define LAPACK_sspsvx LAPACK_GLOBAL(sspsvx,SSPSVX) #define LAPACK_dspsvx LAPACK_GLOBAL(dspsvx,DSPSVX) #define LAPACK_cspsvx LAPACK_GLOBAL(cspsvx,CSPSVX) #define LAPACK_zspsvx LAPACK_GLOBAL(zspsvx,ZSPSVX) #define LAPACK_chpsv LAPACK_GLOBAL(chpsv,CHPSV) #define LAPACK_zhpsv LAPACK_GLOBAL(zhpsv,ZHPSV) #define LAPACK_chpsvx LAPACK_GLOBAL(chpsvx,CHPSVX) #define LAPACK_zhpsvx LAPACK_GLOBAL(zhpsvx,ZHPSVX) #define LAPACK_sgeqrf LAPACK_GLOBAL(sgeqrf,SGEQRF) #define LAPACK_dgeqrf LAPACK_GLOBAL(dgeqrf,DGEQRF) #define LAPACK_cgeqrf LAPACK_GLOBAL(cgeqrf,CGEQRF) #define LAPACK_zgeqrf LAPACK_GLOBAL(zgeqrf,ZGEQRF) #define LAPACK_sgeqpf LAPACK_GLOBAL(sgeqpf,SGEQPF) #define LAPACK_dgeqpf LAPACK_GLOBAL(dgeqpf,DGEQPF) #define LAPACK_cgeqpf LAPACK_GLOBAL(cgeqpf,CGEQPF) #define LAPACK_zgeqpf LAPACK_GLOBAL(zgeqpf,ZGEQPF) #define LAPACK_sgeqp3 LAPACK_GLOBAL(sgeqp3,SGEQP3) #define LAPACK_dgeqp3 LAPACK_GLOBAL(dgeqp3,DGEQP3) #define LAPACK_cgeqp3 LAPACK_GLOBAL(cgeqp3,CGEQP3) #define LAPACK_zgeqp3 LAPACK_GLOBAL(zgeqp3,ZGEQP3) #define LAPACK_sorgqr LAPACK_GLOBAL(sorgqr,SORGQR) #define LAPACK_dorgqr LAPACK_GLOBAL(dorgqr,DORGQR) #define LAPACK_sormqr LAPACK_GLOBAL(sormqr,SORMQR) #define LAPACK_dormqr LAPACK_GLOBAL(dormqr,DORMQR) #define LAPACK_cungqr LAPACK_GLOBAL(cungqr,CUNGQR) #define LAPACK_zungqr LAPACK_GLOBAL(zungqr,ZUNGQR) #define LAPACK_cunmqr LAPACK_GLOBAL(cunmqr,CUNMQR) #define LAPACK_zunmqr LAPACK_GLOBAL(zunmqr,ZUNMQR) #define LAPACK_sgelqf LAPACK_GLOBAL(sgelqf,SGELQF) #define LAPACK_dgelqf LAPACK_GLOBAL(dgelqf,DGELQF) #define LAPACK_cgelqf LAPACK_GLOBAL(cgelqf,CGELQF) #define LAPACK_zgelqf LAPACK_GLOBAL(zgelqf,ZGELQF) #define LAPACK_sorglq LAPACK_GLOBAL(sorglq,SORGLQ) #define LAPACK_dorglq LAPACK_GLOBAL(dorglq,DORGLQ) #define LAPACK_sormlq LAPACK_GLOBAL(sormlq,SORMLQ) #define LAPACK_dormlq LAPACK_GLOBAL(dormlq,DORMLQ) #define LAPACK_cunglq LAPACK_GLOBAL(cunglq,CUNGLQ) #define LAPACK_zunglq LAPACK_GLOBAL(zunglq,ZUNGLQ) #define LAPACK_cunmlq LAPACK_GLOBAL(cunmlq,CUNMLQ) #define LAPACK_zunmlq LAPACK_GLOBAL(zunmlq,ZUNMLQ) #define LAPACK_sgeqlf LAPACK_GLOBAL(sgeqlf,SGEQLF) #define LAPACK_dgeqlf LAPACK_GLOBAL(dgeqlf,DGEQLF) #define LAPACK_cgeqlf LAPACK_GLOBAL(cgeqlf,CGEQLF) #define LAPACK_zgeqlf LAPACK_GLOBAL(zgeqlf,ZGEQLF) #define LAPACK_sorgql LAPACK_GLOBAL(sorgql,SORGQL) #define LAPACK_dorgql LAPACK_GLOBAL(dorgql,DORGQL) #define LAPACK_cungql LAPACK_GLOBAL(cungql,CUNGQL) #define LAPACK_zungql LAPACK_GLOBAL(zungql,ZUNGQL) #define LAPACK_sormql LAPACK_GLOBAL(sormql,SORMQL) #define LAPACK_dormql LAPACK_GLOBAL(dormql,DORMQL) #define LAPACK_cunmql LAPACK_GLOBAL(cunmql,CUNMQL) #define LAPACK_zunmql LAPACK_GLOBAL(zunmql,ZUNMQL) #define LAPACK_sgerqf LAPACK_GLOBAL(sgerqf,SGERQF) #define LAPACK_dgerqf LAPACK_GLOBAL(dgerqf,DGERQF) #define LAPACK_cgerqf LAPACK_GLOBAL(cgerqf,CGERQF) #define LAPACK_zgerqf LAPACK_GLOBAL(zgerqf,ZGERQF) #define LAPACK_sorgrq LAPACK_GLOBAL(sorgrq,SORGRQ) #define LAPACK_dorgrq LAPACK_GLOBAL(dorgrq,DORGRQ) #define LAPACK_cungrq LAPACK_GLOBAL(cungrq,CUNGRQ) #define LAPACK_zungrq LAPACK_GLOBAL(zungrq,ZUNGRQ) #define LAPACK_sormrq LAPACK_GLOBAL(sormrq,SORMRQ) #define LAPACK_dormrq LAPACK_GLOBAL(dormrq,DORMRQ) #define LAPACK_cunmrq LAPACK_GLOBAL(cunmrq,CUNMRQ) #define LAPACK_zunmrq LAPACK_GLOBAL(zunmrq,ZUNMRQ) #define LAPACK_stzrzf LAPACK_GLOBAL(stzrzf,STZRZF) #define LAPACK_dtzrzf LAPACK_GLOBAL(dtzrzf,DTZRZF) #define LAPACK_ctzrzf LAPACK_GLOBAL(ctzrzf,CTZRZF) #define LAPACK_ztzrzf LAPACK_GLOBAL(ztzrzf,ZTZRZF) #define LAPACK_sormrz LAPACK_GLOBAL(sormrz,SORMRZ) #define LAPACK_dormrz LAPACK_GLOBAL(dormrz,DORMRZ) #define LAPACK_cunmrz LAPACK_GLOBAL(cunmrz,CUNMRZ) #define LAPACK_zunmrz LAPACK_GLOBAL(zunmrz,ZUNMRZ) #define LAPACK_sggqrf LAPACK_GLOBAL(sggqrf,SGGQRF) #define LAPACK_dggqrf LAPACK_GLOBAL(dggqrf,DGGQRF) #define LAPACK_cggqrf LAPACK_GLOBAL(cggqrf,CGGQRF) #define LAPACK_zggqrf LAPACK_GLOBAL(zggqrf,ZGGQRF) #define LAPACK_sggrqf LAPACK_GLOBAL(sggrqf,SGGRQF) #define LAPACK_dggrqf LAPACK_GLOBAL(dggrqf,DGGRQF) #define LAPACK_cggrqf LAPACK_GLOBAL(cggrqf,CGGRQF) #define LAPACK_zggrqf LAPACK_GLOBAL(zggrqf,ZGGRQF) #define LAPACK_sgebrd LAPACK_GLOBAL(sgebrd,SGEBRD) #define LAPACK_dgebrd LAPACK_GLOBAL(dgebrd,DGEBRD) #define LAPACK_cgebrd LAPACK_GLOBAL(cgebrd,CGEBRD) #define LAPACK_zgebrd LAPACK_GLOBAL(zgebrd,ZGEBRD) #define LAPACK_sgbbrd LAPACK_GLOBAL(sgbbrd,SGBBRD) #define LAPACK_dgbbrd LAPACK_GLOBAL(dgbbrd,DGBBRD) #define LAPACK_cgbbrd LAPACK_GLOBAL(cgbbrd,CGBBRD) #define LAPACK_zgbbrd LAPACK_GLOBAL(zgbbrd,ZGBBRD) #define LAPACK_sorgbr LAPACK_GLOBAL(sorgbr,SORGBR) #define LAPACK_dorgbr LAPACK_GLOBAL(dorgbr,DORGBR) #define LAPACK_sormbr LAPACK_GLOBAL(sormbr,SORMBR) #define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR) #define LAPACK_cungbr LAPACK_GLOBAL(cungbr,CUNGBR) #define LAPACK_zungbr LAPACK_GLOBAL(zungbr,ZUNGBR) #define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR) #define LAPACK_zunmbr LAPACK_GLOBAL(zunmbr,ZUNMBR) #define LAPACK_sbdsqr LAPACK_GLOBAL(sbdsqr,SBDSQR) #define LAPACK_dbdsqr LAPACK_GLOBAL(dbdsqr,DBDSQR) #define LAPACK_cbdsqr LAPACK_GLOBAL(cbdsqr,CBDSQR) #define LAPACK_zbdsqr LAPACK_GLOBAL(zbdsqr,ZBDSQR) #define LAPACK_sbdsdc LAPACK_GLOBAL(sbdsdc,SBDSDC) #define LAPACK_dbdsdc LAPACK_GLOBAL(dbdsdc,DBDSDC) #define LAPACK_ssytrd LAPACK_GLOBAL(ssytrd,SSYTRD) #define LAPACK_dsytrd LAPACK_GLOBAL(dsytrd,DSYTRD) #define LAPACK_sorgtr LAPACK_GLOBAL(sorgtr,SORGTR) #define LAPACK_dorgtr LAPACK_GLOBAL(dorgtr,DORGTR) #define LAPACK_sormtr LAPACK_GLOBAL(sormtr,SORMTR) #define LAPACK_dormtr LAPACK_GLOBAL(dormtr,DORMTR) #define LAPACK_chetrd LAPACK_GLOBAL(chetrd,CHETRD) #define LAPACK_zhetrd LAPACK_GLOBAL(zhetrd,ZHETRD) #define LAPACK_cungtr LAPACK_GLOBAL(cungtr,CUNGTR) #define LAPACK_zungtr LAPACK_GLOBAL(zungtr,ZUNGTR) #define LAPACK_cunmtr LAPACK_GLOBAL(cunmtr,CUNMTR) #define LAPACK_zunmtr LAPACK_GLOBAL(zunmtr,ZUNMTR) #define LAPACK_ssptrd LAPACK_GLOBAL(ssptrd,SSPTRD) #define LAPACK_dsptrd LAPACK_GLOBAL(dsptrd,DSPTRD) #define LAPACK_sopgtr LAPACK_GLOBAL(sopgtr,SOPGTR) #define LAPACK_dopgtr LAPACK_GLOBAL(dopgtr,DOPGTR) #define LAPACK_sopmtr LAPACK_GLOBAL(sopmtr,SOPMTR) #define LAPACK_dopmtr LAPACK_GLOBAL(dopmtr,DOPMTR) #define LAPACK_chptrd LAPACK_GLOBAL(chptrd,CHPTRD) #define LAPACK_zhptrd LAPACK_GLOBAL(zhptrd,ZHPTRD) #define LAPACK_cupgtr LAPACK_GLOBAL(cupgtr,CUPGTR) #define LAPACK_zupgtr LAPACK_GLOBAL(zupgtr,ZUPGTR) #define LAPACK_cupmtr LAPACK_GLOBAL(cupmtr,CUPMTR) #define LAPACK_zupmtr LAPACK_GLOBAL(zupmtr,ZUPMTR) #define LAPACK_ssbtrd LAPACK_GLOBAL(ssbtrd,SSBTRD) #define LAPACK_dsbtrd LAPACK_GLOBAL(dsbtrd,DSBTRD) #define LAPACK_chbtrd LAPACK_GLOBAL(chbtrd,CHBTRD) #define LAPACK_zhbtrd LAPACK_GLOBAL(zhbtrd,ZHBTRD) #define LAPACK_ssterf LAPACK_GLOBAL(ssterf,SSTERF) #define LAPACK_dsterf LAPACK_GLOBAL(dsterf,DSTERF) #define LAPACK_ssteqr LAPACK_GLOBAL(ssteqr,SSTEQR) #define LAPACK_dsteqr LAPACK_GLOBAL(dsteqr,DSTEQR) #define LAPACK_csteqr LAPACK_GLOBAL(csteqr,CSTEQR) #define LAPACK_zsteqr LAPACK_GLOBAL(zsteqr,ZSTEQR) #define LAPACK_sstemr LAPACK_GLOBAL(sstemr,SSTEMR) #define LAPACK_dstemr LAPACK_GLOBAL(dstemr,DSTEMR) #define LAPACK_cstemr LAPACK_GLOBAL(cstemr,CSTEMR) #define LAPACK_zstemr LAPACK_GLOBAL(zstemr,ZSTEMR) #define LAPACK_sstedc LAPACK_GLOBAL(sstedc,SSTEDC) #define LAPACK_dstedc LAPACK_GLOBAL(dstedc,DSTEDC) #define LAPACK_cstedc LAPACK_GLOBAL(cstedc,CSTEDC) #define LAPACK_zstedc LAPACK_GLOBAL(zstedc,ZSTEDC) #define LAPACK_sstegr LAPACK_GLOBAL(sstegr,SSTEGR) #define LAPACK_dstegr LAPACK_GLOBAL(dstegr,DSTEGR) #define LAPACK_cstegr LAPACK_GLOBAL(cstegr,CSTEGR) #define LAPACK_zstegr LAPACK_GLOBAL(zstegr,ZSTEGR) #define LAPACK_spteqr LAPACK_GLOBAL(spteqr,SPTEQR) #define LAPACK_dpteqr LAPACK_GLOBAL(dpteqr,DPTEQR) #define LAPACK_cpteqr LAPACK_GLOBAL(cpteqr,CPTEQR) #define LAPACK_zpteqr LAPACK_GLOBAL(zpteqr,ZPTEQR) #define LAPACK_sstebz LAPACK_GLOBAL(sstebz,SSTEBZ) #define LAPACK_dstebz LAPACK_GLOBAL(dstebz,DSTEBZ) #define LAPACK_sstein LAPACK_GLOBAL(sstein,SSTEIN) #define LAPACK_dstein LAPACK_GLOBAL(dstein,DSTEIN) #define LAPACK_cstein LAPACK_GLOBAL(cstein,CSTEIN) #define LAPACK_zstein LAPACK_GLOBAL(zstein,ZSTEIN) #define LAPACK_sdisna LAPACK_GLOBAL(sdisna,SDISNA) #define LAPACK_ddisna LAPACK_GLOBAL(ddisna,DDISNA) #define LAPACK_ssygst LAPACK_GLOBAL(ssygst,SSYGST) #define LAPACK_dsygst LAPACK_GLOBAL(dsygst,DSYGST) #define LAPACK_chegst LAPACK_GLOBAL(chegst,CHEGST) #define LAPACK_zhegst LAPACK_GLOBAL(zhegst,ZHEGST) #define LAPACK_sspgst LAPACK_GLOBAL(sspgst,SSPGST) #define LAPACK_dspgst LAPACK_GLOBAL(dspgst,DSPGST) #define LAPACK_chpgst LAPACK_GLOBAL(chpgst,CHPGST) #define LAPACK_zhpgst LAPACK_GLOBAL(zhpgst,ZHPGST) #define LAPACK_ssbgst LAPACK_GLOBAL(ssbgst,SSBGST) #define LAPACK_dsbgst LAPACK_GLOBAL(dsbgst,DSBGST) #define LAPACK_chbgst LAPACK_GLOBAL(chbgst,CHBGST) #define LAPACK_zhbgst LAPACK_GLOBAL(zhbgst,ZHBGST) #define LAPACK_spbstf LAPACK_GLOBAL(spbstf,SPBSTF) #define LAPACK_dpbstf LAPACK_GLOBAL(dpbstf,DPBSTF) #define LAPACK_cpbstf LAPACK_GLOBAL(cpbstf,CPBSTF) #define LAPACK_zpbstf LAPACK_GLOBAL(zpbstf,ZPBSTF) #define LAPACK_sgehrd LAPACK_GLOBAL(sgehrd,SGEHRD) #define LAPACK_dgehrd LAPACK_GLOBAL(dgehrd,DGEHRD) #define LAPACK_cgehrd LAPACK_GLOBAL(cgehrd,CGEHRD) #define LAPACK_zgehrd LAPACK_GLOBAL(zgehrd,ZGEHRD) #define LAPACK_sorghr LAPACK_GLOBAL(sorghr,SORGHR) #define LAPACK_dorghr LAPACK_GLOBAL(dorghr,DORGHR) #define LAPACK_sormhr LAPACK_GLOBAL(sormhr,SORMHR) #define LAPACK_dormhr LAPACK_GLOBAL(dormhr,DORMHR) #define LAPACK_cunghr LAPACK_GLOBAL(cunghr,CUNGHR) #define LAPACK_zunghr LAPACK_GLOBAL(zunghr,ZUNGHR) #define LAPACK_cunmhr LAPACK_GLOBAL(cunmhr,CUNMHR) #define LAPACK_zunmhr LAPACK_GLOBAL(zunmhr,ZUNMHR) #define LAPACK_sgebal LAPACK_GLOBAL(sgebal,SGEBAL) #define LAPACK_dgebal LAPACK_GLOBAL(dgebal,DGEBAL) #define LAPACK_cgebal LAPACK_GLOBAL(cgebal,CGEBAL) #define LAPACK_zgebal LAPACK_GLOBAL(zgebal,ZGEBAL) #define LAPACK_sgebak LAPACK_GLOBAL(sgebak,SGEBAK) #define LAPACK_dgebak LAPACK_GLOBAL(dgebak,DGEBAK) #define LAPACK_cgebak LAPACK_GLOBAL(cgebak,CGEBAK) #define LAPACK_zgebak LAPACK_GLOBAL(zgebak,ZGEBAK) #define LAPACK_shseqr LAPACK_GLOBAL(shseqr,SHSEQR) #define LAPACK_dhseqr LAPACK_GLOBAL(dhseqr,DHSEQR) #define LAPACK_chseqr LAPACK_GLOBAL(chseqr,CHSEQR) #define LAPACK_zhseqr LAPACK_GLOBAL(zhseqr,ZHSEQR) #define LAPACK_shsein LAPACK_GLOBAL(shsein,SHSEIN) #define LAPACK_dhsein LAPACK_GLOBAL(dhsein,DHSEIN) #define LAPACK_chsein LAPACK_GLOBAL(chsein,CHSEIN) #define LAPACK_zhsein LAPACK_GLOBAL(zhsein,ZHSEIN) #define LAPACK_strevc LAPACK_GLOBAL(strevc,STREVC) #define LAPACK_dtrevc LAPACK_GLOBAL(dtrevc,DTREVC) #define LAPACK_ctrevc LAPACK_GLOBAL(ctrevc,CTREVC) #define LAPACK_ztrevc LAPACK_GLOBAL(ztrevc,ZTREVC) #define LAPACK_strsna LAPACK_GLOBAL(strsna,STRSNA) #define LAPACK_dtrsna LAPACK_GLOBAL(dtrsna,DTRSNA) #define LAPACK_ctrsna LAPACK_GLOBAL(ctrsna,CTRSNA) #define LAPACK_ztrsna LAPACK_GLOBAL(ztrsna,ZTRSNA) #define LAPACK_strexc LAPACK_GLOBAL(strexc,STREXC) #define LAPACK_dtrexc LAPACK_GLOBAL(dtrexc,DTREXC) #define LAPACK_ctrexc LAPACK_GLOBAL(ctrexc,CTREXC) #define LAPACK_ztrexc LAPACK_GLOBAL(ztrexc,ZTREXC) #define LAPACK_strsen LAPACK_GLOBAL(strsen,STRSEN) #define LAPACK_dtrsen LAPACK_GLOBAL(dtrsen,DTRSEN) #define LAPACK_ctrsen LAPACK_GLOBAL(ctrsen,CTRSEN) #define LAPACK_ztrsen LAPACK_GLOBAL(ztrsen,ZTRSEN) #define LAPACK_strsyl LAPACK_GLOBAL(strsyl,STRSYL) #define LAPACK_dtrsyl LAPACK_GLOBAL(dtrsyl,DTRSYL) #define LAPACK_ctrsyl LAPACK_GLOBAL(ctrsyl,CTRSYL) #define LAPACK_ztrsyl LAPACK_GLOBAL(ztrsyl,ZTRSYL) #define LAPACK_sgghrd LAPACK_GLOBAL(sgghrd,SGGHRD) #define LAPACK_dgghrd LAPACK_GLOBAL(dgghrd,DGGHRD) #define LAPACK_cgghrd LAPACK_GLOBAL(cgghrd,CGGHRD) #define LAPACK_zgghrd LAPACK_GLOBAL(zgghrd,ZGGHRD) #define LAPACK_sggbal LAPACK_GLOBAL(sggbal,SGGBAL) #define LAPACK_dggbal LAPACK_GLOBAL(dggbal,DGGBAL) #define LAPACK_cggbal LAPACK_GLOBAL(cggbal,CGGBAL) #define LAPACK_zggbal LAPACK_GLOBAL(zggbal,ZGGBAL) #define LAPACK_sggbak LAPACK_GLOBAL(sggbak,SGGBAK) #define LAPACK_dggbak LAPACK_GLOBAL(dggbak,DGGBAK) #define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK) #define LAPACK_zggbak LAPACK_GLOBAL(zggbak,ZGGBAK) #define LAPACK_shgeqz LAPACK_GLOBAL(shgeqz,SHGEQZ) #define LAPACK_dhgeqz LAPACK_GLOBAL(dhgeqz,DHGEQZ) #define LAPACK_chgeqz LAPACK_GLOBAL(chgeqz,CHGEQZ) #define LAPACK_zhgeqz LAPACK_GLOBAL(zhgeqz,ZHGEQZ) #define LAPACK_stgevc LAPACK_GLOBAL(stgevc,STGEVC) #define LAPACK_dtgevc LAPACK_GLOBAL(dtgevc,DTGEVC) #define LAPACK_ctgevc LAPACK_GLOBAL(ctgevc,CTGEVC) #define LAPACK_ztgevc LAPACK_GLOBAL(ztgevc,ZTGEVC) #define LAPACK_stgexc LAPACK_GLOBAL(stgexc,STGEXC) #define LAPACK_dtgexc LAPACK_GLOBAL(dtgexc,DTGEXC) #define LAPACK_ctgexc LAPACK_GLOBAL(ctgexc,CTGEXC) #define LAPACK_ztgexc LAPACK_GLOBAL(ztgexc,ZTGEXC) #define LAPACK_stgsen LAPACK_GLOBAL(stgsen,STGSEN) #define LAPACK_dtgsen LAPACK_GLOBAL(dtgsen,DTGSEN) #define LAPACK_ctgsen LAPACK_GLOBAL(ctgsen,CTGSEN) #define LAPACK_ztgsen LAPACK_GLOBAL(ztgsen,ZTGSEN) #define LAPACK_stgsyl LAPACK_GLOBAL(stgsyl,STGSYL) #define LAPACK_dtgsyl LAPACK_GLOBAL(dtgsyl,DTGSYL) #define LAPACK_ctgsyl LAPACK_GLOBAL(ctgsyl,CTGSYL) #define LAPACK_ztgsyl LAPACK_GLOBAL(ztgsyl,ZTGSYL) #define LAPACK_stgsna LAPACK_GLOBAL(stgsna,STGSNA) #define LAPACK_dtgsna LAPACK_GLOBAL(dtgsna,DTGSNA) #define LAPACK_ctgsna LAPACK_GLOBAL(ctgsna,CTGSNA) #define LAPACK_ztgsna LAPACK_GLOBAL(ztgsna,ZTGSNA) #define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP) #define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP) #define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP) #define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP) #define LAPACK_stgsja LAPACK_GLOBAL(stgsja,STGSJA) #define LAPACK_dtgsja LAPACK_GLOBAL(dtgsja,DTGSJA) #define LAPACK_ctgsja LAPACK_GLOBAL(ctgsja,CTGSJA) #define LAPACK_ztgsja LAPACK_GLOBAL(ztgsja,ZTGSJA) #define LAPACK_sgels LAPACK_GLOBAL(sgels,SGELS) #define LAPACK_dgels LAPACK_GLOBAL(dgels,DGELS) #define LAPACK_cgels LAPACK_GLOBAL(cgels,CGELS) #define LAPACK_zgels LAPACK_GLOBAL(zgels,ZGELS) #define LAPACK_sgelsy LAPACK_GLOBAL(sgelsy,SGELSY) #define LAPACK_dgelsy LAPACK_GLOBAL(dgelsy,DGELSY) #define LAPACK_cgelsy LAPACK_GLOBAL(cgelsy,CGELSY) #define LAPACK_zgelsy LAPACK_GLOBAL(zgelsy,ZGELSY) #define LAPACK_sgelss LAPACK_GLOBAL(sgelss,SGELSS) #define LAPACK_dgelss LAPACK_GLOBAL(dgelss,DGELSS) #define LAPACK_cgelss LAPACK_GLOBAL(cgelss,CGELSS) #define LAPACK_zgelss LAPACK_GLOBAL(zgelss,ZGELSS) #define LAPACK_sgelsd LAPACK_GLOBAL(sgelsd,SGELSD) #define LAPACK_dgelsd LAPACK_GLOBAL(dgelsd,DGELSD) #define LAPACK_cgelsd LAPACK_GLOBAL(cgelsd,CGELSD) #define LAPACK_zgelsd LAPACK_GLOBAL(zgelsd,ZGELSD) #define LAPACK_sgglse LAPACK_GLOBAL(sgglse,SGGLSE) #define LAPACK_dgglse LAPACK_GLOBAL(dgglse,DGGLSE) #define LAPACK_cgglse LAPACK_GLOBAL(cgglse,CGGLSE) #define LAPACK_zgglse LAPACK_GLOBAL(zgglse,ZGGLSE) #define LAPACK_sggglm LAPACK_GLOBAL(sggglm,SGGGLM) #define LAPACK_dggglm LAPACK_GLOBAL(dggglm,DGGGLM) #define LAPACK_cggglm LAPACK_GLOBAL(cggglm,CGGGLM) #define LAPACK_zggglm LAPACK_GLOBAL(zggglm,ZGGGLM) #define LAPACK_ssyev LAPACK_GLOBAL(ssyev,SSYEV) #define LAPACK_dsyev LAPACK_GLOBAL(dsyev,DSYEV) #define LAPACK_cheev LAPACK_GLOBAL(cheev,CHEEV) #define LAPACK_zheev LAPACK_GLOBAL(zheev,ZHEEV) #define LAPACK_ssyevd LAPACK_GLOBAL(ssyevd,SSYEVD) #define LAPACK_dsyevd LAPACK_GLOBAL(dsyevd,DSYEVD) #define LAPACK_cheevd LAPACK_GLOBAL(cheevd,CHEEVD) #define LAPACK_zheevd LAPACK_GLOBAL(zheevd,ZHEEVD) #define LAPACK_ssyevx LAPACK_GLOBAL(ssyevx,SSYEVX) #define LAPACK_dsyevx LAPACK_GLOBAL(dsyevx,DSYEVX) #define LAPACK_cheevx LAPACK_GLOBAL(cheevx,CHEEVX) #define LAPACK_zheevx LAPACK_GLOBAL(zheevx,ZHEEVX) #define LAPACK_ssyevr LAPACK_GLOBAL(ssyevr,SSYEVR) #define LAPACK_dsyevr LAPACK_GLOBAL(dsyevr,DSYEVR) #define LAPACK_cheevr LAPACK_GLOBAL(cheevr,CHEEVR) #define LAPACK_zheevr LAPACK_GLOBAL(zheevr,ZHEEVR) #define LAPACK_sspev LAPACK_GLOBAL(sspev,SSPEV) #define LAPACK_dspev LAPACK_GLOBAL(dspev,DSPEV) #define LAPACK_chpev LAPACK_GLOBAL(chpev,CHPEV) #define LAPACK_zhpev LAPACK_GLOBAL(zhpev,ZHPEV) #define LAPACK_sspevd LAPACK_GLOBAL(sspevd,SSPEVD) #define LAPACK_dspevd LAPACK_GLOBAL(dspevd,DSPEVD) #define LAPACK_chpevd LAPACK_GLOBAL(chpevd,CHPEVD) #define LAPACK_zhpevd LAPACK_GLOBAL(zhpevd,ZHPEVD) #define LAPACK_sspevx LAPACK_GLOBAL(sspevx,SSPEVX) #define LAPACK_dspevx LAPACK_GLOBAL(dspevx,DSPEVX) #define LAPACK_chpevx LAPACK_GLOBAL(chpevx,CHPEVX) #define LAPACK_zhpevx LAPACK_GLOBAL(zhpevx,ZHPEVX) #define LAPACK_ssbev LAPACK_GLOBAL(ssbev,SSBEV) #define LAPACK_dsbev LAPACK_GLOBAL(dsbev,DSBEV) #define LAPACK_chbev LAPACK_GLOBAL(chbev,CHBEV) #define LAPACK_zhbev LAPACK_GLOBAL(zhbev,ZHBEV) #define LAPACK_ssbevd LAPACK_GLOBAL(ssbevd,SSBEVD) #define LAPACK_dsbevd LAPACK_GLOBAL(dsbevd,DSBEVD) #define LAPACK_chbevd LAPACK_GLOBAL(chbevd,CHBEVD) #define LAPACK_zhbevd LAPACK_GLOBAL(zhbevd,ZHBEVD) #define LAPACK_ssbevx LAPACK_GLOBAL(ssbevx,SSBEVX) #define LAPACK_dsbevx LAPACK_GLOBAL(dsbevx,DSBEVX) #define LAPACK_chbevx LAPACK_GLOBAL(chbevx,CHBEVX) #define LAPACK_zhbevx LAPACK_GLOBAL(zhbevx,ZHBEVX) #define LAPACK_sstev LAPACK_GLOBAL(sstev,SSTEV) #define LAPACK_dstev LAPACK_GLOBAL(dstev,DSTEV) #define LAPACK_sstevd LAPACK_GLOBAL(sstevd,SSTEVD) #define LAPACK_dstevd LAPACK_GLOBAL(dstevd,DSTEVD) #define LAPACK_sstevx LAPACK_GLOBAL(sstevx,SSTEVX) #define LAPACK_dstevx LAPACK_GLOBAL(dstevx,DSTEVX) #define LAPACK_sstevr LAPACK_GLOBAL(sstevr,SSTEVR) #define LAPACK_dstevr LAPACK_GLOBAL(dstevr,DSTEVR) #define LAPACK_sgees LAPACK_GLOBAL(sgees,SGEES) #define LAPACK_dgees LAPACK_GLOBAL(dgees,DGEES) #define LAPACK_cgees LAPACK_GLOBAL(cgees,CGEES) #define LAPACK_zgees LAPACK_GLOBAL(zgees,ZGEES) #define LAPACK_sgeesx LAPACK_GLOBAL(sgeesx,SGEESX) #define LAPACK_dgeesx LAPACK_GLOBAL(dgeesx,DGEESX) #define LAPACK_cgeesx LAPACK_GLOBAL(cgeesx,CGEESX) #define LAPACK_zgeesx LAPACK_GLOBAL(zgeesx,ZGEESX) #define LAPACK_sgeev LAPACK_GLOBAL(sgeev,SGEEV) #define LAPACK_dgeev LAPACK_GLOBAL(dgeev,DGEEV) #define LAPACK_cgeev LAPACK_GLOBAL(cgeev,CGEEV) #define LAPACK_zgeev LAPACK_GLOBAL(zgeev,ZGEEV) #define LAPACK_sgeevx LAPACK_GLOBAL(sgeevx,SGEEVX) #define LAPACK_dgeevx LAPACK_GLOBAL(dgeevx,DGEEVX) #define LAPACK_cgeevx LAPACK_GLOBAL(cgeevx,CGEEVX) #define LAPACK_zgeevx LAPACK_GLOBAL(zgeevx,ZGEEVX) #define LAPACK_sgesvd LAPACK_GLOBAL(sgesvd,SGESVD) #define LAPACK_dgesvd LAPACK_GLOBAL(dgesvd,DGESVD) #define LAPACK_cgesvd LAPACK_GLOBAL(cgesvd,CGESVD) #define LAPACK_zgesvd LAPACK_GLOBAL(zgesvd,ZGESVD) #define LAPACK_sgesdd LAPACK_GLOBAL(sgesdd,SGESDD) #define LAPACK_dgesdd LAPACK_GLOBAL(dgesdd,DGESDD) #define LAPACK_cgesdd LAPACK_GLOBAL(cgesdd,CGESDD) #define LAPACK_zgesdd LAPACK_GLOBAL(zgesdd,ZGESDD) #define LAPACK_dgejsv LAPACK_GLOBAL(dgejsv,DGEJSV) #define LAPACK_sgejsv LAPACK_GLOBAL(sgejsv,SGEJSV) #define LAPACK_dgesvj LAPACK_GLOBAL(dgesvj,DGESVJ) #define LAPACK_sgesvj LAPACK_GLOBAL(sgesvj,SGESVJ) #define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD) #define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD) #define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD) #define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD) #define LAPACK_ssygv LAPACK_GLOBAL(ssygv,SSYGV) #define LAPACK_dsygv LAPACK_GLOBAL(dsygv,DSYGV) #define LAPACK_chegv LAPACK_GLOBAL(chegv,CHEGV) #define LAPACK_zhegv LAPACK_GLOBAL(zhegv,ZHEGV) #define LAPACK_ssygvd LAPACK_GLOBAL(ssygvd,SSYGVD) #define LAPACK_dsygvd LAPACK_GLOBAL(dsygvd,DSYGVD) #define LAPACK_chegvd LAPACK_GLOBAL(chegvd,CHEGVD) #define LAPACK_zhegvd LAPACK_GLOBAL(zhegvd,ZHEGVD) #define LAPACK_ssygvx LAPACK_GLOBAL(ssygvx,SSYGVX) #define LAPACK_dsygvx LAPACK_GLOBAL(dsygvx,DSYGVX) #define LAPACK_chegvx LAPACK_GLOBAL(chegvx,CHEGVX) #define LAPACK_zhegvx LAPACK_GLOBAL(zhegvx,ZHEGVX) #define LAPACK_sspgv LAPACK_GLOBAL(sspgv,SSPGV) #define LAPACK_dspgv LAPACK_GLOBAL(dspgv,DSPGV) #define LAPACK_chpgv LAPACK_GLOBAL(chpgv,CHPGV) #define LAPACK_zhpgv LAPACK_GLOBAL(zhpgv,ZHPGV) #define LAPACK_sspgvd LAPACK_GLOBAL(sspgvd,SSPGVD) #define LAPACK_dspgvd LAPACK_GLOBAL(dspgvd,DSPGVD) #define LAPACK_chpgvd LAPACK_GLOBAL(chpgvd,CHPGVD) #define LAPACK_zhpgvd LAPACK_GLOBAL(zhpgvd,ZHPGVD) #define LAPACK_sspgvx LAPACK_GLOBAL(sspgvx,SSPGVX) #define LAPACK_dspgvx LAPACK_GLOBAL(dspgvx,DSPGVX) #define LAPACK_chpgvx LAPACK_GLOBAL(chpgvx,CHPGVX) #define LAPACK_zhpgvx LAPACK_GLOBAL(zhpgvx,ZHPGVX) #define LAPACK_ssbgv LAPACK_GLOBAL(ssbgv,SSBGV) #define LAPACK_dsbgv LAPACK_GLOBAL(dsbgv,DSBGV) #define LAPACK_chbgv LAPACK_GLOBAL(chbgv,CHBGV) #define LAPACK_zhbgv LAPACK_GLOBAL(zhbgv,ZHBGV) #define LAPACK_ssbgvd LAPACK_GLOBAL(ssbgvd,SSBGVD) #define LAPACK_dsbgvd LAPACK_GLOBAL(dsbgvd,DSBGVD) #define LAPACK_chbgvd LAPACK_GLOBAL(chbgvd,CHBGVD) #define LAPACK_zhbgvd LAPACK_GLOBAL(zhbgvd,ZHBGVD) #define LAPACK_ssbgvx LAPACK_GLOBAL(ssbgvx,SSBGVX) #define LAPACK_dsbgvx LAPACK_GLOBAL(dsbgvx,DSBGVX) #define LAPACK_chbgvx LAPACK_GLOBAL(chbgvx,CHBGVX) #define LAPACK_zhbgvx LAPACK_GLOBAL(zhbgvx,ZHBGVX) #define LAPACK_sgges LAPACK_GLOBAL(sgges,SGGES) #define LAPACK_dgges LAPACK_GLOBAL(dgges,DGGES) #define LAPACK_cgges LAPACK_GLOBAL(cgges,CGGES) #define LAPACK_zgges LAPACK_GLOBAL(zgges,ZGGES) #define LAPACK_sggesx LAPACK_GLOBAL(sggesx,SGGESX) #define LAPACK_dggesx LAPACK_GLOBAL(dggesx,DGGESX) #define LAPACK_cggesx LAPACK_GLOBAL(cggesx,CGGESX) #define LAPACK_zggesx LAPACK_GLOBAL(zggesx,ZGGESX) #define LAPACK_sggev LAPACK_GLOBAL(sggev,SGGEV) #define LAPACK_dggev LAPACK_GLOBAL(dggev,DGGEV) #define LAPACK_cggev LAPACK_GLOBAL(cggev,CGGEV) #define LAPACK_zggev LAPACK_GLOBAL(zggev,ZGGEV) #define LAPACK_sggevx LAPACK_GLOBAL(sggevx,SGGEVX) #define LAPACK_dggevx LAPACK_GLOBAL(dggevx,DGGEVX) #define LAPACK_cggevx LAPACK_GLOBAL(cggevx,CGGEVX) #define LAPACK_zggevx LAPACK_GLOBAL(zggevx,ZGGEVX) #define LAPACK_dsfrk LAPACK_GLOBAL(dsfrk,DSFRK) #define LAPACK_ssfrk LAPACK_GLOBAL(ssfrk,SSFRK) #define LAPACK_zhfrk LAPACK_GLOBAL(zhfrk,ZHFRK) #define LAPACK_chfrk LAPACK_GLOBAL(chfrk,CHFRK) #define LAPACK_dtfsm LAPACK_GLOBAL(dtfsm,DTFSM) #define LAPACK_stfsm LAPACK_GLOBAL(stfsm,STFSM) #define LAPACK_ztfsm LAPACK_GLOBAL(ztfsm,ZTFSM) #define LAPACK_ctfsm LAPACK_GLOBAL(ctfsm,CTFSM) #define LAPACK_dtfttp LAPACK_GLOBAL(dtfttp,DTFTTP) #define LAPACK_stfttp LAPACK_GLOBAL(stfttp,STFTTP) #define LAPACK_ztfttp LAPACK_GLOBAL(ztfttp,ZTFTTP) #define LAPACK_ctfttp LAPACK_GLOBAL(ctfttp,CTFTTP) #define LAPACK_dtfttr LAPACK_GLOBAL(dtfttr,DTFTTR) #define LAPACK_stfttr LAPACK_GLOBAL(stfttr,STFTTR) #define LAPACK_ztfttr LAPACK_GLOBAL(ztfttr,ZTFTTR) #define LAPACK_ctfttr LAPACK_GLOBAL(ctfttr,CTFTTR) #define LAPACK_dtpttf LAPACK_GLOBAL(dtpttf,DTPTTF) #define LAPACK_stpttf LAPACK_GLOBAL(stpttf,STPTTF) #define LAPACK_ztpttf LAPACK_GLOBAL(ztpttf,ZTPTTF) #define LAPACK_ctpttf LAPACK_GLOBAL(ctpttf,CTPTTF) #define LAPACK_dtpttr LAPACK_GLOBAL(dtpttr,DTPTTR) #define LAPACK_stpttr LAPACK_GLOBAL(stpttr,STPTTR) #define LAPACK_ztpttr LAPACK_GLOBAL(ztpttr,ZTPTTR) #define LAPACK_ctpttr LAPACK_GLOBAL(ctpttr,CTPTTR) #define LAPACK_dtrttf LAPACK_GLOBAL(dtrttf,DTRTTF) #define LAPACK_strttf LAPACK_GLOBAL(strttf,STRTTF) #define LAPACK_ztrttf LAPACK_GLOBAL(ztrttf,ZTRTTF) #define LAPACK_ctrttf LAPACK_GLOBAL(ctrttf,CTRTTF) #define LAPACK_dtrttp LAPACK_GLOBAL(dtrttp,DTRTTP) #define LAPACK_strttp LAPACK_GLOBAL(strttp,STRTTP) #define LAPACK_ztrttp LAPACK_GLOBAL(ztrttp,ZTRTTP) #define LAPACK_ctrttp LAPACK_GLOBAL(ctrttp,CTRTTP) #define LAPACK_sgeqrfp LAPACK_GLOBAL(sgeqrfp,SGEQRFP) #define LAPACK_dgeqrfp LAPACK_GLOBAL(dgeqrfp,DGEQRFP) #define LAPACK_cgeqrfp LAPACK_GLOBAL(cgeqrfp,CGEQRFP) #define LAPACK_zgeqrfp LAPACK_GLOBAL(zgeqrfp,ZGEQRFP) #define LAPACK_clacgv LAPACK_GLOBAL(clacgv,CLACGV) #define LAPACK_zlacgv LAPACK_GLOBAL(zlacgv,ZLACGV) #define LAPACK_slarnv LAPACK_GLOBAL(slarnv,SLARNV) #define LAPACK_dlarnv LAPACK_GLOBAL(dlarnv,DLARNV) #define LAPACK_clarnv LAPACK_GLOBAL(clarnv,CLARNV) #define LAPACK_zlarnv LAPACK_GLOBAL(zlarnv,ZLARNV) #define LAPACK_sgeqr2 LAPACK_GLOBAL(sgeqr2,SGEQR2) #define LAPACK_dgeqr2 LAPACK_GLOBAL(dgeqr2,DGEQR2) #define LAPACK_cgeqr2 LAPACK_GLOBAL(cgeqr2,CGEQR2) #define LAPACK_zgeqr2 LAPACK_GLOBAL(zgeqr2,ZGEQR2) #define LAPACK_slacn2 LAPACK_GLOBAL(slacn2,SLACN2) #define LAPACK_dlacn2 LAPACK_GLOBAL(dlacn2,DLACN2) #define LAPACK_clacn2 LAPACK_GLOBAL(clacn2,CLACN2) #define LAPACK_zlacn2 LAPACK_GLOBAL(zlacn2,ZLACN2) #define LAPACK_slacpy LAPACK_GLOBAL(slacpy,SLACPY) #define LAPACK_dlacpy LAPACK_GLOBAL(dlacpy,DLACPY) #define LAPACK_clacpy LAPACK_GLOBAL(clacpy,CLACPY) #define LAPACK_zlacpy LAPACK_GLOBAL(zlacpy,ZLACPY) #define LAPACK_clacp2 LAPACK_GLOBAL(clacp2,CLACP2) #define LAPACK_zlacp2 LAPACK_GLOBAL(zlacp2,ZLACP2) #define LAPACK_sgetf2 LAPACK_GLOBAL(sgetf2,SGETF2) #define LAPACK_dgetf2 LAPACK_GLOBAL(dgetf2,DGETF2) #define LAPACK_cgetf2 LAPACK_GLOBAL(cgetf2,CGETF2) #define LAPACK_zgetf2 LAPACK_GLOBAL(zgetf2,ZGETF2) #define LAPACK_slaswp LAPACK_GLOBAL(slaswp,SLASWP) #define LAPACK_dlaswp LAPACK_GLOBAL(dlaswp,DLASWP) #define LAPACK_claswp LAPACK_GLOBAL(claswp,CLASWP) #define LAPACK_zlaswp LAPACK_GLOBAL(zlaswp,ZLASWP) #define LAPACK_slange LAPACK_GLOBAL(slange,SLANGE) #define LAPACK_dlange LAPACK_GLOBAL(dlange,DLANGE) #define LAPACK_clange LAPACK_GLOBAL(clange,CLANGE) #define LAPACK_zlange LAPACK_GLOBAL(zlange,ZLANGE) #define LAPACK_clanhe LAPACK_GLOBAL(clanhe,CLANHE) #define LAPACK_zlanhe LAPACK_GLOBAL(zlanhe,ZLANHE) #define LAPACK_slansy LAPACK_GLOBAL(slansy,SLANSY) #define LAPACK_dlansy LAPACK_GLOBAL(dlansy,DLANSY) #define LAPACK_clansy LAPACK_GLOBAL(clansy,CLANSY) #define LAPACK_zlansy LAPACK_GLOBAL(zlansy,ZLANSY) #define LAPACK_slantr LAPACK_GLOBAL(slantr,SLANTR) #define LAPACK_dlantr LAPACK_GLOBAL(dlantr,DLANTR) #define LAPACK_clantr LAPACK_GLOBAL(clantr,CLANTR) #define LAPACK_zlantr LAPACK_GLOBAL(zlantr,ZLANTR) #define LAPACK_slamch LAPACK_GLOBAL(slamch,SLAMCH) #define LAPACK_dlamch LAPACK_GLOBAL(dlamch,DLAMCH) #define LAPACK_sgelq2 LAPACK_GLOBAL(sgelq2,SGELQ2) #define LAPACK_dgelq2 LAPACK_GLOBAL(dgelq2,DGELQ2) #define LAPACK_cgelq2 LAPACK_GLOBAL(cgelq2,CGELQ2) #define LAPACK_zgelq2 LAPACK_GLOBAL(zgelq2,ZGELQ2) #define LAPACK_slarfb LAPACK_GLOBAL(slarfb,SLARFB) #define LAPACK_dlarfb LAPACK_GLOBAL(dlarfb,DLARFB) #define LAPACK_clarfb LAPACK_GLOBAL(clarfb,CLARFB) #define LAPACK_zlarfb LAPACK_GLOBAL(zlarfb,ZLARFB) #define LAPACK_slarfg LAPACK_GLOBAL(slarfg,SLARFG) #define LAPACK_dlarfg LAPACK_GLOBAL(dlarfg,DLARFG) #define LAPACK_clarfg LAPACK_GLOBAL(clarfg,CLARFG) #define LAPACK_zlarfg LAPACK_GLOBAL(zlarfg,ZLARFG) #define LAPACK_slarft LAPACK_GLOBAL(slarft,SLARFT) #define LAPACK_dlarft LAPACK_GLOBAL(dlarft,DLARFT) #define LAPACK_clarft LAPACK_GLOBAL(clarft,CLARFT) #define LAPACK_zlarft LAPACK_GLOBAL(zlarft,ZLARFT) #define LAPACK_slarfx LAPACK_GLOBAL(slarfx,SLARFX) #define LAPACK_dlarfx LAPACK_GLOBAL(dlarfx,DLARFX) #define LAPACK_clarfx LAPACK_GLOBAL(clarfx,CLARFX) #define LAPACK_zlarfx LAPACK_GLOBAL(zlarfx,ZLARFX) #define LAPACK_slatms LAPACK_GLOBAL(slatms,SLATMS) #define LAPACK_dlatms LAPACK_GLOBAL(dlatms,DLATMS) #define LAPACK_clatms LAPACK_GLOBAL(clatms,CLATMS) #define LAPACK_zlatms LAPACK_GLOBAL(zlatms,ZLATMS) #define LAPACK_slag2d LAPACK_GLOBAL(slag2d,SLAG2D) #define LAPACK_dlag2s LAPACK_GLOBAL(dlag2s,DLAG2S) #define LAPACK_clag2z LAPACK_GLOBAL(clag2z,CLAG2Z) #define LAPACK_zlag2c LAPACK_GLOBAL(zlag2c,ZLAG2C) #define LAPACK_slauum LAPACK_GLOBAL(slauum,SLAUUM) #define LAPACK_dlauum LAPACK_GLOBAL(dlauum,DLAUUM) #define LAPACK_clauum LAPACK_GLOBAL(clauum,CLAUUM) #define LAPACK_zlauum LAPACK_GLOBAL(zlauum,ZLAUUM) #define LAPACK_slagge LAPACK_GLOBAL(slagge,SLAGGE) #define LAPACK_dlagge LAPACK_GLOBAL(dlagge,DLAGGE) #define LAPACK_clagge LAPACK_GLOBAL(clagge,CLAGGE) #define LAPACK_zlagge LAPACK_GLOBAL(zlagge,ZLAGGE) #define LAPACK_slaset LAPACK_GLOBAL(slaset,SLASET) #define LAPACK_dlaset LAPACK_GLOBAL(dlaset,DLASET) #define LAPACK_claset LAPACK_GLOBAL(claset,CLASET) #define LAPACK_zlaset LAPACK_GLOBAL(zlaset,ZLASET) #define LAPACK_slasrt LAPACK_GLOBAL(slasrt,SLASRT) #define LAPACK_dlasrt LAPACK_GLOBAL(dlasrt,DLASRT) #define LAPACK_slagsy LAPACK_GLOBAL(slagsy,SLAGSY) #define LAPACK_dlagsy LAPACK_GLOBAL(dlagsy,DLAGSY) #define LAPACK_clagsy LAPACK_GLOBAL(clagsy,CLAGSY) #define LAPACK_zlagsy LAPACK_GLOBAL(zlagsy,ZLAGSY) #define LAPACK_claghe LAPACK_GLOBAL(claghe,CLAGHE) #define LAPACK_zlaghe LAPACK_GLOBAL(zlaghe,ZLAGHE) #define LAPACK_slapmr LAPACK_GLOBAL(slapmr,SLAPMR) #define LAPACK_dlapmr LAPACK_GLOBAL(dlapmr,DLAPMR) #define LAPACK_clapmr LAPACK_GLOBAL(clapmr,CLAPMR) #define LAPACK_zlapmr LAPACK_GLOBAL(zlapmr,ZLAPMR) #define LAPACK_slapy2 LAPACK_GLOBAL(slapy2,SLAPY2) #define LAPACK_dlapy2 LAPACK_GLOBAL(dlapy2,DLAPY2) #define LAPACK_slapy3 LAPACK_GLOBAL(slapy3,SLAPY3) #define LAPACK_dlapy3 LAPACK_GLOBAL(dlapy3,DLAPY3) #define LAPACK_slartgp LAPACK_GLOBAL(slartgp,SLARTGP) #define LAPACK_dlartgp LAPACK_GLOBAL(dlartgp,DLARTGP) #define LAPACK_slartgs LAPACK_GLOBAL(slartgs,SLARTGS) #define LAPACK_dlartgs LAPACK_GLOBAL(dlartgs,DLARTGS) // LAPACK 3.3.0 #define LAPACK_cbbcsd LAPACK_GLOBAL(cbbcsd,CBBCSD) #define LAPACK_cheswapr LAPACK_GLOBAL(cheswapr,CHESWAPR) #define LAPACK_chetri2 LAPACK_GLOBAL(chetri2,CHETRI2) #define LAPACK_chetri2x LAPACK_GLOBAL(chetri2x,CHETRI2X) #define LAPACK_chetrs2 LAPACK_GLOBAL(chetrs2,CHETRS2) #define LAPACK_csyconv LAPACK_GLOBAL(csyconv,CSYCONV) #define LAPACK_csyswapr LAPACK_GLOBAL(csyswapr,CSYSWAPR) #define LAPACK_csytri2 LAPACK_GLOBAL(csytri2,CSYTRI2) #define LAPACK_csytri2x LAPACK_GLOBAL(csytri2x,CSYTRI2X) #define LAPACK_csytrs2 LAPACK_GLOBAL(csytrs2,CSYTRS2) #define LAPACK_cunbdb LAPACK_GLOBAL(cunbdb,CUNBDB) #define LAPACK_cuncsd LAPACK_GLOBAL(cuncsd,CUNCSD) #define LAPACK_dbbcsd LAPACK_GLOBAL(dbbcsd,DBBCSD) #define LAPACK_dorbdb LAPACK_GLOBAL(dorbdb,DORBDB) #define LAPACK_dorcsd LAPACK_GLOBAL(dorcsd,DORCSD) #define LAPACK_dsyconv LAPACK_GLOBAL(dsyconv,DSYCONV) #define LAPACK_dsyswapr LAPACK_GLOBAL(dsyswapr,DSYSWAPR) #define LAPACK_dsytri2 LAPACK_GLOBAL(dsytri2,DSYTRI2) #define LAPACK_dsytri2x LAPACK_GLOBAL(dsytri2x,DSYTRI2X) #define LAPACK_dsytrs2 LAPACK_GLOBAL(dsytrs2,DSYTRS2) #define LAPACK_sbbcsd LAPACK_GLOBAL(sbbcsd,SBBCSD) #define LAPACK_sorbdb LAPACK_GLOBAL(sorbdb,SORBDB) #define LAPACK_sorcsd LAPACK_GLOBAL(sorcsd,SORCSD) #define LAPACK_ssyconv LAPACK_GLOBAL(ssyconv,SSYCONV) #define LAPACK_ssyswapr LAPACK_GLOBAL(ssyswapr,SSYSWAPR) #define LAPACK_ssytri2 LAPACK_GLOBAL(ssytri2,SSYTRI2) #define LAPACK_ssytri2x LAPACK_GLOBAL(ssytri2x,SSYTRI2X) #define LAPACK_ssytrs2 LAPACK_GLOBAL(ssytrs2,SSYTRS2) #define LAPACK_zbbcsd LAPACK_GLOBAL(zbbcsd,ZBBCSD) #define LAPACK_zheswapr LAPACK_GLOBAL(zheswapr,ZHESWAPR) #define LAPACK_zhetri2 LAPACK_GLOBAL(zhetri2,ZHETRI2) #define LAPACK_zhetri2x LAPACK_GLOBAL(zhetri2x,ZHETRI2X) #define LAPACK_zhetrs2 LAPACK_GLOBAL(zhetrs2,ZHETRS2) #define LAPACK_zsyconv LAPACK_GLOBAL(zsyconv,ZSYCONV) #define LAPACK_zsyswapr LAPACK_GLOBAL(zsyswapr,ZSYSWAPR) #define LAPACK_zsytri2 LAPACK_GLOBAL(zsytri2,ZSYTRI2) #define LAPACK_zsytri2x LAPACK_GLOBAL(zsytri2x,ZSYTRI2X) #define LAPACK_zsytrs2 LAPACK_GLOBAL(zsytrs2,ZSYTRS2) #define LAPACK_zunbdb LAPACK_GLOBAL(zunbdb,ZUNBDB) #define LAPACK_zuncsd LAPACK_GLOBAL(zuncsd,ZUNCSD) // LAPACK 3.4.0 #define LAPACK_sgemqrt LAPACK_GLOBAL(sgemqrt,SGEMQRT) #define LAPACK_dgemqrt LAPACK_GLOBAL(dgemqrt,DGEMQRT) #define LAPACK_cgemqrt LAPACK_GLOBAL(cgemqrt,CGEMQRT) #define LAPACK_zgemqrt LAPACK_GLOBAL(zgemqrt,ZGEMQRT) #define LAPACK_sgeqrt LAPACK_GLOBAL(sgeqrt,SGEQRT) #define LAPACK_dgeqrt LAPACK_GLOBAL(dgeqrt,DGEQRT) #define LAPACK_cgeqrt LAPACK_GLOBAL(cgeqrt,CGEQRT) #define LAPACK_zgeqrt LAPACK_GLOBAL(zgeqrt,ZGEQRT) #define LAPACK_sgeqrt2 LAPACK_GLOBAL(sgeqrt2,SGEQRT2) #define LAPACK_dgeqrt2 LAPACK_GLOBAL(dgeqrt2,DGEQRT2) #define LAPACK_cgeqrt2 LAPACK_GLOBAL(cgeqrt2,CGEQRT2) #define LAPACK_zgeqrt2 LAPACK_GLOBAL(zgeqrt2,ZGEQRT2) #define LAPACK_sgeqrt3 LAPACK_GLOBAL(sgeqrt3,SGEQRT3) #define LAPACK_dgeqrt3 LAPACK_GLOBAL(dgeqrt3,DGEQRT3) #define LAPACK_cgeqrt3 LAPACK_GLOBAL(cgeqrt3,CGEQRT3) #define LAPACK_zgeqrt3 LAPACK_GLOBAL(zgeqrt3,ZGEQRT3) #define LAPACK_stpmqrt LAPACK_GLOBAL(stpmqrt,STPMQRT) #define LAPACK_dtpmqrt LAPACK_GLOBAL(dtpmqrt,DTPMQRT) #define LAPACK_ctpmqrt LAPACK_GLOBAL(ctpmqrt,CTPMQRT) #define LAPACK_ztpmqrt LAPACK_GLOBAL(ztpmqrt,ZTPMQRT) #define LAPACK_dtpqrt LAPACK_GLOBAL(dtpqrt,DTPQRT) #define LAPACK_ctpqrt LAPACK_GLOBAL(ctpqrt,CTPQRT) #define LAPACK_ztpqrt LAPACK_GLOBAL(ztpqrt,ZTPQRT) #define LAPACK_stpqrt2 LAPACK_GLOBAL(stpqrt2,STPQRT2) #define LAPACK_dtpqrt2 LAPACK_GLOBAL(dtpqrt2,DTPQRT2) #define LAPACK_ctpqrt2 LAPACK_GLOBAL(ctpqrt2,CTPQRT2) #define LAPACK_ztpqrt2 LAPACK_GLOBAL(ztpqrt2,ZTPQRT2) #define LAPACK_stprfb LAPACK_GLOBAL(stprfb,STPRFB) #define LAPACK_dtprfb LAPACK_GLOBAL(dtprfb,DTPRFB) #define LAPACK_ctprfb LAPACK_GLOBAL(ctprfb,CTPRFB) #define LAPACK_ztprfb LAPACK_GLOBAL(ztprfb,ZTPRFB) // LAPACK 3.X.X #define LAPACK_ssysv_rook LAPACK_GLOBAL(ssysv_rook,SSYSV_ROOK) #define LAPACK_dsysv_rook LAPACK_GLOBAL(dsysv_rook,DSYSV_ROOK) #define LAPACK_csysv_rook LAPACK_GLOBAL(csysv_rook,CSYSV_ROOK) #define LAPACK_zsysv_rook LAPACK_GLOBAL(zsysv_rook,ZSYSV_ROOK) #define LAPACK_csyr LAPACK_GLOBAL(csyr,CSYR) #define LAPACK_zsyr LAPACK_GLOBAL(zsyr,ZSYR) #define LAPACK_ilaver LAPACK_GLOBAL(ilaver,ILAVER) void LAPACK_sgetrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_dgetrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_cgetrf( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_zgetrf( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_sgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, float* ab, lapack_int* ldab, lapack_int* ipiv, lapack_int *info ); void LAPACK_dgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, double* ab, lapack_int* ldab, lapack_int* ipiv, lapack_int *info ); void LAPACK_cgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_complex_float* ab, lapack_int* ldab, lapack_int* ipiv, lapack_int *info ); void LAPACK_zgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_complex_double* ab, lapack_int* ldab, lapack_int* ipiv, lapack_int *info ); void LAPACK_sgttrf( lapack_int* n, float* dl, float* d, float* du, float* du2, lapack_int* ipiv, lapack_int *info ); void LAPACK_dgttrf( lapack_int* n, double* dl, double* d, double* du, double* du2, lapack_int* ipiv, lapack_int *info ); void LAPACK_cgttrf( lapack_int* n, lapack_complex_float* dl, lapack_complex_float* d, lapack_complex_float* du, lapack_complex_float* du2, lapack_int* ipiv, lapack_int *info ); void LAPACK_zgttrf( lapack_int* n, lapack_complex_double* dl, lapack_complex_double* d, lapack_complex_double* du, lapack_complex_double* du2, lapack_int* ipiv, lapack_int *info ); void LAPACK_spotrf( char* uplo, lapack_int* n, float* a, lapack_int* lda, lapack_int *info ); void LAPACK_dpotrf( char* uplo, lapack_int* n, double* a, lapack_int* lda, lapack_int *info ); void LAPACK_cpotrf( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int *info ); void LAPACK_zpotrf( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int *info ); void LAPACK_dpstrf( char* uplo, lapack_int* n, double* a, lapack_int* lda, lapack_int* piv, lapack_int* rank, double* tol, double* work, lapack_int *info ); void LAPACK_spstrf( char* uplo, lapack_int* n, float* a, lapack_int* lda, lapack_int* piv, lapack_int* rank, float* tol, float* work, lapack_int *info ); void LAPACK_zpstrf( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* piv, lapack_int* rank, double* tol, double* work, lapack_int *info ); void LAPACK_cpstrf( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* piv, lapack_int* rank, float* tol, float* work, lapack_int *info ); void LAPACK_dpftrf( char* transr, char* uplo, lapack_int* n, double* a, lapack_int *info ); void LAPACK_spftrf( char* transr, char* uplo, lapack_int* n, float* a, lapack_int *info ); void LAPACK_zpftrf( char* transr, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int *info ); void LAPACK_cpftrf( char* transr, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int *info ); void LAPACK_spptrf( char* uplo, lapack_int* n, float* ap, lapack_int *info ); void LAPACK_dpptrf( char* uplo, lapack_int* n, double* ap, lapack_int *info ); void LAPACK_cpptrf( char* uplo, lapack_int* n, lapack_complex_float* ap, lapack_int *info ); void LAPACK_zpptrf( char* uplo, lapack_int* n, lapack_complex_double* ap, lapack_int *info ); void LAPACK_spbtrf( char* uplo, lapack_int* n, lapack_int* kd, float* ab, lapack_int* ldab, lapack_int *info ); void LAPACK_dpbtrf( char* uplo, lapack_int* n, lapack_int* kd, double* ab, lapack_int* ldab, lapack_int *info ); void LAPACK_cpbtrf( char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab, lapack_int *info ); void LAPACK_zpbtrf( char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab, lapack_int *info ); void LAPACK_spttrf( lapack_int* n, float* d, float* e, lapack_int *info ); void LAPACK_dpttrf( lapack_int* n, double* d, double* e, lapack_int *info ); void LAPACK_cpttrf( lapack_int* n, float* d, lapack_complex_float* e, lapack_int *info ); void LAPACK_zpttrf( lapack_int* n, double* d, lapack_complex_double* e, lapack_int *info ); void LAPACK_ssytrf( char* uplo, lapack_int* n, float* a, lapack_int* lda, lapack_int* ipiv, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dsytrf( char* uplo, lapack_int* n, double* a, lapack_int* lda, lapack_int* ipiv, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_csytrf( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zsytrf( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_chetrf( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zhetrf( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ssptrf( char* uplo, lapack_int* n, float* ap, lapack_int* ipiv, lapack_int *info ); void LAPACK_dsptrf( char* uplo, lapack_int* n, double* ap, lapack_int* ipiv, lapack_int *info ); void LAPACK_csptrf( char* uplo, lapack_int* n, lapack_complex_float* ap, lapack_int* ipiv, lapack_int *info ); void LAPACK_zsptrf( char* uplo, lapack_int* n, lapack_complex_double* ap, lapack_int* ipiv, lapack_int *info ); void LAPACK_chptrf( char* uplo, lapack_int* n, lapack_complex_float* ap, lapack_int* ipiv, lapack_int *info ); void LAPACK_zhptrf( char* uplo, lapack_int* n, lapack_complex_double* ap, lapack_int* ipiv, lapack_int *info ); void LAPACK_sgetrs( char* trans, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dgetrs( char* trans, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cgetrs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zgetrs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const float* ab, lapack_int* ldab, const lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const double* ab, lapack_int* ldab, const lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const lapack_complex_float* ab, lapack_int* ldab, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const lapack_complex_double* ab, lapack_int* ldab, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sgttrs( char* trans, lapack_int* n, lapack_int* nrhs, const float* dl, const float* d, const float* du, const float* du2, const lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dgttrs( char* trans, lapack_int* n, lapack_int* nrhs, const double* dl, const double* d, const double* du, const double* du2, const lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cgttrs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* du2, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zgttrs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* du2, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_spotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dpotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cpotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zpotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, const double* a, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_spftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_spptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* ap, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dpptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* ap, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cpptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zpptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_spbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const float* ab, lapack_int* ldab, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const double* ab, lapack_int* ldab, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_spttrs( lapack_int* n, lapack_int* nrhs, const float* d, const float* e, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dpttrs( lapack_int* n, lapack_int* nrhs, const double* d, const double* e, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cpttrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d, const lapack_complex_float* e, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zpttrs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* d, const lapack_complex_double* e, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ssytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dsytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_csytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zsytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_chetrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zhetrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ssptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* ap, const lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dsptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* ap, const lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_csptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zsptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_chptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zhptrs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_strtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dtrtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ctrtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ztrtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_stptrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const float* ap, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dtptrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const double* ap, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ctptrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ztptrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_stbtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const float* ab, lapack_int* ldab, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dtbtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const double* ab, lapack_int* ldab, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ctbtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_ztbtrs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sgecon( char* norm, lapack_int* n, const float* a, lapack_int* lda, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgecon( char* norm, lapack_int* n, const double* a, lapack_int* lda, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgecon( char* norm, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* anorm, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgecon( char* norm, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* anorm, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, const float* ab, lapack_int* ldab, const lapack_int* ipiv, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, const double* ab, lapack_int* ldab, const lapack_int* ipiv, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, const lapack_complex_float* ab, lapack_int* ldab, const lapack_int* ipiv, float* anorm, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku, const lapack_complex_double* ab, lapack_int* ldab, const lapack_int* ipiv, double* anorm, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sgtcon( char* norm, lapack_int* n, const float* dl, const float* d, const float* du, const float* du2, const lapack_int* ipiv, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgtcon( char* norm, lapack_int* n, const double* dl, const double* d, const double* du, const double* du2, const lapack_int* ipiv, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgtcon( char* norm, lapack_int* n, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* du2, const lapack_int* ipiv, float* anorm, float* rcond, lapack_complex_float* work, lapack_int *info ); void LAPACK_zgtcon( char* norm, lapack_int* n, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* du2, const lapack_int* ipiv, double* anorm, double* rcond, lapack_complex_double* work, lapack_int *info ); void LAPACK_spocon( char* uplo, lapack_int* n, const float* a, lapack_int* lda, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dpocon( char* uplo, lapack_int* n, const double* a, lapack_int* lda, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cpocon( char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* anorm, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zpocon( char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* anorm, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sppcon( char* uplo, lapack_int* n, const float* ap, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dppcon( char* uplo, lapack_int* n, const double* ap, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cppcon( char* uplo, lapack_int* n, const lapack_complex_float* ap, float* anorm, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zppcon( char* uplo, lapack_int* n, const lapack_complex_double* ap, double* anorm, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_spbcon( char* uplo, lapack_int* n, lapack_int* kd, const float* ab, lapack_int* ldab, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dpbcon( char* uplo, lapack_int* n, lapack_int* kd, const double* ab, lapack_int* ldab, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cpbcon( char* uplo, lapack_int* n, lapack_int* kd, const lapack_complex_float* ab, lapack_int* ldab, float* anorm, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zpbcon( char* uplo, lapack_int* n, lapack_int* kd, const lapack_complex_double* ab, lapack_int* ldab, double* anorm, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sptcon( lapack_int* n, const float* d, const float* e, float* anorm, float* rcond, float* work, lapack_int *info ); void LAPACK_dptcon( lapack_int* n, const double* d, const double* e, double* anorm, double* rcond, double* work, lapack_int *info ); void LAPACK_cptcon( lapack_int* n, const float* d, const lapack_complex_float* e, float* anorm, float* rcond, float* work, lapack_int *info ); void LAPACK_zptcon( lapack_int* n, const double* d, const lapack_complex_double* e, double* anorm, double* rcond, double* work, lapack_int *info ); void LAPACK_ssycon( char* uplo, lapack_int* n, const float* a, lapack_int* lda, const lapack_int* ipiv, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dsycon( char* uplo, lapack_int* n, const double* a, lapack_int* lda, const lapack_int* ipiv, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_csycon( char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, float* anorm, float* rcond, lapack_complex_float* work, lapack_int *info ); void LAPACK_zsycon( char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, double* anorm, double* rcond, lapack_complex_double* work, lapack_int *info ); void LAPACK_checon( char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, float* anorm, float* rcond, lapack_complex_float* work, lapack_int *info ); void LAPACK_zhecon( char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, double* anorm, double* rcond, lapack_complex_double* work, lapack_int *info ); void LAPACK_sspcon( char* uplo, lapack_int* n, const float* ap, const lapack_int* ipiv, float* anorm, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dspcon( char* uplo, lapack_int* n, const double* ap, const lapack_int* ipiv, double* anorm, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cspcon( char* uplo, lapack_int* n, const lapack_complex_float* ap, const lapack_int* ipiv, float* anorm, float* rcond, lapack_complex_float* work, lapack_int *info ); void LAPACK_zspcon( char* uplo, lapack_int* n, const lapack_complex_double* ap, const lapack_int* ipiv, double* anorm, double* rcond, lapack_complex_double* work, lapack_int *info ); void LAPACK_chpcon( char* uplo, lapack_int* n, const lapack_complex_float* ap, const lapack_int* ipiv, float* anorm, float* rcond, lapack_complex_float* work, lapack_int *info ); void LAPACK_zhpcon( char* uplo, lapack_int* n, const lapack_complex_double* ap, const lapack_int* ipiv, double* anorm, double* rcond, lapack_complex_double* work, lapack_int *info ); void LAPACK_strcon( char* norm, char* uplo, char* diag, lapack_int* n, const float* a, lapack_int* lda, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dtrcon( char* norm, char* uplo, char* diag, lapack_int* n, const double* a, lapack_int* lda, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ctrcon( char* norm, char* uplo, char* diag, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztrcon( char* norm, char* uplo, char* diag, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_stpcon( char* norm, char* uplo, char* diag, lapack_int* n, const float* ap, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dtpcon( char* norm, char* uplo, char* diag, lapack_int* n, const double* ap, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ctpcon( char* norm, char* uplo, char* diag, lapack_int* n, const lapack_complex_float* ap, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztpcon( char* norm, char* uplo, char* diag, lapack_int* n, const lapack_complex_double* ap, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_stbcon( char* norm, char* uplo, char* diag, lapack_int* n, lapack_int* kd, const float* ab, lapack_int* ldab, float* rcond, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dtbcon( char* norm, char* uplo, char* diag, lapack_int* n, lapack_int* kd, const double* ab, lapack_int* ldab, double* rcond, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ctbcon( char* norm, char* uplo, char* diag, lapack_int* n, lapack_int* kd, const lapack_complex_float* ab, lapack_int* ldab, float* rcond, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztbcon( char* norm, char* uplo, char* diag, lapack_int* n, lapack_int* kd, const lapack_complex_double* ab, lapack_int* ldab, double* rcond, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sgerfs( char* trans, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const float* af, lapack_int* ldaf, const lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgerfs( char* trans, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const double* af, lapack_int* ldaf, const lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgerfs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgerfs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_dgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const double* af, lapack_int* ldaf, const lapack_int* ipiv, const double* r, const double* c, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_sgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const float* af, lapack_int* ldaf, const lapack_int* ipiv, const float* r, const float* c, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const lapack_int* ipiv, const double* r, const double* c, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_cgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const lapack_int* ipiv, const float* r, const float* c, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_sgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const float* ab, lapack_int* ldab, const float* afb, lapack_int* ldafb, const lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const double* ab, lapack_int* ldab, const double* afb, lapack_int* ldafb, const lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const lapack_complex_float* ab, lapack_int* ldab, const lapack_complex_float* afb, lapack_int* ldafb, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const lapack_complex_double* ab, lapack_int* ldab, const lapack_complex_double* afb, lapack_int* ldafb, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_dgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const double* ab, lapack_int* ldab, const double* afb, lapack_int* ldafb, const lapack_int* ipiv, const double* r, const double* c, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_sgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const float* ab, lapack_int* ldab, const float* afb, lapack_int* ldafb, const lapack_int* ipiv, const float* r, const float* c, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const lapack_complex_double* ab, lapack_int* ldab, const lapack_complex_double* afb, lapack_int* ldafb, const lapack_int* ipiv, const double* r, const double* c, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_cgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, const lapack_complex_float* ab, lapack_int* ldab, const lapack_complex_float* afb, lapack_int* ldafb, const lapack_int* ipiv, const float* r, const float* c, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_sgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, const float* dl, const float* d, const float* du, const float* dlf, const float* df, const float* duf, const float* du2, const lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, const double* dl, const double* d, const double* du, const double* dlf, const double* df, const double* duf, const double* du2, const lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, const lapack_complex_float* dlf, const lapack_complex_float* df, const lapack_complex_float* duf, const lapack_complex_float* du2, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgtrfs( char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, const lapack_complex_double* dlf, const lapack_complex_double* df, const lapack_complex_double* duf, const lapack_complex_double* du2, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const float* af, lapack_int* ldaf, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const double* af, lapack_int* ldaf, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_dporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const double* af, lapack_int* ldaf, const double* s, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_sporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const float* af, lapack_int* ldaf, const float* s, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const double* s, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_cporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const float* s, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_spprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* ap, const float* afp, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dpprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* ap, const double* afp, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cpprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zpprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_spbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const float* ab, lapack_int* ldab, const float* afb, lapack_int* ldafb, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const double* ab, lapack_int* ldab, const double* afb, lapack_int* ldafb, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_float* ab, lapack_int* ldab, const lapack_complex_float* afb, lapack_int* ldafb, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_double* ab, lapack_int* ldab, const lapack_complex_double* afb, lapack_int* ldafb, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sptrfs( lapack_int* n, lapack_int* nrhs, const float* d, const float* e, const float* df, const float* ef, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int *info ); void LAPACK_dptrfs( lapack_int* n, lapack_int* nrhs, const double* d, const double* e, const double* df, const double* ef, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int *info ); void LAPACK_cptrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d, const lapack_complex_float* e, const float* df, const lapack_complex_float* ef, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zptrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* d, const lapack_complex_double* e, const double* df, const lapack_complex_double* ef, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_ssyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const float* af, lapack_int* ldaf, const lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const double* af, lapack_int* ldaf, const lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_csyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_dsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const double* af, lapack_int* ldaf, const lapack_int* ipiv, const double* s, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ssyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const float* af, lapack_int* ldaf, const lapack_int* ipiv, const float* s, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const lapack_int* ipiv, const double* s, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_csyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const lapack_int* ipiv, const float* s, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_cherfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zherfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_zherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* af, lapack_int* ldaf, const lapack_int* ipiv, const double* s, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_cherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* af, lapack_int* ldaf, const lapack_int* ipiv, const float* s, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ssprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* ap, const float* afp, const lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dsprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const double* ap, const double* afp, const lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_csprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zsprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_chprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, const lapack_complex_float* afp, const lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zhprfs( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, const lapack_complex_double* afp, const lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_strrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const float* b, lapack_int* ldb, const float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dtrrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const double* b, lapack_int* ldb, const double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ctrrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* b, lapack_int* ldb, const lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztrrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* b, lapack_int* ldb, const lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_stprfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const float* ap, const float* b, lapack_int* ldb, const float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dtprfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const double* ap, const double* b, lapack_int* ldb, const double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ctprfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, const lapack_complex_float* b, lapack_int* ldb, const lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztprfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, const lapack_complex_double* b, lapack_int* ldb, const lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_stbrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const float* ab, lapack_int* ldab, const float* b, lapack_int* ldb, const float* x, lapack_int* ldx, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dtbrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const double* ab, lapack_int* ldab, const double* b, lapack_int* ldb, const double* x, lapack_int* ldx, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ctbrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_float* ab, lapack_int* ldab, const lapack_complex_float* b, lapack_int* ldb, const lapack_complex_float* x, lapack_int* ldx, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztbrfs( char* uplo, char* trans, char* diag, lapack_int* n, lapack_int* kd, lapack_int* nrhs, const lapack_complex_double* ab, lapack_int* ldab, const lapack_complex_double* b, lapack_int* ldb, const lapack_complex_double* x, lapack_int* ldx, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sgetri( lapack_int* n, float* a, lapack_int* lda, const lapack_int* ipiv, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgetri( lapack_int* n, double* a, lapack_int* lda, const lapack_int* ipiv, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgetri( lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgetri( lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_spotri( char* uplo, lapack_int* n, float* a, lapack_int* lda, lapack_int *info ); void LAPACK_dpotri( char* uplo, lapack_int* n, double* a, lapack_int* lda, lapack_int *info ); void LAPACK_cpotri( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int *info ); void LAPACK_zpotri( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int *info ); void LAPACK_dpftri( char* transr, char* uplo, lapack_int* n, double* a, lapack_int *info ); void LAPACK_spftri( char* transr, char* uplo, lapack_int* n, float* a, lapack_int *info ); void LAPACK_zpftri( char* transr, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int *info ); void LAPACK_cpftri( char* transr, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int *info ); void LAPACK_spptri( char* uplo, lapack_int* n, float* ap, lapack_int *info ); void LAPACK_dpptri( char* uplo, lapack_int* n, double* ap, lapack_int *info ); void LAPACK_cpptri( char* uplo, lapack_int* n, lapack_complex_float* ap, lapack_int *info ); void LAPACK_zpptri( char* uplo, lapack_int* n, lapack_complex_double* ap, lapack_int *info ); void LAPACK_ssytri( char* uplo, lapack_int* n, float* a, lapack_int* lda, const lapack_int* ipiv, float* work, lapack_int *info ); void LAPACK_dsytri( char* uplo, lapack_int* n, double* a, lapack_int* lda, const lapack_int* ipiv, double* work, lapack_int *info ); void LAPACK_csytri( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int *info ); void LAPACK_zsytri( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int *info ); void LAPACK_chetri( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int *info ); void LAPACK_zhetri( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int *info ); void LAPACK_ssptri( char* uplo, lapack_int* n, float* ap, const lapack_int* ipiv, float* work, lapack_int *info ); void LAPACK_dsptri( char* uplo, lapack_int* n, double* ap, const lapack_int* ipiv, double* work, lapack_int *info ); void LAPACK_csptri( char* uplo, lapack_int* n, lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* work, lapack_int *info ); void LAPACK_zsptri( char* uplo, lapack_int* n, lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* work, lapack_int *info ); void LAPACK_chptri( char* uplo, lapack_int* n, lapack_complex_float* ap, const lapack_int* ipiv, lapack_complex_float* work, lapack_int *info ); void LAPACK_zhptri( char* uplo, lapack_int* n, lapack_complex_double* ap, const lapack_int* ipiv, lapack_complex_double* work, lapack_int *info ); void LAPACK_strtri( char* uplo, char* diag, lapack_int* n, float* a, lapack_int* lda, lapack_int *info ); void LAPACK_dtrtri( char* uplo, char* diag, lapack_int* n, double* a, lapack_int* lda, lapack_int *info ); void LAPACK_ctrtri( char* uplo, char* diag, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int *info ); void LAPACK_ztrtri( char* uplo, char* diag, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int *info ); void LAPACK_dtftri( char* transr, char* uplo, char* diag, lapack_int* n, double* a, lapack_int *info ); void LAPACK_stftri( char* transr, char* uplo, char* diag, lapack_int* n, float* a, lapack_int *info ); void LAPACK_ztftri( char* transr, char* uplo, char* diag, lapack_int* n, lapack_complex_double* a, lapack_int *info ); void LAPACK_ctftri( char* transr, char* uplo, char* diag, lapack_int* n, lapack_complex_float* a, lapack_int *info ); void LAPACK_stptri( char* uplo, char* diag, lapack_int* n, float* ap, lapack_int *info ); void LAPACK_dtptri( char* uplo, char* diag, lapack_int* n, double* ap, lapack_int *info ); void LAPACK_ctptri( char* uplo, char* diag, lapack_int* n, lapack_complex_float* ap, lapack_int *info ); void LAPACK_ztptri( char* uplo, char* diag, lapack_int* n, lapack_complex_double* ap, lapack_int *info ); void LAPACK_sgeequ( lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_dgeequ( lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_cgeequ( lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_zgeequ( lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_dgeequb( lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_sgeequb( lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_zgeequb( lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_cgeequb( lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_sgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const float* ab, lapack_int* ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_dgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const double* ab, lapack_int* ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_cgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const lapack_complex_float* ab, lapack_int* ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_zgbequ( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const lapack_complex_double* ab, lapack_int* ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_dgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const double* ab, lapack_int* ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_sgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const float* ab, lapack_int* ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_zgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const lapack_complex_double* ab, lapack_int* ldab, double* r, double* c, double* rowcnd, double* colcnd, double* amax, lapack_int *info ); void LAPACK_cgbequb( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const lapack_complex_float* ab, lapack_int* ldab, float* r, float* c, float* rowcnd, float* colcnd, float* amax, lapack_int *info ); void LAPACK_spoequ( lapack_int* n, const float* a, lapack_int* lda, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_dpoequ( lapack_int* n, const double* a, lapack_int* lda, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_cpoequ( lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_zpoequ( lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_dpoequb( lapack_int* n, const double* a, lapack_int* lda, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_spoequb( lapack_int* n, const float* a, lapack_int* lda, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_zpoequb( lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_cpoequb( lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_sppequ( char* uplo, lapack_int* n, const float* ap, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_dppequ( char* uplo, lapack_int* n, const double* ap, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_cppequ( char* uplo, lapack_int* n, const lapack_complex_float* ap, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_zppequ( char* uplo, lapack_int* n, const lapack_complex_double* ap, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_spbequ( char* uplo, lapack_int* n, lapack_int* kd, const float* ab, lapack_int* ldab, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_dpbequ( char* uplo, lapack_int* n, lapack_int* kd, const double* ab, lapack_int* ldab, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_cpbequ( char* uplo, lapack_int* n, lapack_int* kd, const lapack_complex_float* ab, lapack_int* ldab, float* s, float* scond, float* amax, lapack_int *info ); void LAPACK_zpbequ( char* uplo, lapack_int* n, lapack_int* kd, const lapack_complex_double* ab, lapack_int* ldab, double* s, double* scond, double* amax, lapack_int *info ); void LAPACK_dsyequb( char* uplo, lapack_int* n, const double* a, lapack_int* lda, double* s, double* scond, double* amax, double* work, lapack_int *info ); void LAPACK_ssyequb( char* uplo, lapack_int* n, const float* a, lapack_int* lda, float* s, float* scond, float* amax, float* work, lapack_int *info ); void LAPACK_zsyequb( char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* s, double* scond, double* amax, lapack_complex_double* work, lapack_int *info ); void LAPACK_csyequb( char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* s, float* scond, float* amax, lapack_complex_float* work, lapack_int *info ); void LAPACK_zheequb( char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* s, double* scond, double* amax, lapack_complex_double* work, lapack_int *info ); void LAPACK_cheequb( char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* s, float* scond, float* amax, lapack_complex_float* work, lapack_int *info ); void LAPACK_sgesv( lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dsgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, lapack_int* ipiv, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* work, float* swork, lapack_int* iter, lapack_int *info ); void LAPACK_zcgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, lapack_complex_double* work, lapack_complex_float* swork, double* rwork, lapack_int* iter, lapack_int *info ); void LAPACK_sgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_dgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_sgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_cgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_sgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, float* ab, lapack_int* ldab, lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, double* ab, lapack_int* ldab, lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab, lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, lapack_complex_double* ab, lapack_int* ldab, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, float* ab, lapack_int* ldab, float* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, double* ab, lapack_int* ldab, double* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_dgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, double* ab, lapack_int* ldab, double* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r, double* c, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_sgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, float* ab, lapack_int* ldab, float* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r, float* c, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r, double* c, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_cgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* afb, lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r, float* c, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_sgtsv( lapack_int* n, lapack_int* nrhs, float* dl, float* d, float* du, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dgtsv( lapack_int* n, lapack_int* nrhs, double* dl, double* d, double* du, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* dl, lapack_complex_float* d, lapack_complex_float* du, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* dl, lapack_complex_double* d, lapack_complex_double* du, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, const float* dl, const float* d, const float* du, float* dlf, float* df, float* duf, float* du2, lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, const double* dl, const double* d, const double* du, double* dlf, double* df, double* duf, double* du2, lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* dl, const lapack_complex_float* d, const lapack_complex_float* du, lapack_complex_float* dlf, lapack_complex_float* df, lapack_complex_float* duf, lapack_complex_float* du2, lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* dl, const lapack_complex_double* d, const lapack_complex_double* du, lapack_complex_double* dlf, lapack_complex_double* df, lapack_complex_double* duf, lapack_complex_double* du2, lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sposv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cposv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zposv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dsposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* work, float* swork, lapack_int* iter, lapack_int *info ); void LAPACK_zcposv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, lapack_complex_double* work, lapack_complex_float* swork, double* rwork, lapack_int* iter, lapack_int *info ); void LAPACK_sposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* af, lapack_int* ldaf, char* equed, float* s, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* af, lapack_int* ldaf, char* equed, double* s, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, char* equed, float* s, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, char* equed, double* s, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_dposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* af, lapack_int* ldaf, char* equed, double* s, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_sposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* af, lapack_int* ldaf, char* equed, float* s, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, char* equed, double* s, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_cposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, char* equed, float* s, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_sppsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dppsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cppsv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* ap, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zppsv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* ap, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, float* ap, float* afp, char* equed, float* s, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, double* ap, double* afp, char* equed, double* s, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* ap, lapack_complex_float* afp, char* equed, float* s, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* ap, lapack_complex_double* afp, char* equed, double* s, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_spbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, float* ab, lapack_int* ldab, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, double* ab, lapack_int* ldab, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_spbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, float* ab, lapack_int* ldab, float* afb, lapack_int* ldafb, char* equed, float* s, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, double* ab, lapack_int* ldab, double* afb, lapack_int* ldafb, char* equed, double* s, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* afb, lapack_int* ldafb, char* equed, float* s, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* afb, lapack_int* ldafb, char* equed, double* s, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sptsv( lapack_int* n, lapack_int* nrhs, float* d, float* e, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dptsv( lapack_int* n, lapack_int* nrhs, double* d, double* e, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cptsv( lapack_int* n, lapack_int* nrhs, float* d, lapack_complex_float* e, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zptsv( lapack_int* n, lapack_int* nrhs, double* d, lapack_complex_double* e, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d, const float* e, float* df, float* ef, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int *info ); void LAPACK_dptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const double* d, const double* e, double* df, double* ef, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int *info ); void LAPACK_cptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d, const lapack_complex_float* e, float* df, lapack_complex_float* ef, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const double* d, const lapack_complex_double* e, double* df, lapack_complex_double* ef, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_ssysv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, lapack_int* ipiv, float* b, lapack_int* ldb, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dsysv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, lapack_int* ipiv, double* b, lapack_int* ldb, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_csysv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zsysv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ssysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, float* af, lapack_int* ldaf, lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, double* af, lapack_int* ldaf, lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_csysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_dsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, double* s, double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ssysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, float* s, float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_zsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, double* s, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_csysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, float* s, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_chesv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zhesv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_chesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zhesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_zhesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, double* s, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* rpvgrw, double* berr, lapack_int* n_err_bnds, double* err_bnds_norm, double* err_bnds_comp, lapack_int* nparams, double* params, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_chesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* af, lapack_int* ldaf, lapack_int* ipiv, char* equed, float* s, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* rpvgrw, float* berr, lapack_int* n_err_bnds, float* err_bnds_norm, float* err_bnds_comp, lapack_int* nparams, float* params, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_sspsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap, lapack_int* ipiv, float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dspsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap, lapack_int* ipiv, double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_cspsv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* ap, lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zspsv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* ap, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const float* ap, float* afp, lapack_int* ipiv, const float* b, lapack_int* ldb, float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const double* ap, double* afp, lapack_int* ipiv, const double* b, lapack_int* ldb, double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, lapack_complex_float* afp, lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, lapack_complex_double* afp, lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_chpsv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* ap, lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zhpsv( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* ap, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_chpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* ap, lapack_complex_float* afp, lapack_int* ipiv, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx, float* rcond, float* ferr, float* berr, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zhpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* ap, lapack_complex_double* afp, lapack_int* ipiv, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx, double* rcond, double* ferr, double* berr, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sgeqrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgeqrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgeqrf( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgeqrf( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgeqpf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, lapack_int* jpvt, float* tau, float* work, lapack_int *info ); void LAPACK_dgeqpf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, lapack_int* jpvt, double* tau, double* work, lapack_int *info ); void LAPACK_cgeqpf( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* jpvt, lapack_complex_float* tau, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgeqpf( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* jpvt, lapack_complex_double* tau, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sgeqp3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, lapack_int* jpvt, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgeqp3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, lapack_int* jpvt, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgeqp3( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* jpvt, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zgeqp3( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* jpvt, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_sorgqr( lapack_int* m, lapack_int* n, lapack_int* k, float* a, lapack_int* lda, const float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dorgqr( lapack_int* m, lapack_int* n, lapack_int* k, double* a, lapack_int* lda, const double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormqr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormqr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cungqr( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zungqr( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmqr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmqr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgelqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgelqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgelqf( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgelqf( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sorglq( lapack_int* m, lapack_int* n, lapack_int* k, float* a, lapack_int* lda, const float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dorglq( lapack_int* m, lapack_int* n, lapack_int* k, double* a, lapack_int* lda, const double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormlq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormlq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunglq( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunglq( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmlq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmlq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgeqlf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgeqlf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgeqlf( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgeqlf( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sorgql( lapack_int* m, lapack_int* n, lapack_int* k, float* a, lapack_int* lda, const float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dorgql( lapack_int* m, lapack_int* n, lapack_int* k, double* a, lapack_int* lda, const double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cungql( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zungql( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormql( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormql( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmql( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmql( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgerqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgerqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgerqf( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgerqf( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sorgrq( lapack_int* m, lapack_int* n, lapack_int* k, float* a, lapack_int* lda, const float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dorgrq( lapack_int* m, lapack_int* n, lapack_int* k, double* a, lapack_int* lda, const double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cungrq( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zungrq( lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormrq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormrq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmrq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmrq( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_stzrzf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dtzrzf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ctzrzf( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ztzrzf( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormrz( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormrz( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmrz( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmrz( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sggqrf( lapack_int* n, lapack_int* m, lapack_int* p, float* a, lapack_int* lda, float* taua, float* b, lapack_int* ldb, float* taub, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dggqrf( lapack_int* n, lapack_int* m, lapack_int* p, double* a, lapack_int* lda, double* taua, double* b, lapack_int* ldb, double* taub, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cggqrf( lapack_int* n, lapack_int* m, lapack_int* p, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* taua, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* taub, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zggqrf( lapack_int* n, lapack_int* m, lapack_int* p, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* taua, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* taub, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sggrqf( lapack_int* m, lapack_int* p, lapack_int* n, float* a, lapack_int* lda, float* taua, float* b, lapack_int* ldb, float* taub, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dggrqf( lapack_int* m, lapack_int* p, lapack_int* n, double* a, lapack_int* lda, double* taua, double* b, lapack_int* ldb, double* taub, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cggrqf( lapack_int* m, lapack_int* p, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* taua, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* taub, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zggrqf( lapack_int* m, lapack_int* p, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* taua, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* taub, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgebrd( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* d, float* e, float* tauq, float* taup, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgebrd( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* d, double* e, double* tauq, double* taup, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgebrd( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* d, float* e, lapack_complex_float* tauq, lapack_complex_float* taup, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgebrd( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* d, double* e, lapack_complex_double* tauq, lapack_complex_double* taup, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, lapack_int* kl, lapack_int* ku, float* ab, lapack_int* ldab, float* d, float* e, float* q, lapack_int* ldq, float* pt, lapack_int* ldpt, float* c, lapack_int* ldc, float* work, lapack_int *info ); void LAPACK_dgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, lapack_int* kl, lapack_int* ku, double* ab, lapack_int* ldab, double* d, double* e, double* q, lapack_int* ldq, double* pt, lapack_int* ldpt, double* c, lapack_int* ldc, double* work, lapack_int *info ); void LAPACK_cgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, lapack_int* kl, lapack_int* ku, lapack_complex_float* ab, lapack_int* ldab, float* d, float* e, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* pt, lapack_int* ldpt, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc, lapack_int* kl, lapack_int* ku, lapack_complex_double* ab, lapack_int* ldab, double* d, double* e, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* pt, lapack_int* ldpt, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, float* a, lapack_int* lda, const float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, double* a, lapack_int* lda, const double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormbr( char* vect, char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormbr( char* vect, char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmbr( char* vect, char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmbr( char* vect, char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, lapack_int* nru, lapack_int* ncc, float* d, float* e, float* vt, lapack_int* ldvt, float* u, lapack_int* ldu, float* c, lapack_int* ldc, float* work, lapack_int *info ); void LAPACK_dbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, lapack_int* nru, lapack_int* ncc, double* d, double* e, double* vt, lapack_int* ldvt, double* u, lapack_int* ldu, double* c, lapack_int* ldc, double* work, lapack_int *info ); void LAPACK_cbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, lapack_int* nru, lapack_int* ncc, float* d, float* e, lapack_complex_float* vt, lapack_int* ldvt, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* c, lapack_int* ldc, float* work, lapack_int *info ); void LAPACK_zbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt, lapack_int* nru, lapack_int* ncc, double* d, double* e, lapack_complex_double* vt, lapack_int* ldvt, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* c, lapack_int* ldc, double* work, lapack_int *info ); void LAPACK_sbdsdc( char* uplo, char* compq, lapack_int* n, float* d, float* e, float* u, lapack_int* ldu, float* vt, lapack_int* ldvt, float* q, lapack_int* iq, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dbdsdc( char* uplo, char* compq, lapack_int* n, double* d, double* e, double* u, lapack_int* ldu, double* vt, lapack_int* ldvt, double* q, lapack_int* iq, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_ssytrd( char* uplo, lapack_int* n, float* a, lapack_int* lda, float* d, float* e, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dsytrd( char* uplo, lapack_int* n, double* a, lapack_int* lda, double* d, double* e, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sorgtr( char* uplo, lapack_int* n, float* a, lapack_int* lda, const float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dorgtr( char* uplo, lapack_int* n, double* a, lapack_int* lda, const double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_chetrd( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* d, float* e, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zhetrd( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* d, double* e, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cungtr( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zungtr( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ssptrd( char* uplo, lapack_int* n, float* ap, float* d, float* e, float* tau, lapack_int *info ); void LAPACK_dsptrd( char* uplo, lapack_int* n, double* ap, double* d, double* e, double* tau, lapack_int *info ); void LAPACK_sopgtr( char* uplo, lapack_int* n, const float* ap, const float* tau, float* q, lapack_int* ldq, float* work, lapack_int *info ); void LAPACK_dopgtr( char* uplo, lapack_int* n, const double* ap, const double* tau, double* q, lapack_int* ldq, double* work, lapack_int *info ); void LAPACK_sopmtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const float* ap, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int *info ); void LAPACK_dopmtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const double* ap, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int *info ); void LAPACK_chptrd( char* uplo, lapack_int* n, lapack_complex_float* ap, float* d, float* e, lapack_complex_float* tau, lapack_int *info ); void LAPACK_zhptrd( char* uplo, lapack_int* n, lapack_complex_double* ap, double* d, double* e, lapack_complex_double* tau, lapack_int *info ); void LAPACK_cupgtr( char* uplo, lapack_int* n, const lapack_complex_float* ap, const lapack_complex_float* tau, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* work, lapack_int *info ); void LAPACK_zupgtr( char* uplo, lapack_int* n, const lapack_complex_double* ap, const lapack_complex_double* tau, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* work, lapack_int *info ); void LAPACK_cupmtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const lapack_complex_float* ap, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int *info ); void LAPACK_zupmtr( char* side, char* uplo, char* trans, lapack_int* m, lapack_int* n, const lapack_complex_double* ap, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int *info ); void LAPACK_ssbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, float* ab, lapack_int* ldab, float* d, float* e, float* q, lapack_int* ldq, float* work, lapack_int *info ); void LAPACK_dsbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, double* ab, lapack_int* ldab, double* d, double* e, double* q, lapack_int* ldq, double* work, lapack_int *info ); void LAPACK_chbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab, float* d, float* e, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* work, lapack_int *info ); void LAPACK_zhbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab, double* d, double* e, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* work, lapack_int *info ); void LAPACK_ssterf( lapack_int* n, float* d, float* e, lapack_int *info ); void LAPACK_dsterf( lapack_int* n, double* d, double* e, lapack_int *info ); void LAPACK_ssteqr( char* compz, lapack_int* n, float* d, float* e, float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_dsteqr( char* compz, lapack_int* n, double* d, double* e, double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_csteqr( char* compz, lapack_int* n, float* d, float* e, lapack_complex_float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_zsteqr( char* compz, lapack_int* n, double* d, double* e, lapack_complex_double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_sstemr( char* jobz, char* range, lapack_int* n, float* d, float* e, float* vl, float* vu, lapack_int* il, lapack_int* iu, lapack_int* m, float* w, float* z, lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dstemr( char* jobz, char* range, lapack_int* n, double* d, double* e, double* vl, double* vu, lapack_int* il, lapack_int* iu, lapack_int* m, double* w, double* z, lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_cstemr( char* jobz, char* range, lapack_int* n, float* d, float* e, float* vl, float* vu, lapack_int* il, lapack_int* iu, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zstemr( char* jobz, char* range, lapack_int* n, double* d, double* e, double* vl, double* vu, lapack_int* il, lapack_int* iu, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_sstedc( char* compz, lapack_int* n, float* d, float* e, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dstedc( char* compz, lapack_int* n, double* d, double* e, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_cstedc( char* compz, lapack_int* n, float* d, float* e, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zstedc( char* compz, lapack_int* n, double* d, double* e, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_sstegr( char* jobz, char* range, lapack_int* n, float* d, float* e, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, lapack_int* isuppz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dstegr( char* jobz, char* range, lapack_int* n, double* d, double* e, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, lapack_int* isuppz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_cstegr( char* jobz, char* range, lapack_int* n, float* d, float* e, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_int* isuppz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zstegr( char* jobz, char* range, lapack_int* n, double* d, double* e, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_int* isuppz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_spteqr( char* compz, lapack_int* n, float* d, float* e, float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_dpteqr( char* compz, lapack_int* n, double* d, double* e, double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_cpteqr( char* compz, lapack_int* n, float* d, float* e, lapack_complex_float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_zpteqr( char* compz, lapack_int* n, double* d, double* e, lapack_complex_double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_sstebz( char* range, char* order, lapack_int* n, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, const float* d, const float* e, lapack_int* m, lapack_int* nsplit, float* w, lapack_int* iblock, lapack_int* isplit, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dstebz( char* range, char* order, lapack_int* n, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, const double* d, const double* e, lapack_int* m, lapack_int* nsplit, double* w, lapack_int* iblock, lapack_int* isplit, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_sstein( lapack_int* n, const float* d, const float* e, lapack_int* m, const float* w, const lapack_int* iblock, const lapack_int* isplit, float* z, lapack_int* ldz, float* work, lapack_int* iwork, lapack_int* ifailv, lapack_int *info ); void LAPACK_dstein( lapack_int* n, const double* d, const double* e, lapack_int* m, const double* w, const lapack_int* iblock, const lapack_int* isplit, double* z, lapack_int* ldz, double* work, lapack_int* iwork, lapack_int* ifailv, lapack_int *info ); void LAPACK_cstein( lapack_int* n, const float* d, const float* e, lapack_int* m, const float* w, const lapack_int* iblock, const lapack_int* isplit, lapack_complex_float* z, lapack_int* ldz, float* work, lapack_int* iwork, lapack_int* ifailv, lapack_int *info ); void LAPACK_zstein( lapack_int* n, const double* d, const double* e, lapack_int* m, const double* w, const lapack_int* iblock, const lapack_int* isplit, lapack_complex_double* z, lapack_int* ldz, double* work, lapack_int* iwork, lapack_int* ifailv, lapack_int *info ); void LAPACK_sdisna( char* job, lapack_int* m, lapack_int* n, const float* d, float* sep, lapack_int *info ); void LAPACK_ddisna( char* job, lapack_int* m, lapack_int* n, const double* d, double* sep, lapack_int *info ); void LAPACK_ssygst( lapack_int* itype, char* uplo, lapack_int* n, float* a, lapack_int* lda, const float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_dsygst( lapack_int* itype, char* uplo, lapack_int* n, double* a, lapack_int* lda, const double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_chegst( lapack_int* itype, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* b, lapack_int* ldb, lapack_int *info ); void LAPACK_zhegst( lapack_int* itype, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* b, lapack_int* ldb, lapack_int *info ); void LAPACK_sspgst( lapack_int* itype, char* uplo, lapack_int* n, float* ap, const float* bp, lapack_int *info ); void LAPACK_dspgst( lapack_int* itype, char* uplo, lapack_int* n, double* ap, const double* bp, lapack_int *info ); void LAPACK_chpgst( lapack_int* itype, char* uplo, lapack_int* n, lapack_complex_float* ap, const lapack_complex_float* bp, lapack_int *info ); void LAPACK_zhpgst( lapack_int* itype, char* uplo, lapack_int* n, lapack_complex_double* ap, const lapack_complex_double* bp, lapack_int *info ); void LAPACK_ssbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab, const float* bb, lapack_int* ldbb, float* x, lapack_int* ldx, float* work, lapack_int *info ); void LAPACK_dsbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, double* ab, lapack_int* ldab, const double* bb, lapack_int* ldbb, double* x, lapack_int* ldx, double* work, lapack_int *info ); void LAPACK_chbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab, const lapack_complex_float* bb, lapack_int* ldbb, lapack_complex_float* x, lapack_int* ldx, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zhbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab, const lapack_complex_double* bb, lapack_int* ldbb, lapack_complex_double* x, lapack_int* ldx, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_spbstf( char* uplo, lapack_int* n, lapack_int* kb, float* bb, lapack_int* ldbb, lapack_int *info ); void LAPACK_dpbstf( char* uplo, lapack_int* n, lapack_int* kb, double* bb, lapack_int* ldbb, lapack_int *info ); void LAPACK_cpbstf( char* uplo, lapack_int* n, lapack_int* kb, lapack_complex_float* bb, lapack_int* ldbb, lapack_int *info ); void LAPACK_zpbstf( char* uplo, lapack_int* n, lapack_int* kb, lapack_complex_double* bb, lapack_int* ldbb, lapack_int *info ); void LAPACK_sgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a, lapack_int* lda, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a, lapack_int* lda, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a, lapack_int* lda, const float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a, lapack_int* lda, const double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sormhr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const float* a, lapack_int* lda, const float* tau, float* c, lapack_int* ldc, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dormhr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const double* a, lapack_int* lda, const double* tau, double* c, lapack_int* ldc, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cunmhr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zunmhr( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgebal( char* job, lapack_int* n, float* a, lapack_int* lda, lapack_int* ilo, lapack_int* ihi, float* scale, lapack_int *info ); void LAPACK_dgebal( char* job, lapack_int* n, double* a, lapack_int* lda, lapack_int* ilo, lapack_int* ihi, double* scale, lapack_int *info ); void LAPACK_cgebal( char* job, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* ilo, lapack_int* ihi, float* scale, lapack_int *info ); void LAPACK_zgebal( char* job, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* ilo, lapack_int* ihi, double* scale, lapack_int *info ); void LAPACK_sgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const float* scale, lapack_int* m, float* v, lapack_int* ldv, lapack_int *info ); void LAPACK_dgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const double* scale, lapack_int* m, double* v, lapack_int* ldv, lapack_int *info ); void LAPACK_cgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const float* scale, lapack_int* m, lapack_complex_float* v, lapack_int* ldv, lapack_int *info ); void LAPACK_zgebak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const double* scale, lapack_int* m, lapack_complex_double* v, lapack_int* ldv, lapack_int *info ); void LAPACK_shseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* h, lapack_int* ldh, float* wr, float* wi, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* h, lapack_int* ldh, double* wr, double* wi, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_chseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_float* h, lapack_int* ldh, lapack_complex_float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_double* h, lapack_int* ldh, lapack_complex_double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_shsein( char* job, char* eigsrc, char* initv, lapack_logical* select, lapack_int* n, const float* h, lapack_int* ldh, float* wr, const float* wi, float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, float* work, lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); void LAPACK_dhsein( char* job, char* eigsrc, char* initv, lapack_logical* select, lapack_int* n, const double* h, lapack_int* ldh, double* wr, const double* wi, double* vl, lapack_int* ldvl, double* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, double* work, lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); void LAPACK_chsein( char* job, char* eigsrc, char* initv, const lapack_logical* select, lapack_int* n, const lapack_complex_float* h, lapack_int* ldh, lapack_complex_float* w, lapack_complex_float* vl, lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, lapack_complex_float* work, float* rwork, lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); void LAPACK_zhsein( char* job, char* eigsrc, char* initv, const lapack_logical* select, lapack_int* n, const lapack_complex_double* h, lapack_int* ldh, lapack_complex_double* w, lapack_complex_double* vl, lapack_int* ldvl, lapack_complex_double* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, lapack_complex_double* work, double* rwork, lapack_int* ifaill, lapack_int* ifailr, lapack_int *info ); void LAPACK_strevc( char* side, char* howmny, lapack_logical* select, lapack_int* n, const float* t, lapack_int* ldt, float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, float* work, lapack_int *info ); void LAPACK_dtrevc( char* side, char* howmny, lapack_logical* select, lapack_int* n, const double* t, lapack_int* ldt, double* vl, lapack_int* ldvl, double* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, double* work, lapack_int *info ); void LAPACK_ctrevc( char* side, char* howmny, const lapack_logical* select, lapack_int* n, lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* vl, lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztrevc( char* side, char* howmny, const lapack_logical* select, lapack_int* n, lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* vl, lapack_int* ldvl, lapack_complex_double* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_strsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const float* t, lapack_int* ldt, const float* vl, lapack_int* ldvl, const float* vr, lapack_int* ldvr, float* s, float* sep, lapack_int* mm, lapack_int* m, float* work, lapack_int* ldwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dtrsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const double* t, lapack_int* ldt, const double* vl, lapack_int* ldvl, const double* vr, lapack_int* ldvr, double* s, double* sep, lapack_int* mm, lapack_int* m, double* work, lapack_int* ldwork, lapack_int* iwork, lapack_int *info ); void LAPACK_ctrsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const lapack_complex_float* t, lapack_int* ldt, const lapack_complex_float* vl, lapack_int* ldvl, const lapack_complex_float* vr, lapack_int* ldvr, float* s, float* sep, lapack_int* mm, lapack_int* m, lapack_complex_float* work, lapack_int* ldwork, float* rwork, lapack_int *info ); void LAPACK_ztrsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const lapack_complex_double* t, lapack_int* ldt, const lapack_complex_double* vl, lapack_int* ldvl, const lapack_complex_double* vr, lapack_int* ldvr, double* s, double* sep, lapack_int* mm, lapack_int* m, lapack_complex_double* work, lapack_int* ldwork, double* rwork, lapack_int *info ); void LAPACK_strexc( char* compq, lapack_int* n, float* t, lapack_int* ldt, float* q, lapack_int* ldq, lapack_int* ifst, lapack_int* ilst, float* work, lapack_int *info ); void LAPACK_dtrexc( char* compq, lapack_int* n, double* t, lapack_int* ldt, double* q, lapack_int* ldq, lapack_int* ifst, lapack_int* ilst, double* work, lapack_int *info ); void LAPACK_ctrexc( char* compq, lapack_int* n, lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* q, lapack_int* ldq, lapack_int* ifst, lapack_int* ilst, lapack_int *info ); void LAPACK_ztrexc( char* compq, lapack_int* n, lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* q, lapack_int* ldq, lapack_int* ifst, lapack_int* ilst, lapack_int *info ); void LAPACK_strsen( char* job, char* compq, const lapack_logical* select, lapack_int* n, float* t, lapack_int* ldt, float* q, lapack_int* ldq, float* wr, float* wi, lapack_int* m, float* s, float* sep, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dtrsen( char* job, char* compq, const lapack_logical* select, lapack_int* n, double* t, lapack_int* ldt, double* q, lapack_int* ldq, double* wr, double* wi, lapack_int* m, double* s, double* sep, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_ctrsen( char* job, char* compq, const lapack_logical* select, lapack_int* n, lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* w, lapack_int* m, float* s, float* sep, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ztrsen( char* job, char* compq, const lapack_logical* select, lapack_int* n, lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* w, lapack_int* m, double* s, double* sep, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_strsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, const float* b, lapack_int* ldb, float* c, lapack_int* ldc, float* scale, lapack_int *info ); void LAPACK_dtrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, const double* b, lapack_int* ldb, double* c, lapack_int* ldc, double* scale, lapack_int *info ); void LAPACK_ctrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* c, lapack_int* ldc, float* scale, lapack_int *info ); void LAPACK_ztrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* c, lapack_int* ldc, double* scale, lapack_int *info ); void LAPACK_sgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* q, lapack_int* ldq, float* z, lapack_int* ldz, lapack_int *info ); void LAPACK_dgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* q, lapack_int* ldq, double* z, lapack_int* ldz, lapack_int *info ); void LAPACK_cgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* z, lapack_int* ldz, lapack_int *info ); void LAPACK_zgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* z, lapack_int* ldz, lapack_int *info ); void LAPACK_sggbal( char* job, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* work, lapack_int *info ); void LAPACK_dggbal( char* job, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* work, lapack_int *info ); void LAPACK_cggbal( char* job, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* work, lapack_int *info ); void LAPACK_zggbal( char* job, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* work, lapack_int *info ); void LAPACK_sggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const float* lscale, const float* rscale, lapack_int* m, float* v, lapack_int* ldv, lapack_int *info ); void LAPACK_dggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const double* lscale, const double* rscale, lapack_int* m, double* v, lapack_int* ldv, lapack_int *info ); void LAPACK_cggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const float* lscale, const float* rscale, lapack_int* m, lapack_complex_float* v, lapack_int* ldv, lapack_int *info ); void LAPACK_zggbak( char* job, char* side, lapack_int* n, lapack_int* ilo, lapack_int* ihi, const double* lscale, const double* rscale, lapack_int* m, lapack_complex_double* v, lapack_int* ldv, lapack_int *info ); void LAPACK_shgeqz( char* job, char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* h, lapack_int* ldh, float* t, lapack_int* ldt, float* alphar, float* alphai, float* beta, float* q, lapack_int* ldq, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dhgeqz( char* job, char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* h, lapack_int* ldh, double* t, lapack_int* ldt, double* alphar, double* alphai, double* beta, double* q, lapack_int* ldq, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_chgeqz( char* job, char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_float* h, lapack_int* ldh, lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zhgeqz( char* job, char* compq, char* compz, lapack_int* n, lapack_int* ilo, lapack_int* ihi, lapack_complex_double* h, lapack_int* ldh, lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_stgevc( char* side, char* howmny, const lapack_logical* select, lapack_int* n, const float* s, lapack_int* lds, const float* p, lapack_int* ldp, float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, float* work, lapack_int *info ); void LAPACK_dtgevc( char* side, char* howmny, const lapack_logical* select, lapack_int* n, const double* s, lapack_int* lds, const double* p, lapack_int* ldp, double* vl, lapack_int* ldvl, double* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, double* work, lapack_int *info ); void LAPACK_ctgevc( char* side, char* howmny, const lapack_logical* select, lapack_int* n, const lapack_complex_float* s, lapack_int* lds, const lapack_complex_float* p, lapack_int* ldp, lapack_complex_float* vl, lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_ztgevc( char* side, char* howmny, const lapack_logical* select, lapack_int* n, const lapack_complex_double* s, lapack_int* lds, const lapack_complex_double* p, lapack_int* ldp, lapack_complex_double* vl, lapack_int* ldvl, lapack_complex_double* vr, lapack_int* ldvr, lapack_int* mm, lapack_int* m, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_stgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* q, lapack_int* ldq, float* z, lapack_int* ldz, lapack_int* ifst, lapack_int* ilst, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dtgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* q, lapack_int* ldq, double* z, lapack_int* ldz, lapack_int* ifst, lapack_int* ilst, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ctgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* z, lapack_int* ldz, lapack_int* ifst, lapack_int* ilst, lapack_int *info ); void LAPACK_ztgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* z, lapack_int* ldz, lapack_int* ifst, lapack_int* ilst, lapack_int *info ); void LAPACK_stgsen( lapack_int* ijob, lapack_logical* wantq, lapack_logical* wantz, const lapack_logical* select, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* alphar, float* alphai, float* beta, float* q, lapack_int* ldq, float* z, lapack_int* ldz, lapack_int* m, float* pl, float* pr, float* dif, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dtgsen( lapack_int* ijob, lapack_logical* wantq, lapack_logical* wantz, const lapack_logical* select, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* alphar, double* alphai, double* beta, double* q, lapack_int* ldq, double* z, lapack_int* ldz, lapack_int* m, double* pl, double* pr, double* dif, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_ctgsen( lapack_int* ijob, lapack_logical* wantq, lapack_logical* wantz, const lapack_logical* select, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* z, lapack_int* ldz, lapack_int* m, float* pl, float* pr, float* dif, lapack_complex_float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_ztgsen( lapack_int* ijob, lapack_logical* wantq, lapack_logical* wantz, const lapack_logical* select, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* z, lapack_int* ldz, lapack_int* m, double* pl, double* pr, double* dif, lapack_complex_double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_stgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, const float* b, lapack_int* ldb, float* c, lapack_int* ldc, const float* d, lapack_int* ldd, const float* e, lapack_int* lde, float* f, lapack_int* ldf, float* scale, float* dif, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dtgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, const double* b, lapack_int* ldb, double* c, lapack_int* ldc, const double* d, lapack_int* ldd, const double* e, lapack_int* lde, double* f, lapack_int* ldf, double* scale, double* dif, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_ctgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* c, lapack_int* ldc, const lapack_complex_float* d, lapack_int* ldd, const lapack_complex_float* e, lapack_int* lde, lapack_complex_float* f, lapack_int* ldf, float* scale, float* dif, lapack_complex_float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_ztgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* c, lapack_int* ldc, const lapack_complex_double* d, lapack_int* ldd, const lapack_complex_double* e, lapack_int* lde, lapack_complex_double* f, lapack_int* ldf, double* scale, double* dif, lapack_complex_double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_stgsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const float* a, lapack_int* lda, const float* b, lapack_int* ldb, const float* vl, lapack_int* ldvl, const float* vr, lapack_int* ldvr, float* s, float* dif, lapack_int* mm, lapack_int* m, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dtgsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const double* a, lapack_int* lda, const double* b, lapack_int* ldb, const double* vl, lapack_int* ldvl, const double* vr, lapack_int* ldvr, double* s, double* dif, lapack_int* mm, lapack_int* m, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_ctgsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, const lapack_complex_float* b, lapack_int* ldb, const lapack_complex_float* vl, lapack_int* ldvl, const lapack_complex_float* vr, lapack_int* ldvr, float* s, float* dif, lapack_int* mm, lapack_int* m, lapack_complex_float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_ztgsna( char* job, char* howmny, const lapack_logical* select, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, const lapack_complex_double* b, lapack_int* ldb, const lapack_complex_double* vl, lapack_int* ldvl, const lapack_complex_double* vr, lapack_int* ldvr, double* s, double* dif, lapack_int* mm, lapack_int* m, lapack_complex_double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_sggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* tola, float* tolb, lapack_int* k, lapack_int* l, float* u, lapack_int* ldu, float* v, lapack_int* ldv, float* q, lapack_int* ldq, lapack_int* iwork, float* tau, float* work, lapack_int *info ); void LAPACK_dggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* tola, double* tolb, lapack_int* k, lapack_int* l, double* u, lapack_int* ldu, double* v, lapack_int* ldv, double* q, lapack_int* ldq, lapack_int* iwork, double* tau, double* work, lapack_int *info ); void LAPACK_cggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* tola, float* tolb, lapack_int* k, lapack_int* l, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* v, lapack_int* ldv, lapack_complex_float* q, lapack_int* ldq, lapack_int* iwork, float* rwork, lapack_complex_float* tau, lapack_complex_float* work, lapack_int *info ); void LAPACK_zggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* tola, double* tolb, lapack_int* k, lapack_int* l, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* v, lapack_int* ldv, lapack_complex_double* q, lapack_int* ldq, lapack_int* iwork, double* rwork, lapack_complex_double* tau, lapack_complex_double* work, lapack_int *info ); void LAPACK_stgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* tola, float* tolb, float* alpha, float* beta, float* u, lapack_int* ldu, float* v, lapack_int* ldv, float* q, lapack_int* ldq, float* work, lapack_int* ncycle, lapack_int *info ); void LAPACK_dtgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* tola, double* tolb, double* alpha, double* beta, double* u, lapack_int* ldu, double* v, lapack_int* ldv, double* q, lapack_int* ldq, double* work, lapack_int* ncycle, lapack_int *info ); void LAPACK_ctgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* tola, float* tolb, float* alpha, float* beta, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* v, lapack_int* ldv, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* work, lapack_int* ncycle, lapack_int *info ); void LAPACK_ztgsja( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* tola, double* tolb, double* alpha, double* beta, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* v, lapack_int* ldv, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* work, lapack_int* ncycle, lapack_int *info ); void LAPACK_sgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* b, lapack_int* ldb, lapack_int* jpvt, float* rcond, lapack_int* rank, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* b, lapack_int* ldb, lapack_int* jpvt, double* rcond, lapack_int* rank, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_int* jpvt, float* rcond, lapack_int* rank, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_int* jpvt, double* rcond, lapack_int* rank, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_sgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* s, float* rcond, lapack_int* rank, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* s, double* rcond, lapack_int* rank, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* s, float* rcond, lapack_int* rank, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* s, double* rcond, lapack_int* rank, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_sgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* s, float* rcond, lapack_int* rank, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* s, double* rcond, lapack_int* rank, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_cgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* s, float* rcond, lapack_int* rank, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* iwork, lapack_int *info ); void LAPACK_zgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* s, double* rcond, lapack_int* rank, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* iwork, lapack_int *info ); void LAPACK_sgglse( lapack_int* m, lapack_int* n, lapack_int* p, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* c, float* d, float* x, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgglse( lapack_int* m, lapack_int* n, lapack_int* p, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* c, double* d, double* x, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgglse( lapack_int* m, lapack_int* n, lapack_int* p, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* c, lapack_complex_float* d, lapack_complex_float* x, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgglse( lapack_int* m, lapack_int* n, lapack_int* p, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* c, lapack_complex_double* d, lapack_complex_double* x, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sggglm( lapack_int* n, lapack_int* m, lapack_int* p, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* d, float* x, float* y, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dggglm( lapack_int* n, lapack_int* m, lapack_int* p, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* d, double* x, double* y, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cggglm( lapack_int* n, lapack_int* m, lapack_int* p, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* d, lapack_complex_float* x, lapack_complex_float* y, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zggglm( lapack_int* n, lapack_int* m, lapack_int* p, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* d, lapack_complex_double* x, lapack_complex_double* y, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_ssyev( char* jobz, char* uplo, lapack_int* n, float* a, lapack_int* lda, float* w, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dsyev( char* jobz, char* uplo, lapack_int* n, double* a, lapack_int* lda, double* w, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cheev( char* jobz, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* w, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zheev( char* jobz, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* w, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_ssyevd( char* jobz, char* uplo, lapack_int* n, float* a, lapack_int* lda, float* w, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dsyevd( char* jobz, char* uplo, lapack_int* n, double* a, lapack_int* lda, double* w, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_cheevd( char* jobz, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* w, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zheevd( char* jobz, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* w, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_ssyevx( char* jobz, char* range, char* uplo, lapack_int* n, float* a, lapack_int* lda, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_dsyevx( char* jobz, char* range, char* uplo, lapack_int* n, double* a, lapack_int* lda, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_cheevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_zheevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_ssyevr( char* jobz, char* range, char* uplo, lapack_int* n, float* a, lapack_int* lda, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, lapack_int* isuppz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dsyevr( char* jobz, char* range, char* uplo, lapack_int* n, double* a, lapack_int* lda, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, lapack_int* isuppz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_cheevr( char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_int* isuppz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zheevr( char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_int* isuppz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_sspev( char* jobz, char* uplo, lapack_int* n, float* ap, float* w, float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_dspev( char* jobz, char* uplo, lapack_int* n, double* ap, double* w, double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_chpev( char* jobz, char* uplo, lapack_int* n, lapack_complex_float* ap, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zhpev( char* jobz, char* uplo, lapack_int* n, lapack_complex_double* ap, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sspevd( char* jobz, char* uplo, lapack_int* n, float* ap, float* w, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dspevd( char* jobz, char* uplo, lapack_int* n, double* ap, double* w, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_chpevd( char* jobz, char* uplo, lapack_int* n, lapack_complex_float* ap, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zhpevd( char* jobz, char* uplo, lapack_int* n, lapack_complex_double* ap, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_sspevx( char* jobz, char* range, char* uplo, lapack_int* n, float* ap, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, float* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_dspevx( char* jobz, char* range, char* uplo, lapack_int* n, double* ap, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, double* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_chpevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_float* ap, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_zhpevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_double* ap, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_ssbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, float* ab, lapack_int* ldab, float* w, float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_dsbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, double* ab, lapack_int* ldab, double* w, double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_chbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zhbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_ssbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, float* ab, lapack_int* ldab, float* w, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dsbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, double* ab, lapack_int* ldab, double* w, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_chbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zhbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_ssbevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* kd, float* ab, lapack_int* ldab, float* q, lapack_int* ldq, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, float* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_dsbevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* kd, double* ab, lapack_int* ldab, double* q, lapack_int* ldq, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, double* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_chbevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* q, lapack_int* ldq, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_zhbevx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* q, lapack_int* ldq, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_sstev( char* jobz, lapack_int* n, float* d, float* e, float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_dstev( char* jobz, lapack_int* n, double* d, double* e, double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_sstevd( char* jobz, lapack_int* n, float* d, float* e, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dstevd( char* jobz, lapack_int* n, double* d, double* e, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_sstevx( char* jobz, char* range, lapack_int* n, float* d, float* e, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, float* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_dstevx( char* jobz, char* range, lapack_int* n, double* d, double* e, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, double* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_sstevr( char* jobz, char* range, lapack_int* n, float* d, float* e, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, lapack_int* isuppz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dstevr( char* jobz, char* range, lapack_int* n, double* d, double* e, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, lapack_int* isuppz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_sgees( char* jobvs, char* sort, LAPACK_S_SELECT2 select, lapack_int* n, float* a, lapack_int* lda, lapack_int* sdim, float* wr, float* wi, float* vs, lapack_int* ldvs, float* work, lapack_int* lwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_dgees( char* jobvs, char* sort, LAPACK_D_SELECT2 select, lapack_int* n, double* a, lapack_int* lda, lapack_int* sdim, double* wr, double* wi, double* vs, lapack_int* ldvs, double* work, lapack_int* lwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_cgees( char* jobvs, char* sort, LAPACK_C_SELECT1 select, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* sdim, lapack_complex_float* w, lapack_complex_float* vs, lapack_int* ldvs, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_zgees( char* jobvs, char* sort, LAPACK_Z_SELECT1 select, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* sdim, lapack_complex_double* w, lapack_complex_double* vs, lapack_int* ldvs, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_sgeesx( char* jobvs, char* sort, LAPACK_S_SELECT2 select, char* sense, lapack_int* n, float* a, lapack_int* lda, lapack_int* sdim, float* wr, float* wi, float* vs, lapack_int* ldvs, float* rconde, float* rcondv, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_dgeesx( char* jobvs, char* sort, LAPACK_D_SELECT2 select, char* sense, lapack_int* n, double* a, lapack_int* lda, lapack_int* sdim, double* wr, double* wi, double* vs, lapack_int* ldvs, double* rconde, double* rcondv, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_cgeesx( char* jobvs, char* sort, LAPACK_C_SELECT1 select, char* sense, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* sdim, lapack_complex_float* w, lapack_complex_float* vs, lapack_int* ldvs, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_zgeesx( char* jobvs, char* sort, LAPACK_Z_SELECT1 select, char* sense, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* sdim, lapack_complex_double* w, lapack_complex_double* vs, lapack_int* ldvs, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_sgeev( char* jobvl, char* jobvr, lapack_int* n, float* a, lapack_int* lda, float* wr, float* wi, float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgeev( char* jobvl, char* jobvr, lapack_int* n, double* a, lapack_int* lda, double* wr, double* wi, double* vl, lapack_int* ldvl, double* vr, lapack_int* ldvr, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgeev( char* jobvl, char* jobvr, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* w, lapack_complex_float* vl, lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zgeev( char* jobvl, char* jobvr, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* w, lapack_complex_double* vl, lapack_int* ldvl, lapack_complex_double* vr, lapack_int* ldvr, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_sgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, float* a, lapack_int* lda, float* wr, float* wi, float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, float* scale, float* abnrm, float* rconde, float* rcondv, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, double* a, lapack_int* lda, double* wr, double* wi, double* vl, lapack_int* ldvl, double* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, double* scale, double* abnrm, double* rconde, double* rcondv, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_cgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* w, lapack_complex_float* vl, lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, float* scale, float* abnrm, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zgeevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* w, lapack_complex_double* vl, lapack_int* ldvl, lapack_complex_double* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, double* scale, double* abnrm, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_sgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* s, float* u, lapack_int* ldu, float* vt, lapack_int* ldvt, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* s, double* u, lapack_int* ldu, double* vt, lapack_int* ldvt, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* s, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* vt, lapack_int* ldvt, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* s, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* vt, lapack_int* ldvt, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_sgesdd( char* jobz, lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* s, float* u, lapack_int* ldu, float* vt, lapack_int* ldvt, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dgesdd( char* jobz, lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* s, double* u, lapack_int* ldu, double* vt, lapack_int* ldvt, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_cgesdd( char* jobz, lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, float* s, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* vt, lapack_int* ldvt, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* iwork, lapack_int *info ); void LAPACK_zgesdd( char* jobz, lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, double* s, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* vt, lapack_int* ldvt, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt, char* jobp, lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* sva, double* u, lapack_int* ldu, double* v, lapack_int* ldv, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_sgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt, char* jobp, lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* sva, float* u, lapack_int* ldu, float* v, lapack_int* ldv, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int *info ); void LAPACK_dgesvj( char* joba, char* jobu, char* jobv, lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* sva, lapack_int* mv, double* v, lapack_int* ldv, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sgesvj( char* joba, char* jobu, char* jobv, lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* sva, lapack_int* mv, float* v, lapack_int* ldv, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_sggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* alpha, float* beta, float* u, lapack_int* ldu, float* v, lapack_int* ldv, float* q, lapack_int* ldq, float* work, lapack_int* iwork, lapack_int *info ); void LAPACK_dggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* alpha, double* beta, double* u, lapack_int* ldu, double* v, lapack_int* ldv, double* q, lapack_int* ldq, double* work, lapack_int* iwork, lapack_int *info ); void LAPACK_cggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* alpha, float* beta, lapack_complex_float* u, lapack_int* ldu, lapack_complex_float* v, lapack_int* ldv, lapack_complex_float* q, lapack_int* ldq, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int *info ); void LAPACK_zggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m, lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* alpha, double* beta, lapack_complex_double* u, lapack_int* ldu, lapack_complex_double* v, lapack_int* ldv, lapack_complex_double* q, lapack_int* ldq, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int *info ); void LAPACK_ssygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* w, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dsygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* w, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_chegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* w, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zhegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* w, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_ssygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* w, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dsygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* w, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_chegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* w, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zhegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* w, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_ssygvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_dsygvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_chegvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_zhegvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_sspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, float* ap, float* bp, float* w, float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_dspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, double* ap, double* bp, double* w, double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_chpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_float* ap, lapack_complex_float* bp, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zhpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_double* ap, lapack_complex_double* bp, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_sspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, float* ap, float* bp, float* w, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, double* ap, double* bp, double* w, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_chpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_float* ap, lapack_complex_float* bp, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zhpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n, lapack_complex_double* ap, lapack_complex_double* bp, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_sspgvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, float* ap, float* bp, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, float* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_dspgvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, double* ap, double* bp, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, double* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_chpgvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_float* ap, lapack_complex_float* bp, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_zhpgvx( lapack_int* itype, char* jobz, char* range, char* uplo, lapack_int* n, lapack_complex_double* ap, lapack_complex_double* bp, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_ssbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab, float* bb, lapack_int* ldbb, float* w, float* z, lapack_int* ldz, float* work, lapack_int *info ); void LAPACK_dsbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, double* ab, lapack_int* ldab, double* bb, lapack_int* ldbb, double* w, double* z, lapack_int* ldz, double* work, lapack_int *info ); void LAPACK_chbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* bb, lapack_int* ldbb, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int *info ); void LAPACK_zhbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* bb, lapack_int* ldbb, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int *info ); void LAPACK_ssbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab, float* bb, lapack_int* ldbb, float* w, float* z, lapack_int* ldz, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_dsbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, double* ab, lapack_int* ldab, double* bb, lapack_int* ldbb, double* w, double* z, lapack_int* ldz, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_chbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* bb, lapack_int* ldbb, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_zhbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* bb, lapack_int* ldbb, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork, lapack_int *info ); void LAPACK_ssbgvx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab, float* bb, lapack_int* ldbb, float* q, lapack_int* ldq, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, float* z, lapack_int* ldz, float* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_dsbgvx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, double* ab, lapack_int* ldab, double* bb, lapack_int* ldbb, double* q, lapack_int* ldq, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, double* z, lapack_int* ldz, double* work, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_chbgvx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab, lapack_complex_float* bb, lapack_int* ldbb, lapack_complex_float* q, lapack_int* ldq, float* vl, float* vu, lapack_int* il, lapack_int* iu, float* abstol, lapack_int* m, float* w, lapack_complex_float* z, lapack_int* ldz, lapack_complex_float* work, float* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_zhbgvx( char* jobz, char* range, char* uplo, lapack_int* n, lapack_int* ka, lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab, lapack_complex_double* bb, lapack_int* ldbb, lapack_complex_double* q, lapack_int* ldq, double* vl, double* vu, lapack_int* il, lapack_int* iu, double* abstol, lapack_int* m, double* w, lapack_complex_double* z, lapack_int* ldz, lapack_complex_double* work, double* rwork, lapack_int* iwork, lapack_int* ifail, lapack_int *info ); void LAPACK_sgges( char* jobvsl, char* jobvsr, char* sort, LAPACK_S_SELECT3 selctg, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, lapack_int* sdim, float* alphar, float* alphai, float* beta, float* vsl, lapack_int* ldvsl, float* vsr, lapack_int* ldvsr, float* work, lapack_int* lwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_dgges( char* jobvsl, char* jobvsr, char* sort, LAPACK_D_SELECT3 selctg, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, lapack_int* sdim, double* alphar, double* alphai, double* beta, double* vsl, lapack_int* ldvsl, double* vsr, lapack_int* ldvsr, double* work, lapack_int* lwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_cgges( char* jobvsl, char* jobvsr, char* sort, LAPACK_C_SELECT2 selctg, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vsl, lapack_int* ldvsl, lapack_complex_float* vsr, lapack_int* ldvsr, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_zgges( char* jobvsl, char* jobvsr, char* sort, LAPACK_Z_SELECT2 selctg, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vsl, lapack_int* ldvsl, lapack_complex_double* vsr, lapack_int* ldvsr, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_sggesx( char* jobvsl, char* jobvsr, char* sort, LAPACK_S_SELECT3 selctg, char* sense, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, lapack_int* sdim, float* alphar, float* alphai, float* beta, float* vsl, lapack_int* ldvsl, float* vsr, lapack_int* ldvsr, float* rconde, float* rcondv, float* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_dggesx( char* jobvsl, char* jobvsr, char* sort, LAPACK_D_SELECT3 selctg, char* sense, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, lapack_int* sdim, double* alphar, double* alphai, double* beta, double* vsl, lapack_int* ldvsl, double* vsr, lapack_int* ldvsr, double* rconde, double* rcondv, double* work, lapack_int* lwork, lapack_int* iwork, lapack_int* liwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_cggesx( char* jobvsl, char* jobvsr, char* sort, LAPACK_C_SELECT2 selctg, char* sense, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vsl, lapack_int* ldvsl, lapack_complex_float* vsr, lapack_int* ldvsr, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* iwork, lapack_int* liwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_zggesx( char* jobvsl, char* jobvsr, char* sort, LAPACK_Z_SELECT2 selctg, char* sense, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vsl, lapack_int* ldvsl, lapack_complex_double* vsr, lapack_int* ldvsr, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* iwork, lapack_int* liwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_sggev( char* jobvl, char* jobvr, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* alphar, float* alphai, float* beta, float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dggev( char* jobvl, char* jobvr, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* alphar, double* alphai, double* beta, double* vl, lapack_int* ldvl, double* vr, lapack_int* ldvr, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cggev( char* jobvl, char* jobvr, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vl, lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int *info ); void LAPACK_zggev( char* jobvl, char* jobvr, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vl, lapack_int* ldvl, lapack_complex_double* vr, lapack_int* ldvr, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int *info ); void LAPACK_sggevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* alphar, float* alphai, float* beta, float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* abnrm, float* bbnrm, float* rconde, float* rcondv, float* work, lapack_int* lwork, lapack_int* iwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_dggevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* alphar, double* alphai, double* beta, double* vl, lapack_int* ldvl, double* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* abnrm, double* bbnrm, double* rconde, double* rcondv, double* work, lapack_int* lwork, lapack_int* iwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_cggevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* vl, lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, float* lscale, float* rscale, float* abnrm, float* bbnrm, float* rconde, float* rcondv, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* iwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_zggevx( char* balanc, char* jobvl, char* jobvr, char* sense, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* vl, lapack_int* ldvl, lapack_complex_double* vr, lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi, double* lscale, double* rscale, double* abnrm, double* bbnrm, double* rconde, double* rcondv, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* iwork, lapack_logical* bwork, lapack_int *info ); void LAPACK_dsfrk( char* transr, char* uplo, char* trans, lapack_int* n, lapack_int* k, double* alpha, const double* a, lapack_int* lda, double* beta, double* c ); void LAPACK_ssfrk( char* transr, char* uplo, char* trans, lapack_int* n, lapack_int* k, float* alpha, const float* a, lapack_int* lda, float* beta, float* c ); void LAPACK_zhfrk( char* transr, char* uplo, char* trans, lapack_int* n, lapack_int* k, double* alpha, const lapack_complex_double* a, lapack_int* lda, double* beta, lapack_complex_double* c ); void LAPACK_chfrk( char* transr, char* uplo, char* trans, lapack_int* n, lapack_int* k, float* alpha, const lapack_complex_float* a, lapack_int* lda, float* beta, lapack_complex_float* c ); void LAPACK_dtfsm( char* transr, char* side, char* uplo, char* trans, char* diag, lapack_int* m, lapack_int* n, double* alpha, const double* a, double* b, lapack_int* ldb ); void LAPACK_stfsm( char* transr, char* side, char* uplo, char* trans, char* diag, lapack_int* m, lapack_int* n, float* alpha, const float* a, float* b, lapack_int* ldb ); void LAPACK_ztfsm( char* transr, char* side, char* uplo, char* trans, char* diag, lapack_int* m, lapack_int* n, lapack_complex_double* alpha, const lapack_complex_double* a, lapack_complex_double* b, lapack_int* ldb ); void LAPACK_ctfsm( char* transr, char* side, char* uplo, char* trans, char* diag, lapack_int* m, lapack_int* n, lapack_complex_float* alpha, const lapack_complex_float* a, lapack_complex_float* b, lapack_int* ldb ); void LAPACK_dtfttp( char* transr, char* uplo, lapack_int* n, const double* arf, double* ap, lapack_int *info ); void LAPACK_stfttp( char* transr, char* uplo, lapack_int* n, const float* arf, float* ap, lapack_int *info ); void LAPACK_ztfttp( char* transr, char* uplo, lapack_int* n, const lapack_complex_double* arf, lapack_complex_double* ap, lapack_int *info ); void LAPACK_ctfttp( char* transr, char* uplo, lapack_int* n, const lapack_complex_float* arf, lapack_complex_float* ap, lapack_int *info ); void LAPACK_dtfttr( char* transr, char* uplo, lapack_int* n, const double* arf, double* a, lapack_int* lda, lapack_int *info ); void LAPACK_stfttr( char* transr, char* uplo, lapack_int* n, const float* arf, float* a, lapack_int* lda, lapack_int *info ); void LAPACK_ztfttr( char* transr, char* uplo, lapack_int* n, const lapack_complex_double* arf, lapack_complex_double* a, lapack_int* lda, lapack_int *info ); void LAPACK_ctfttr( char* transr, char* uplo, lapack_int* n, const lapack_complex_float* arf, lapack_complex_float* a, lapack_int* lda, lapack_int *info ); void LAPACK_dtpttf( char* transr, char* uplo, lapack_int* n, const double* ap, double* arf, lapack_int *info ); void LAPACK_stpttf( char* transr, char* uplo, lapack_int* n, const float* ap, float* arf, lapack_int *info ); void LAPACK_ztpttf( char* transr, char* uplo, lapack_int* n, const lapack_complex_double* ap, lapack_complex_double* arf, lapack_int *info ); void LAPACK_ctpttf( char* transr, char* uplo, lapack_int* n, const lapack_complex_float* ap, lapack_complex_float* arf, lapack_int *info ); void LAPACK_dtpttr( char* uplo, lapack_int* n, const double* ap, double* a, lapack_int* lda, lapack_int *info ); void LAPACK_stpttr( char* uplo, lapack_int* n, const float* ap, float* a, lapack_int* lda, lapack_int *info ); void LAPACK_ztpttr( char* uplo, lapack_int* n, const lapack_complex_double* ap, lapack_complex_double* a, lapack_int* lda, lapack_int *info ); void LAPACK_ctpttr( char* uplo, lapack_int* n, const lapack_complex_float* ap, lapack_complex_float* a, lapack_int* lda, lapack_int *info ); void LAPACK_dtrttf( char* transr, char* uplo, lapack_int* n, const double* a, lapack_int* lda, double* arf, lapack_int *info ); void LAPACK_strttf( char* transr, char* uplo, lapack_int* n, const float* a, lapack_int* lda, float* arf, lapack_int *info ); void LAPACK_ztrttf( char* transr, char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, lapack_complex_double* arf, lapack_int *info ); void LAPACK_ctrttf( char* transr, char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, lapack_complex_float* arf, lapack_int *info ); void LAPACK_dtrttp( char* uplo, lapack_int* n, const double* a, lapack_int* lda, double* ap, lapack_int *info ); void LAPACK_strttp( char* uplo, lapack_int* n, const float* a, lapack_int* lda, float* ap, lapack_int *info ); void LAPACK_ztrttp( char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, lapack_complex_double* ap, lapack_int *info ); void LAPACK_ctrttp( char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, lapack_complex_float* ap, lapack_int *info ); void LAPACK_sgeqrfp( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dgeqrfp( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_cgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_clacgv( lapack_int* n, lapack_complex_float* x, lapack_int* incx ); void LAPACK_zlacgv( lapack_int* n, lapack_complex_double* x, lapack_int* incx ); void LAPACK_slarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, float* x ); void LAPACK_dlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, double* x ); void LAPACK_clarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, lapack_complex_float* x ); void LAPACK_zlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n, lapack_complex_double* x ); void LAPACK_sgeqr2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int *info ); void LAPACK_dgeqr2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int *info ); void LAPACK_cgeqr2( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int *info ); void LAPACK_zgeqr2( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int *info ); void LAPACK_slacn2( lapack_int* n, float* v, float* x, lapack_int* isgn, float* est, lapack_int* kase, lapack_int* isave ); void LAPACK_dlacn2( lapack_int* n, double* v, double* x, lapack_int* isgn, double* est, lapack_int* kase, lapack_int* isave ); void LAPACK_clacn2( lapack_int* n, lapack_complex_float* v, lapack_complex_float* x, float* est, lapack_int* kase, lapack_int* isave ); void LAPACK_zlacn2( lapack_int* n, lapack_complex_double* v, lapack_complex_double* x, double* est, lapack_int* kase, lapack_int* isave ); void LAPACK_slacpy( char* uplo, lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, float* b, lapack_int* ldb ); void LAPACK_dlacpy( char* uplo, lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, double* b, lapack_int* ldb ); void LAPACK_clacpy( char* uplo, lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb ); void LAPACK_zlacpy( char* uplo, lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb ); void LAPACK_clacp2( char* uplo, lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb ); void LAPACK_zlacp2( char* uplo, lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb ); void LAPACK_sgetf2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_dgetf2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_cgetf2( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_zgetf2( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_int *info ); void LAPACK_slaswp( lapack_int* n, float* a, lapack_int* lda, lapack_int* k1, lapack_int* k2, const lapack_int* ipiv, lapack_int* incx ); void LAPACK_dlaswp( lapack_int* n, double* a, lapack_int* lda, lapack_int* k1, lapack_int* k2, const lapack_int* ipiv, lapack_int* incx ); void LAPACK_claswp( lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int* k1, lapack_int* k2, const lapack_int* ipiv, lapack_int* incx ); void LAPACK_zlaswp( lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int* k1, lapack_int* k2, const lapack_int* ipiv, lapack_int* incx ); float LAPACK_slange( char* norm, lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, float* work ); double LAPACK_dlange( char* norm, lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, double* work ); float LAPACK_clange( char* norm, lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* work ); double LAPACK_zlange( char* norm, lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* work ); float LAPACK_clanhe( char* norm, char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* work ); double LAPACK_zlanhe( char* norm, char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* work ); float LAPACK_slansy( char* norm, char* uplo, lapack_int* n, const float* a, lapack_int* lda, float* work ); double LAPACK_dlansy( char* norm, char* uplo, lapack_int* n, const double* a, lapack_int* lda, double* work ); float LAPACK_clansy( char* norm, char* uplo, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* work ); double LAPACK_zlansy( char* norm, char* uplo, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* work ); float LAPACK_slantr( char* norm, char* uplo, char* diag, lapack_int* m, lapack_int* n, const float* a, lapack_int* lda, float* work ); double LAPACK_dlantr( char* norm, char* uplo, char* diag, lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, double* work ); float LAPACK_clantr( char* norm, char* uplo, char* diag, lapack_int* m, lapack_int* n, const lapack_complex_float* a, lapack_int* lda, float* work ); double LAPACK_zlantr( char* norm, char* uplo, char* diag, lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, double* work ); float LAPACK_slamch( char* cmach ); double LAPACK_dlamch( char* cmach ); void LAPACK_sgelq2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* tau, float* work, lapack_int *info ); void LAPACK_dgelq2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* tau, double* work, lapack_int *info ); void LAPACK_cgelq2( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* tau, lapack_complex_float* work, lapack_int *info ); void LAPACK_zgelq2( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* tau, lapack_complex_double* work, lapack_int *info ); void LAPACK_slarfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, const float* v, lapack_int* ldv, const float* t, lapack_int* ldt, float* c, lapack_int* ldc, float* work, lapack_int* ldwork ); void LAPACK_dlarfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, const double* v, lapack_int* ldv, const double* t, lapack_int* ldt, double* c, lapack_int* ldc, double* work, lapack_int* ldwork ); void LAPACK_clarfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_float* v, lapack_int* ldv, const lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int* ldwork ); void LAPACK_zlarfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, const lapack_complex_double* v, lapack_int* ldv, const lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int* ldwork ); void LAPACK_slarfg( lapack_int* n, float* alpha, float* x, lapack_int* incx, float* tau ); void LAPACK_dlarfg( lapack_int* n, double* alpha, double* x, lapack_int* incx, double* tau ); void LAPACK_clarfg( lapack_int* n, lapack_complex_float* alpha, lapack_complex_float* x, lapack_int* incx, lapack_complex_float* tau ); void LAPACK_zlarfg( lapack_int* n, lapack_complex_double* alpha, lapack_complex_double* x, lapack_int* incx, lapack_complex_double* tau ); void LAPACK_slarft( char* direct, char* storev, lapack_int* n, lapack_int* k, const float* v, lapack_int* ldv, const float* tau, float* t, lapack_int* ldt ); void LAPACK_dlarft( char* direct, char* storev, lapack_int* n, lapack_int* k, const double* v, lapack_int* ldv, const double* tau, double* t, lapack_int* ldt ); void LAPACK_clarft( char* direct, char* storev, lapack_int* n, lapack_int* k, const lapack_complex_float* v, lapack_int* ldv, const lapack_complex_float* tau, lapack_complex_float* t, lapack_int* ldt ); void LAPACK_zlarft( char* direct, char* storev, lapack_int* n, lapack_int* k, const lapack_complex_double* v, lapack_int* ldv, const lapack_complex_double* tau, lapack_complex_double* t, lapack_int* ldt ); void LAPACK_slarfx( char* side, lapack_int* m, lapack_int* n, const float* v, float* tau, float* c, lapack_int* ldc, float* work ); void LAPACK_dlarfx( char* side, lapack_int* m, lapack_int* n, const double* v, double* tau, double* c, lapack_int* ldc, double* work ); void LAPACK_clarfx( char* side, lapack_int* m, lapack_int* n, const lapack_complex_float* v, lapack_complex_float* tau, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work ); void LAPACK_zlarfx( char* side, lapack_int* m, lapack_int* n, const lapack_complex_double* v, lapack_complex_double* tau, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work ); void LAPACK_slatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, char* sym, float* d, lapack_int* mode, float* cond, float* dmax, lapack_int* kl, lapack_int* ku, char* pack, float* a, lapack_int* lda, float* work, lapack_int *info ); void LAPACK_dlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, char* sym, double* d, lapack_int* mode, double* cond, double* dmax, lapack_int* kl, lapack_int* ku, char* pack, double* a, lapack_int* lda, double* work, lapack_int *info ); void LAPACK_clatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, char* sym, float* d, lapack_int* mode, float* cond, float* dmax, lapack_int* kl, lapack_int* ku, char* pack, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* work, lapack_int *info ); void LAPACK_zlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed, char* sym, double* d, lapack_int* mode, double* cond, double* dmax, lapack_int* kl, lapack_int* ku, char* pack, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* work, lapack_int *info ); void LAPACK_slag2d( lapack_int* m, lapack_int* n, const float* sa, lapack_int* ldsa, double* a, lapack_int* lda, lapack_int *info ); void LAPACK_dlag2s( lapack_int* m, lapack_int* n, const double* a, lapack_int* lda, float* sa, lapack_int* ldsa, lapack_int *info ); void LAPACK_clag2z( lapack_int* m, lapack_int* n, const lapack_complex_float* sa, lapack_int* ldsa, lapack_complex_double* a, lapack_int* lda, lapack_int *info ); void LAPACK_zlag2c( lapack_int* m, lapack_int* n, const lapack_complex_double* a, lapack_int* lda, lapack_complex_float* sa, lapack_int* ldsa, lapack_int *info ); void LAPACK_slauum( char* uplo, lapack_int* n, float* a, lapack_int* lda, lapack_int *info ); void LAPACK_dlauum( char* uplo, lapack_int* n, double* a, lapack_int* lda, lapack_int *info ); void LAPACK_clauum( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_int *info ); void LAPACK_zlauum( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_int *info ); void LAPACK_slagge( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const float* d, float* a, lapack_int* lda, lapack_int* iseed, float* work, lapack_int *info ); void LAPACK_dlagge( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const double* d, double* a, lapack_int* lda, lapack_int* iseed, double* work, lapack_int *info ); void LAPACK_clagge( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const float* d, lapack_complex_float* a, lapack_int* lda, lapack_int* iseed, lapack_complex_float* work, lapack_int *info ); void LAPACK_zlagge( lapack_int* m, lapack_int* n, lapack_int* kl, lapack_int* ku, const double* d, lapack_complex_double* a, lapack_int* lda, lapack_int* iseed, lapack_complex_double* work, lapack_int *info ); void LAPACK_slaset( char* uplo, lapack_int* m, lapack_int* n, float* alpha, float* beta, float* a, lapack_int* lda ); void LAPACK_dlaset( char* uplo, lapack_int* m, lapack_int* n, double* alpha, double* beta, double* a, lapack_int* lda ); void LAPACK_claset( char* uplo, lapack_int* m, lapack_int* n, lapack_complex_float* alpha, lapack_complex_float* beta, lapack_complex_float* a, lapack_int* lda ); void LAPACK_zlaset( char* uplo, lapack_int* m, lapack_int* n, lapack_complex_double* alpha, lapack_complex_double* beta, lapack_complex_double* a, lapack_int* lda ); void LAPACK_slasrt( char* id, lapack_int* n, float* d, lapack_int *info ); void LAPACK_dlasrt( char* id, lapack_int* n, double* d, lapack_int *info ); void LAPACK_claghe( lapack_int* n, lapack_int* k, const float* d, lapack_complex_float* a, lapack_int* lda, lapack_int* iseed, lapack_complex_float* work, lapack_int *info ); void LAPACK_zlaghe( lapack_int* n, lapack_int* k, const double* d, lapack_complex_double* a, lapack_int* lda, lapack_int* iseed, lapack_complex_double* work, lapack_int *info ); void LAPACK_slagsy( lapack_int* n, lapack_int* k, const float* d, float* a, lapack_int* lda, lapack_int* iseed, float* work, lapack_int *info ); void LAPACK_dlagsy( lapack_int* n, lapack_int* k, const double* d, double* a, lapack_int* lda, lapack_int* iseed, double* work, lapack_int *info ); void LAPACK_clagsy( lapack_int* n, lapack_int* k, const float* d, lapack_complex_float* a, lapack_int* lda, lapack_int* iseed, lapack_complex_float* work, lapack_int *info ); void LAPACK_zlagsy( lapack_int* n, lapack_int* k, const double* d, lapack_complex_double* a, lapack_int* lda, lapack_int* iseed, lapack_complex_double* work, lapack_int *info ); void LAPACK_slapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, float* x, lapack_int* ldx, lapack_int* k ); void LAPACK_dlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, double* x, lapack_int* ldx, lapack_int* k ); void LAPACK_clapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, lapack_complex_float* x, lapack_int* ldx, lapack_int* k ); void LAPACK_zlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n, lapack_complex_double* x, lapack_int* ldx, lapack_int* k ); float LAPACK_slapy2( float* x, float* y ); double LAPACK_dlapy2( double* x, double* y ); float LAPACK_slapy3( float* x, float* y, float* z ); double LAPACK_dlapy3( double* x, double* y, double* z ); void LAPACK_slartgp( float* f, float* g, float* cs, float* sn, float* r ); void LAPACK_dlartgp( double* f, double* g, double* cs, double* sn, double* r ); void LAPACK_slartgs( float* x, float* y, float* sigma, float* cs, float* sn ); void LAPACK_dlartgs( double* x, double* y, double* sigma, double* cs, double* sn ); // LAPACK 3.3.0 void LAPACK_cbbcsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, lapack_int* m, lapack_int* p, lapack_int* q, float* theta, float* phi, lapack_complex_float* u1, lapack_int* ldu1, lapack_complex_float* u2, lapack_int* ldu2, lapack_complex_float* v1t, lapack_int* ldv1t, lapack_complex_float* v2t, lapack_int* ldv2t, float* b11d, float* b11e, float* b12d, float* b12e, float* b21d, float* b21e, float* b22d, float* b22e, float* rwork, lapack_int* lrwork , lapack_int *info ); void LAPACK_cheswapr( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* i1, lapack_int* i2 ); void LAPACK_chetri2( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); void LAPACK_chetri2x( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int* nb , lapack_int *info ); void LAPACK_chetrs2( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* work , lapack_int *info ); void LAPACK_csyconv( char* uplo, char* way, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work , lapack_int *info ); void LAPACK_csyswapr( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* i1, lapack_int* i2 ); void LAPACK_csytri2( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); void LAPACK_csytri2x( char* uplo, lapack_int* n, lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int* nb , lapack_int *info ); void LAPACK_csytrs2( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* work , lapack_int *info ); void LAPACK_cunbdb( char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, lapack_complex_float* x11, lapack_int* ldx11, lapack_complex_float* x12, lapack_int* ldx12, lapack_complex_float* x21, lapack_int* ldx21, lapack_complex_float* x22, lapack_int* ldx22, float* theta, float* phi, lapack_complex_float* taup1, lapack_complex_float* taup2, lapack_complex_float* tauq1, lapack_complex_float* tauq2, lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); void LAPACK_cuncsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, lapack_complex_float* x11, lapack_int* ldx11, lapack_complex_float* x12, lapack_int* ldx12, lapack_complex_float* x21, lapack_int* ldx21, lapack_complex_float* x22, lapack_int* ldx22, float* theta, lapack_complex_float* u1, lapack_int* ldu1, lapack_complex_float* u2, lapack_int* ldu2, lapack_complex_float* v1t, lapack_int* ldv1t, lapack_complex_float* v2t, lapack_int* ldv2t, lapack_complex_float* work, lapack_int* lwork, float* rwork, lapack_int* lrwork, lapack_int* iwork , lapack_int *info ); void LAPACK_dbbcsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, lapack_int* m, lapack_int* p, lapack_int* q, double* theta, double* phi, double* u1, lapack_int* ldu1, double* u2, lapack_int* ldu2, double* v1t, lapack_int* ldv1t, double* v2t, lapack_int* ldv2t, double* b11d, double* b11e, double* b12d, double* b12e, double* b21d, double* b21e, double* b22d, double* b22e, double* work, lapack_int* lwork , lapack_int *info ); void LAPACK_dorbdb( char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, double* x11, lapack_int* ldx11, double* x12, lapack_int* ldx12, double* x21, lapack_int* ldx21, double* x22, lapack_int* ldx22, double* theta, double* phi, double* taup1, double* taup2, double* tauq1, double* tauq2, double* work, lapack_int* lwork , lapack_int *info ); void LAPACK_dorcsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, double* x11, lapack_int* ldx11, double* x12, lapack_int* ldx12, double* x21, lapack_int* ldx21, double* x22, lapack_int* ldx22, double* theta, double* u1, lapack_int* ldu1, double* u2, lapack_int* ldu2, double* v1t, lapack_int* ldv1t, double* v2t, lapack_int* ldv2t, double* work, lapack_int* lwork, lapack_int* iwork , lapack_int *info ); void LAPACK_dsyconv( char* uplo, char* way, lapack_int* n, double* a, lapack_int* lda, const lapack_int* ipiv, double* work , lapack_int *info ); void LAPACK_dsyswapr( char* uplo, lapack_int* n, double* a, lapack_int* i1, lapack_int* i2 ); void LAPACK_dsytri2( char* uplo, lapack_int* n, double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); void LAPACK_dsytri2x( char* uplo, lapack_int* n, double* a, lapack_int* lda, const lapack_int* ipiv, double* work, lapack_int* nb , lapack_int *info ); void LAPACK_dsytrs2( char* uplo, lapack_int* n, lapack_int* nrhs, const double* a, lapack_int* lda, const lapack_int* ipiv, double* b, lapack_int* ldb, double* work , lapack_int *info ); void LAPACK_sbbcsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, lapack_int* m, lapack_int* p, lapack_int* q, float* theta, float* phi, float* u1, lapack_int* ldu1, float* u2, lapack_int* ldu2, float* v1t, lapack_int* ldv1t, float* v2t, lapack_int* ldv2t, float* b11d, float* b11e, float* b12d, float* b12e, float* b21d, float* b21e, float* b22d, float* b22e, float* work, lapack_int* lwork , lapack_int *info ); void LAPACK_sorbdb( char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, float* x11, lapack_int* ldx11, float* x12, lapack_int* ldx12, float* x21, lapack_int* ldx21, float* x22, lapack_int* ldx22, float* theta, float* phi, float* taup1, float* taup2, float* tauq1, float* tauq2, float* work, lapack_int* lwork , lapack_int *info ); void LAPACK_sorcsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, float* x11, lapack_int* ldx11, float* x12, lapack_int* ldx12, float* x21, lapack_int* ldx21, float* x22, lapack_int* ldx22, float* theta, float* u1, lapack_int* ldu1, float* u2, lapack_int* ldu2, float* v1t, lapack_int* ldv1t, float* v2t, lapack_int* ldv2t, float* work, lapack_int* lwork, lapack_int* iwork , lapack_int *info ); void LAPACK_ssyconv( char* uplo, char* way, lapack_int* n, float* a, lapack_int* lda, const lapack_int* ipiv, float* work , lapack_int *info ); void LAPACK_ssyswapr( char* uplo, lapack_int* n, float* a, lapack_int* i1, lapack_int* i2 ); void LAPACK_ssytri2( char* uplo, lapack_int* n, float* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_float* work, lapack_int* lwork , lapack_int *info ); void LAPACK_ssytri2x( char* uplo, lapack_int* n, float* a, lapack_int* lda, const lapack_int* ipiv, float* work, lapack_int* nb , lapack_int *info ); void LAPACK_ssytrs2( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a, lapack_int* lda, const lapack_int* ipiv, float* b, lapack_int* ldb, float* work , lapack_int *info ); void LAPACK_zbbcsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, lapack_int* m, lapack_int* p, lapack_int* q, double* theta, double* phi, lapack_complex_double* u1, lapack_int* ldu1, lapack_complex_double* u2, lapack_int* ldu2, lapack_complex_double* v1t, lapack_int* ldv1t, lapack_complex_double* v2t, lapack_int* ldv2t, double* b11d, double* b11e, double* b12d, double* b12e, double* b21d, double* b21e, double* b22d, double* b22e, double* rwork, lapack_int* lrwork , lapack_int *info ); void LAPACK_zheswapr( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* i1, lapack_int* i2 ); void LAPACK_zhetri2( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); void LAPACK_zhetri2x( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int* nb , lapack_int *info ); void LAPACK_zhetrs2( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* work , lapack_int *info ); void LAPACK_zsyconv( char* uplo, char* way, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work , lapack_int *info ); void LAPACK_zsyswapr( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* i1, lapack_int* i2 ); void LAPACK_zsytri2( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); void LAPACK_zsytri2x( char* uplo, lapack_int* n, lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* work, lapack_int* nb , lapack_int *info ); void LAPACK_zsytrs2( char* uplo, lapack_int* n, lapack_int* nrhs, const lapack_complex_double* a, lapack_int* lda, const lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* work , lapack_int *info ); void LAPACK_zunbdb( char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, lapack_complex_double* x11, lapack_int* ldx11, lapack_complex_double* x12, lapack_int* ldx12, lapack_complex_double* x21, lapack_int* ldx21, lapack_complex_double* x22, lapack_int* ldx22, double* theta, double* phi, lapack_complex_double* taup1, lapack_complex_double* taup2, lapack_complex_double* tauq1, lapack_complex_double* tauq2, lapack_complex_double* work, lapack_int* lwork , lapack_int *info ); void LAPACK_zuncsd( char* jobu1, char* jobu2, char* jobv1t, char* jobv2t, char* trans, char* signs, lapack_int* m, lapack_int* p, lapack_int* q, lapack_complex_double* x11, lapack_int* ldx11, lapack_complex_double* x12, lapack_int* ldx12, lapack_complex_double* x21, lapack_int* ldx21, lapack_complex_double* x22, lapack_int* ldx22, double* theta, lapack_complex_double* u1, lapack_int* ldu1, lapack_complex_double* u2, lapack_int* ldu2, lapack_complex_double* v1t, lapack_int* ldv1t, lapack_complex_double* v2t, lapack_int* ldv2t, lapack_complex_double* work, lapack_int* lwork, double* rwork, lapack_int* lrwork, lapack_int* iwork , lapack_int *info ); // LAPACK 3.4.0 void LAPACK_sgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* nb, const float* v, lapack_int* ldv, const float* t, lapack_int* ldt, float* c, lapack_int* ldc, float* work, lapack_int *info ); void LAPACK_dgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* nb, const double* v, lapack_int* ldv, const double* t, lapack_int* ldt, double* c, lapack_int* ldc, double* work, lapack_int *info ); void LAPACK_cgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* nb, const lapack_complex_float* v, lapack_int* ldv, const lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* c, lapack_int* ldc, lapack_complex_float* work, lapack_int *info ); void LAPACK_zgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* nb, const lapack_complex_double* v, lapack_int* ldv, const lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* c, lapack_int* ldc, lapack_complex_double* work, lapack_int *info ); void LAPACK_sgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, float* a, lapack_int* lda, float* t, lapack_int* ldt, float* work, lapack_int *info ); void LAPACK_dgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, double* a, lapack_int* lda, double* t, lapack_int* ldt, double* work, lapack_int *info ); void LAPACK_cgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* work, lapack_int *info ); void LAPACK_zgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* work, lapack_int *info ); void LAPACK_sgeqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* t, lapack_int* ldt, lapack_int *info ); void LAPACK_dgeqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* t, lapack_int* ldt, lapack_int *info ); void LAPACK_cgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* t, lapack_int* ldt, lapack_int *info ); void LAPACK_zgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* t, lapack_int* ldt, lapack_int *info ); void LAPACK_sgeqrt3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda, float* t, lapack_int* ldt, lapack_int *info ); void LAPACK_dgeqrt3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda, double* t, lapack_int* ldt, lapack_int *info ); void LAPACK_cgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* t, lapack_int* ldt, lapack_int *info ); void LAPACK_zgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* t, lapack_int* ldt, lapack_int *info ); void LAPACK_stpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, lapack_int* nb, const float* v, lapack_int* ldv, const float* t, lapack_int* ldt, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* work, lapack_int *info ); void LAPACK_dtpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, lapack_int* nb, const double* v, lapack_int* ldv, const double* t, lapack_int* ldt, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* work, lapack_int *info ); void LAPACK_ctpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, lapack_int* nb, const lapack_complex_float* v, lapack_int* ldv, const lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* work, lapack_int *info ); void LAPACK_ztpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, lapack_int* nb, const lapack_complex_double* v, lapack_int* ldv, const lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* work, lapack_int *info ); void LAPACK_dtpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* t, lapack_int* ldt, double* work, lapack_int *info ); void LAPACK_ctpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* work, lapack_int *info ); void LAPACK_ztpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* work, lapack_int *info ); void LAPACK_stpqrt2( lapack_int* m, lapack_int* n, lapack_int* l, float* a, lapack_int* lda, float* b, lapack_int* ldb, float* t, lapack_int* ldt, lapack_int *info ); void LAPACK_dtpqrt2( lapack_int* m, lapack_int* n, lapack_int* l, double* a, lapack_int* lda, double* b, lapack_int* ldb, double* t, lapack_int* ldt, lapack_int *info ); void LAPACK_ctpqrt2( lapack_int* m, lapack_int* n, lapack_int* l, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* t, lapack_int* ldt, lapack_int *info ); void LAPACK_ztpqrt2( lapack_int* m, lapack_int* n, lapack_int* l, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* t, lapack_int* ldt, lapack_int *info ); void LAPACK_stprfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const float* v, lapack_int* ldv, const float* t, lapack_int* ldt, float* a, lapack_int* lda, float* b, lapack_int* ldb, const float* work, lapack_int* ldwork ); void LAPACK_dtprfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const double* v, lapack_int* ldv, const double* t, lapack_int* ldt, double* a, lapack_int* lda, double* b, lapack_int* ldb, const double* work, lapack_int* ldwork ); void LAPACK_ctprfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const lapack_complex_float* v, lapack_int* ldv, const lapack_complex_float* t, lapack_int* ldt, lapack_complex_float* a, lapack_int* lda, lapack_complex_float* b, lapack_int* ldb, const float* work, lapack_int* ldwork ); void LAPACK_ztprfb( char* side, char* trans, char* direct, char* storev, lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l, const lapack_complex_double* v, lapack_int* ldv, const lapack_complex_double* t, lapack_int* ldt, lapack_complex_double* a, lapack_int* lda, lapack_complex_double* b, lapack_int* ldb, const double* work, lapack_int* ldwork ); // LAPACK 3.5.0 void LAPACK_ssysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda, lapack_int* ipiv, float* b, lapack_int* ldb, float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_dsysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda, lapack_int* ipiv, double* b, lapack_int* ldb, double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_csysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb, lapack_complex_float* work, lapack_int* lwork, lapack_int *info ); void LAPACK_zsysv_rook( char* uplo, lapack_int* n, lapack_int* nrhs, lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b, lapack_int* ldb, lapack_complex_double* work, lapack_int* lwork, lapack_int *info ); void LAPACK_csyr( char* uplo, lapack_int* n, lapack_complex_float* alpha, const lapack_complex_float* x, lapack_int* incx, lapack_complex_float* a, lapack_int* lda ); void LAPACK_zsyr( char* uplo, lapack_int* n, lapack_complex_double* alpha, const lapack_complex_double* x, lapack_int* incx, lapack_complex_double* a, lapack_int* lda ); void LAPACK_ilaver( const lapack_int* vers_major, const lapack_int* vers_minor, const lapack_int* vers_patch ); #ifdef __cplusplus } #endif /* __cplusplus */ #endif /* _LAPACKE_H_ */ bart-0.4.02/src/lapacke/lapacke_cge_nancheck.c000066400000000000000000000054371320577655200211550ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2010, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** * Contents: Native C interface to LAPACK utility function * Author: Intel Corporation * Created in February, 2010 *****************************************************************************/ #include "lapacke_utils.h" /* Check a matrix for NaN entries. */ lapack_logical LAPACKE_cge_nancheck( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float *a, lapack_int lda ) { lapack_int i, j; if( a == NULL ) return (lapack_logical) 0; if( matrix_order == LAPACK_COL_MAJOR ) { for( j = 0; j < n; j++ ) { for( i = 0; i < MIN( m, lda ); i++ ) { if( LAPACK_CISNAN( a[i+(size_t)j*lda] ) ) return (lapack_logical) 1; } } } else if ( matrix_order == LAPACK_ROW_MAJOR ) { for( i = 0; i < m; i++ ) { for( j = 0; j < MIN( n, lda ); j++ ) { if( LAPACK_CISNAN( a[(size_t)i*lda+j] ) ) return (lapack_logical) 1; } } } return (lapack_logical) 0; } bart-0.4.02/src/lapacke/lapacke_cge_trans.c000066400000000000000000000053631320577655200205300ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2010, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** * Contents: Native C interface to LAPACK utility function * Author: Intel Corporation * Created in February, 2010 *****************************************************************************/ #include "lapacke_utils.h" /* Converts input general matrix from row-major(C) to column-major(Fortran) * layout or vice versa. */ void LAPACKE_cge_trans( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* in, lapack_int ldin, lapack_complex_float* out, lapack_int ldout ) { lapack_int i, j, x, y; if( in == NULL || out == NULL ) return; if( matrix_order == LAPACK_COL_MAJOR ) { x = n; y = m; } else if ( matrix_order == LAPACK_ROW_MAJOR ) { x = m; y = n; } else { /* Unknown input layout */ return; } /* In case of incorrect m, n, ldin or ldout the function does nothing */ for( i = 0; i < MIN( y, ldin ); i++ ) { for( j = 0; j < MIN( x, ldout ); j++ ) { out[ (size_t)i*ldout + j ] = in[ (size_t)j*ldin + i ]; } } } bart-0.4.02/src/lapacke/lapacke_cgesdd.c000066400000000000000000000106011320577655200200030ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2011, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************** * Contents: Native high-level C interface to LAPACK function cgesdd * Author: Intel Corporation * Generated November, 2011 *****************************************************************************/ #include "lapacke_utils.h" lapack_int LAPACKE_cgesdd( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* vt, lapack_int ldvt ) { lapack_int info = 0; lapack_int lwork = -1; /* Additional scalars declarations for work arrays */ size_t lrwork; lapack_int* iwork = NULL; float* rwork = NULL; lapack_complex_float* work = NULL; lapack_complex_float work_query; if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_cgesdd", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ if( LAPACKE_cge_nancheck( matrix_order, m, n, a, lda ) ) { return -5; } #endif /* Additional scalars initializations for work arrays */ if( LAPACKE_lsame( jobz, 'n' ) ) { lrwork = MAX(1,5*MIN(m,n)); } else { lrwork = (size_t)5*MAX(1,MIN(m,n))*MAX(1,MIN(m,n))+7*MIN(m,n); } /* Allocate memory for working array(s) */ iwork = (lapack_int*) LAPACKE_malloc( sizeof(lapack_int) * MAX(1,8*MIN(m,n)) ); if( iwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } rwork = (float*)LAPACKE_malloc( sizeof(float) * lrwork ); if( rwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; } /* Query optimal working array(s) size */ info = LAPACKE_cgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, &work_query, lwork, rwork, iwork ); if( info != 0 ) { goto exit_level_2; } lwork = LAPACK_C2INT( work_query ); /* Allocate memory for work arrays */ work = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_2; } /* Call middle-level interface */ info = LAPACKE_cgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, iwork ); /* Release memory and exit */ LAPACKE_free( work ); exit_level_2: LAPACKE_free( rwork ); exit_level_1: LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_cgesdd", info ); } return info; } bart-0.4.02/src/lapacke/lapacke_cgesdd_work.c000066400000000000000000000161611320577655200210540ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2011, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function cgesdd * Author: Intel Corporation * Generated November, 2011 *****************************************************************************/ #include "lapacke_utils.h" lapack_int LAPACKE_cgesdd_work( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_float* a, lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu, lapack_complex_float* vt, lapack_int ldvt, lapack_complex_float* work, lapack_int lwork, float* rwork, lapack_int* iwork ) { lapack_int info = 0; if( matrix_order == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_cgesdd( &jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; } } else if( matrix_order == LAPACK_ROW_MAJOR ) { lapack_int nrows_u = ( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && m=n) ) ) { vt_t = (lapack_complex_float*) LAPACKE_malloc( sizeof(lapack_complex_float) * ldvt_t * MAX(1,n) ); if( vt_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; } } /* Transpose input matrices */ LAPACKE_cge_trans( matrix_order, m, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ LAPACK_cgesdd( &jobz, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t, &ldvt_t, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; } /* Transpose output matrices */ LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && (m=n) ) ) { LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt, ldvt ); } /* Release memory and exit */ if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) { LAPACKE_free( vt_t ); } exit_level_2: if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && (m (y)) ? (x) : (y)) #endif #ifndef MIN #define MIN(x,y) (((x) < (y)) ? (x) : (y)) #endif #ifndef MAX3 #define MAX3(x,y,z) (((x) > MAX(y,z)) ? (x) : MAX(y,z)) #endif #ifndef MIN3 #define MIN3(x,y,z) (((x) < MIN(y,z)) ? (x) : MIN(y,z)) #endif #define IS_S_NONZERO(x) ( (x) < 0 || (x) > 0 ) #define IS_D_NONZERO(x) ( (x) < 0 || (x) > 0 ) #define IS_C_NONZERO(x) ( IS_S_NONZERO(*((float*)&x)) || \ IS_S_NONZERO(*(((float*)&x)+1)) ) #define IS_Z_NONZERO(x) ( IS_D_NONZERO(*((double*)&x)) || \ IS_D_NONZERO(*(((double*)&x)+1)) ) /* Error handler */ void LAPACKE_xerbla( const char *name, lapack_int info ); /* Compare two chars (case-insensitive) */ lapack_logical LAPACKE_lsame( char ca, char cb ); /* Functions to convert column-major to row-major 2d arrays and vice versa. */ void LAPACKE_cgb_trans( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_cge_trans( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* in, lapack_int ldin, lapack_complex_float* out, lapack_int ldout ); void LAPACKE_cgg_trans( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float* in, lapack_int ldin, lapack_complex_float* out, lapack_int ldout ); void LAPACKE_chb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_che_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_chp_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *in, lapack_complex_float *out ); void LAPACKE_chs_trans( int matrix_order, lapack_int n, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_cpb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_cpf_trans( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_float *in, lapack_complex_float *out ); void LAPACKE_cpo_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_cpp_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *in, lapack_complex_float *out ); void LAPACKE_csp_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *in, lapack_complex_float *out ); void LAPACKE_csy_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_ctb_trans( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_ctf_trans( int matrix_order, char transr, char uplo, char diag, lapack_int n, const lapack_complex_float *in, lapack_complex_float *out ); void LAPACKE_ctp_trans( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_float *in, lapack_complex_float *out ); void LAPACKE_ctr_trans( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_float *in, lapack_int ldin, lapack_complex_float *out, lapack_int ldout ); void LAPACKE_dgb_trans( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_dge_trans( int matrix_order, lapack_int m, lapack_int n, const double* in, lapack_int ldin, double* out, lapack_int ldout ); void LAPACKE_dgg_trans( int matrix_order, lapack_int m, lapack_int n, const double* in, lapack_int ldin, double* out, lapack_int ldout ); void LAPACKE_dhs_trans( int matrix_order, lapack_int n, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_dpb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_dpf_trans( int matrix_order, char transr, char uplo, lapack_int n, const double *in, double *out ); void LAPACKE_dpo_trans( int matrix_order, char uplo, lapack_int n, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_dpp_trans( int matrix_order, char uplo, lapack_int n, const double *in, double *out ); void LAPACKE_dsb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_dsp_trans( int matrix_order, char uplo, lapack_int n, const double *in, double *out ); void LAPACKE_dsy_trans( int matrix_order, char uplo, lapack_int n, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_dtb_trans( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_dtf_trans( int matrix_order, char transr, char uplo, char diag, lapack_int n, const double *in, double *out ); void LAPACKE_dtp_trans( int matrix_order, char uplo, char diag, lapack_int n, const double *in, double *out ); void LAPACKE_dtr_trans( int matrix_order, char uplo, char diag, lapack_int n, const double *in, lapack_int ldin, double *out, lapack_int ldout ); void LAPACKE_sgb_trans( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_sge_trans( int matrix_order, lapack_int m, lapack_int n, const float* in, lapack_int ldin, float* out, lapack_int ldout ); void LAPACKE_sgg_trans( int matrix_order, lapack_int m, lapack_int n, const float* in, lapack_int ldin, float* out, lapack_int ldout ); void LAPACKE_shs_trans( int matrix_order, lapack_int n, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_spb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_spf_trans( int matrix_order, char transr, char uplo, lapack_int n, const float *in, float *out ); void LAPACKE_spo_trans( int matrix_order, char uplo, lapack_int n, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_spp_trans( int matrix_order, char uplo, lapack_int n, const float *in, float *out ); void LAPACKE_ssb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_ssp_trans( int matrix_order, char uplo, lapack_int n, const float *in, float *out ); void LAPACKE_ssy_trans( int matrix_order, char uplo, lapack_int n, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_stb_trans( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_stf_trans( int matrix_order, char transr, char uplo, char diag, lapack_int n, const float *in, float *out ); void LAPACKE_stp_trans( int matrix_order, char uplo, char diag, lapack_int n, const float *in, float *out ); void LAPACKE_str_trans( int matrix_order, char uplo, char diag, lapack_int n, const float *in, lapack_int ldin, float *out, lapack_int ldout ); void LAPACKE_zgb_trans( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_zge_trans( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* in, lapack_int ldin, lapack_complex_double* out, lapack_int ldout ); void LAPACKE_zgg_trans( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* in, lapack_int ldin, lapack_complex_double* out, lapack_int ldout ); void LAPACKE_zhb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_zhe_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_zhp_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *in, lapack_complex_double *out ); void LAPACKE_zhs_trans( int matrix_order, lapack_int n, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_zpb_trans( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_zpf_trans( int matrix_order, char transr, char uplo, lapack_int n, const lapack_complex_double *in, lapack_complex_double *out ); void LAPACKE_zpo_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_zpp_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *in, lapack_complex_double *out ); void LAPACKE_zsp_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *in, lapack_complex_double *out ); void LAPACKE_zsy_trans( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_ztb_trans( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); void LAPACKE_ztf_trans( int matrix_order, char transr, char uplo, char diag, lapack_int n, const lapack_complex_double *in, lapack_complex_double *out ); void LAPACKE_ztp_trans( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_double *in, lapack_complex_double *out ); void LAPACKE_ztr_trans( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_double *in, lapack_int ldin, lapack_complex_double *out, lapack_int ldout ); /* NaN checkers */ #define LAPACK_SISNAN( x ) ( x != x ) #define LAPACK_DISNAN( x ) ( x != x ) #define LAPACK_CISNAN( x ) ( LAPACK_SISNAN(*((float*) &x)) || \ LAPACK_SISNAN(*(((float*) &x)+1)) ) #define LAPACK_ZISNAN( x ) ( LAPACK_DISNAN(*((double*)&x)) || \ LAPACK_DISNAN(*(((double*)&x)+1)) ) /* NaN checkers for vectors */ lapack_logical LAPACKE_c_nancheck( lapack_int n, const lapack_complex_float *x, lapack_int incx ); lapack_logical LAPACKE_d_nancheck( lapack_int n, const double *x, lapack_int incx ); lapack_logical LAPACKE_s_nancheck( lapack_int n, const float *x, lapack_int incx ); lapack_logical LAPACKE_z_nancheck( lapack_int n, const lapack_complex_double *x, lapack_int incx ); /* NaN checkers for matrices */ lapack_logical LAPACKE_cgb_nancheck( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_float *ab, lapack_int ldab ); lapack_logical LAPACKE_cge_nancheck( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float *a, lapack_int lda ); lapack_logical LAPACKE_cgg_nancheck( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_float *a, lapack_int lda ); lapack_logical LAPACKE_cgt_nancheck( lapack_int n, const lapack_complex_float *dl, const lapack_complex_float *d, const lapack_complex_float *du ); lapack_logical LAPACKE_chb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab ); lapack_logical LAPACKE_che_nancheck( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *a, lapack_int lda ); lapack_logical LAPACKE_chp_nancheck( lapack_int n, const lapack_complex_float *ap ); lapack_logical LAPACKE_chs_nancheck( int matrix_order, lapack_int n, const lapack_complex_float *a, lapack_int lda ); lapack_logical LAPACKE_cpb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab ); lapack_logical LAPACKE_cpf_nancheck( lapack_int n, const lapack_complex_float *a ); lapack_logical LAPACKE_cpo_nancheck( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *a, lapack_int lda ); lapack_logical LAPACKE_cpp_nancheck( lapack_int n, const lapack_complex_float *ap ); lapack_logical LAPACKE_cpt_nancheck( lapack_int n, const float *d, const lapack_complex_float *e ); lapack_logical LAPACKE_csp_nancheck( lapack_int n, const lapack_complex_float *ap ); lapack_logical LAPACKE_cst_nancheck( lapack_int n, const lapack_complex_float *d, const lapack_complex_float *e ); lapack_logical LAPACKE_csy_nancheck( int matrix_order, char uplo, lapack_int n, const lapack_complex_float *a, lapack_int lda ); lapack_logical LAPACKE_ctb_nancheck( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_float* ab, lapack_int ldab ); lapack_logical LAPACKE_ctf_nancheck( int matrix_order, char transr, char uplo, char diag, lapack_int n, const lapack_complex_float *a ); lapack_logical LAPACKE_ctp_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_float *ap ); lapack_logical LAPACKE_ctr_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_float *a, lapack_int lda ); lapack_logical LAPACKE_dgb_nancheck( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const double *ab, lapack_int ldab ); lapack_logical LAPACKE_dge_nancheck( int matrix_order, lapack_int m, lapack_int n, const double *a, lapack_int lda ); lapack_logical LAPACKE_dgg_nancheck( int matrix_order, lapack_int m, lapack_int n, const double *a, lapack_int lda ); lapack_logical LAPACKE_dgt_nancheck( lapack_int n, const double *dl, const double *d, const double *du ); lapack_logical LAPACKE_dhs_nancheck( int matrix_order, lapack_int n, const double *a, lapack_int lda ); lapack_logical LAPACKE_dpb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab ); lapack_logical LAPACKE_dpf_nancheck( lapack_int n, const double *a ); lapack_logical LAPACKE_dpo_nancheck( int matrix_order, char uplo, lapack_int n, const double *a, lapack_int lda ); lapack_logical LAPACKE_dpp_nancheck( lapack_int n, const double *ap ); lapack_logical LAPACKE_dpt_nancheck( lapack_int n, const double *d, const double *e ); lapack_logical LAPACKE_dsb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab ); lapack_logical LAPACKE_dsp_nancheck( lapack_int n, const double *ap ); lapack_logical LAPACKE_dst_nancheck( lapack_int n, const double *d, const double *e ); lapack_logical LAPACKE_dsy_nancheck( int matrix_order, char uplo, lapack_int n, const double *a, lapack_int lda ); lapack_logical LAPACKE_dtb_nancheck( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const double* ab, lapack_int ldab ); lapack_logical LAPACKE_dtf_nancheck( int matrix_order, char transr, char uplo, char diag, lapack_int n, const double *a ); lapack_logical LAPACKE_dtp_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const double *ap ); lapack_logical LAPACKE_dtr_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const double *a, lapack_int lda ); lapack_logical LAPACKE_sgb_nancheck( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const float *ab, lapack_int ldab ); lapack_logical LAPACKE_sge_nancheck( int matrix_order, lapack_int m, lapack_int n, const float *a, lapack_int lda ); lapack_logical LAPACKE_sgg_nancheck( int matrix_order, lapack_int m, lapack_int n, const float *a, lapack_int lda ); lapack_logical LAPACKE_sgt_nancheck( lapack_int n, const float *dl, const float *d, const float *du ); lapack_logical LAPACKE_shs_nancheck( int matrix_order, lapack_int n, const float *a, lapack_int lda ); lapack_logical LAPACKE_spb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab ); lapack_logical LAPACKE_spf_nancheck( lapack_int n, const float *a ); lapack_logical LAPACKE_spo_nancheck( int matrix_order, char uplo, lapack_int n, const float *a, lapack_int lda ); lapack_logical LAPACKE_spp_nancheck( lapack_int n, const float *ap ); lapack_logical LAPACKE_spt_nancheck( lapack_int n, const float *d, const float *e ); lapack_logical LAPACKE_ssb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab ); lapack_logical LAPACKE_ssp_nancheck( lapack_int n, const float *ap ); lapack_logical LAPACKE_sst_nancheck( lapack_int n, const float *d, const float *e ); lapack_logical LAPACKE_ssy_nancheck( int matrix_order, char uplo, lapack_int n, const float *a, lapack_int lda ); lapack_logical LAPACKE_stb_nancheck( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const float* ab, lapack_int ldab ); lapack_logical LAPACKE_stf_nancheck( int matrix_order, char transr, char uplo, char diag, lapack_int n, const float *a ); lapack_logical LAPACKE_stp_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const float *ap ); lapack_logical LAPACKE_str_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const float *a, lapack_int lda ); lapack_logical LAPACKE_zgb_nancheck( int matrix_order, lapack_int m, lapack_int n, lapack_int kl, lapack_int ku, const lapack_complex_double *ab, lapack_int ldab ); lapack_logical LAPACKE_zge_nancheck( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double *a, lapack_int lda ); lapack_logical LAPACKE_zgg_nancheck( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double *a, lapack_int lda ); lapack_logical LAPACKE_zgt_nancheck( lapack_int n, const lapack_complex_double *dl, const lapack_complex_double *d, const lapack_complex_double *du ); lapack_logical LAPACKE_zhb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab ); lapack_logical LAPACKE_zhe_nancheck( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *a, lapack_int lda ); lapack_logical LAPACKE_zhp_nancheck( lapack_int n, const lapack_complex_double *ap ); lapack_logical LAPACKE_zhs_nancheck( int matrix_order, lapack_int n, const lapack_complex_double *a, lapack_int lda ); lapack_logical LAPACKE_zpb_nancheck( int matrix_order, char uplo, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab ); lapack_logical LAPACKE_zpf_nancheck( lapack_int n, const lapack_complex_double *a ); lapack_logical LAPACKE_zpo_nancheck( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *a, lapack_int lda ); lapack_logical LAPACKE_zpp_nancheck( lapack_int n, const lapack_complex_double *ap ); lapack_logical LAPACKE_zpt_nancheck( lapack_int n, const double *d, const lapack_complex_double *e ); lapack_logical LAPACKE_zsp_nancheck( lapack_int n, const lapack_complex_double *ap ); lapack_logical LAPACKE_zst_nancheck( lapack_int n, const lapack_complex_double *d, const lapack_complex_double *e ); lapack_logical LAPACKE_zsy_nancheck( int matrix_order, char uplo, lapack_int n, const lapack_complex_double *a, lapack_int lda ); lapack_logical LAPACKE_ztb_nancheck( int matrix_order, char uplo, char diag, lapack_int n, lapack_int kd, const lapack_complex_double* ab, lapack_int ldab ); lapack_logical LAPACKE_ztf_nancheck( int matrix_order, char transr, char uplo, char diag, lapack_int n, const lapack_complex_double *a ); lapack_logical LAPACKE_ztp_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_double *ap ); lapack_logical LAPACKE_ztr_nancheck( int matrix_order, char uplo, char diag, lapack_int n, const lapack_complex_double *a, lapack_int lda ); #ifdef __cplusplus } #endif /* __cplusplus */ #endif /* _LAPACKE_UTILS_H_ */ bart-0.4.02/src/lapacke/lapacke_xerbla.c000066400000000000000000000044511320577655200200350ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2010, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** * Contents: Native C interface to LAPACK lsame * Author: Intel Corporation * Created in January, 2010 *****************************************************************************/ #include #include "lapacke_utils.h" void LAPACKE_xerbla( const char *name, lapack_int info ) { if( info == LAPACK_WORK_MEMORY_ERROR ) { printf( "Not enough memory to allocate work array in %s\n", name ); } else if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { printf( "Not enough memory to transpose matrix in %s\n", name ); } else if( info < 0 ) { printf( "Wrong parameter %d in %s\n", -(int) info, name ); } } bart-0.4.02/src/lapacke/lapacke_zge_nancheck.c000066400000000000000000000054401320577655200211760ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2010, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** * Contents: Native C interface to LAPACK utility function * Author: Intel Corporation * Created in February, 2010 *****************************************************************************/ #include "lapacke_utils.h" /* Check a matrix for NaN entries. */ lapack_logical LAPACKE_zge_nancheck( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double *a, lapack_int lda ) { lapack_int i, j; if( a == NULL ) return (lapack_logical) 0; if( matrix_order == LAPACK_COL_MAJOR ) { for( j = 0; j < n; j++ ) { for( i = 0; i < MIN( m, lda ); i++ ) { if( LAPACK_ZISNAN( a[i+(size_t)j*lda] ) ) return (lapack_logical) 1; } } } else if ( matrix_order == LAPACK_ROW_MAJOR ) { for( i = 0; i < m; i++ ) { for( j = 0; j < MIN( n, lda ); j++ ) { if( LAPACK_ZISNAN( a[(size_t)i*lda+j] ) ) return (lapack_logical) 1; } } } return (lapack_logical) 0; } bart-0.4.02/src/lapacke/lapacke_zge_trans.c000066400000000000000000000053651320577655200205610ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2010, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ****************************************************************************** * Contents: Native C interface to LAPACK utility function * Author: Intel Corporation * Created in February, 2010 *****************************************************************************/ #include "lapacke_utils.h" /* Converts input general matrix from row-major(C) to column-major(Fortran) * layout or vice versa. */ void LAPACKE_zge_trans( int matrix_order, lapack_int m, lapack_int n, const lapack_complex_double* in, lapack_int ldin, lapack_complex_double* out, lapack_int ldout ) { lapack_int i, j, x, y; if( in == NULL || out == NULL ) return; if( matrix_order == LAPACK_COL_MAJOR ) { x = n; y = m; } else if ( matrix_order == LAPACK_ROW_MAJOR ) { x = m; y = n; } else { /* Unknown input layout */ return; } /* In case of incorrect m, n, ldin or ldout the function does nothing */ for( i = 0; i < MIN( y, ldin ); i++ ) { for( j = 0; j < MIN( x, ldout ); j++ ) { out[ (size_t)i*ldout + j ] = in[ (size_t)j*ldin + i ]; } } } bart-0.4.02/src/lapacke/lapacke_zgesdd.c000066400000000000000000000106141320577655200200360ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2011, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************** * Contents: Native high-level C interface to LAPACK function zgesdd * Author: Intel Corporation * Generated November, 2011 *****************************************************************************/ #include "lapacke_utils.h" lapack_int LAPACKE_zgesdd( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* vt, lapack_int ldvt ) { lapack_int info = 0; lapack_int lwork = -1; /* Additional scalars declarations for work arrays */ size_t lrwork; lapack_int* iwork = NULL; double* rwork = NULL; lapack_complex_double* work = NULL; lapack_complex_double work_query; if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zgesdd", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ if( LAPACKE_zge_nancheck( matrix_order, m, n, a, lda ) ) { return -5; } #endif /* Additional scalars initializations for work arrays */ if( LAPACKE_lsame( jobz, 'n' ) ) { lrwork = MAX(1,5*MIN(m,n)); } else { lrwork = (size_t)5*MAX(1,MIN(m,n))*MAX(1,MIN(m,n))+7*MIN(m,n); } /* Allocate memory for working array(s) */ iwork = (lapack_int*) LAPACKE_malloc( sizeof(lapack_int) * MAX(1,8*MIN(m,n)) ); if( iwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork ); if( rwork == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_1; } /* Query optimal working array(s) size */ info = LAPACKE_zgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, &work_query, lwork, rwork, iwork ); if( info != 0 ) { goto exit_level_2; } lwork = LAPACK_Z2INT( work_query ); /* Allocate memory for work arrays */ work = (lapack_complex_double*) LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_2; } /* Call middle-level interface */ info = LAPACKE_zgesdd_work( matrix_order, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, rwork, iwork ); /* Release memory and exit */ LAPACKE_free( work ); exit_level_2: LAPACKE_free( rwork ); exit_level_1: LAPACKE_free( iwork ); exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_zgesdd", info ); } return info; } bart-0.4.02/src/lapacke/lapacke_zgesdd_work.c000066400000000000000000000162001320577655200210750ustar00rootroot00000000000000/***************************************************************************** Copyright (c) 2011, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************** * Contents: Native middle-level C interface to LAPACK function zgesdd * Author: Intel Corporation * Generated November, 2011 *****************************************************************************/ #include "lapacke_utils.h" lapack_int LAPACKE_zgesdd_work( int matrix_order, char jobz, lapack_int m, lapack_int n, lapack_complex_double* a, lapack_int lda, double* s, lapack_complex_double* u, lapack_int ldu, lapack_complex_double* vt, lapack_int ldvt, lapack_complex_double* work, lapack_int lwork, double* rwork, lapack_int* iwork ) { lapack_int info = 0; if( matrix_order == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_zgesdd( &jobz, &m, &n, a, &lda, s, u, &ldu, vt, &ldvt, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; } } else if( matrix_order == LAPACK_ROW_MAJOR ) { lapack_int nrows_u = ( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && m=n) ) ) { vt_t = (lapack_complex_double*) LAPACKE_malloc( sizeof(lapack_complex_double) * ldvt_t * MAX(1,n) ); if( vt_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_2; } } /* Transpose input matrices */ LAPACKE_zge_trans( matrix_order, m, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ LAPACK_zgesdd( &jobz, &m, &n, a_t, &lda_t, s, u_t, &ldu_t, vt_t, &ldvt_t, work, &lwork, rwork, iwork, &info ); if( info < 0 ) { info = info - 1; } /* Transpose output matrices */ LAPACKE_zge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda ); if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && (m=n) ) ) { LAPACKE_zge_trans( LAPACK_COL_MAJOR, nrows_vt, n, vt_t, ldvt_t, vt, ldvt ); } /* Release memory and exit */ if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && (m>=n) ) ) { LAPACKE_free( vt_t ); } exit_level_2: if( LAPACKE_lsame( jobz, 'a' ) || LAPACKE_lsame( jobz, 's' ) || ( LAPACKE_lsame( jobz, 'o' ) && (m * 2014 Joseph Y Cheng * 2016 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "num/iovec.h" #include "num/gpuops.h" #include "linops/linop.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/types.h" #include "finite_diff.h" /** * Contains parameters for finite difference * * @param D number of dimensions * @param dims dimensions of input to be differenced * @param str strides of input * @param tmp temporary storage for computing finite difference * @param tmp2 temporary storage for computing cumulative sum * @param flags bitmask for applying operators * @param order finite difference order (currently only 1) * @param snip TRUE to zero out first dimension */ struct fdiff_s { INTERFACE(linop_data_t); unsigned int D; long* dims; long* str; complex float* tmp; complex float* tmp2; // future: be less sloppy with memory unsigned int flags; int order; bool snip; }; static DEF_TYPEID(fdiff_s); /* * Implements finite difference operator (order 1 for now) * using circular shift: diff(x) = x - circshift(x) * @param snip Keeps first entry if snip = false; clear first entry if snip = true * * optr = [iptr(1); diff(iptr)] */ static void md_zfinitediff_core2(unsigned int D, const long dims[D], unsigned int flags, bool snip, complex float* tmp, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float)); long zdims[D]; long center[D]; md_select_dims(D, ~0, zdims, dims); memset(center, 0, D * sizeof(long)); for (unsigned int i=0; i < D; i++) { if (MD_IS_SET(flags, i)) { center[i] = 1; // order md_circ_shift2(D, dims, center, ostrs, optr, istrs, tmp, sizeof(complex float)); zdims[i] = 1; if (!snip) // zero out first dimension before subtracting md_clear2(D, zdims, ostrs, optr, sizeof(complex float)); md_zsub2(D, dims, ostrs, optr, istrs, tmp, ostrs, optr); md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); if (snip) // zero out first dimension after subtracting md_clear2(D, zdims, ostrs, optr, sizeof(complex float)); center[i] = 0; zdims[i] = dims[i]; } } } /* * Finite difference along dimensions specified by flags (without strides) * Keeps first entry so that dimensions are unchanged * * optr = [iptr(1); diff(iptr)] */ void md_zfinitediff(unsigned int D, const long dims[D], unsigned int flags, bool snip, complex float* optr, const complex float* iptr) { long str[D]; md_calc_strides(D, str, dims, sizeof(complex float)); md_zfinitediff2(D, dims, flags, snip, str, optr, str, iptr); } /* * Finite difference along dimensions specified by flags (with strides) * Keeps first entry so that dimensions are unchanged * * optr = [iptr(1); diff(iptr)] */ void md_zfinitediff2(unsigned int D, const long dims[D], unsigned int flags, bool snip, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { complex float* tmp = md_alloc_sameplace(D, dims, sizeof(complex float), optr); md_zfinitediff_core2(D, dims, flags, snip, tmp, ostrs, optr, istrs, iptr); md_free(tmp); } /* * Implements cumulative sum operator (order 1 for now) * using circular shift: cumsum(x) = x + circshift(x,1) + circshift(x,2) + ... * * optr = cumsum(iptr) */ static void md_zcumsum_core2(unsigned int D, const long dims[D], unsigned int flags, complex float* tmp, complex float* tmp2, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { //out = dx md_copy2(D, dims, ostrs, optr, istrs, iptr, sizeof(complex float)); md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float)); long zdims[D]; long center[D]; md_select_dims(D, ~0, zdims, dims); memset(center, 0, D * sizeof(long)); for (unsigned int i=0; i < D; i++) { if (MD_IS_SET(flags, i)) { for (int d=1; d < dims[i]; d++) { // tmp = circshift(tmp, i) center[i] = d; md_circ_shift2(D, dims, center, istrs, tmp2, istrs, tmp, sizeof(complex float)); zdims[i] = d; // tmp(1:d,:) = 0 md_clear2(D, zdims, istrs, tmp2, sizeof(complex float)); //md_zsmul2(D, zdims, istrs, tmp2, istrs, tmp2, 0.); //dump_cfl("tmp2", D, dims, tmp2); // out = out + tmp md_zadd2(D, dims, ostrs, optr, istrs, tmp2, ostrs, optr); //md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); } md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); center[i] = 0; zdims[i] = dims[i]; } } } /* * Cumulative sum along dimensions specified by flags (without strides) * * optr = cumsum(iptr) */ void md_zcumsum(unsigned int D, const long dims[D], unsigned int flags, complex float* optr, const complex float* iptr) { long str[D]; md_calc_strides(D, str, dims, sizeof(complex float)); md_zcumsum2(D, dims, flags, str, optr, str, iptr); } /* * Cumulative sum along dimensions specified by flags (with strides) * * optr = cumsum(iptr) */ void md_zcumsum2(unsigned int D, const long dims[D], unsigned int flags, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { complex float* tmp = md_alloc_sameplace(D, dims, CFL_SIZE, optr); complex float* tmp2 = md_alloc_sameplace(D, dims, CFL_SIZE, optr); md_zcumsum_core2(D, dims, flags, tmp, tmp2, ostrs, optr, istrs, iptr); md_free(tmp); md_free(tmp2); } /* * Finite difference operator along specified dimensions. * Keeps the original value for the first entry * * optr = [iptr(1); diff(iptr)] */ static void fdiff_apply(const linop_data_t* _data, complex float* optr, const complex float* iptr) { const struct fdiff_s* data = CAST_DOWN(fdiff_s, _data); md_zfinitediff_core2(data->D, data->dims, data->flags, data->snip, data->tmp, data->str, optr, data->str, iptr); } /* * Adjoint of finite difference operator along specified dimensions. * Equivalent to finite difference in reverse order * * @param snip if false: keeps the original value for the last entry; * if true: implements the adjoint of the difference matrix with all zero first row * * optr = [-diff(iptr); iptr(end)] = flip(fdiff_apply(flip(iptr))) */ static void fdiff_apply_adjoint(const linop_data_t* _data, complex float* optr, const complex float* iptr) { const struct fdiff_s* data = CAST_DOWN(fdiff_s, _data); md_copy2(data->D, data->dims, data->str, optr, data->str, iptr, CFL_SIZE); for (unsigned int i=0; i < data->D; i++) { unsigned int single_flag = data->flags & MD_BIT(i); if (single_flag) { md_flip2(data->D, data->dims, single_flag, data->str, data->tmp2, data->str, optr, CFL_SIZE); md_zfinitediff_core2(data->D, data->dims, single_flag, false, data->tmp, data->str, data->tmp2, data->str, data->tmp2); md_flip2(data->D, data->dims, single_flag, data->str, optr, data->str, data->tmp2, CFL_SIZE); if (data->snip) { long zdims[data->D]; md_select_dims(data->D, ~0, zdims, data->dims); zdims[i] = 1; md_zsub2(data->D, zdims, data->str, optr, data->str, optr, data->str, iptr); } } } } /* * Cumulative sum - inverse of finite difference operator * * optr = cumsum(iptr); */ static void cumsum_apply(const linop_data_t* _data, float lambda, complex float* optr, const complex float* iptr) { const struct fdiff_s* data = CAST_DOWN(fdiff_s, _data); assert(0. == lambda); md_zcumsum_core2(data->D, data->dims, data->flags, data->tmp, data->tmp2, data->str, optr, data->str, iptr); } static void finite_diff_del(const linop_data_t* _data) { const struct fdiff_s* data = CAST_DOWN(fdiff_s, _data); xfree(data->dims); xfree(data->str); md_free(data->tmp); md_free(data->tmp2); xfree(data); } /** * Initialize finite difference operator * * @param D number of dimensions * @param dim input dimensions * @param flags bitmask for applying operator * @param snip true: clear initial entry (i.c.); false: keep initial entry (i.c.) * @param gpu true if using gpu, false if using cpu * * Returns a pointer to the finite difference operator */ extern const struct linop_s* linop_finitediff_create(unsigned int D, const long dim[D], const unsigned long flags, bool snip, bool gpu) { PTR_ALLOC(struct fdiff_s, data); SET_TYPEID(fdiff_s, data); data->D = D; data->flags = flags; data->order = 1; data->snip = snip; data->dims = *TYPE_ALLOC(long[D]); md_copy_dims(D, data->dims, dim); data->str = *TYPE_ALLOC(long[D]); md_calc_strides(D, data->str, data->dims, CFL_SIZE); #ifdef USE_CUDA data->tmp = (gpu ? md_alloc_gpu : md_alloc)(D, data->dims, CFL_SIZE); data->tmp2 = (gpu ? md_alloc_gpu : md_alloc)(D, data->dims, CFL_SIZE); #else assert(!gpu); data->tmp = md_alloc(D, data->dims, CFL_SIZE); data->tmp2 = md_alloc(D, data->dims, CFL_SIZE); #endif return linop_create(D, dim, D, dim, CAST_UP(PTR_PASS(data)), fdiff_apply, fdiff_apply_adjoint, NULL, cumsum_apply, finite_diff_del); } void fd_proj_noninc(const struct linop_s* o, complex float* optr, const complex float* iptr) { struct fdiff_s* data = (struct fdiff_s*)linop_get_data(o); dump_cfl("impre", data->D, data->dims, iptr); linop_forward_unchecked(o, data->tmp2, iptr); long tmpdim = data->dims[0]; long dims2[data->D]; md_select_dims(data->D, ~0u, dims2, data->dims); dims2[0] *= 2; dump_cfl("dxpre", data->D, data->dims, data->tmp2); md_smin(data->D, dims2, (float*)optr, (float*)data->tmp2, 0.); // add back initial value dims2[0] = tmpdim; for (unsigned int i=0; i < data->D; i++) { if (MD_IS_SET(data->flags, i)) { dims2[i] = 1; md_copy2(data->D, dims2, data->str, optr, data->str, data->tmp2, sizeof(complex float)); break; } } dump_cfl("dxpost", data->D, data->dims, optr); linop_norm_inv_unchecked(o, 0., optr, optr); dump_cfl("impost", data->D, data->dims, optr); } complex float* get_fdiff_tmp2ptr(const struct linop_s* o) { struct fdiff_s* fdata = (struct fdiff_s*)linop_get_data(o); return fdata->tmp2; } /** * Internal data structure used for zfinitediff operator */ struct zfinitediff_data { INTERFACE(linop_data_t); unsigned int D; long dim_diff; bool do_circdiff; long* dims_in; long* strides_in; long* dims_adj; long* strides_adj; size_t size; }; static DEF_TYPEID(zfinitediff_data); /** * Originally used md_circshift, but couldn't get it right, so I just * wrote it out for now (also avoids extra memory) */ static void zfinitediff_apply(const linop_data_t* _data, complex float* optr, const complex float* iptr) { // if (docircshift) // out(..,1:(end-1),..) = in(..,1:(end-1),..) - in(..,2:end,..) // out(..,end,..) = in(..,end,..) - in(..,1,..) // else // out = in(..,1:(end-1),..) - in(..,2:end,..) //printf("zfinitediff_apply\n"); const struct zfinitediff_data* data = CAST_DOWN(zfinitediff_data, _data); unsigned long d = data->dim_diff; long nx = data->dims_in[d]; long dims_sub[data->D]; md_copy_dims(data->D, dims_sub, data->dims_in); long off_in, off_adj; if (data->do_circdiff) { // out(..,1:(end-1),..) = in(..,1:(end-1),..) - in(..,2:end,..) dims_sub[d] = nx - 1; off_in = data->strides_in[d] / CFL_SIZE; //off_adj = data->strides_in[d]/CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_adj, optr, data->strides_in, iptr, data->strides_in, iptr + off_in); // out(..,end,..) = in(..,end,..) - in(..,1,..) dims_sub[d] = 1; off_in = (nx - 1) * data->strides_in[d] / CFL_SIZE; off_adj = (nx - 1) * data->strides_adj[d] / CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_adj, optr + off_adj, data->strides_in, iptr + off_in, data->strides_in, iptr); } else { // out(..,1:(end-1),..) = in(..,1:(end-1),..) - in(..,2:end,..) dims_sub[d] = nx - 1; off_in = data->strides_in[d] / CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_adj, optr, data->strides_in, iptr, data->strides_in, iptr + off_in); } /* long i_shift, i_adj, x_orig, x_new; unsigned int d = data->dim_diff; for (unsigned int i = 0; i < md_calc_size(data->D, data->dims_in); i++) { i_shift = i; i_adj = i; x_orig = (i/data->strs_in[d]) % data->dims_in[d]; x_new = x_orig + 1; // shift by 1 while (x_new >= data->dims_in[d]) x_new -= data->dims_in[d]; i_shift += (x_new - x_orig)*data->strs_in[d]; optr[i_adj] = iptr[i] - iptr[i_shift]; */ } static void zfinitediff_adjoint(const linop_data_t* _data, complex float* optr, const complex float* iptr) { //printf("zfinitediff_adjoint\n"); const struct zfinitediff_data* data = CAST_DOWN(zfinitediff_data, _data); // if (docircshift) // out(..,2:end,..) = in(..,2:end,..) - in(..,1:(end-1),..) // out(..,1,..) = in(..,1,..) - in(..,end,..) // else // out(..,1,..) = in(..,1,..) // out(..,2:(end-1),..) = in(..,2:end,..) - in(..,1:(end-1),..) // out(..,end,..) = -in(..,end,..); unsigned int d = data->dim_diff; long nx = data->dims_adj[d]; long off_in, off_adj; long dims_sub[data->D]; md_copy_dims(data->D, dims_sub, data->dims_adj); if (data->do_circdiff) { // out(..,2:end,..) = in(..,2:end,..) - in(..,1:(end-1),..) dims_sub[d] = nx - 1; off_adj = data->strides_adj[d] / CFL_SIZE; off_in = data->strides_in[d] / CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_in, optr + off_in, data->strides_in, iptr + off_adj, data->strides_adj, iptr); // out(..,1,..) = in(..,1,..) - in(..,end,..) dims_sub[d] = 1; off_adj = (nx - 1) * data->strides_adj[d] / CFL_SIZE; off_in = (nx - 1) * data->strides_in[d] / CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_adj, iptr, data->strides_adj, iptr + off_adj); } else { // out(..,end,..) = 0 //md_clear2(data->D, data->dims_in, data->strides_in, optr, CFL_SIZE); dims_sub[d] = 1; off_in = nx * data->strides_in[d] / CFL_SIZE; md_clear2(data->D, dims_sub, data->strides_in, optr + off_in, CFL_SIZE); // out(..,1:end-1,:) = in_adj(..,1:end,:) md_copy2(data->D, data->dims_adj, data->strides_in, optr, data->strides_adj, iptr, CFL_SIZE); // out(..,2:end,:) -= in_adj(..,1:end,:) off_in = data->strides_in[d] / CFL_SIZE; md_zsub2(data->D, data->dims_adj, data->strides_in, optr + off_in, data->strides_in, optr + off_in, data->strides_adj, iptr); /* // out(..,1,..) = in_adj(..,1,..) dims_sub[d] = 1; md_copy2(data->D, dims_sub, data->strides_in, optr, data->strides_adj, iptr, CFL_SIZE); // out(..,2:(end-1),..) = in(..,2:end,..) - in(..,1:(end-1),..) dims_sub[d] = nx - 1; off_adj = data->strides_adj[d]/CFL_SIZE; off_in = data->strides_in[d]/CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_in, optr+off_in, data->strides_adj, iptr+off_adj, data->strides_adj, iptr); // out(..,end,..) = -in(..,end,..); dims_sub[d] = 1; off_adj = (nx - 1) * data->strides_adj[d]/CFL_SIZE; off_in = nx * data->strides_in[d]/CFL_SIZE; // !!!This one operation is really really slow!!! md_zsmul2(data->D, dims_sub, data->strides_in, optr+off_in, data->strides_adj, iptr+off_adj, -1.); */ } } // y = 2*x - circshift(x,center_adj) - circshift(x,center) static void zfinitediff_normal(const linop_data_t* _data, complex float* optr, const complex float* iptr) { const struct zfinitediff_data* data = CAST_DOWN(zfinitediff_data, _data); // Turns out that this is faster, but this requires extra memory. complex float* tmp = md_alloc_sameplace(data->D, data->dims_in, CFL_SIZE, iptr); zfinitediff_apply(_data, tmp, iptr); zfinitediff_adjoint(_data, optr, tmp); md_free(tmp); return; // FIXME: WTF? unsigned long d = data->dim_diff; long nx = data->dims_in[d]; long offset; long dims_sub[data->D]; md_copy_dims(data->D, dims_sub, data->dims_in); // optr and iptr same size regardless if do_circdiff true/false // if (data->do_circdiff) // out = 2*in; // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) // out(..,end,..) = out(..,end,..) - in(..,1,..) // out(..,1,..) = out(..,1,..) - in(..,end,..) // // else // out(..,1,..) = in(..,1,..) // out(..,end,..) = in(..,end,..) // out(..,2:(end-1),..) = 2*in(..,2:(end-1),..) // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) // if (data->do_circdiff) { md_zsmul2(data->D, data->dims_in, data->strides_in, optr, data->strides_in, iptr, 2.); dims_sub[d] = (nx - 1); offset = data->strides_in[d] / CFL_SIZE; // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_in, optr, data->strides_in, iptr + offset); // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) md_zsub2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, optr + offset, data->strides_in, iptr); dims_sub[d] = 1; offset = (nx - 1) * data->strides_in[d] / CFL_SIZE; // out(..,1,..) = out(..,1,..) - in(..,end,..) md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_in, optr, data->strides_in, iptr + offset); // out(..,end,..) = out(..,end,..) - in(..,1,..) md_zsub2(data->D, dims_sub, data->strides_in, optr+offset, data->strides_in, optr+offset, data->strides_in, iptr); } else { dims_sub[d] = 1; offset = (nx - 1) * data->strides_in[d] / CFL_SIZE; // out(..,1,..) = in(..,1,..) md_copy2(data->D, dims_sub, data->strides_in, optr, data->strides_in, iptr, CFL_SIZE); // out(..,end,..) = in(..,end,..) md_copy2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, iptr + offset, CFL_SIZE); dims_sub[d] = nx - 2; offset = data->strides_in[d] / CFL_SIZE; // out(..,2:(end-1),..) = 2*in(..,2:(end-1),..) md_zsmul2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, iptr + offset, 2.); dims_sub[d] = nx - 1; offset = data->strides_in[d] / CFL_SIZE; // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_in, optr, data->strides_in, iptr + offset); // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) md_zsub2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, optr + offset, data->strides_in, iptr); } } static void zfinitediff_del(const linop_data_t* _data) { const struct zfinitediff_data* data = CAST_DOWN(zfinitediff_data, _data); xfree(data->dims_in); xfree(data->strides_in); xfree(data->dims_adj); xfree(data->strides_adj); // FIXME free data } const struct linop_s* linop_zfinitediff_create(unsigned int D, const long dims[D], long diffdim, bool circular) { PTR_ALLOC(struct zfinitediff_data, data); SET_TYPEID(zfinitediff_data, data); data->D = D; data->dim_diff = diffdim; data->do_circdiff = circular; data->dims_in = *TYPE_ALLOC(long[D]); data->dims_adj = *TYPE_ALLOC(long[D]); data->strides_in = *TYPE_ALLOC(long[D]); data->strides_adj = *TYPE_ALLOC(long[D]); md_copy_dims(D, data->dims_in, dims); md_copy_dims(D, data->dims_adj, dims); md_calc_strides(D, data->strides_in, data->dims_in, CFL_SIZE); if (!data->do_circdiff) data->dims_adj[data->dim_diff] -= 1; md_calc_strides(D, data->strides_adj, data->dims_adj, CFL_SIZE); const long* dims_adj = data->dims_adj; const long* dims_in = data->dims_in; return linop_create(D, dims_adj, D, dims_in, CAST_UP(PTR_PASS(data)), zfinitediff_apply, zfinitediff_adjoint, zfinitediff_normal, NULL, zfinitediff_del); } bart-0.4.02/src/linops/finite_diff.h000066400000000000000000000034331320577655200172560ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __FINITE_DIFF #define __FINITE_DIFF #include "misc/cppwrap.h" extern void md_zfinitediff(unsigned int D, const long dim[__VLA(D)], unsigned int flags, _Bool snip, _Complex float* optr, const _Complex float* iptr); extern void md_zfinitediff2(unsigned int D, const long dim[__VLA(D)], unsigned int flags, _Bool snip, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zcumsum(unsigned int D, const long dim[__VLA(D)], unsigned int flags, _Complex float* optr, const _Complex float* iptr); extern void md_zcumsum2(unsigned int D, const long dim[__VLA(D)], unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); struct linop_s; extern const struct linop_s* linop_finitediff_create(unsigned int D, const long dim[__VLA(D)], const unsigned long flags, _Bool snip, _Bool gpu); extern void fd_proj_noninc(const struct linop_s* o, _Complex float* optr, const _Complex float* iptr); extern _Complex float* get_fdiff_tmp2ptr(const struct linop_s* o); /** * Circular finite difference operator * (without "snipping" or first elements) * * @param D number of dimensions * @param dim dimensions * @param diffdim specifies the direction to perform the operation * @param circular indicates whether it a circular operation * * Joseph Y Cheng (jycheng@stanford.edu) */ const struct linop_s* linop_zfinitediff_create(unsigned int D, const long dims[__VLA(D)], const long diffdim, _Bool circular); #include "misc/cppwrap.h" #endif bart-0.4.02/src/linops/fmac.c000066400000000000000000000054231320577655200157120ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" //#include "num/iovec.h" #include "linops/linop.h" #include "misc/misc.h" #include "fmac.h" struct fmac_data { INTERFACE(linop_data_t); unsigned int N; long *dims; long *idims; long *istrs; long *odims; long *ostrs; long *tstrs; const complex float* tensor; #ifdef USE_CUDA const complex float* gpu_tensor; #endif }; static DEF_TYPEID(fmac_data); static void fmac_free_data(const linop_data_t* _data) { struct fmac_data* data = CAST_DOWN(fmac_data, _data); xfree(data->dims); xfree(data->idims); xfree(data->istrs); xfree(data->odims); xfree(data->ostrs); xfree(data->tstrs); xfree(data); } static void fmac_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { struct fmac_data* data = CAST_DOWN(fmac_data, _data); // FIXME: gpuA md_clear2(data->N, data->odims, data->ostrs, dst, CFL_SIZE); md_zfmac2(data->N, data->dims, data->ostrs, dst, data->istrs, src, data->tstrs, data->tensor); } static void fmac_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { struct fmac_data* data = CAST_DOWN(fmac_data, _data); // FIXME: gpu md_clear2(data->N, data->idims, data->istrs, dst, CFL_SIZE); md_zfmacc2(data->N, data->dims, data->istrs, dst, data->ostrs, src, data->tstrs, data->tensor); } const struct linop_s* linop_fmac_create(unsigned int N, const long dims[N], unsigned int oflags, unsigned int iflags, unsigned int tflags, const complex float* tensor) { PTR_ALLOC(struct fmac_data, data); SET_TYPEID(fmac_data, data); data->N = N; data->dims = *TYPE_ALLOC(long[N]); md_copy_dims(N, data->dims, dims); data->idims = *TYPE_ALLOC(long[N]); data->istrs = *TYPE_ALLOC(long[N]); md_select_dims(N, ~iflags, data->idims, dims); md_calc_strides(N, data->istrs, data->idims, CFL_SIZE); data->odims = *TYPE_ALLOC(long[N]); data->ostrs = *TYPE_ALLOC(long[N]); md_select_dims(N, ~oflags, data->odims, dims); md_calc_strides(N, data->ostrs, data->odims, CFL_SIZE); data->tstrs = *TYPE_ALLOC(long[N]); long tdims[N]; md_select_dims(N, ~tflags, tdims, dims); md_calc_strides(N, data->tstrs, tdims, CFL_SIZE); data->tensor = tensor; long odims[N]; md_copy_dims(N, odims, data->odims); long idims[N]; md_copy_dims(N, idims, data->idims); return linop_create(N, odims, N, idims, CAST_UP(PTR_PASS(data)), fmac_apply, fmac_adjoint, NULL, NULL, fmac_free_data); } bart-0.4.02/src/linops/fmac.h000066400000000000000000000003321320577655200157110ustar00rootroot00000000000000 #include struct linop_s; extern const struct linop_s* linop_fmac_create(unsigned int N, const long dims[N], unsigned int oflags, unsigned int iflags, unsigned int flags, const complex float* tensor); bart-0.4.02/src/linops/grad.c000066400000000000000000000074131320577655200157220ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "linops/linop.h" #include "misc/misc.h" #include "grad.h" static void grad_dims(unsigned int D, long dims2[D + 1], unsigned int flags, const long dims[D]) { md_copy_dims(D, dims2, dims); dims2[D] = bitcount(flags); } void grad_op(unsigned int D, const long dims[D], unsigned int flags, complex float* out, const complex float* in) { unsigned int N = bitcount(flags); assert(N == dims[D - 1]); // we use the highest dim to store our different partial derivatives unsigned int flags2 = flags; for (unsigned int i = 0; i < N; i++) { unsigned int lsb = ffs(flags2) - 1; flags2 = MD_CLEAR(flags2, lsb); md_zfdiff(D - 1, dims, lsb, out + i * md_calc_size(D - 1, dims), in); } assert(0 == flags2); } void grad_adjoint(unsigned int D, const long dims[D], unsigned int flags, complex float* out, const complex float* in) { unsigned int N = bitcount(flags); assert(N == dims[D - 1]); // we use the highest dim to store our different partial derivatives unsigned int flags2 = flags; complex float* tmp = md_alloc_sameplace(D - 1, dims, CFL_SIZE, out); md_clear(D - 1, dims, out, CFL_SIZE); md_clear(D - 1, dims, tmp, CFL_SIZE); for (unsigned int i = 0; i < N; i++) { unsigned int lsb = ffs(flags2) - 1; flags2 = MD_CLEAR(flags2, lsb); md_zfdiff_backwards(D - 1, dims, lsb, tmp, in + i * md_calc_size(D - 1, dims)); md_zadd(D - 1, dims, out, out, tmp); } md_free(tmp); assert(0 == flags2); } void grad(unsigned int D, const long dims[D], unsigned int flags, complex float* out, const complex float* in) { long dims2[D + 1]; grad_dims(D, dims2, flags, dims); complex float* tmp = md_alloc_sameplace(D + 1, dims2, CFL_SIZE, out); grad_op(D + 1, dims2, flags, tmp, in); // rss should be moved elsewhere md_zrss(D + 1, dims2, flags, out, tmp); md_free(tmp); } struct grad_s { INTERFACE(linop_data_t); unsigned int N; long* dims; unsigned long flags; }; static DEF_TYPEID(grad_s); static void grad_op_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct grad_s* data = CAST_DOWN(grad_s, _data); grad_op(data->N, data->dims, data->flags, dst, src); } static void grad_op_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct grad_s* data = CAST_DOWN(grad_s, _data); grad_adjoint(data->N, data->dims, data->flags, dst, src); } static void grad_op_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct grad_s* data = CAST_DOWN(grad_s, _data); complex float* tmp = md_alloc_sameplace(data->N, data->dims, CFL_SIZE, dst); // this could be implemented more efficiently grad_op(data->N, data->dims, data->flags, tmp, src); grad_adjoint(data->N, data->dims, data->flags, dst, tmp); md_free(tmp); } static void grad_op_free(const linop_data_t* _data) { const struct grad_s* data = CAST_DOWN(grad_s, _data); free(data->dims); free((void*)data); } struct linop_s* linop_grad_create(long N, const long dims[N], unsigned int flags) { PTR_ALLOC(struct grad_s, data); SET_TYPEID(grad_s, data); long dims2[N + 1]; grad_dims(N, dims2, flags, dims); data->N = N + 1; data->flags = flags; data->dims = *TYPE_ALLOC(long[N + 1]); md_copy_dims(N + 1, data->dims, dims2); return linop_create(N + 1, dims2, N, dims, CAST_UP(PTR_PASS(data)), grad_op_apply, grad_op_adjoint, grad_op_normal, NULL, grad_op_free); } bart-0.4.02/src/linops/grad.h000066400000000000000000000013421320577655200157220ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" extern void grad(unsigned int D, const long dims[__VLA(D)], unsigned int flags, complex float* out, const complex float* in); extern void grad_op(unsigned int D, const long dims[__VLA(D)], unsigned int flags, complex float* out, const complex float* in); extern void grad_adjoint(unsigned int D, const long dims[__VLA(D)], unsigned int flags, complex float* out, const complex float* in); extern struct linop_s* linop_grad_create(long N, const long dims[__VLA(N)], unsigned int flags); #include "misc/cppwrap.h" bart-0.4.02/src/linops/linop.c000066400000000000000000000265371320577655200161360ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Martin Uecker * 2014 Frank Ong */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/shrdptr.h" #include "linop.h" struct shared_data_s { INTERFACE(operator_data_t); linop_data_t* data; del_fun_t del; struct shared_ptr_s sptr; union { lop_fun_t apply; lop_p_fun_t apply_p; } u; }; static DEF_TYPEID(shared_data_s); static void shared_del(const operator_data_t* _data) { struct shared_data_s* data = CAST_DOWN(shared_data_s, _data); shared_ptr_destroy(&data->sptr); xfree(data); } static void shared_apply(const operator_data_t* _data, unsigned int N, void* args[N]) { struct shared_data_s* data = CAST_DOWN(shared_data_s, _data); assert(2 == N); debug_trace("ENTER %p\n", data->u.apply); data->u.apply(data->data, args[0], args[1]); debug_trace("LEAVE %p\n", data->u.apply); } static void shared_apply_p(const operator_data_t* _data, float lambda, complex float* dst, const complex float* src) { struct shared_data_s* data = CAST_DOWN(shared_data_s, _data); debug_trace("ENTER %p\n", data->u.apply_p); data->u.apply_p(data->data, lambda, dst, src); debug_trace("LEAVE %p\n", data->u.apply_p); } static void sptr_del(const struct shared_ptr_s* p) { struct shared_data_s* data = CONTAINER_OF(p, struct shared_data_s, sptr); data->del(data->data); } /** * Create a linear operator (with strides) */ struct linop_s* linop_create2(unsigned int ON, const long odims[ON], const long ostrs[ON], unsigned int IN, const long idims[IN], const long istrs[IN], linop_data_t* data, lop_fun_t forward, lop_fun_t adjoint, lop_fun_t normal, lop_p_fun_t norm_inv, del_fun_t del) { PTR_ALLOC(struct linop_s, lo); struct shared_data_s* shared_data[4]; for (unsigned int i = 0; i < 4; i++) { shared_data[i] = TYPE_ALLOC(struct shared_data_s); SET_TYPEID(shared_data_s, shared_data[i]); } for (unsigned int i = 0; i < 4; i++) { shared_data[i]->data = data; shared_data[i]->del = del; if (0 == i) shared_ptr_init(&shared_data[i]->sptr, sptr_del); else shared_ptr_copy(&shared_data[i]->sptr, &shared_data[0]->sptr); } shared_data[0]->u.apply = forward; shared_data[1]->u.apply = adjoint; shared_data[2]->u.apply = normal; shared_data[3]->u.apply_p = norm_inv; assert((NULL != forward)); assert((NULL != adjoint)); lo->forward = operator_create2(ON, odims, ostrs, IN, idims, istrs, CAST_UP(shared_data[0]), shared_apply, shared_del); lo->adjoint = operator_create2(IN, idims, istrs, ON, odims, ostrs, CAST_UP(shared_data[1]), shared_apply, shared_del); if (NULL != normal) { lo->normal = operator_create2(IN, idims, istrs, IN, idims, istrs, CAST_UP(shared_data[2]), shared_apply, shared_del); } else { shared_ptr_destroy(&shared_data[2]->sptr); xfree(shared_data[2]); #if 0 lo->normal = NULL; #else lo->normal = operator_chain(lo->forward, lo->adjoint); #endif } if (NULL != norm_inv) { lo->norm_inv = operator_p_create2(IN, idims, istrs, IN, idims, istrs, CAST_UP(shared_data[3]), shared_apply_p, shared_del); } else { shared_ptr_destroy(&shared_data[3]->sptr); xfree(shared_data[3]); lo->norm_inv = NULL; } return PTR_PASS(lo); } /** * Create a linear operator (without strides) * * @param N number of dimensions * @param odims dimensions of output (codomain) * @param idims dimensions of input (domain) * @param data data for applying the operator * @param forward function for applying the forward operation, A * @param adjoint function for applying the adjoint operation, A^H * @param normal function for applying the normal equations operation, A^H A * @param norm_inv function for applying the pseudo-inverse operation, (A^H A + mu I)^-1 * @param del function for freeing the data */ struct linop_s* linop_create(unsigned int ON, const long odims[ON], unsigned int IN, const long idims[IN], linop_data_t* data, lop_fun_t forward, lop_fun_t adjoint, lop_fun_t normal, lop_p_fun_t norm_inv, del_fun_t del) { long ostrs[ON]; long istrs[IN]; md_calc_strides(ON, ostrs, odims, CFL_SIZE); md_calc_strides(IN, istrs, idims, CFL_SIZE); return linop_create2(ON, odims, ostrs, IN, idims, istrs, data, forward, adjoint, normal, norm_inv, del); } /** * Return the data associated with the linear operator * * @param ptr linear operator */ const linop_data_t* linop_get_data(const struct linop_s* ptr) { return ((struct shared_data_s*)operator_get_data(ptr->forward))->data; } /** * Make a copy of a linear operator * @param x linear operator */ extern const struct linop_s* linop_clone(const struct linop_s* x) { PTR_ALLOC(struct linop_s, lo); lo->forward = operator_ref(x->forward); lo->adjoint = operator_ref(x->adjoint); lo->normal = operator_ref(x->normal); lo->norm_inv = operator_p_ref(x->norm_inv); return PTR_PASS(lo); } /** * Apply the forward operation of a linear operator: y = A x * Checks that dimensions are consistent for the linear operator * * @param op linear operator * @param DN number of destination dimensions * @param ddims dimensions of the output (codomain) * @param dst output data * @param SN number of source dimensions * @param sdims dimensions of the input (domain) * @param src input data */ void linop_forward(const struct linop_s* op, unsigned int DN, const long ddims[DN], complex float* dst, unsigned int SN, const long sdims[SN], const complex float* src) { assert(op->forward); operator_apply(op->forward, DN, ddims, dst, SN, sdims, src); } /** * Apply the adjoint operation of a linear operator: y = A^H x * Checks that dimensions are consistent for the linear operator * * @param op linear operator * @param DN number of destination dimensions * @param ddims dimensions of the output (domain) * @param dst output data * @param SN number of source dimensions * @param sdims dimensions of the input (codomain) * @param src input data */ void linop_adjoint(const struct linop_s* op, unsigned int DN, const long ddims[DN], complex float* dst, unsigned int SN, const long sdims[SN], const complex float* src) { assert(op->adjoint); operator_apply(op->adjoint, DN, ddims, dst, SN, sdims, src); } /** * Apply the pseudo-inverse operation of a linear operator: x = (A^H A + lambda I)^-1 A^H y * Checks that dimensions are consistent for the linear operator * * @param op linear operator * @param lambda regularization parameter * @param DN number of destination dimensions * @param ddims dimensions of the output (domain) * @param dst output data * @param SN number of source dimensions * @param sdims dimensions of the input (codomain) * @param src input data */ void linop_pseudo_inv(const struct linop_s* op, float lambda, unsigned int DN, const long ddims[DN], complex float* dst, unsigned int SN, const long sdims[SN], const complex float* src) { complex float* adj = md_alloc_sameplace(DN, ddims, CFL_SIZE, dst); linop_adjoint(op, DN, ddims, adj, SN, sdims, src); assert(op->norm_inv); operator_p_apply(op->norm_inv, lambda, DN, ddims, dst, DN, ddims, adj); md_free(adj); } /** * Apply the normal equations operation of a linear operator: y = A^H A x * Checks that dimensions are consistent for the linear operator * * @param op linear operator * @param N number of dimensions * @param dims dimensions * @param dst output data * @param src input data */ void linop_normal(const struct linop_s* op, unsigned int N, const long dims[N], complex float* dst, const complex float* src) { assert(op->normal); operator_apply(op->normal, N, dims, dst, N, dims, src); } /** * Apply the forward operation of a linear operator: y = A x * Does not check that the dimensions are consistent for the linear operator * * @param op linear operator * @param dst output data * @param src input data */ void linop_forward_unchecked(const struct linop_s* op, complex float* dst, const complex float* src) { assert(op->forward); operator_apply_unchecked(op->forward, dst, src); } /** * Apply the adjoint operation of a linear operator: y = A^H x * Does not check that the dimensions are consistent for the linear operator * * @param op linear operator * @param dst output data * @param src input data */ void linop_adjoint_unchecked(const struct linop_s* op, complex float* dst, const complex float* src) { assert(op->adjoint); operator_apply_unchecked(op->adjoint, dst, src); } /** * Apply the normal equations operation of a linear operator: y = A^H A x * Does not check that the dimensions are consistent for the linear operator * * @param op linear operator * @param dst output data * @param src input data */ void linop_normal_unchecked(const struct linop_s* op, complex float* dst, const complex float* src) { assert(op->normal); operator_apply_unchecked(op->normal, dst, src); } /** * Apply the pseudo-inverse operation of a linear operator: y = (A^H A + lambda I)^-1 x * Does not check that the dimensions are consistent for the linear operator * * @param op linear operator * @param lambda regularization parameter * @param dst output data * @param src input data */ void linop_norm_inv_unchecked(const struct linop_s* op, float lambda, complex float* dst, const complex float* src) { operator_p_apply_unchecked(op->norm_inv, lambda, dst, src); } /** * Return the dimensions and strides of the domain of a linear operator * * @param op linear operator */ const struct iovec_s* linop_domain(const struct linop_s* op) { return operator_domain(op->forward); } /** * Return the dimensions and strides of the codomain of a linear operator * * @param op linear operator */ const struct iovec_s* linop_codomain(const struct linop_s* op) { return operator_codomain(op->forward); } /** * Create chain of linear operators. * C = B A * C^H = A^H B^H * C^H C = A^H B^H B A */ struct linop_s* linop_chain(const struct linop_s* a, const struct linop_s* b) { PTR_ALLOC(struct linop_s, c); c->forward = operator_chain(a->forward, b->forward); c->adjoint = operator_chain(b->adjoint, a->adjoint); if (NULL == b->normal) { c->normal = operator_chain(c->forward, c->adjoint); } else { const struct operator_s* top = operator_chain(b->normal, a->adjoint); c->normal = operator_chain(a->forward, top); operator_free(top); } c->norm_inv = NULL; return PTR_PASS(c); } struct linop_s* linop_chainN(unsigned int N, struct linop_s* a[N]) { assert(N > 0); if (1 == N) return a[0]; return linop_chain(a[0], linop_chainN(N - 1, a + 1)); } struct linop_s* linop_loop(unsigned int D, const long dims[D], struct linop_s* op) { PTR_ALLOC(struct linop_s, op2); op2->forward = operator_loop(D, dims, op->forward); op2->adjoint = operator_loop(D, dims, op->adjoint); op2->normal = (NULL == op->normal) ? NULL : operator_loop(D, dims, op->normal); op2->norm_inv = NULL; // FIXME return op2; } /** * Free the linear operator and associated data, * Note: only frees the data if its reference count is zero * * @param op linear operator */ void linop_free(const struct linop_s* op) { operator_free(op->forward); operator_free(op->adjoint); operator_free(op->normal); operator_p_free(op->norm_inv); free((void*)op); } bart-0.4.02/src/linops/linop.h000066400000000000000000000071471320577655200161370ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #ifndef __LINOP_H #define __LINOP_H #include "misc/cppwrap.h" #include "misc/types.h" extern TYPEID linop_data_s; typedef struct linop_data_s { TYPEID* TYPEID; } linop_data_t; typedef void (*lop_fun_t)(const linop_data_t* _data, complex float* dst, const complex float* src); typedef void (*lop_p_fun_t)(const linop_data_t* _data, float lambda, complex float* dst, const complex float* src); typedef void (*del_fun_t)(const linop_data_t* _data); struct operator_s; struct operator_p_s; struct linop_s { const struct operator_s* forward; const struct operator_s* adjoint; const struct operator_s* normal; const struct operator_p_s* norm_inv; }; extern struct linop_s* linop_create(unsigned int ON, const long odims[__VLA(ON)], unsigned int IN, const long idims[__VLA(IN)], linop_data_t* data, lop_fun_t forward, lop_fun_t adjoint, lop_fun_t normal, lop_p_fun_t norm_inv, del_fun_t); extern struct linop_s* linop_create2(unsigned int ON, const long odims[__VLA(ON)], const long ostr[__VLA(ON)], unsigned int IN, const long idims[__VLA(IN)], const long istrs[__VLA(IN)], linop_data_t* data, lop_fun_t forward, lop_fun_t adjoint, lop_fun_t normal, lop_p_fun_t norm_inv, del_fun_t); extern const linop_data_t* linop_get_data(const struct linop_s* ptr); extern void linop_free(const struct linop_s* op); extern void linop_forward(const struct linop_s* op, unsigned int DN, const long ddims[__VLA(DN)], complex float* dst, unsigned int SN, const long sdims[__VLA(SN)], const complex float* src); extern void linop_adjoint(const struct linop_s* op, unsigned int DN, const long ddims[__VLA(DN)], complex float* dst, unsigned int SN, const long sdims[__VLA(SN)], const complex float* src); extern void linop_normal(const struct linop_s* op, unsigned int N, const long dims[__VLA(N)], complex float* dst, const complex float* src); extern void linop_pseudo_inv(const struct linop_s* op, float lambda, unsigned int DN, const long ddims[__VLA(DN)], complex float* dst, unsigned int SN, const long sdims[__VLA(SN)], const complex float* src); extern void linop_forward_unchecked(const struct linop_s* op, complex float* dst, const complex float* src); extern void linop_adjoint_unchecked(const struct linop_s* op, complex float* dst, const complex float* src); extern void linop_normal_unchecked(const struct linop_s* op, complex float* dst, const complex float* src); extern void linop_norm_inv_unchecked(const struct linop_s* op, float lambda, complex float* dst, const complex float* src); extern struct linop_s* linop_chain(const struct linop_s* a, const struct linop_s* b); extern struct linop_s* linop_chainN(unsigned int N, struct linop_s* x[N]); struct iovec_s; extern const struct iovec_s* linop_domain(const struct linop_s* x); extern const struct iovec_s* linop_codomain(const struct linop_s* x); extern const struct linop_s* linop_clone(const struct linop_s* x); extern struct linop_s* linop_loop(unsigned int D, const long dims[D], struct linop_s* op); // extern const struct linop_s* linop_identity(unsigned int N, const long dims[N]); // extern const struct linop_s* linop_matrix(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const _Complex float* matrix, _Bool use_gpu); extern const struct linop_s* linop_join(unsigned int D, const struct linop_s* a, const struct linop_s* b); #include "misc/cppwrap.h" #endif // __LINOP_H bart-0.4.02/src/linops/lintest.c000066400000000000000000000055201320577655200164640ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include "misc/debug.h" #include "num/rand.h" #include "num/iovec.h" #include "num/multind.h" #include "num/flpmath.h" #include "linops/linop.h" #include "lintest.h" float linop_test_adjoint(const struct linop_s* op) { int N_dom = linop_domain(op)->N; int N_cod = linop_codomain(op)->N; long dims_dom[N_dom]; md_copy_dims(N_dom, dims_dom, linop_domain(op)->dims); long dims_cod[N_cod]; md_copy_dims(N_cod, dims_cod, linop_codomain(op)->dims); complex float* tmp1 = md_alloc(N_dom, dims_dom, CFL_SIZE); complex float* tmp2 = md_alloc(N_cod, dims_cod, CFL_SIZE); complex float* tmp3 = md_alloc(N_cod, dims_cod, CFL_SIZE); complex float* tmp4 = md_alloc(N_dom, dims_dom, CFL_SIZE); md_gaussian_rand(N_dom, dims_dom, tmp1); linop_forward_unchecked(op, tmp3, tmp1); md_gaussian_rand(N_cod, dims_cod, tmp2); linop_adjoint_unchecked(op, tmp4, tmp2); complex float sc1 = md_zscalar(N_dom, dims_dom, tmp1, tmp4); complex float sc2 = md_zscalar(N_cod, dims_cod, tmp3, tmp2); md_free(tmp1); md_free(tmp2); md_free(tmp3); md_free(tmp4); debug_printf(DP_DEBUG4, "- %f%+fi - %f%+fi -\n", crealf(sc1), cimagf(sc1), crealf(sc2), cimagf(sc2)); return cabsf(sc1 - sc2); } float linop_test_normal(const struct linop_s* op) { int N_dom = linop_domain(op)->N; int N_cod = linop_codomain(op)->N; long dims_dom[N_dom]; md_copy_dims(N_dom, dims_dom, linop_domain(op)->dims); long dims_cod[N_cod]; md_copy_dims(N_cod, dims_cod, linop_codomain(op)->dims); complex float* tmp1 = md_alloc(N_dom, dims_dom, CFL_SIZE); complex float* tmp2 = md_alloc(N_dom, dims_dom, CFL_SIZE); complex float* tmp3 = md_alloc(N_cod, dims_cod, CFL_SIZE); complex float* tmp4 = md_alloc(N_dom, dims_dom, CFL_SIZE); md_gaussian_rand(N_dom, dims_dom, tmp1); linop_forward_unchecked(op, tmp3, tmp1); linop_adjoint_unchecked(op, tmp4, tmp3); linop_normal_unchecked(op, tmp2, tmp1); float nrmse = md_znrmse(N_dom, dims_dom, tmp2, tmp4); md_free(tmp1); md_free(tmp2); md_free(tmp3); md_free(tmp4); return nrmse; } float linop_test_inverse(const struct linop_s* op) { int N_dom = linop_domain(op)->N; long dims_dom[N_dom]; md_copy_dims(N_dom, dims_dom, linop_domain(op)->dims); complex float* tmp1 = md_alloc(N_dom, dims_dom, CFL_SIZE); complex float* tmp2 = md_alloc(N_dom, dims_dom, CFL_SIZE); complex float* tmp3 = md_alloc(N_dom, dims_dom, CFL_SIZE); md_gaussian_rand(N_dom, dims_dom, tmp1); linop_normal_unchecked(op, tmp3, tmp1); linop_norm_inv_unchecked(op, 0., tmp2, tmp3); float nrmse = md_znrmse(N_dom, dims_dom, tmp2, tmp1); md_free(tmp1); md_free(tmp2); md_free(tmp3); return nrmse; } bart-0.4.02/src/linops/lintest.h000066400000000000000000000003041320577655200164640ustar00rootroot00000000000000 struct linop_s; extern float linop_test_adjoint(const struct linop_s* op); extern float linop_test_normal(const struct linop_s* op); extern float linop_test_inverse(const struct linop_s* op); bart-0.4.02/src/linops/realval.c000066400000000000000000000023751320577655200164350ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Martin Uecker */ #include #include "misc/misc.h" #include "num/flpmath.h" #include "num/multind.h" #include "linops/linop.h" #include "realval.h" struct rvc_s { INTERFACE(linop_data_t); unsigned int N; const long* dims; }; static DEF_TYPEID(rvc_s); static void rvc_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct rvc_s* data = CAST_DOWN(rvc_s, _data); md_zreal(data->N, data->dims, dst, src); } static void rvc_free(const linop_data_t* _data) { const struct rvc_s* data = CAST_DOWN(rvc_s, _data); free((void*)data->dims); free((void*)data); } struct linop_s* linop_realval_create(unsigned int N, const long dims[N]) { PTR_ALLOC(struct rvc_s, data); SET_TYPEID(rvc_s, data); PTR_ALLOC(long[N], dims2); md_copy_dims(N, *dims2, dims); data->N = N; data->dims = *PTR_PASS(dims2); return linop_create(N, dims, N, dims, CAST_UP(PTR_PASS(data)), rvc_apply, rvc_apply, rvc_apply, NULL, rvc_free); } bart-0.4.02/src/linops/realval.h000066400000000000000000000002201320577655200164250ustar00rootroot00000000000000 #include "misc/cppwrap.h" extern struct linop_s* linop_realval_create(unsigned int N, const long dims[__VLA(N)]); #include "misc/cppwrap.h" bart-0.4.02/src/linops/sampling.c000066400000000000000000000050231320577655200166120ustar00rootroot00000000000000/* Copyright 2014,2017. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Martin Uecker * 2017 Jon Tamir */ #include #include "misc/mri.h" #include "misc/misc.h" #include "num/flpmath.h" #include "num/multind.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "linops/linop.h" #include "sampling.h" struct sampling_data_s { INTERFACE(linop_data_t); long dims[DIMS]; long strs[DIMS]; long pat_dims[DIMS]; long pat_strs[DIMS]; complex float* pattern; #ifdef USE_CUDA const complex float* gpu_pattern; #endif }; static DEF_TYPEID(sampling_data_s); #ifdef USE_CUDA static const complex float* get_pat(const struct sampling_data_s* data, bool gpu) { const complex float* pattern = data->pattern; if (gpu) { if (NULL == data->gpu_pattern) ((struct sampling_data_s*)data)->gpu_pattern = md_gpu_move(DIMS, data->pat_dims, data->pattern, CFL_SIZE); pattern = data->gpu_pattern; } return pattern; } #endif static void sampling_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct sampling_data_s* data = CAST_DOWN(sampling_data_s, _data); #ifdef USE_CUDA const complex float* pattern = get_pat(data, cuda_ondevice(src)); #else const complex float* pattern = data->pattern; #endif md_zmul2(DIMS, data->dims, data->strs, dst, data->strs, src, data->pat_strs, pattern); } static void sampling_free(const linop_data_t* _data) { const struct sampling_data_s* data = CAST_DOWN(sampling_data_s, _data); #ifdef USE_CUDA if (NULL != data->gpu_pattern) { md_free((void*)data->gpu_pattern); } #endif xfree(data); } struct linop_s* linop_sampling_create(const long dims[DIMS], const long pat_dims[DIMS], const complex float* pattern) { PTR_ALLOC(struct sampling_data_s, data); SET_TYPEID(sampling_data_s, data); md_copy_dims(DIMS, data->pat_dims, pat_dims); md_select_dims(DIMS, ~MAPS_FLAG, data->dims, dims); // dimensions of kspace md_calc_strides(DIMS, data->strs, data->dims, CFL_SIZE); md_calc_strides(DIMS, data->pat_strs, data->pat_dims, CFL_SIZE); data->pattern = (complex float*)pattern; #ifdef USE_CUDA data->gpu_pattern = NULL; #endif const long* dims2 = data->dims; return linop_create(DIMS, dims2, DIMS, dims2, CAST_UP(PTR_PASS(data)), sampling_apply, sampling_apply, sampling_apply, NULL, sampling_free); } bart-0.4.02/src/linops/sampling.h000066400000000000000000000002311320577655200166130ustar00rootroot00000000000000 #include "misc/mri.h" extern struct linop_s* linop_sampling_create(const long dims[DIMS], const long pat_dims[DIMS], const complex float* pattern); bart-0.4.02/src/linops/someops.c000066400000000000000000000632221320577655200164720ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2017 Martin Uecker * 2014 Jonathan Tamir */ #include #include #include "misc/misc.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/wavelet.h" #include "num/conv.h" #include "num/ops.h" #include "num/iovec.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "linops/linop.h" #include "someops.h" static DEF_TYPEID(cdiag_s); struct cdiag_s { INTERFACE(linop_data_t); unsigned int N; const long* dims; const long* strs; const long* dstrs; const complex float* diag; #ifdef USE_CUDA const complex float* gpu_diag; #endif bool rmul; }; static void cdiag_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct cdiag_s* data = CAST_DOWN(cdiag_s, _data); const complex float* diag = data->diag; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->gpu_diag) ((struct cdiag_s*)data)->gpu_diag = md_gpu_move(data->N, data->dims, data->diag, CFL_SIZE); diag = data->gpu_diag; } #endif (data->rmul ? md_zrmul2 : md_zmul2)(data->N, data->dims, data->strs, dst, data->strs, src, data->dstrs, diag); } static void cdiag_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct cdiag_s* data = CAST_DOWN(cdiag_s, _data); const complex float* diag = data->diag; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->gpu_diag) ((struct cdiag_s*)data)->gpu_diag = md_gpu_move(data->N, data->dims, data->diag, CFL_SIZE); diag = data->gpu_diag; } #endif (data->rmul ? md_zrmul2 : md_zmulc2)(data->N, data->dims, data->strs, dst, data->strs, src, data->dstrs, diag); } static void cdiag_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { cdiag_apply(_data, dst, src); cdiag_adjoint(_data, dst, dst); } static void cdiag_free(const linop_data_t* _data) { const struct cdiag_s* data = CAST_DOWN(cdiag_s, _data); #ifdef USE_CUDA md_free((void*)data->gpu_diag); #endif free((void*)data->dims); free((void*)data->dstrs); free((void*)data->strs); free((void*)data); } static struct linop_s* linop_gdiag_create(unsigned int N, const long dims[N], unsigned int flags, const complex float* diag, bool rdiag) { PTR_ALLOC(struct cdiag_s, data); SET_TYPEID(cdiag_s, data); data->rmul = rdiag; data->N = N; PTR_ALLOC(long[N], dims2); PTR_ALLOC(long[N], dstrs); PTR_ALLOC(long[N], strs); long ddims[N]; md_select_dims(N, flags, ddims, dims); md_copy_dims(N, *dims2, dims); md_calc_strides(N, *strs, dims, CFL_SIZE); md_calc_strides(N, *dstrs, ddims, CFL_SIZE); data->dims = *PTR_PASS(dims2); data->strs = *PTR_PASS(strs); data->dstrs = *PTR_PASS(dstrs); data->diag = diag; // make a copy? #ifdef USE_CUDA data->gpu_diag = NULL; #endif return linop_create(N, dims, N, dims, CAST_UP(PTR_PASS(data)), cdiag_apply, cdiag_adjoint, cdiag_normal, NULL, cdiag_free); } /** * Create a operator y = D x where D is a diagonal matrix * * @param N number of dimensions * @param dims input and output dimensions * @param flags bitmask specifiying the dimensions present in diag * @param diag diagonal matrix */ struct linop_s* linop_cdiag_create(unsigned int N, const long dims[N], unsigned int flags, const complex float* diag) { return linop_gdiag_create(N, dims, flags, diag, false); } /** * Create a operator y = D x where D is a diagonal matrix * * @param N number of dimensions * @param dims input and output dimensions * @param flags bitmask specifiying the dimensions present in diag * @param diag diagonal matrix */ struct linop_s* linop_rdiag_create(unsigned int N, const long dims[N], unsigned int flags, const complex float* diag) { return linop_gdiag_create(N, dims, flags, diag, true); } struct identity_data_s { INTERFACE(linop_data_t); const struct iovec_s* domain; }; static DEF_TYPEID(identity_data_s); static void identity_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct iovec_s* domain = CAST_DOWN(identity_data_s, _data)->domain; md_copy2(domain->N, domain->dims, domain->strs, dst, domain->strs, src, CFL_SIZE); } static void identity_free(const linop_data_t* _data) { const struct identity_data_s* data = CAST_DOWN(identity_data_s, _data); iovec_free(data->domain); free((void*)data); } /** * Create an Identity linear operator: I x * @param N number of dimensions * @param dims dimensions of input (domain) */ struct linop_s* linop_identity_create(unsigned int N, const long dims[N]) { PTR_ALLOC(struct identity_data_s, data); SET_TYPEID(identity_data_s, data); data->domain = iovec_create(N, dims, CFL_SIZE); return linop_create(N, dims, N, dims, CAST_UP(PTR_PASS(data)), identity_apply, identity_apply, identity_apply, NULL, identity_free); } struct resize_op_s { INTERFACE(linop_data_t); unsigned int N; const long* out_dims; const long* in_dims; }; static DEF_TYPEID(resize_op_s); static void resize_forward(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct resize_op_s* data = CAST_DOWN(resize_op_s, _data); md_resize_center(data->N, data->out_dims, dst, data->in_dims, src, CFL_SIZE); } static void resize_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct resize_op_s* data = CAST_DOWN(resize_op_s, _data); md_resize_center(data->N, data->in_dims, dst, data->out_dims, src, CFL_SIZE); } static void resize_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct resize_op_s* data = CAST_DOWN(resize_op_s, _data); complex float* tmp = md_alloc_sameplace(data->N, data->out_dims, CFL_SIZE, dst); resize_forward(_data, tmp, src); resize_adjoint(_data, dst, tmp); md_free(tmp); } static void resize_free(const linop_data_t* _data) { const struct resize_op_s* data = CAST_DOWN(resize_op_s, _data); free((void*)data->out_dims); free((void*)data->in_dims); free((void*)data); } /** * Create a resize linear operator: y = M x, * where M either crops or expands the the input dimensions to match the output dimensions. * Uses centered zero-padding and centered cropping * * @param N number of dimensions * @param out_dims output dimensions * @param in_dims input dimensions */ struct linop_s* linop_resize_create(unsigned int N, const long out_dims[N], const long in_dims[N]) { PTR_ALLOC(struct resize_op_s, data); SET_TYPEID(resize_op_s, data); data->N = N; data->out_dims = *TYPE_ALLOC(long[N]); data->in_dims = *TYPE_ALLOC(long[N]); md_copy_dims(N, (long*)data->out_dims, out_dims); md_copy_dims(N, (long*)data->in_dims, in_dims); return linop_create(N, out_dims, N, in_dims, CAST_UP(PTR_PASS(data)), resize_forward, resize_adjoint, resize_normal, NULL, resize_free); } struct operator_matrix_s { INTERFACE(linop_data_t); const complex float* mat; const complex float* mat_gram; // A^H A #ifdef USE_CUDA const complex float* mat_gpu; const complex float* mat_gram_gpu; #endif unsigned int N; const long* mat_dims; const long* out_dims; const long* in_dims; const long* grm_dims; const long* gin_dims; const long* gout_dims; }; static DEF_TYPEID(operator_matrix_s); static void linop_matrix_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); const complex float* mat = data->mat; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->mat_gpu) data->mat_gpu = md_gpu_move(data->N, data->mat_dims, data->mat, CFL_SIZE); mat = data->mat_gpu; } #endif md_ztenmul(data->N, data->out_dims, dst, data->in_dims, src, data->mat_dims, mat); } static void linop_matrix_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); const complex float* mat = data->mat; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->mat_gpu) data->mat_gpu = md_gpu_move(data->N, data->mat_dims, data->mat, CFL_SIZE); mat = data->mat_gpu; } #endif md_ztenmulc(data->N, data->in_dims, dst, data->out_dims, src, data->mat_dims, mat); } static void linop_matrix_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); if (NULL == data->mat_gram) { complex float* tmp = md_alloc_sameplace(data->N, data->out_dims, CFL_SIZE, src); linop_matrix_apply(_data, tmp, src); linop_matrix_apply_adjoint(_data, dst, tmp); md_free(tmp); } else { const complex float* mat_gram = data->mat_gram; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->mat_gram_gpu) data->mat_gram_gpu = md_gpu_move(2 * data->N, data->grm_dims, data->mat_gram, CFL_SIZE); mat_gram = data->mat_gram_gpu; } #endif md_ztenmul(2 * data->N, data->gout_dims, dst, data->gin_dims, src, data->grm_dims, mat_gram); } } static void linop_matrix_del(const linop_data_t* _data) { const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); xfree(data->out_dims); xfree(data->mat_dims); xfree(data->in_dims); xfree(data->gin_dims); xfree(data->gout_dims); xfree(data->grm_dims); md_free(data->mat); md_free(data->mat_gram); #ifdef USE_CUDA md_free(data->mat_gpu); md_free(data->mat_gram_gpu); #endif xfree(data); } static void shadow_dims(unsigned int N, long out[2 * N], const long in[N]) { for (unsigned int i = 0; i < N; i++) { out[2 * i + 0] = in[i]; out[2 * i + 1] = 1; } } /* O I M G * 1 1 1 1 - not used * 1 1 A ! - forbidden * 1 A 1 ! - forbidden * A 1 1 ! - forbidden * A A 1 1 - replicated * A 1 A 1 - output * 1 A A A/A - input * A A A A - batch */ static struct operator_matrix_s* linop_matrix_priv2(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const complex float* matrix) { // to get assertions and cost estimate long max_dims[N]; md_tenmul_dims(N, max_dims, out_dims, in_dims, matrix_dims); PTR_ALLOC(struct operator_matrix_s, data); SET_TYPEID(operator_matrix_s, data); data->N = N; PTR_ALLOC(long[N], out_dims1); md_copy_dims(N, *out_dims1, out_dims); data->out_dims = *PTR_PASS(out_dims1); PTR_ALLOC(long[N], mat_dims1); md_copy_dims(N, *mat_dims1, matrix_dims); data->mat_dims = *PTR_PASS(mat_dims1); PTR_ALLOC(long[N], in_dims1); md_copy_dims(N, *in_dims1, in_dims); data->in_dims = *PTR_PASS(in_dims1); complex float* mat = md_alloc(N, matrix_dims, CFL_SIZE); md_copy(N, matrix_dims, mat, matrix, CFL_SIZE); data->mat = mat; data->mat_gram = NULL; #ifdef USE_CUDA data->mat_gpu = NULL; data->mat_gram_gpu = NULL; #endif #if 1 // pre-multiply gram matrix (if there is a cost reduction) unsigned long out_flags = md_nontriv_dims(N, out_dims); unsigned long in_flags = md_nontriv_dims(N, in_dims); unsigned long del_flags = in_flags & ~out_flags; unsigned long new_flags = out_flags & ~in_flags; /* we double (again) for the gram matrix */ PTR_ALLOC(long[2 * N], mat_dims2); PTR_ALLOC(long[2 * N], in_dims2); PTR_ALLOC(long[2 * N], gmt_dims2); PTR_ALLOC(long[2 * N], gin_dims2); PTR_ALLOC(long[2 * N], grm_dims2); PTR_ALLOC(long[2 * N], gout_dims2); shadow_dims(N, *gmt_dims2, matrix_dims); shadow_dims(N, *mat_dims2, matrix_dims); shadow_dims(N, *in_dims2, in_dims); shadow_dims(N, *gout_dims2, in_dims); shadow_dims(N, *gin_dims2, in_dims); shadow_dims(N, *grm_dims2, matrix_dims); /* move removed input dims into shadow position * for the gram matrix can have an output there */ for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(del_flags, i)) { assert((*mat_dims2)[2 * i + 0] == (*in_dims2)[2 * i + 0]); (*mat_dims2)[2 * i + 1] = (*mat_dims2)[2 * i + 0]; (*mat_dims2)[2 * i + 0] = 1; (*in_dims2)[2 * i + 1] = (*gin_dims2)[2 * i + 0]; (*in_dims2)[2 * i + 0] = 1; } } for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(new_flags, i)) { (*grm_dims2)[2 * i + 0] = 1; (*grm_dims2)[2 * i + 1] = 1; } if (MD_IS_SET(del_flags, i)) { (*gout_dims2)[2 * i + 1] = (*gin_dims2)[2 * i + 0]; (*gout_dims2)[2 * i + 0] = 1; (*grm_dims2)[2 * i + 0] = in_dims[i]; (*grm_dims2)[2 * i + 1] = in_dims[i]; } } long gmx_dims[2 * N]; md_tenmul_dims(2 * N, gmx_dims, *gout_dims2, *gin_dims2, *grm_dims2); long mult_mat = md_calc_size(N, max_dims); long mult_gram = md_calc_size(2 * N, gmx_dims); if (mult_gram < 2 * mult_mat) { // FIXME: rethink debug_printf(DP_DEBUG2, "Gram matrix: 2x %ld vs %ld\n", mult_mat, mult_gram); complex float* mat_gram = md_alloc(2 * N, *grm_dims2, CFL_SIZE); md_ztenmulc(2 * N, *grm_dims2, mat_gram, *gmt_dims2, matrix, *mat_dims2, matrix); data->mat_gram = mat_gram; } PTR_FREE(gmt_dims2); PTR_FREE(mat_dims2); PTR_FREE(in_dims2); data->gin_dims = *PTR_PASS(gin_dims2); data->gout_dims = *PTR_PASS(gout_dims2); data->grm_dims = *PTR_PASS(grm_dims2); #else data->gin_dims = NULL; data->gout_dims = NULL; data->grm_dims = NULL; #endif return PTR_PASS(data); } static struct operator_matrix_s* linop_matrix_priv(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const complex float* matrix) { unsigned long out_flags = md_nontriv_dims(N, out_dims); unsigned long in_flags = md_nontriv_dims(N, in_dims); unsigned long del_flags = in_flags & ~out_flags; /* we double dimensions for chaining which can lead to * matrices with the same input and output dimension */ long out_dims2[2 * N]; long mat_dims2[2 * N]; long in_dims2[2 * N]; shadow_dims(N, out_dims2, out_dims); shadow_dims(N, mat_dims2, matrix_dims); shadow_dims(N, in_dims2, in_dims); /* move removed input dims into shadow position * which makes chaining easier below */ for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(del_flags, i)) { assert(1 == out_dims2[2 * i + 0]); assert(mat_dims2[2 * i + 0] == in_dims2[2 * i + 0]); mat_dims2[2 * i + 1] = mat_dims2[2 * i + 0]; mat_dims2[2 * i + 0] = 1; in_dims2[2 * i + 1] = in_dims[i]; in_dims2[2 * i + 0] = 1; } } return linop_matrix_priv2(2 * N, out_dims2, in_dims2, mat_dims2, matrix); } /** * Operator interface for a true matrix: * out = mat * in * in: [x x x x 1 x x K x x] * mat: [x x x x T x x K x x] * out: [x x x x T x x 1 x x] * where the x's are arbitrary dimensions and T and K may be transposed * * @param N number of dimensions * @param out_dims output dimensions after applying the matrix (codomain) * @param in_dims input dimensions to apply the matrix (domain) * @param matrix_dims dimensions of the matrix * @param matrix matrix data */ struct linop_s* linop_matrix_create(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const complex float* matrix) { struct operator_matrix_s* data = linop_matrix_priv(N, out_dims, in_dims, matrix_dims, matrix); return linop_create(N, out_dims, N, in_dims, CAST_UP(data), linop_matrix_apply, linop_matrix_apply_adjoint, linop_matrix_apply_normal, NULL, linop_matrix_del); } /** * Efficiently chain two matrix linops by multiplying the actual matrices together. * Stores a copy of the new matrix. * Returns: C = B A * * @param a first matrix (applied to input) * @param b second matrix (applied to output of first matrix) */ struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b) { const struct operator_matrix_s* a_data = CAST_DOWN(operator_matrix_s, linop_get_data(a)); const struct operator_matrix_s* b_data = CAST_DOWN(operator_matrix_s, linop_get_data(b)); // check compatibility assert(linop_codomain(a)->N == linop_domain(b)->N); assert(md_check_compat(linop_codomain(a)->N, 0u, linop_codomain(a)->dims, linop_domain(b)->dims)); unsigned int D = linop_domain(a)->N; unsigned long outB_flags = md_nontriv_dims(D, linop_codomain(b)->dims); unsigned long inB_flags = md_nontriv_dims(D, linop_domain(b)->dims); unsigned long delB_flags = inB_flags & ~outB_flags; unsigned int N = a_data->N; assert(N == 2 * D); long in_dims[N]; md_copy_dims(N, in_dims, a_data->in_dims); long matA_dims[N]; md_copy_dims(N, matA_dims, a_data->mat_dims); long matB_dims[N]; md_copy_dims(N, matB_dims, b_data->mat_dims); long out_dims[N]; md_copy_dims(N, out_dims, b_data->out_dims); for (unsigned int i = 0; i < D; i++) { if (MD_IS_SET(delB_flags, i)) { matA_dims[2 * i + 0] = a_data->mat_dims[2 * i + 1]; matA_dims[2 * i + 1] = a_data->mat_dims[2 * i + 0]; in_dims[2 * i + 0] = a_data->in_dims[2 * i + 1]; in_dims[2 * i + 1] = a_data->in_dims[2 * i + 0]; } } long matrix_dims[N]; md_singleton_dims(N, matrix_dims); unsigned long iflags = md_nontriv_dims(N, in_dims); unsigned long oflags = md_nontriv_dims(N, out_dims); unsigned long flags = iflags | oflags; // we combine a and b and sum over dims not in input or output md_max_dims(N, flags, matrix_dims, matA_dims, matB_dims); debug_printf(DP_DEBUG1, "tensor chain: %ld x %ld -> %ld\n", md_calc_size(N, matA_dims), md_calc_size(N, matB_dims), md_calc_size(N, matrix_dims)); complex float* matrix = md_alloc(N, matrix_dims, CFL_SIZE); debug_print_dims(DP_DEBUG2, N, matrix_dims); debug_print_dims(DP_DEBUG2, N, in_dims); debug_print_dims(DP_DEBUG2, N, matA_dims); debug_print_dims(DP_DEBUG2, N, matB_dims); debug_print_dims(DP_DEBUG2, N, out_dims); md_ztenmul(N, matrix_dims, matrix, matA_dims, a_data->mat, matB_dims, b_data->mat); // priv2 takes our doubled dimensions struct operator_matrix_s* data = linop_matrix_priv2(N, out_dims, in_dims, matrix_dims, matrix); /* although we internally use different dimensions we define the * correct interface */ struct linop_s* c = linop_create(linop_codomain(b)->N, linop_codomain(b)->dims, linop_domain(a)->N, linop_domain(a)->dims, CAST_UP(data), linop_matrix_apply, linop_matrix_apply_adjoint, linop_matrix_apply_normal, NULL, linop_matrix_del); md_free(matrix); return c; } struct fft_linop_s { INTERFACE(linop_data_t); const struct operator_s* frw; const struct operator_s* adj; bool center; float nscale; int N; long* dims; long* strs; }; static DEF_TYPEID(fft_linop_s); static void fft_linop_apply(const linop_data_t* _data, complex float* out, const complex float* in) { const struct fft_linop_s* data = CAST_DOWN(fft_linop_s, _data); if (in != out) md_copy2(data->N, data->dims, data->strs, out, data->strs, in, CFL_SIZE); operator_apply(data->frw, data->N, data->dims, out, data->N, data->dims, out); } static void fft_linop_adjoint(const linop_data_t* _data, complex float* out, const complex float* in) { const struct fft_linop_s* data = CAST_DOWN(fft_linop_s, _data); if (in != out) md_copy2(data->N, data->dims, data->strs, out, data->strs, in, CFL_SIZE); operator_apply(data->adj, data->N, data->dims, out, data->N, data->dims, out); } static void fft_linop_free(const linop_data_t* _data) { const struct fft_linop_s* data = CAST_DOWN(fft_linop_s, _data); fft_free(data->frw); fft_free(data->adj); free(data->dims); free(data->strs); free((void*)data); } static void fft_linop_normal(const linop_data_t* _data, complex float* out, const complex float* in) { const struct fft_linop_s* data = CAST_DOWN(fft_linop_s, _data); if (data->center) md_copy(data->N, data->dims, out, in, CFL_SIZE); else md_zsmul(data->N, data->dims, out, in, data->nscale); } static struct linop_s* linop_fft_create_priv(int N, const long dims[N], unsigned int flags, bool forward, bool center) { const struct operator_s* plan = fft_measure_create(N, dims, flags, true, false); const struct operator_s* iplan = fft_measure_create(N, dims, flags, true, true); PTR_ALLOC(struct fft_linop_s, data); SET_TYPEID(fft_linop_s, data); data->frw = plan; data->adj = iplan; data->N = N; data->center = center; data->dims = *TYPE_ALLOC(long[N]); md_copy_dims(N, data->dims, dims); data->strs = *TYPE_ALLOC(long[N]); md_calc_strides(N, data->strs, data->dims, CFL_SIZE); long fft_dims[N]; md_select_dims(N, flags, fft_dims, dims); data->nscale = (float)md_calc_size(N, fft_dims); lop_fun_t apply = forward ? fft_linop_apply : fft_linop_adjoint; lop_fun_t adjoint = forward ? fft_linop_adjoint : fft_linop_apply; struct linop_s* lop = linop_create(N, dims, N, dims, CAST_UP(PTR_PASS(data)), apply, adjoint, fft_linop_normal, NULL, fft_linop_free); if (center) { // FIXME: should only allocate flagged dims complex float* fftmod_mat = md_alloc(N, dims, CFL_SIZE); complex float* fftmodk_mat = md_alloc(N, dims, CFL_SIZE); // we need fftmodk only because we want to apply scaling only once complex float one[1] = { 1. }; md_fill(N, dims, fftmod_mat, one, CFL_SIZE); fftmod(N, dims, flags, fftmodk_mat, fftmod_mat); fftscale(N, dims, flags, fftmod_mat, fftmodk_mat); struct linop_s* mod = linop_cdiag_create(N, dims, ~0u, fftmod_mat); struct linop_s* modk = linop_cdiag_create(N, dims, ~0u, fftmodk_mat); struct linop_s* tmp = linop_chain(mod, lop); tmp = linop_chain(tmp, modk); linop_free(lop); linop_free(mod); linop_free(modk); lop = tmp; } return lop; } /** * Uncentered forward Fourier transform linear operator * * @param N number of dimensions * @param dims dimensions of input * @param flags bitmask of the dimensions to apply the Fourier transform * @param gpu use gpu */ struct linop_s* linop_fft_create(int N, const long dims[N], unsigned int flags) { return linop_fft_create_priv(N, dims, flags, true, false); } /** * Uncentered inverse Fourier transform linear operator * * @param N number of dimensions * @param dims dimensions of input * @param flags bitmask of the dimensions to apply the Fourier transform */ struct linop_s* linop_ifft_create(int N, const long dims[N], unsigned int flags) { return linop_fft_create_priv(N, dims, flags, false, false); } /** * Centered forward Fourier transform linear operator * * @param N number of dimensions * @param dims dimensions of input * @param flags bitmask of the dimensions to apply the Fourier transform */ struct linop_s* linop_fftc_create(int N, const long dims[N], unsigned int flags) { return linop_fft_create_priv(N, dims, flags, true, true); } /** * Centered inverse Fourier transform linear operator * * @param N number of dimensions * @param dims dimensions of input * @param flags bitmask of the dimensions to apply the Fourier transform */ struct linop_s* linop_ifftc_create(int N, const long dims[N], unsigned int flags) { return linop_fft_create_priv(N, dims, flags, false, true); } struct linop_cdf97_s { INTERFACE(linop_data_t); unsigned int N; const long* dims; unsigned int flags; }; static DEF_TYPEID(linop_cdf97_s); static void linop_cdf97_apply(const linop_data_t* _data, complex float* out, const complex float* in) { const struct linop_cdf97_s* data = CAST_DOWN(linop_cdf97_s, _data); md_copy(data->N, data->dims, out, in, CFL_SIZE); md_cdf97z(data->N, data->dims, data->flags, out); } static void linop_cdf97_adjoint(const linop_data_t* _data, complex float* out, const complex float* in) { const struct linop_cdf97_s* data = CAST_DOWN(linop_cdf97_s, _data); md_copy(data->N, data->dims, out, in, CFL_SIZE); md_icdf97z(data->N, data->dims, data->flags, out); } static void linop_cdf97_normal(const linop_data_t* _data, complex float* out, const complex float* in) { const struct linop_cdf97_s* data = CAST_DOWN(linop_cdf97_s, _data); md_copy(data->N, data->dims, out, in, CFL_SIZE); } static void linop_cdf97_free(const linop_data_t* _data) { const struct linop_cdf97_s* data = CAST_DOWN(linop_cdf97_s, _data); free((void*)data->dims); free((void*)data); } /** * Wavelet CFD9/7 transform operator * * @param N number of dimensions * @param dims dimensions of input * @param flags bitmask of the dimensions to apply the Fourier transform */ struct linop_s* linop_cdf97_create(int N, const long dims[N], unsigned int flags) { PTR_ALLOC(struct linop_cdf97_s, data); SET_TYPEID(linop_cdf97_s, data); PTR_ALLOC(long[N], ndims); md_copy_dims(N, *ndims, dims); data->N = N; data->dims = *ndims; data->flags = flags; return linop_create(N, dims, N, dims, CAST_UP(PTR_PASS(data)), linop_cdf97_apply, linop_cdf97_adjoint, linop_cdf97_normal, NULL, linop_cdf97_free); } struct conv_data_s { INTERFACE(linop_data_t); struct conv_plan* plan; }; static DEF_TYPEID(conv_data_s); static void linop_conv_forward(const linop_data_t* _data, complex float* out, const complex float* in) { struct conv_data_s* data = CAST_DOWN(conv_data_s, _data); conv_exec(data->plan, out, in); } static void linop_conv_adjoint(const linop_data_t* _data, complex float* out, const complex float* in) { struct conv_data_s* data = CAST_DOWN(conv_data_s, _data); conv_adjoint(data->plan, out, in); } static void linop_conv_free(const linop_data_t* _data) { struct conv_data_s* data = CAST_DOWN(conv_data_s, _data); conv_free(data->plan); free(data); } /** * Convolution operator * * @param N number of dimensions * @param flags bitmask of the dimensions to apply convolution * @param ctype * @param cmode * @param odims output dimensions * @param idims input dimensions * @param kdims kernel dimensions * @param krn convolution kernel */ struct linop_s* linop_conv_create(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[N], const long idims[N], const long kdims[N], const complex float* krn) { PTR_ALLOC(struct conv_data_s, data); SET_TYPEID(conv_data_s, data); data->plan = conv_plan(N, flags, ctype, cmode, odims, idims, kdims, krn); return linop_create(N, odims, N, idims, CAST_UP(PTR_PASS(data)), linop_conv_forward, linop_conv_adjoint, NULL, NULL, linop_conv_free); } bart-0.4.02/src/linops/someops.h000066400000000000000000000042511320577655200164740ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __SOMEOPS_H #define __SOMEOPS_H #include #include "misc/cppwrap.h" extern struct linop_s* linop_cdiag_create(unsigned int N, const long dims[__VLA(N)], unsigned int flags, const _Complex float* diag); extern struct linop_s* linop_rdiag_create(unsigned int N, const long dims[__VLA(N)], unsigned int flags, const _Complex float* diag); extern struct linop_s* linop_identity_create(unsigned int N, const long dims[__VLA(N)]); extern struct linop_s* linop_resize_create(unsigned int N, const long out_dims[__VLA(N)], const long in_dims[__VLA(N)]); extern struct linop_s* linop_fft_create(int N, const long dims[__VLA(N)], unsigned int flags); extern struct linop_s* linop_ifft_create(int N, const long dims[__VLA(N)], unsigned int flags); extern struct linop_s* linop_fftc_create(int N, const long dims[__VLA(N)], unsigned int flags); extern struct linop_s* linop_ifftc_create(int N, const long dims[__VLA(N)], unsigned int flags); extern struct linop_s* linop_cdf97_create(int N, const long dims[__VLA(N)], unsigned int flag); #ifndef __CONV_ENUMS #define __CONV_ENUMS enum conv_mode { CONV_SYMMETRIC, CONV_CAUSAL, CONV_ANTICAUSAL }; enum conv_type { CONV_CYCLIC, CONV_TRUNCATED, CONV_VALID, CONV_EXTENDED }; #endif extern struct linop_s* linop_conv_create(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[__VLA(N)], const long idims1[__VLA(N)], const long idims2[__VLA(N)], const complex float* src2); extern struct linop_s* linop_matrix_create(unsigned int N, const long out_dims[__VLA(N)], const long in_dims[__VLA(N)], const long matrix_dims[__VLA(N)], const _Complex float* matrix); extern struct linop_s* linop_matrix_altcreate(unsigned int N, const long out_dims[__VLA(N)], const long in_dims[__VLA(N)], const unsigned int T_dim, const unsigned int K_dim, const complex float* matrix); extern struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b); #include "misc/cppwrap.h" #endif // __SOMEOPS_H bart-0.4.02/src/linops/sum.c000066400000000000000000000112641320577655200156100ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong * 2016 Martin Uecker * */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/ops.h" #include "num/iovec.h" #include "linops/linop.h" #include "misc/misc.h" #include "misc/mri.h" #include "sum.h" #include "misc/debug.h" /** * data structure */ struct sum_data { INTERFACE(linop_data_t); bool use_gpu; long imgd_dims[DIMS]; long img_dims[DIMS]; long levels; long imgd_strs[DIMS]; long img_strs[DIMS]; complex float* tmp; }; static DEF_TYPEID(sum_data); static struct sum_data* sum_create_data(const long imgd_dims[DIMS], bool use_gpu); static void sum_free_data(const linop_data_t* _data); static void sum_apply(const linop_data_t* _data, complex float* _dst, const complex float* _src); static void sum_apply_adjoint(const linop_data_t* _data, complex float* _dst, const complex float* _src); static void sum_apply_normal(const linop_data_t* _data, complex float* _dst, const complex float* _src); static void sum_apply_pinverse(const linop_data_t* _data, float lambda, complex float* _dst, const complex float* _src); /** * Create sum operator */ const struct linop_s* linop_sum_create(const long imgd_dims[DIMS], bool use_gpu) { struct sum_data* data = sum_create_data(imgd_dims, use_gpu); // create operator interface return linop_create(DIMS, data->img_dims, DIMS, data->imgd_dims, CAST_UP(data), sum_apply, sum_apply_adjoint, sum_apply_normal, sum_apply_pinverse, sum_free_data); } static struct sum_data* sum_create_data(const long imgd_dims[DIMS], bool use_gpu) { PTR_ALLOC(struct sum_data, data); SET_TYPEID(sum_data, data); // decom dimensions md_copy_dims(DIMS, data->imgd_dims, imgd_dims); md_calc_strides(DIMS, data->imgd_strs, imgd_dims, CFL_SIZE); // image dimensions data->levels = imgd_dims[LEVEL_DIM]; md_select_dims(DIMS, ~LEVEL_FLAG, data->img_dims, imgd_dims); md_calc_strides(DIMS, data->img_strs, data->img_dims, CFL_SIZE); data->tmp = NULL; data->use_gpu = use_gpu; return PTR_PASS(data); } void sum_free_data(const linop_data_t* _data) { struct sum_data* data = CAST_DOWN(sum_data, _data); if (NULL != data->tmp) md_free(data->tmp); xfree(data); } void sum_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { struct sum_data* data = CAST_DOWN(sum_data, _data); md_clear(DIMS, data->img_dims, dst, CFL_SIZE); md_zaxpy2(DIMS, data->imgd_dims, data->img_strs, dst, 1. / sqrtf(data->levels), data->imgd_strs, src); } void sum_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { struct sum_data* data = CAST_DOWN(sum_data, _data); md_clear(DIMS, data->imgd_dims, dst, CFL_SIZE); md_zaxpy2(DIMS, data->imgd_dims, data->imgd_strs, dst, 1. / sqrtf(data->levels), data->img_strs, src); } void sum_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { struct sum_data* data = CAST_DOWN(sum_data, _data); if (NULL == data->tmp) { #ifdef USE_CUDA data->tmp = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, data->img_dims, CFL_SIZE); #else data->tmp = md_alloc(DIMS, data->img_dims, CFL_SIZE); #endif } sum_apply(_data, data->tmp, src); sum_apply_adjoint(_data, dst, data->tmp); } /** * * x = (ATA + uI)^-1 b * */ void sum_apply_pinverse(const linop_data_t* _data, float rho, complex float* dst, const complex float* src) { struct sum_data* data = CAST_DOWN(sum_data, _data); if (NULL == data->tmp) { #ifdef USE_CUDA data->tmp = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, data->img_dims, CFL_SIZE); #else data->tmp = md_alloc(DIMS, data->img_dims, CFL_SIZE); #endif } // get average md_clear(DIMS, data->img_dims, data->tmp, CFL_SIZE); md_zadd2(DIMS, data->imgd_dims, data->img_strs, data->tmp, data->img_strs, data->tmp , data->imgd_strs, src); md_zsmul(DIMS, data->img_dims, data->tmp, data->tmp, 1. / data->levels); // get non-average md_zsub2(DIMS, data->imgd_dims, data->imgd_strs, dst, data->imgd_strs, src, data->img_strs, data->tmp); // avg = avg / (1 + rho) md_zsmul(DIMS, data->img_dims, data->tmp, data->tmp, 1. / (1. + rho)); // nonavg = nonavg / rho md_zsmul(DIMS, data->imgd_dims, dst, dst, 1. / rho); // dst = avg + nonavg md_zadd2(DIMS, data->imgd_dims, data->imgd_strs, dst, data->imgd_strs, dst, data->img_strs, data->tmp); } bart-0.4.02/src/linops/sum.h000066400000000000000000000003161320577655200156110ustar00rootroot00000000000000 #ifdef __cplusplus extern "C" { #endif #include "misc/mri.h" struct operator_s; extern const struct linop_s* linop_sum_create(const long imgd_dims[DIMS], _Bool use_gpu); #ifdef __cplusplus } #endif bart-0.4.02/src/linops/ufft.c000066400000000000000000000120411320577655200157420ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong * 2016 Martin Uecker * */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/ops.h" #include "num/iovec.h" #include "linops/linop.h" #include "linops/someops.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "ufft.h" /** * data structure for holding the undersampled fft data. */ struct ufft_data { INTERFACE(linop_data_t); bool use_gpu; unsigned int flags; long ksp_dims[DIMS]; long pat_dims[DIMS]; long ksp_strs[DIMS]; long pat_strs[DIMS]; const struct linop_s* fft_op; complex float* pat; }; static DEF_TYPEID(ufft_data); static struct ufft_data* ufft_create_data(const long ksp_dims[DIMS], const long pat_dims[DIMS], const complex float* pat, unsigned int flags, bool use_gpu); static void ufft_free_data(const linop_data_t* _data); static void ufft_apply(const linop_data_t* _data, complex float* dst, const complex float* src); static void ufft_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src); static void ufft_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src); static void ufft_apply_pinverse(const linop_data_t* _data, float rho, complex float* dst, const complex float* src); /** * Create undersampled/weighted fft operator */ const struct linop_s* linop_ufft_create(const long ksp_dims[DIMS], const long pat_dims[DIMS], const complex float* pat, unsigned int flags, bool use_gpu) { struct ufft_data* data = ufft_create_data(ksp_dims, pat_dims, pat, flags, use_gpu); // Create operator interface return linop_create(DIMS, data->ksp_dims, DIMS, data->ksp_dims, CAST_UP(data), ufft_apply, ufft_apply_adjoint, ufft_apply_normal, ufft_apply_pinverse, ufft_free_data); } static struct ufft_data* ufft_create_data(const long ksp_dims[DIMS], const long pat_dims[DIMS], const complex float* pat, unsigned int flags, bool use_gpu) { PTR_ALLOC(struct ufft_data, data); SET_TYPEID(ufft_data, data); data->flags = flags; data->use_gpu = use_gpu; md_copy_dims(DIMS, data->pat_dims, pat_dims); md_copy_dims(DIMS, data->ksp_dims, ksp_dims); md_calc_strides(DIMS, data->pat_strs, pat_dims, CFL_SIZE); md_calc_strides(DIMS, data->ksp_strs, ksp_dims, CFL_SIZE); #ifdef USE_CUDA data->pat = (use_gpu ? md_alloc_gpu : md_alloc)(DIMS, data->pat_dims, CFL_SIZE); #else data->pat = md_alloc(DIMS, data->pat_dims, CFL_SIZE); #endif md_copy(DIMS, data->pat_dims, data->pat, pat, CFL_SIZE); data->fft_op = linop_fftc_create(DIMS, ksp_dims, flags); return PTR_PASS(data); } static void ufft_free_data(const linop_data_t* _data) { struct ufft_data* data = CAST_DOWN(ufft_data, _data); md_free(data->pat); linop_free(data->fft_op); xfree(data); } /** * Undersampled FFT forward operator */ void ufft_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { struct ufft_data* data = CAST_DOWN(ufft_data, _data); linop_forward(data->fft_op, DIMS, data->ksp_dims, dst, DIMS, data->ksp_dims, src); md_zmul2(DIMS, data->ksp_dims, data->ksp_strs, dst, data->ksp_strs, dst, data->pat_strs, data->pat); } /** * Undersampled FFT adjoint operator */ void ufft_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { struct ufft_data* data = CAST_DOWN(ufft_data, _data); md_zmul2(DIMS, data->ksp_dims, data->ksp_strs, dst, data->ksp_strs, src, data->pat_strs, data->pat); linop_adjoint(data->fft_op, DIMS, data->ksp_dims, dst, DIMS, data->ksp_dims, dst); } /** * Undersampled FFT normal operator * X = pat^2 B */ void ufft_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { struct ufft_data* data = CAST_DOWN(ufft_data, _data); linop_forward(data->fft_op, DIMS, data->ksp_dims, dst, DIMS, data->ksp_dims, src); md_zmul2(DIMS, data->ksp_dims, data->ksp_strs, dst, data->ksp_strs, dst, data->pat_strs, data->pat); linop_adjoint(data->fft_op, DIMS, data->ksp_dims, dst, DIMS, data->ksp_dims, dst); } /** * 1/2 || Ax - b ||^2 + rho/2 || x - y ||^2 * * x = (ATA + lI)^-1 b * * X = 1 / (pat^2 + l) B * */ static void ufft_apply_pinverse(const linop_data_t* _data, float rho, complex float* dst, const complex float* src) { struct ufft_data* data = CAST_DOWN(ufft_data, _data); md_zsadd(DIMS, data->pat_dims, data->pat, data->pat, rho); linop_forward(data->fft_op, DIMS, data->ksp_dims, dst, DIMS, data->ksp_dims, src); md_zdiv2(DIMS, data->ksp_dims, data->ksp_strs, dst, data->ksp_strs, dst, data->pat_strs, data->pat); linop_adjoint(data->fft_op, DIMS, data->ksp_dims, dst, DIMS, data->ksp_dims, dst); md_zsadd(DIMS, data->pat_dims, data->pat, data->pat, -rho); } bart-0.4.02/src/linops/ufft.h000066400000000000000000000004031320577655200157460ustar00rootroot00000000000000 #include #include "misc/mri.h" struct operator_s; // Undersampled fft operator extern const struct linop_s* linop_ufft_create(const long ksp_dims[DIMS], const long pat_dims[DIMS], const complex float* pat, unsigned int flags, _Bool use_gpu); bart-0.4.02/src/linops/waveop.c000066400000000000000000000065161320577655200163110ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2017 Martin Uecker * 2017 Sofia Dimoudi */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "linops/linop.h" #include "misc/misc.h" #include "wavelet/wavelet.h" #include "waveop.h" struct wavelet_s { INTERFACE(linop_data_t); unsigned int N; unsigned int flags; const long* idims; const long* istr; const long* odims; const long* ostr; const long* minsize; long* shifts; bool randshift; int rand_state; }; static DEF_TYPEID(wavelet_s); static int wrand_lim(unsigned int* state, int limit) { int divisor = RAND_MAX / (limit + 1); int retval; do { retval = rand_r(state) / divisor; } while (retval > limit); return retval; } static void wavelet_forward(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct wavelet_s* data = CAST_DOWN(wavelet_s, _data); if (data->randshift) { for (unsigned int i = 0; i < data->N; i++) { if (MD_IS_SET(data->flags, i)) { int levels = wavelet_num_levels(data->N, MD_BIT(i), data->idims, data->minsize, 4); data->shifts[i] = wrand_lim((unsigned int*)&data->rand_state, 1 << levels); assert(data->shifts[i] < data->idims[i]); } } } fwt2(data->N, data->flags, data->shifts, data->odims, data->ostr, dst, data->idims, data->istr, src, data->minsize, 4, wavelet_dau2); } static void wavelet_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct wavelet_s* data = CAST_DOWN(wavelet_s, _data); iwt2(data->N, data->flags, data->shifts, data->idims, data->istr, dst, data->odims, data->ostr, src, data->minsize, 4, wavelet_dau2); } static void wavelet_del(const linop_data_t* _data) { const struct wavelet_s* data = CAST_DOWN(wavelet_s, _data); xfree(data->odims); xfree(data->ostr); xfree(data->idims); xfree(data->istr); xfree(data->minsize); xfree(data->shifts); xfree(data); } struct linop_s* linop_wavelet_create(unsigned int N, unsigned int flags, const long dims[N], const long istr[N], const long minsize[N], bool randshift) { PTR_ALLOC(struct wavelet_s, data); SET_TYPEID(wavelet_s, data); data->N = N; data->flags = flags; data->randshift = randshift; data->rand_state = 1; long (*idims)[N] = TYPE_ALLOC(long[N]); md_copy_dims(N, *idims, dims); data->idims = *idims; long (*nistr)[N] = TYPE_ALLOC(long[N]); md_copy_strides(N, *nistr, istr); data->istr = *nistr; long (*nminsize)[N] = TYPE_ALLOC(long[N]); md_copy_dims(N, *nminsize, minsize); data->minsize = *nminsize; long (*odims)[N] = TYPE_ALLOC(long[N]); wavelet_coeffs2(N, flags, *odims, dims, minsize, 4); data->odims = *odims; long (*ostr)[N] = TYPE_ALLOC(long[N]); md_calc_strides(N, *ostr, *odims, CFL_SIZE); data->ostr = *ostr; long (*shifts)[N] = TYPE_ALLOC(long[N]); for (unsigned int i = 0; i < data->N; i++) (*shifts)[i] = 0; data->shifts = *shifts; return linop_create2(N, *odims, *ostr, N, dims, istr, CAST_UP(PTR_PASS(data)), wavelet_forward, wavelet_adjoint, NULL, NULL, wavelet_del); } bart-0.4.02/src/linops/waveop.h000066400000000000000000000006601320577655200163100ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" extern struct linop_s* linop_wavelet_create(unsigned int N, unsigned int flags, const long dims[__VLA(N)], const long istr[__VLA(N)], const long minsize[__VLA(N)], _Bool randshift); #include "misc/cppwrap.h" bart-0.4.02/src/lowrank/000077500000000000000000000000001320577655200150055ustar00rootroot00000000000000bart-0.4.02/src/lowrank/batchsvd.c000066400000000000000000000036721320577655200167570ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Frank Ong * 2016 Martin Uecker */ #include #include "misc/misc.h" #include "num/blas.h" #include "num/lapack.h" #include "num/linalg.h" #include "batchsvd.h" void batch_svthresh(long M, long N, long num_blocks, float lambda, complex float dst[num_blocks][N][M]) { #pragma omp parallel { long minMN = MIN(M, N); PTR_ALLOC(complex float[minMN][M], U); PTR_ALLOC(complex float[N][minMN], VT); PTR_ALLOC(float[minMN], S); PTR_ALLOC(complex float[minMN][minMN], AA); #pragma omp for for (int b = 0; b < num_blocks; b++) { // Compute upper bound | A^T A |_inf // FIXME: this is based on gratuitous guess-work about the obscure // API of this FORTRAN from ancient times... Is it really worth it? blas_csyrk('U', (N <= M) ? 'T' : 'N', (N <= M) ? N : M, (N <= M) ? M : N, 1., M, dst[b], 0., minMN, *AA); // lambda_max( A ) <= max_i sum_j | a_i^T a_j | float s_upperbound = 0; for (int i = 0; i < minMN; i++) { float s = 0; for (int j = 0; j < minMN; j++) s += cabsf((*AA)[MAX(i, j)][MIN(i, j)]); s_upperbound = MAX(s_upperbound, s); } /* avoid doing SVD-based thresholding if we know from * the upper bound that lambda_max <= lambda and the * result must be zero */ if (s_upperbound < lambda * lambda) { mat_zero(N, M, dst[b]); continue; } lapack_svd_econ(M, N, *U, *VT, *S, dst[b]); // soft threshold for (int i = 0; i < minMN; i++) for (int j = 0; j < N; j++) (*VT)[j][i] *= ((*S)[i] < lambda) ? 0. : ((*S)[i] - lambda); blas_matrix_multiply(M, N, minMN, dst[b], *U, *VT); } PTR_FREE(U); PTR_FREE(VT); PTR_FREE(S); PTR_FREE(AA); } // #pragma omp parallel } bart-0.4.02/src/lowrank/batchsvd.h000066400000000000000000000002111320577655200167460ustar00rootroot00000000000000 #include extern void batch_svthresh(long M, long N, long num_blocks, float lambda, complex float dst[num_blocks][N][M]); bart-0.4.02/src/lowrank/lrthresh.c000066400000000000000000000245531320577655200170150ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015. Tao Zhang and Joseph Cheng. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2015 Frank Ong * 2014 Tao Zhang * 2014 Joseph Cheng * 2014 Jon Tamir * 2014-2016 Martin Uecker */ #include #include #include #include #include #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/lapack.h" #include "num/linalg.h" #include "num/ops.h" #include "num/iovec.h" #include "num/blockproc.h" #include "num/casorati.h" #include "iter/thresh.h" #include "lowrank/batchsvd.h" #include "lowrank/svthresh.h" #include "lrthresh.h" struct lrthresh_data_s { INTERFACE(operator_data_t); float lambda; bool randshift; bool use_gpu; bool noise; int remove_mean; long strs_lev[DIMS]; long strs[DIMS]; long dims_decom[DIMS]; long dims[DIMS]; unsigned long mflags; unsigned long flags; long levels; long blkdims[MAX_LEV][DIMS]; }; static DEF_TYPEID(lrthresh_data_s); static struct lrthresh_data_s* lrthresh_create_data(const long dims_decom[DIMS], bool randshift, unsigned long mflags, const long blkdims[MAX_LEV][DIMS], float lambda, bool noise, int remove_mean, bool use_gpu); static void lrthresh_free_data(const operator_data_t* data); static void lrthresh_apply(const operator_data_t* _data, float lambda, complex float* dst, const complex float* src); /** * Intialize lrthresh operator * * @param dims_decom - decomposition dimensions * @param randshift - randshift boolean * @param mflags - selects which dimensions gets reshaped as the first dimension in matrix * @param blkdims - contains block dimensions for all levels * @param use_gpu - gpu boolean * */ const struct operator_p_s* lrthresh_create(const long dims_lev[DIMS], bool randshift, unsigned long mflags, const long blkdims[MAX_LEV][DIMS], float lambda, bool noise, int remove_mean, bool use_gpu) { struct lrthresh_data_s* data = lrthresh_create_data(dims_lev, randshift, mflags, blkdims, lambda, noise, remove_mean, use_gpu); return operator_p_create(DIMS, dims_lev, DIMS, dims_lev, CAST_UP(data), lrthresh_apply, lrthresh_free_data); } /** * Intialize lrthresh data * * @param dims_decom - dimensions with levels at LEVEL_DIMS * @param randshift - randshift boolean * @param mflags - selects which dimensions gets reshaped as the first dimension in matrix * @param blkdims - contains block dimensions for all levels * @param use_gpu - gpu boolean * */ static struct lrthresh_data_s* lrthresh_create_data(const long dims_decom[DIMS], bool randshift, unsigned long mflags, const long blkdims[MAX_LEV][DIMS], float lambda, bool noise, int remove_mean, bool use_gpu) { PTR_ALLOC(struct lrthresh_data_s, data); SET_TYPEID(lrthresh_data_s, data); data->randshift = randshift; data->mflags = mflags; data->lambda = lambda; data->noise = noise; data->remove_mean = remove_mean; // level dimensions md_copy_dims(DIMS, data->dims_decom, dims_decom); md_calc_strides(DIMS, data->strs_lev, dims_decom, CFL_SIZE); // image dimensions data->levels = dims_decom[LEVEL_DIM]; md_select_dims(DIMS, ~LEVEL_FLAG, data->dims, dims_decom); md_calc_strides(DIMS, data->strs, data->dims, CFL_SIZE); // blkdims for(long l = 0; l < data->levels; l++) { for (long i = 0; i < DIMS; i++) data->blkdims[l][i] = blkdims[l][i]; } data->use_gpu = use_gpu; return PTR_PASS(data); } /** * Free lrthresh operator */ static void lrthresh_free_data(const operator_data_t* _data) { xfree(CAST_DOWN(lrthresh_data_s, _data)); } /* * Return a random number between 0 and limit inclusive. */ static int rand_lim(int limit) { int divisor = RAND_MAX / (limit + 1); int retval; do { retval = rand() / divisor; } while (retval > limit); return retval; } /* * Low rank threhsolding for arbitrary block sizes */ static void lrthresh_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { struct lrthresh_data_s* data = CAST_DOWN(lrthresh_data_s, _data); float lambda = mu * data->lambda; long strs1[DIMS]; md_calc_strides(DIMS, strs1, data->dims_decom, 1); //#pragma omp parallel for for (int l = 0; l < data->levels; l++) { complex float* dstl = dst + l * strs1[LEVEL_DIM]; const complex float* srcl = src + l * strs1[LEVEL_DIM]; long blkdims[DIMS]; long shifts[DIMS]; long unshifts[DIMS]; long zpad_dims[DIMS]; long M = 1; for (unsigned int i = 0; i < DIMS; i++) { blkdims[i] = data->blkdims[l][i]; zpad_dims[i] = (data->dims[i] + blkdims[i] - 1) / blkdims[i]; zpad_dims[i] *= blkdims[i]; if (MD_IS_SET(data->mflags, i)) M *= blkdims[i]; if (data->randshift) shifts[i] = rand_lim(MIN(blkdims[i] - 1, zpad_dims[i] - blkdims[i])); else shifts[i] = 0; unshifts[i] = -shifts[i]; } long zpad_strs[DIMS]; md_calc_strides(DIMS, zpad_strs, zpad_dims, CFL_SIZE); long blk_size = md_calc_size(DIMS, blkdims); long img_size = md_calc_size(DIMS, zpad_dims); long N = blk_size / M; long B = img_size / blk_size; if (data->noise && (l == data->levels - 1)) { M = img_size; N = 1; B = 1; } complex float* tmp; #ifdef USE_CUDA tmp = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, zpad_dims, CFL_SIZE); #else tmp = md_alloc(DIMS, zpad_dims, CFL_SIZE); #endif md_circ_ext(DIMS, zpad_dims, tmp, data->dims, srcl, CFL_SIZE); md_circ_shift(DIMS, zpad_dims, shifts, tmp, tmp, CFL_SIZE); long mat_dims[2]; basorati_dims(DIMS, mat_dims, blkdims, zpad_dims); complex float* tmp_mat; #ifdef USE_CUDA tmp_mat = (data->use_gpu ? md_alloc_gpu : md_alloc)(2, mat_dims, CFL_SIZE); #else tmp_mat = md_alloc(2, mat_dims, CFL_SIZE); #endif // Reshape image into a blk_size x number of blocks matrix basorati_matrix(DIMS, blkdims, mat_dims, tmp_mat, zpad_dims, zpad_strs, tmp); batch_svthresh(M, N, mat_dims[1], lambda * GWIDTH(M, N, B), *(complex float (*)[mat_dims[1]][M][N])tmp_mat); // for ( int b = 0; b < mat_dims[1]; b++ ) // svthresh(M, N, lambda * GWIDTH(M, N, B), tmp_mat, tmp_mat); basorati_matrixH(DIMS, blkdims, zpad_dims, zpad_strs, tmp, mat_dims, tmp_mat); md_circ_shift(DIMS, zpad_dims, unshifts, tmp, tmp, CFL_SIZE); md_resize(DIMS, data->dims, dstl, zpad_dims, tmp, CFL_SIZE); md_free(tmp); md_free(tmp_mat); } } /* * Nuclear norm calculation for arbitrary block sizes */ float lrnucnorm(const struct operator_p_s* op, const complex float* src) { struct lrthresh_data_s* data = (struct lrthresh_data_s*)operator_p_get_data(op); long strs1[DIMS]; md_calc_strides(DIMS, strs1, data->dims_decom, 1); float nnorm = 0.; for (int l = 0; l < data->levels; l++) { const complex float* srcl = src + l * strs1[LEVEL_DIM]; long blkdims[DIMS]; long blksize = 1; for (unsigned int i = 0; i < DIMS; i++) { blkdims[i] = data->blkdims[l][i]; blksize *= blkdims[i]; } if (1 == blksize) { for (long j = 0; j < md_calc_size(DIMS, data->dims); j++) nnorm += 2 * cabsf(srcl[j]); continue; } struct svthresh_blockproc_data* svdata = svthresh_blockproc_create(data->mflags, 0., 0); complex float* tmp; #ifdef USE_CUDA tmp = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, data->dims, CFL_SIZE); #else tmp = md_alloc(DIMS, data->dims, CFL_SIZE); #endif //debug_print_dims(DP_DEBUG1, DIMS, data->dims); md_copy(DIMS, data->dims, tmp, srcl, CFL_SIZE); // Block SVD Threshold nnorm = blockproc(DIMS, data->dims, blkdims, (void*)svdata, nucnorm_blockproc, tmp, tmp); free(svdata); md_free(tmp); } return nnorm; } /************* * Block dimensions functions *************/ /** * Generates multiscale low rank block sizes * * @param blkdims - block sizes to be written * @param flags - specifies which dimensions to do the blocks. The other dimensions will be the same as input * @param idims - input dimensions * @param blkskip - scale each level by blkskip to generate the next level * * returns number of levels */ long multilr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long idims[DIMS], int blkskip, long initblk) { // Multiscale low rank block sizes long tmp_block[DIMS]; for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(flags, i)) tmp_block[i] = MIN(initblk, idims[i]); else tmp_block[i] = idims[i]; } bool done; // Loop block_sizes long levels = 0; do { levels++; debug_printf(DP_INFO, "[\t"); for (unsigned int i = 0; i < DIMS; i++) { blkdims[levels - 1][i] = tmp_block[i]; debug_printf(DP_INFO, "%ld\t", blkdims[levels-1][i]); } debug_printf(DP_INFO, "]\n"); done = true; for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(flags, i) && (idims[i] != 1)) { tmp_block[i] = MIN(tmp_block[i] * blkskip, idims[i]); done = done && (blkdims[levels - 1][i] == idims[i]); } } } while(!done); return levels; } void add_lrnoiseblk(long* levels, long blkdims[MAX_LEV][DIMS], const long idims[DIMS]) { levels[0]++; debug_printf(DP_DEBUG1, "[\t"); for (unsigned int i = 0; i < DIMS; i++) { blkdims[levels[0] - 1][i] = idims[i]; debug_printf(DP_DEBUG1, "%ld\t", blkdims[levels[0] - 1][i]); } debug_printf(DP_DEBUG1, "]\n"); } /** * Generates locally low rank block sizes * * @param blkdims - block sizes to be written * @param flags - specifies which dimensions to do the blocks. The other dimensions will be the same as input * @param idims - input dimensions * @param llkblk - the block size * * returns number of levels = 1 */ long llr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long idims[DIMS], long llrblk) { for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(flags, i)) blkdims[0][i] = MIN(llrblk, idims[i]); else blkdims[0][i] = idims[i]; } return 1; } /** * Generates low rank + sparse block sizes * * @param blkdims - block sizes to be written * @param idims - input dimensions * * returns number of levels = 2 */ long ls_blkdims(long blkdims[MAX_LEV][DIMS], const long idims[DIMS]) { for (unsigned int i = 0; i < DIMS; i++) { blkdims[0][i] = 1; blkdims[1][i] = idims[i]; } return 2; } float get_lrthresh_lambda(const struct operator_p_s* o) { const struct lrthresh_data_s* data = CAST_DOWN(lrthresh_data_s, operator_p_get_data(o)); return data->lambda; } bart-0.4.02/src/lowrank/lrthresh.h000066400000000000000000000024741320577655200170200ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #include "misc/mri.h" #ifndef MAX_LEV #define MAX_LEV 100 #endif struct operator_p_s; // Low rank thresholding for arbitrary block sizes extern const struct operator_p_s* lrthresh_create(const long dims_lev[DIMS], _Bool randshift, unsigned long mflags, const long blkdims[MAX_LEV][DIMS], float lambda, _Bool noise, int remove_mean, _Bool use_gpu); // Returns nuclear norm using lrthresh operator extern float lrnucnorm(const struct operator_p_s* op, const complex float* src); // Generates multiscale block sizes extern long multilr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long dims[DIMS], int blkskip, long initblk); // Generates locally low rank block size extern long llr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long dims[DIMS], long llrblk); // Generates low rank plus sparse block size extern long ls_blkdims(long blkdims[MAX_LEV][DIMS], const long dims[DIMS]); extern void add_lrnoiseblk(long* level, long blkdims[MAX_LEV][DIMS], const long dims[DIMS]); // Return the regularization parameter extern float get_lrthresh_lambda(const struct operator_p_s* o); bart-0.4.02/src/lowrank/svthresh.c000066400000000000000000000145721320577655200170300ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong * 2014 Martin Uecker * */ #include #include #include #include #include #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/lapack.h" #include "num/blas.h" #include "num/linalg.h" #include "num/ops.h" #include "num/iovec.h" #include "num/fft.h" #include "iter/thresh.h" #include "svthresh.h" float svthresh_nomeanu( long M, long N, float lambda, complex float* dst, const complex float* src) { long MN = M*N; complex float* basis = md_alloc( 1, &M, CFL_SIZE ); complex float* coeff = md_alloc( 1, &N, CFL_SIZE ); complex float* tmp = md_alloc( 1, &MN, CFL_SIZE ); for( int i = 0; i < M; i++) basis[i] = 1./sqrtf( M ); md_clear( 1, &N, coeff, CFL_SIZE ); md_clear( 1, &MN, tmp, CFL_SIZE ); for( int j = 0; j < N; j++) for( int i = 0; i < M; i++) coeff[j] += basis[i] * src[i + j*M]; for( int j = 0; j < N; j++) for( int i = 0; i < M; i++) tmp[i + j*M] = src[i + j*M] - coeff[j] * basis[i]; svthresh(M, N, lambda , dst, tmp); for( int j = 0; j < N; j++) for( int i = 0; i < M; i++) dst[i + j*M] += coeff[j] * basis[i]; return 0; } float svthresh_nomeanv( long M, long N, float lambda, complex float* dst, const complex float* src) { long MN = M*N; complex float* basis = md_alloc( 1, &N, CFL_SIZE ); complex float* coeff = md_alloc( 1, &M, CFL_SIZE ); complex float* tmp = md_alloc( 1, &MN, CFL_SIZE ); for( int i = 0; i < N; i++) basis[i] = 1. / sqrtf( N ); md_clear( 1, &M, coeff, CFL_SIZE ); md_clear( 1, &MN, tmp, CFL_SIZE ); for( int j = 0; j < N; j++) for( int i = 0; i < M; i++) coeff[i] += basis[j] * src[i + j*M]; ; for( int j = 0; j < N; j++) for( int i = 0; i < M; i++) tmp[i + j*M] = src[i + j*M] - coeff[i] * basis[j]; svthresh(M, N, lambda , dst, tmp); for( int j = 0; j < N; j++) for( int i = 0; i < M; i++) dst[i + j*M] += coeff[i] * basis[j]; return 0; } /** * Singular Value Thresholding * * @param M - matrix column size * @param N - matrix row size * @param lambda - regularization parameter * @param A - input/output matrix */ float svthresh(long M, long N, float lambda, complex float* dst, const complex float* src) //FIXME: destroys input { long minMN = MIN(M,N); long dimsU[3] = {M,minMN,1}; long dimsVT[3] = {minMN,N,1}; long dimsS[3] = {minMN,1,1}; // long dimsAA[3] = {minMN, minMN,1}; long strsVT[3]; long strsS[3]; md_calc_strides(3, strsVT, dimsVT, CFL_SIZE); md_calc_strides(3, strsS, dimsS, FL_SIZE); complex float* U = md_alloc_sameplace(3, dimsU, CFL_SIZE, src); complex float* VT = md_alloc_sameplace(3, dimsVT, CFL_SIZE, src ); float* S = md_alloc_sameplace(3, dimsS, FL_SIZE, src); // complex float* AA = md_alloc_sameplace(3, dimsAA, CFL_SIZE, src ); // lapack_normal_multiply( M, N, (M > N), (complex float (*) [])AA, (const complex float (*) [])src ); // SVD lapack_svd_econ(M, N, (complex float (*) []) U, (complex float (*) []) VT, S, (complex float (*) [N])src); // Thresh S md_softthresh(3, dimsS, lambda, 0, S, S); // VT = S * VT md_mul2( 3, dimsVT, strsVT, (float*) VT, strsVT, (float*) VT, strsS, S ); md_mul2( 3, dimsVT, strsVT, ((float*) VT)+1, strsVT, ((float*) VT)+1, strsS, S ); // dst = U * VT blas_matrix_multiply( M, N, minMN, (complex float (*) [N])dst, (const complex float (*) [minMN])U, (const complex float (*) [N])VT ); md_free(U); md_free(VT); md_free(S); return 0; } float nuclearnorm(long M, long N, const complex float* d) { // FIXME: destroys input long minMN = MIN(M,N); long dimsU[3] = {M, minMN, 1}; long dimsVT[3] = {minMN, N, 1}; long dimsS[3] = {minMN, 1, 1}; complex float* U = md_alloc_sameplace(3, dimsU, CFL_SIZE, d); complex float* VT = md_alloc_sameplace(3, dimsVT, CFL_SIZE, d ); float* S = md_alloc_sameplace(3, dimsS, FL_SIZE, d); // SVD lapack_svd_econ(M, N, (complex float (*) []) U, (complex float (*) []) VT, S, (complex float (*) [N])d); float nnorm = 0.; for (int i = 0; i < minMN; i++) nnorm += S[i]; md_free(U); md_free(VT); md_free(S); return nnorm; } float maxsingular(long M, long N, const complex float* d) { // FIXME: destroys input long dimsU[2] = {M,N}; long dimsV[2] = {N,N}; complex float* U = md_alloc(2, dimsU, sizeof(complex float) ); complex float* VT = md_alloc(2, dimsV, sizeof(complex float) ); float* S = xmalloc( MIN(M,N) * sizeof(float) ); // SVD lapack_svd_econ(M, N, (complex float (*) []) U, (complex float (*) []) VT, S, (complex float (*) [N])d); float value = S[0]; md_free(U); md_free(VT); free(S); return value; } /*********** * Blockproc functions ************/ struct svthresh_blockproc_data { unsigned long mflags; float lambda; int remove_mean; }; struct svthresh_blockproc_data* svthresh_blockproc_create( unsigned long mflags, float lambda, int remove_mean ) { PTR_ALLOC(struct svthresh_blockproc_data, data); data->mflags = mflags; data->lambda = lambda; data->remove_mean = remove_mean; return data; } float svthresh_blockproc( const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src ) { const struct svthresh_blockproc_data* data = (const struct svthresh_blockproc_data*) _data; long M = 1; long N = md_calc_size( DIMS, blkdims ); for ( unsigned int i = 0; i < DIMS; i++ ) { if (MD_IS_SET(data->mflags, i)) { M *= blkdims[i]; N /= blkdims[i]; } } if (data->remove_mean == 1) svthresh_nomeanu(M, N, data->lambda , dst, src); else if (data->remove_mean == 2) svthresh_nomeanv(M, N, data->lambda , dst, src); else if (data->remove_mean == 0) svthresh(M, N, data->lambda , dst, src); else assert(0); return 0; } float nucnorm_blockproc( const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src ) { UNUSED(dst); const struct svthresh_blockproc_data* data = (const struct svthresh_blockproc_data*) _data; long M = 1; long N = md_calc_size( DIMS, blkdims ); for ( unsigned int i = 0; i < DIMS; i++ ) { if (MD_IS_SET(data->mflags, i)) { M *= blkdims[i]; N /= blkdims[i]; } } float G = sqrtf(M) + sqrtf(N); return G * nuclearnorm(M, N, src); } bart-0.4.02/src/lowrank/svthresh.h000066400000000000000000000030121320577655200170200ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #define GWIDTH( M, N, B) ( (sqrtf( M ) + sqrtf( N )) + sqrtf( logf( B * ((M > N) ? N : M )) )) //#define GWIDTH( M, N, B) ( sqrtf( M ) + sqrtf( N ) ) //#define GWIDTH( M, N, B) sqrtf( ((M > N) ? M : N) ) #include // Singular value thresholding for matrix extern float svthresh(long M, long N, float lambda, complex float* dst, const complex float* src); extern float svthresh2(long M, long N, float lambda, complex float* dst, const complex float* src, complex float* U, float* S, complex float* VT); extern float svthresh_nomeanu(long M, long N, float lambda, complex float* dst, const complex float* src); extern float svthresh_nomeanv(long M, long N, float lambda, complex float* dst, const complex float* src); // Singular value analysis (maybe useful to help determining regularization parameter for min nuclear norm) extern float nuclearnorm(long M, long N, const complex float* d); extern float maxsingular(long M, long N, const complex float* d); extern struct svthresh_blockproc_data* svthresh_blockproc_create(unsigned long mflags, float lambda, int remove_mean); extern float svthresh_blockproc(const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src); extern float nucnorm_blockproc(const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src); bart-0.4.02/src/lrmatrix.c000066400000000000000000000144471320577655200153500ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong * 2015-2016 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "num/ops.h" #include "linops/linop.h" #include "linops/sum.h" #include "linops/sampling.h" #include "linops/someops.h" #include "iter/iter.h" #include "iter/lsqr.h" #include "iter/thresh.h" #include "iter/prox.h" #include "lowrank/lrthresh.h" #include "misc/debug.h" #include "misc/mri.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" struct s_data { INTERFACE(operator_data_t); long size; }; static DEF_TYPEID(s_data); // x = (z1 + z2)/2 static void sum_xupdate(const operator_data_t* _data, float rho, complex float* dst, const complex float* src) { UNUSED(rho); const struct s_data* data = CAST_DOWN(s_data, _data); for(int i = 0; i < data->size; i++) dst[i] = src[i] / 2.; } static void sum_xupdate_free(const operator_data_t* data) { xfree(CAST_DOWN(s_data, data)); } static const char usage_str[] = " "; static const char help_str[] = "Perform (multi-scale) low rank matrix completion"; int main_lrmatrix(int argc, char* argv[]) { double start_time = timestamp(); bool use_gpu = false; int maxiter = 100; float rho = 0.25; int blkskip = 2; bool randshift = true; long mflags = 1; long flags = ~0; const char* sum_str = NULL; bool noise = false; bool decom = false; bool llr = false; long llrblk = -1; bool ls = false; bool hogwild = false; bool fast = true; long initblk = 1; int remove_mean = 0; const struct opt_s opts[] = { OPT_SET('d', &decom, "perform decomposition instead, ie fully sampled"), // FIXME: 'd' fell through to u in original version ??!? OPT_INT('i', &maxiter, "iter", "maximum iterations."), OPT_LONG('m', &mflags, "flags", "which dimensions are reshaped to matrix columns."), OPT_LONG('f', &flags, "flags", "which dimensions to perform multi-scale partition."), OPT_INT('j', &blkskip, "scale", "block size scaling from one scale to the next one."), OPT_LONG('k', &initblk, "size", "smallest block size"), OPT_SET('N', &noise, "add noise scale to account for Gaussian noise."), OPT_SET('s', &ls, "perform low rank + sparse matrix completion."), OPT_LONG('l', &llrblk, "size", "perform locally low rank soft thresholding with specified block size."), OPT_STRING('o', &sum_str, "out2", "summed over all non-noise scales to create a denoised output."), OPT_SELECT('u', int, &remove_mean, 1, "()"), OPT_SELECT('v', int, &remove_mean, 2, "()"), OPT_SET('H', &hogwild, "(hogwild)"), OPT_FLOAT('p', &rho, "", "(rho)"), OPT_CLEAR('n', &randshift, "(no randshift)"), OPT_SET('g', &use_gpu, "(use GPU)"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); if (-1 != llrblk) llr = true; long idims[DIMS]; long odims[DIMS]; // Load input complex float* idata = load_cfl(argv[1], DIMS, idims); // Get levels and block dimensions long blkdims[MAX_LEV][DIMS]; long levels; if (llr) levels = llr_blkdims(blkdims, flags, idims, llrblk); else if (ls) levels = ls_blkdims(blkdims, idims); else levels = multilr_blkdims(blkdims, flags, idims, blkskip, initblk); if (noise) add_lrnoiseblk(&levels, blkdims, idims); debug_printf(DP_INFO, "Number of levels: %ld\n", levels); // Get outdims md_copy_dims(DIMS, odims, idims); odims[LEVEL_DIM] = levels; complex float* odata = create_cfl(argv[22], DIMS, odims); md_clear(DIMS, odims, odata, sizeof(complex float)); // Get pattern complex float* pattern = NULL; if (!decom) { pattern = md_alloc(DIMS, idims, CFL_SIZE); estimate_pattern(DIMS, idims, TIME_FLAG, pattern, idata); } // Initialize algorithm iter_conf* iconf; struct iter_admm_conf mmconf; memcpy(&mmconf, &iter_admm_defaults, sizeof(struct iter_admm_conf)); mmconf.maxiter = maxiter; mmconf.rho = rho; mmconf.hogwild = hogwild; mmconf.fast = fast; iconf = CAST_UP(&mmconf); // Initialize operators const struct linop_s* sum_op = linop_sum_create(odims, use_gpu); const struct linop_s* sampling_op = NULL; if (!decom) { sampling_op = linop_sampling_create(idims, idims, pattern); sum_op = linop_chain(sum_op, sampling_op); linop_free(sampling_op); } const struct operator_p_s* sum_prox = prox_lineq_create( sum_op, idata ); const struct operator_p_s* lr_prox = lrthresh_create(odims, randshift, mflags, (const long (*)[])blkdims, 1., noise, remove_mean, use_gpu); assert(use_gpu == false); (use_gpu ? num_init_gpu : num_init)(); if (use_gpu) debug_printf(DP_INFO, "GPU reconstruction\n"); // put into iter2 format unsigned int num_funs = 2; const struct linop_s* eye_op = linop_identity_create(DIMS, odims); const struct linop_s* ops[2] = { eye_op, eye_op }; const struct operator_p_s* prox_ops[2] = { sum_prox, lr_prox }; long size = 2 * md_calc_size(DIMS, odims); struct s_data s_data = { { &TYPEID(s_data) }, size / 2 }; const struct operator_p_s* sum_xupdate_op = operator_p_create(DIMS, odims, DIMS, odims, CAST_UP(&s_data), sum_xupdate, sum_xupdate_free); // do recon iter2_admm( iconf, NULL, num_funs, prox_ops, ops, NULL, sum_xupdate_op, size, (float*) odata, NULL, NULL); // Sum if (sum_str) { complex float* sdata = create_cfl(sum_str, DIMS, idims); long istrs[DIMS]; long ostrs[DIMS]; md_calc_strides(DIMS, istrs, idims, sizeof(complex float)); md_calc_strides(DIMS, ostrs, odims, sizeof(complex float)); md_clear(DIMS, idims, sdata, sizeof(complex float)); odims[LEVEL_DIM]--; md_zaxpy2(DIMS, odims, istrs, sdata, 1. / sqrt(levels), ostrs, odata); odims[LEVEL_DIM]++; unmap_cfl(DIMS, idims, sdata); } // Clean up unmap_cfl(DIMS, idims, idata); unmap_cfl(DIMS, odims, odata); linop_free(sum_op); operator_p_free(sum_prox); operator_p_free(lr_prox); double end_time = timestamp(); debug_printf(DP_INFO, "Total Time: %f\n", end_time - start_time); exit(0); } bart-0.4.02/src/main.c000066400000000000000000000001631320577655200144200ustar00rootroot00000000000000 extern int main_real(int argc, char** argv); int main(int argc, char** argv) { return main_real(argc, argv); } bart-0.4.02/src/main.h000066400000000000000000000003501320577655200144230ustar00rootroot00000000000000 #include "misc/cppmap.h" #define DECLMAIN(x) \ extern int main_ ## x(int argc, char* argv[]); MAP(DECLMAIN, MAIN_LIST) #undef DECLMAIN extern int main_bart(int argc, char* argv[]); extern int main_bbox(int argc, char* argv[]); bart-0.4.02/src/mandelbrot.c000066400000000000000000000041341320577655200156250ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "num/loop.h" #include "misc/mmio.h" #include "misc/io.h" #include "misc/misc.h" #include "misc/opts.h" static const char usage_str[] = "output"; static const char help_str[] = "Compute mandelbrot set.\n"; int main_mandelbrot(int argc, char* argv[]) { unsigned int size = 512; unsigned int iter = 20; float zoom = .20; // 0.3 float thresh = 4.; float offr = 0.0; // 0.4 float offi = 0.0; const struct opt_s opts[] = { OPT_UINT('s', &size, "size", "image size"), OPT_UINT('n', &iter, "#", "nr. of iterations"), OPT_FLOAT('t', &thresh, "t", "threshold for divergence"), OPT_FLOAT('z', &zoom, "z", "zoom"), OPT_FLOAT('r', &offr, "r", "offset real"), OPT_FLOAT('i', &offi, "i", "offset imag"), }; cmdline(&argc, argv, 1, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); complex float off = offr + 1.i * offi; long dims[2] = { size, size }; complex float* o = create_cfl(argv[1], 2, dims); md_zfill(2, dims, o, iter); complex float* x = md_calloc(2, dims, CFL_SIZE); complex float* t = md_alloc(2, dims, CFL_SIZE); complex float* c = md_alloc(2, dims, CFL_SIZE); md_zgradient(2, dims, c, (complex float[2]){ 1., 1.i }); md_zfill(2, dims, t, (size / 2.) * (1. + 1.i + off)); md_zsub(2, dims, c, c, t); md_zsmul(2, dims, c, c, 1. / (zoom * size)); for (unsigned int i = 0; i < iter; i++) { // iteration x -> x * x + c md_zmul(2, dims, x, x, x); md_zadd(2, dims, x, x, c); // track non-divergent points md_zabs(2, dims, t, x); md_slessequal(3, (long[3]){ 2, dims[0], dims[1] }, (float*)t, (float*)t, thresh); md_zreal(2, dims, t, t); md_zsub(2, dims, o, o, t); } md_free(t); md_free(c); md_free(x); unmap_cfl(2, dims, o); exit(0); } bart-0.4.02/src/mat2cfl.c000066400000000000000000000027201320577655200150250ustar00rootroot00000000000000/* * Martin Uecker 2012-01-18 * uecker@eecs.berkeley.edu */ #include #include #include #include #include #include #include #include // MATLAB #include "mat.h" #include "matrix.h" #include "num/multind.h" #include "misc/mmio.h" int main(int argc, char *argv[]) { MATFile *mat; const char* name = NULL; const mwSize* dims; if (argc != 2) { fprintf(stderr, "Usage: %s file.mat\n", argv[0]); exit(1); } if (NULL == (mat = matOpen(argv[1], "r"))) exit(1); mxArray* ar; while (NULL != (ar = matGetNextVariable(mat, &name))) { int ndim = (int)mxGetNumberOfDimensions(ar); dims = mxGetDimensions(ar); bool cmp = mxIsComplex(ar); bool dbl = mxIsDouble(ar); printf("%s: [ ", name); if ((!cmp) || (!dbl)) { printf("not complex double\n"); mxDestroyArray(ar); continue; } long ldims[ndim]; for (int i = 0; i < ndim; i++) ldims[i] = dims[i]; for (int i = 0; i < ndim; i++) printf("%ld ", ldims[i]); char outname[256]; snprintf(outname, 256, "%s_%s", strtok(argv[1], "."), name); complex float* buf = create_cfl(outname, ndim, ldims); double* re = mxGetPr(ar); double* im = mxGetPi(ar); size_t size = md_calc_size(ndim, ldims); for (unsigned long i = 0; i < size; i++) buf[i] = re[i] + 1.i * im[i]; printf("] -> %s\n", outname); unmap_cfl(ndim, ldims, buf); mxDestroyArray(ar); } matClose(mat); } bart-0.4.02/src/mip.c000077500000000000000000000036511320577655200142710ustar00rootroot00000000000000/* Copyright 2014-2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Jonathan Tamir */ #define _GNU_SOURCE #include #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/opts.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Maximum (minimum) intensity projection (MIP) along dimensions specified by bitmask.\n"; int main_mip(int argc, char* argv[argc]) { bool do_abs = false; bool mIP = false; const struct opt_s opts[] = { OPT_SET('m', &mIP, "minimum" ), OPT_SET('a', &do_abs, "do absolute value first" ), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); unsigned int flags = atoi(argv[1]); long idims[DIMS]; complex float* in = load_cfl(argv[2], DIMS, idims); long odims[DIMS]; md_select_dims(DIMS, ~flags, odims, idims); complex float* out = create_cfl(argv[3], DIMS, odims); complex float* tmp = md_alloc(DIMS, idims, CFL_SIZE); if (do_abs) md_zabs(DIMS, idims, tmp, in); else md_copy(DIMS, idims, tmp, in, CFL_SIZE); long istr[DIMS]; long ostr[DIMS]; md_calc_strides(DIMS, istr, idims, CFL_SIZE); md_calc_strides(DIMS, ostr, odims, CFL_SIZE); md_clear(DIMS, odims, out, CFL_SIZE); md_max2(DIMS, idims, ostr, (float*)out, ostr, (const float*)out, istr, (const float*)tmp); if (mIP) { // need result of max in output md_min2(DIMS, idims, ostr, (float*)out, ostr, (const float*)out, istr, (const float*)tmp); } md_free(tmp); unmap_cfl(DIMS, idims, in); unmap_cfl(DIMS, odims, out); exit(0); } bart-0.4.02/src/misc/000077500000000000000000000000001320577655200142635ustar00rootroot00000000000000bart-0.4.02/src/misc/cppmap.h000066400000000000000000000026451320577655200157230ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Martin Uecker */ // some classic C preprocessor hackery /* Ideas from: * * https://github.com/swansontec/map-macro * https://github.com/pfultz2/Cloak/wiki/Is-the-C-preprocessor-Turing-complete%3F */ #define EMPTY() #define DEFER1(...) __VA_ARGS__ EMPTY() #define DEFER2(...) __VA_ARGS__ DEFER1(EMPTY)() #define DEFER3(...) __VA_ARGS__ DEFER2(EMPTY)() #define EXPAND6(...) __VA_ARGS__ #define EXPAND5(...) EXPAND6(EXPAND6(__VA_ARGS__)) #define EXPAND4(...) EXPAND5(EXPAND5(__VA_ARGS__)) #define EXPAND3(...) EXPAND4(EXPAND4(__VA_ARGS__)) #define EXPAND2(...) EXPAND3(EXPAND3(__VA_ARGS__)) #define EXPAND1(...) EXPAND2(EXPAND2(__VA_ARGS__)) #define EXPAND0(...) EXPAND1(EXPAND1(__VA_ARGS__)) #define EXPAND(...) EXPAND0(EXPAND0(__VA_ARGS__)) #define CAT0(x, y) x ## y #define CAT(x, y) CAT0(x, y) #define NIL_TEST() DUMMY, TRUE, #define RET2ND0(a, b, ...) b #define RET2ND(...) RET2ND0(__VA_ARGS__) #define NIL_P(x) RET2ND(NIL_TEST x, FALSE) #define IF_TRUE(a, b) a #define IF_FALSE(a, b) b #define IF(x, a, b) CAT(IF_, x)(a, b) #define MAP1() MAP0 #define MAP0(f, a, b, ...) f(a) IF(NIL_P(b), , DEFER3(MAP1)()(f, b, __VA_ARGS__)) #define MAP(f, ...) EXPAND(MAP0(f, __VA_ARGS__, ())) bart-0.4.02/src/misc/cppwrap.h000066400000000000000000000005321320577655200161100ustar00rootroot00000000000000 #ifndef __BART_CPP_WRAP #define __BART_CPP_WRAP #ifdef __cplusplus extern "C" { #endif #ifndef __VLA #ifdef __cplusplus #define __VLA(x) #else #define __VLA(x) static x #endif #endif #ifndef __VLA2 #ifdef __cplusplus #define __VLA2(x) #else #define __VLA2(x) x #endif #endif #else #undef __BART_CPP_WRAP #ifdef __cplusplus } #endif #endif bart-0.4.02/src/misc/debug.c000066400000000000000000000065731320577655200155300ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2016 Martin Uecker * 2013,2015 Jonathan Tamir * 2013 Dara Bahri #include #include #include #include #include #include #include #ifndef __CYGWIN__ #include #endif #include "num/multind.h" #include "misc/mmio.h" #include "misc/cppmap.h" #include "misc/misc.h" #include "debug.h" #define STRSIZE 64 // Patrick Virtue's timing code double timestamp(void) { struct timeval tv; gettimeofday(&tv, 0); // more accurate than return tv.tv_sec + 1e-6 * tv.tv_usec; } void dump_cfl(const char* name, int D, const long dimensions[D], const complex float* src) { complex float* out = create_cfl(name, D, dimensions); md_copy(D, dimensions, out, src, sizeof(complex float)); unmap_cfl(D, dimensions, out); } int debug_level = -1; bool debug_logging = false; static const char* level_strings[] = { #define LSTR(x) [DP_ ## x] = # x, MAP(LSTR, ERROR, WARN, INFO, DEBUG1, DEBUG2, DEBUG3, DEBUG4, TRACE, ()) #undef LSTR }; static const char* get_level_str(int level) { assert(level >= 0); return (level < DP_ALL) ? level_strings[level] : "ALL"; } static void get_datetime_str(int len, char* datetime_str) { time_t tv = time(NULL); struct tm* dt = gmtime(&tv); strftime(datetime_str, len, "%F %T", dt); } #define RESET "\033[0m" #define RED "\033[31m" void debug_vprintf(int level, const char* fmt, va_list ap) { if (-1 == debug_level) { char* str = getenv("DEBUG_LEVEL"); debug_level = (NULL != str) ? atoi(str) : DP_INFO; } if (level <= debug_level) { FILE* ofp = (level < DP_INFO) ? stderr : stdout; if (debug_logging) { char dt_str[STRSIZE]; get_datetime_str(STRSIZE, dt_str); fprintf(ofp, "[%s] [%s] - ", dt_str, get_level_str(level)); } else if (level < DP_INFO) fprintf(ofp, "%s%s: ", (level < DP_INFO ? RED : ""), get_level_str(level)); vfprintf(ofp, fmt, ap); if ((!debug_logging) && (level < DP_INFO)) fprintf(ofp, RESET); fflush(ofp); } } void debug_printf(int level, const char* fmt, ...) { va_list ap; va_start(ap, fmt); debug_vprintf(level, fmt, ap); va_end(ap); } void debug_backtrace(size_t n) { #ifndef __CYGWIN__ void* ptrs[n + 1]; size_t l = backtrace(ptrs, n + 1); if (l > 1) backtrace_symbols_fd(ptrs + 1, l - 1, STDERR_FILENO); #else UNUSED(n); debug_printf(DP_WARN, "no backtrace on cygwin."); #endif } void debug_trace(const char* fmt, ...) { debug_printf(DP_TRACE, "TRACE %f: ", timestamp()); va_list ap; va_start(ap, fmt); debug_vprintf(DP_TRACE, fmt, ap); va_end(ap); } #ifdef INSTRUMENT /* The following functions are called when entering or * leaving any function, if instrumentation is enabled with: * -finstrument-functions -finstrument-functions-exclude-file-list=debug.c */ extern void __cyg_profile_func_enter(void *this_fn, void *call_site) { UNUSED(call_site); debug_trace("ENTER %p\n", this_fn); } extern void __cyg_profile_func_exit(void *this_fn, void *call_site) { UNUSED(call_site); debug_trace("LEAVE %p\n", this_fn); } #endif bart-0.4.02/src/misc/debug.h000066400000000000000000000017211320577655200155230ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __DEBUG_H #define __DEBUG_H 1 #include #include #include "misc/cppwrap.h" extern void dump_cfl(const char* name, int D, const long dimensions[__VLA(D)], const _Complex float* x); extern double timestamp(void); extern int debug_level; extern _Bool debug_logging; enum debug_levels { DP_ERROR, DP_WARN, DP_INFO, DP_DEBUG1, DP_DEBUG2, DP_DEBUG3, DP_DEBUG4, DP_TRACE, DP_ALL }; extern void debug_printf(int level, const char* fmt, ...); extern void debug_vprintf(int level, const char* fmt, va_list ap); extern void debug_backtrace(size_t n); extern void debug_trace(const char* fmt, ...); #define TRACE() debug_trace("%s:%d %s\n", __FILE__, __LINE__, __func__) #include "misc/cppwrap.h" #endif // __DEBUG_H bart-0.4.02/src/misc/dicom.c000066400000000000000000000146301320577655200155260ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2015 Martin Uecker * 2015 Jonathan Tamir */ /* NOTE: This code packs pixel data into very simple dicom images * with only image related tags. Other mandatory DICOM tags are * missing. We only support 16 bit little endian gray scale images. * * FOR RESEARCH USE ONLY - NOT FOR DIAGNOSTIC USE */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "dicom.h" // US unsigned short // LS unsigned long // IS integer string // LT long text // CS code string // OW other word string #define DGRP_IMAGE 0x0028 #define DTAG_IMAGE_SAMPLES_PER_PIXEL 0x0002 #define DTAG_IMAGE_PHOTOM_INTER 0x0004 #define DTAG_IMAGE_ROWS 0x0010 #define DTAG_IMAGE_COLS 0x0011 #define DTAG_IMAGE_BITS_ALLOC 0x0100 #define DTAG_IMAGE_BITS_STORED 0x0101 #define DTAG_IMAGE_PIXEL_HIGH_BIT 0x0102 #define DTAG_IMAGE_PIXEL_REP 0x0103 // 0 unsigned 2 two's complement #define MONOCHROME2 "MONOCHROME2" #define DGRP_PIXEL 0x7FE0 #define DTAG_PIXEL_DATA 0x0010 #define DGRP_FILE 0x0002 #define DTAG_META_SIZE 0x0000 #define DTAG_TRANSFER_SYNTAX 0x0010 #define LITTLE_ENDIAN_EXPLICIT "1.2.840.10008.1.2.1" #define DGRP_IMAGE2 0x0020 #define DTAG_IMAGE_INSTANCE_NUM 0x0013 #define DTAG_COMMENT 0x4000 // order matters... enum eoffset { EOFF_BEGIN, ITAG_META_SIZE = EOFF_BEGIN, ITAG_TRANSFER_SYNTAX, ITAG_IMAGE_INSTANCE_NUM, ITAG_COMMENT, ITAG_IMAGE_SAMPLES_PER_PIXEL, ITAG_IMAGE_PHOTOM_INTER, ITAG_IMAGE_ROWS, ITAG_IMAGE_COLS, ITAG_IMAGE_BITS_ALLOC, ITAG_IMAGE_BITS_STORED, ITAG_IMAGE_PIXEL_HIGH_BIT, ITAG_IMAGE_PIXEL_REP, ITAG_PIXEL_DATA, EOFF_END, }; struct element { enum eoffset eoff; // sanity check uint16_t group; uint16_t element; char vr[2]; unsigned int len; const void* data; }; struct element dicom_elements_default[EOFF_END] = { { ITAG_META_SIZE, DGRP_FILE, DTAG_META_SIZE, "UL", 4, &(uint32_t){ 28 } }, { ITAG_TRANSFER_SYNTAX, DGRP_FILE, DTAG_TRANSFER_SYNTAX, "UI", sizeof(LITTLE_ENDIAN_EXPLICIT), LITTLE_ENDIAN_EXPLICIT }, { ITAG_IMAGE_INSTANCE_NUM, DGRP_IMAGE2, DTAG_IMAGE_INSTANCE_NUM, "IS", 0, NULL }, { ITAG_COMMENT, DGRP_IMAGE2, DTAG_COMMENT, "LT", 22, "NOT FOR DIAGNOSTIC USE\0\0" }, { ITAG_IMAGE_SAMPLES_PER_PIXEL, DGRP_IMAGE, DTAG_IMAGE_SAMPLES_PER_PIXEL, "US", 2, &(uint16_t){ 1 } }, // gray scale { ITAG_IMAGE_PHOTOM_INTER, DGRP_IMAGE, DTAG_IMAGE_PHOTOM_INTER, "CS", sizeof(MONOCHROME2), MONOCHROME2 }, // 0 is black { ITAG_IMAGE_ROWS, DGRP_IMAGE, DTAG_IMAGE_ROWS, "US", 2, &(uint16_t){ 0 } }, { ITAG_IMAGE_COLS, DGRP_IMAGE, DTAG_IMAGE_COLS, "US", 2, &(uint16_t){ 0 } }, { ITAG_IMAGE_BITS_ALLOC, DGRP_IMAGE, DTAG_IMAGE_BITS_ALLOC, "US", 2, &(uint16_t){ 16 } }, // { ITAG_IMAGE_BITS_STORED, DGRP_IMAGE, DTAG_IMAGE_BITS_STORED, "US", 2, &(uint16_t){ 16 } }, // 12 for CT { ITAG_IMAGE_PIXEL_HIGH_BIT, DGRP_IMAGE, DTAG_IMAGE_PIXEL_HIGH_BIT, "US", 2, &(uint16_t){ 15 } }, { ITAG_IMAGE_PIXEL_REP, DGRP_IMAGE, DTAG_IMAGE_PIXEL_REP, "US", 2, &(uint16_t){ 0 } }, // unsigned { ITAG_PIXEL_DATA, DGRP_PIXEL, DTAG_PIXEL_DATA, "OW", 0, NULL }, }; static bool vr_oneof(const char a[2], unsigned int N, const char b[N][2]) { for (unsigned int i = 0; i < N; i++) if ((a[0] == b[i][0]) && (a[1] == b[i][1])) return true; return false; } static int dicom_write_element(unsigned int len, char buf[static 8 + len], struct element e) { assert((((union { uint16_t s; uint8_t b; }){ 1 }).b)); // little endian assert(len == e.len); assert(0 == len % 2); int o = 0; buf[o++] = ((e.group >> 0) & 0xFF); buf[o++] = ((e.group >> 8) & 0xFF); buf[o++] = ((e.element >> 0) & 0xFF); buf[o++] = ((e.element >> 8) & 0xFF); buf[o++] = e.vr[0]; buf[o++] = e.vr[1]; if (!vr_oneof(e.vr, 5, (const char[5][2]){ "OB", "OW", "SQ", "UN", "UT" })) { buf[o++] = ((len >> 0) & 0xFF); buf[o++] = ((len >> 8) & 0xFF); } else { buf[o++] = 0; // reserved buf[o++] = 0; // reserved buf[o++] = ((len >> 0) & 0xFF); buf[o++] = ((len >> 8) & 0xFF); buf[o++] = ((len >> 16) & 0xFF); buf[o++] = ((len >> 24) & 0xFF); } memcpy(buf + o, e.data, len); return len + o; } int dicom_write(const char* name, unsigned int cols, unsigned int rows, long inum, const unsigned char* img) { int fd; void* addr; struct stat st; int ret = -1; // allocate before any goto calls int entries = EOFF_END; struct element dicom_elements[entries]; for (int i = 0; i < entries; i++) { memcpy(&dicom_elements[i], &dicom_elements_default[i], sizeof(struct element)); assert(dicom_elements[i].eoff == (enum eoffset)i); } if (-1 == (fd = open(name, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR))) goto cleanup; if (-1 == fstat(fd, &st)) goto cleanup; size_t size = 128 + 4; dicom_elements[ITAG_IMAGE_ROWS].data = &(uint16_t){ rows }; dicom_elements[ITAG_IMAGE_COLS].data = &(uint16_t){ cols }; char inst_num[12]; // max number of bytes for InstanceNumber tag sprintf(inst_num, "+%04ld", inum); dicom_elements[ITAG_IMAGE_INSTANCE_NUM].data = inst_num; dicom_elements[ITAG_IMAGE_INSTANCE_NUM].len = sizeof(inst_num); dicom_elements[ITAG_PIXEL_DATA].data = img; dicom_elements[ITAG_PIXEL_DATA].len = 2 * rows * cols; size += 4; // the pixel data element is larger for (int i = 0; i < entries; i++) size += 8 + dicom_elements[i].len; if (-1 == ftruncate(fd, size)) goto cleanup; if (MAP_FAILED == (addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0))) goto cleanup; // write header memset(addr, 0, 128); memcpy(addr + 128, "DICM", 4); size_t off = 128 + 4; uint16_t last_group = 0; uint16_t last_element = 0; // make sure tags are in ascending order for (int i = 0; i < entries; i++) { assert(((last_group == dicom_elements[i].group) && (last_element < dicom_elements[i].element)) || (last_group < dicom_elements[i].group)); last_group = dicom_elements[i].group; last_element = dicom_elements[i].element; off += dicom_write_element(dicom_elements[i].len, addr + off, dicom_elements[i]); } assert(0 == size - off); ret = 0; if (-1 == munmap((void*)addr, size)) abort(); cleanup: if (-1 == close(fd)) abort(); return ret; } bart-0.4.02/src/misc/dicom.h000066400000000000000000000001711320577655200155260ustar00rootroot00000000000000 extern int dicom_write(const char* name, unsigned int cols, unsigned int rows, long inum, const unsigned char* img); bart-0.4.02/src/misc/io.c000066400000000000000000000146661320577655200150530ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "num/multind.h" #include "misc/version.h" #include "misc/misc.h" #include "misc/debug.h" #include "io.h" static void xdprintf(int fd, const char* fmt, ...) { va_list ap; va_start(ap, fmt); int ret = vdprintf(fd, fmt, ap); va_end(ap); if (ret < 0) error("Error writing.\n"); } struct iofile_s { const char* name; bool out; struct iofile_s* prev; }; static struct iofile_s* iofiles = NULL; static void io_register(const char* name, bool out) { const struct iofile_s* iop = iofiles; while (NULL != iop) { if (0 == strcmp(name, iop->name) && (out || iop->out)) debug_printf(DP_WARN, "Overwriting file: %s\n", name); iop = iop->prev; } PTR_ALLOC(struct iofile_s, ion); ion->name = strdup(name); ion->out = out; ion->prev = iofiles; iofiles = PTR_PASS(ion); } void io_register_input(const char* name) { io_register(name, false); } void io_register_output(const char* name) { io_register(name, true); } int write_cfl_header(int fd, unsigned int n, const long dimensions[n]) { xdprintf(fd, "# Dimensions\n"); for (unsigned int i = 0; i < n; i++) xdprintf(fd, "%ld ", dimensions[i]); xdprintf(fd, "\n"); if (NULL != command_line) { xdprintf(fd, "# Command\n"); xdprintf(fd, "%s\n", command_line); } if (NULL != iofiles) { struct iofile_s* in = iofiles; xdprintf(fd, "# Files\n"); while (in) { xdprintf(fd, " %c%s", in->out ? '>' : '<', in->name); in = in->prev; } xdprintf(fd, "\n"); } xdprintf(fd, "# Creator\nBART %s\n", bart_version); return 0; } int read_cfl_header(int fd, unsigned int n, long dimensions[n]) { char header[4097]; memset(header, 0, 4097); int max; if (0 > (max = read(fd, header, 4096))) return -1; int pos = 0; int delta = 0; bool ok = false; while (true) { // skip lines not starting with '#' while ('#' != header[pos]) { if ('\0' == header[pos]) goto out; if (0 != sscanf(header + pos, "%*[^\n]\n%n", &delta)) return -1; if (0 == delta) goto out; pos += delta; } char keyword[32]; if (1 == sscanf(header + pos, "# %31s\n%n", keyword, &delta)) { pos += delta; if (0 == strcmp(keyword, "Dimensions")) { for (unsigned int i = 0; i < n; i++) dimensions[i] = 1; long val; unsigned int i = 0; while (1 == sscanf(header + pos, "%ld%n", &val, &delta)) { pos += delta; if (i < n) dimensions[i] = val; else if (1 != val) return -1; i++; } if (0 != sscanf(header + pos, "\n%n", &delta)) return -1; pos += delta; if (ok) return -1; ok = true; } } else { // skip this line if (0 != sscanf(header + pos, "%*[^\n]\n%n", &delta)) return -1; if (0 == delta) goto out; pos += delta; } } out: return ok ? 0 : -1; } int write_coo(int fd, unsigned int n, const long dimensions[n]) { char header[4096]; size_t len = ARRAY_SIZE(header); memset(header, 0, 4096); int pos = 0; int ret; ret = snprintf(header + pos, len, "Type: float\nDimensions: %d\n", n); if ((ret < 0) || ((unsigned int)ret >= len)) return -1; pos += ret; len -= ret; long start = 0; long stride = 1; for (unsigned int i = 0; i < n; i++) { long size = dimensions[i]; ret = snprintf(header + pos, len, "[%ld\t%ld\t%ld\t%ld]\n", start, stride * size, size, stride); if ((ret < 0) || ((unsigned int)ret >= len)) return -1; pos += ret; len -= ret; stride *= size; } if (4096 != write(fd, header, 4096)) return -1; return 0; } int read_coo(int fd, unsigned int n, long dimensions[n]) { char header[4096]; if (4096 != read(fd, header, 4096)) return -1; int pos = 0; int delta = 0; if (0 != sscanf(header + pos, "Type: float\n%n", &delta)) return -1; if (0 == delta) return -1; pos += delta; unsigned int dim; if (1 != sscanf(header + pos, "Dimensions: %d\n%n", &dim, &delta)) return -1; pos += delta; // if (n != dim) // return -1; for (unsigned int i = 0; i < n; i++) dimensions[i] = 1; for (unsigned int i = 0; i < dim; i++) { long val; if (1 != sscanf(header + pos, "[%*d %*d %ld %*d]\n%n", &val, &delta)) return -1; pos += delta; if (i < n) dimensions[i] = val; else if (1 != val) // fail if we have to many dimensions not equal 1 return -1; } return 0; } struct ra_hdr_s { uint64_t magic; uint64_t flags; uint64_t eltype; uint64_t elbyte; uint64_t size; uint64_t ndims; }; #define RA_MAGIC_NUMBER 0x7961727261776172ULL #define RA_FLAG_BIG_ENDIAN (1ULL << 0) enum ra_types { RA_TYPE_USER = 0, RA_TYPE_INT, RA_TYPE_UINT, RA_TYPE_FLOAT, RA_TYPE_COMPLEX, }; #define err_assert(x) ({ if (!(x)) { debug_printf(DP_ERROR, "%s", #x); return -1; } }) int read_ra(int fd, unsigned int n, long dimensions[n]) { struct ra_hdr_s header; if (sizeof(header) != read(fd, &header, sizeof(header))) return -1; err_assert(RA_MAGIC_NUMBER == header.magic); err_assert(!(header.flags & RA_FLAG_BIG_ENDIAN)); err_assert(RA_TYPE_COMPLEX == header.eltype); err_assert(sizeof(complex float) == header.elbyte); err_assert(header.ndims <= n); uint64_t dims[header.ndims]; if ((int)sizeof(dims) != read(fd, &dims, sizeof(dims))) return -1; md_singleton_dims(n, dimensions); for (unsigned int i = 0; i < header.ndims; i++) dimensions[i] = dims[i]; // this can overflow, but we check in mmio err_assert(header.size == md_calc_size(n, dimensions) * sizeof(complex float)); return 0; } int write_ra(int fd, unsigned int n, const long dimensions[n]) { struct ra_hdr_s header = { .magic = RA_MAGIC_NUMBER, .flags = 0ULL, .eltype = RA_TYPE_COMPLEX, .elbyte = sizeof(complex float), .size = md_calc_size(n, dimensions) * sizeof(complex float), .ndims = n, }; if (sizeof(header) != write(fd, &header, sizeof(header))) return -1; uint64_t dims[n]; for (unsigned int i = 0; i < n; i++) dims[i] = dimensions[i]; if ((int)sizeof(dims) != write(fd, &dims, sizeof(dims))) return -1; return 0; } bart-0.4.02/src/misc/io.h000066400000000000000000000011571320577655200150470ustar00rootroot00000000000000 #include "misc/cppwrap.h" extern int write_ra(int fd, unsigned int n, const long dimensions[__VLA(n)]); extern int read_ra(int fd, unsigned int n, long dimensions[__VLA(n)]); extern int write_coo(int fd, unsigned int n, const long dimensions[__VLA(n)]); extern int read_coo(int fd, unsigned int n, long dimensions[__VLA(n)]); extern int write_cfl_header(int fd, unsigned int n, const long dimensions[__VLA(n)]); extern int read_cfl_header(int fd, unsigned int D, long dimensions[__VLA(D)]); extern void io_register_input(const char* name); extern void io_register_output(const char* name); #include "misc/cppwrap.h" bart-0.4.02/src/misc/misc.c000066400000000000000000000167541320577655200153770ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2015. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2011-2015 Martin Uecker * 2017 Sofia Dimoudi */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include "misc/debug.h" #include "misc/opts.h" #include "misc.h" void* xmalloc(size_t s) { void* p = malloc(s); if (NULL == p) error("Could not allocate memory.\n"); return p; } void xfree(const void* x) { free((void*)x); } void warn_nonnull_ptr(void* p) { void** p2 = p; if (NULL != *p2) { debug_printf(DP_WARN, "pointer not cleared: "); debug_backtrace(1); } } void error(const char* fmt, ...) { va_list ap; va_start(ap, fmt); debug_vprintf(DP_ERROR, fmt, ap); va_end(ap); exit(EXIT_FAILURE); } void print_dims(int D, const long dims[D]) { printf("["); for (int i = 0; i < D; i++) printf("%3ld ", dims[i]); printf("]\n"); } void debug_print_dims(int dblevel, int D, const long dims[D]) { bool dbl = debug_logging; debug_logging = false; debug_printf(dblevel, "["); for (int i = 0; i < D; i++) debug_printf(dblevel, "%3ld ", dims[i]); debug_printf(dblevel, "]\n"); debug_logging = dbl; } int parse_cfl(complex float res[1], const char* str) { char* tail; float re = strtof(str, &tail); float im = 0.; if (str == tail) return -1; if ('\0' == tail[0]) goto ok; if (('i' == tail[0]) && ('\0' == tail[1])) { im = re; re = 0; goto ok; } str = tail; im = strtof(str, &tail); if (('i' != tail[0]) || ('\0' != tail[1])) return -1; ok: res[0] = re + 1.i * im; return 0; } void quicksort(unsigned int N, unsigned int ord[N], const void* data, quicksort_cmp_t cmp) { if (N < 2) return; unsigned int pivot = ord[N / 2]; unsigned int l = 0; unsigned int h = N - 1; while (l <= h) { if (cmp(data, ord[l], pivot) < 0) { l++; continue; } if (cmp(data, ord[h], pivot) > 0) { h--; continue; } unsigned int swap = ord[l]; ord[l] = ord[h]; ord[h] = swap; l++; h--; } if (h + 1 > 0) quicksort(h + 1, ord, data, cmp); if (N > l) quicksort(N - l, ord + l, data, cmp); } /** * Quickselect adapted from §8.5 in Numerical Recipes in C, * The Art of Scientific Computing * Second Edition, William H. Press, 1992. * Sort descending of an array of floats, stop at k largest element. * * @param arr array of floats, input * @param n total number of elements in input array * @param k the rank of the element to be selected in the sort * * @returns the k-th largest float in the array * * @note In-place sort. The input array contents are not preserved in their original order. */ float quickselect(float *arr, unsigned int n, unsigned int k) { unsigned long i,ir,j,l,mid; float a; l=0; ir=n-1; for(;;) { if (ir <= l+1) { if (ir == l+1 && arr[ir] > arr[l]) { SWAP(arr[l],arr[ir], float); } return arr[k]; } else { mid=(l+ir) >> 1; SWAP(arr[mid],arr[l+1], float); if (arr[l] < arr[ir]) { SWAP(arr[l],arr[ir], float); } if (arr[l+1] < arr[ir]) { SWAP(arr[l+1],arr[ir], float); } if (arr[l] < arr[l+1]) { SWAP(arr[l],arr[l+1], float); } i=l+1; j=ir; a=arr[l+1]; for (;;) { do i++; while (arr[i] > a); do j--; while (arr[j] < a); if (j < i) break; SWAP(arr[i],arr[j], float); } arr[l+1]=arr[j]; arr[j]=a; if (j >= k) ir=j-1; if (j <= k) l=i; } } } /** * * Same as quickselect, but the input is a complex array * and the absolute value of the k-th largest element is returned. * Possibly faster for application to complex arrays. * */ float quickselect_complex(complex float *arr, unsigned int n, unsigned int k) { unsigned long i,ir,j,l,mid; float a; complex float ca; l=0; ir=n-1; for(;;) { if (ir <= l+1) { if (ir == l+1 && cabsf(arr[ir]) > cabsf(arr[l])) { SWAP(arr[l],arr[ir], complex float); } return cabsf(arr[k]); } else { mid=(l+ir) >> 1; SWAP(arr[mid],arr[l+1], complex float); if (cabsf(arr[l]) < cabsf(arr[ir])) { SWAP(arr[l],arr[ir], complex float); } if (cabsf(arr[l+1]) < cabsf(arr[ir])) { SWAP(arr[l+1],arr[ir], complex float); } if (cabsf(arr[l]) < cabsf(arr[l+1])) { SWAP(arr[l],arr[l+1], complex float); } i=l+1; j=ir; a=cabsf(arr[l+1]); ca = arr[l+1]; for (;;) { do i++; while (cabsf(arr[i]) > a); do j--; while (cabsf(arr[j]) < a); if (j < i) break; SWAP(arr[i],arr[j], complex float); } arr[l+1]=arr[j]; arr[j]=ca; if (j >= k) ir=j-1; if (j <= k) l=i; } } } static const char* quote(const char* str) { int i = 0; int j = 0; int c; bool flag = false; while ('\0' != (c = str[i++])) { if (isspace(c)) flag = true; switch (c) { case '\\': case '\'': case '"': case '$': j++; /* fall through */ default: break; } } if ((!flag) && (0 == j)) return strdup(str); int len = strlen(str); char (*qstr)[len + j + 3] = TYPE_ALLOC(char[len + j + 3]); i = 0; j = 0; (*qstr)[j++] = '\"'; while ('\0' != (c = str[i++])) { switch (c) { case '\\': case '\'': case '"': case '$': (*qstr)[j++] = '\''; /* fall through */ default: (*qstr)[j++] = c; } } (*qstr)[j++] = '\"'; (*qstr)[j++] = '\0'; return *qstr; } const char* command_line = NULL; void save_command_line(int argc, char* argv[]) { size_t len = 0; const char* qargv[argc]; for (int i = 0; i < argc; i++) { qargv[i] = quote(argv[i]); len += strlen(qargv[i]) + 1; } char (*buf)[len + 1] = TYPE_ALLOC(char[len + 1]); size_t pos = 0; for (int i = 0; i < argc; i++) { strcpy((*buf) + pos, qargv[i]); pos += strlen(qargv[i]); free((void*)qargv[i]); (*buf)[pos++] = ' '; } (*buf)[pos] = '\0'; command_line = (*buf); } void mini_cmdline(int* argcp, char* argv[], int expected_args, const char* usage_str, const char* help_str) { mini_cmdline_bool(argcp, argv, '\0', expected_args, usage_str, help_str); } bool mini_cmdline_bool(int* argcp, char* argv[], char flag_char, int expected_args, const char* usage_str, const char* help_str) { bool flag = false; struct opt_s opts[1] = { { flag_char, false, opt_set, &flag, NULL } }; char* help = strdup(help_str); int hlen = strlen(help); if ((hlen > 1) && ('\n' == help[hlen - 1])) help[hlen - 1] = '\0'; int min_args = expected_args; int max_args = expected_args; if (expected_args < 0) { min_args = -expected_args; max_args = 1000; } cmdline(argcp, argv, min_args, max_args, usage_str, help, 1, opts); free(help); return flag; } void print_long(unsigned int D, const long arr[D]) { for (unsigned int i = 0; i < D; i++) printf("arr[%i] = %ld\n", i, arr[i]); } void print_float(unsigned int D, const float arr[D]) { for (unsigned int i = 0; i < D; i++) printf("arr[%i] = %f\n", i, arr[i]); } void print_int(unsigned int D, const int arr[D]) { for (unsigned int i = 0; i < D; i++) printf("arr[%i] = %i\n", i, arr[i]); } void print_complex(unsigned int D, const complex float arr[D]) { for (unsigned int i = 0; i < D; i++) printf("arr[%i]: real = %f, imag = %f\n", i, crealf(arr[i]), cimagf(arr[i])); } unsigned int bitcount(unsigned int flags) { unsigned int N = 0; for (; flags > 0; N++) flags &= (flags - 1); return N; } bart-0.4.02/src/misc/misc.h000066400000000000000000000054331320577655200153740ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __MISC_H #define __MISC_H #include #include #ifndef M_PI #define M_PI 3.1415926535897932384626433832795 #endif #define MIN(x, y) ({ __typeof(x) __x = (x); __typeof(y) __y = (y); (__x < __y) ? __x : __y; }) #define MAX(x, y) ({ __typeof(x) __x = (x); __typeof(y) __y = (y); (__x > __y) ? __x : __y; }) #define UNUSED(x) (void)(x) #define MAKE_ARRAY(x, ...) ((__typeof__(x)[]){ x, __VA_ARGS__ }) #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define SWAP(x, y, T) do { T temp = x; x = y; y = temp; } while (0) // for quickselect #include "misc/cppwrap.h" extern void* xmalloc(size_t s); extern void xfree(const void*); extern void warn_nonnull_ptr(void*); #define XMALLOC(x) (x = xmalloc(sizeof(*x))) #define XFREE(x) (xfree(x), x = NULL) #define _TYPE_ALLOC(T) ((T*)xmalloc(sizeof(T))) #define TYPE_ALLOC(T) _TYPE_ALLOC(__typeof__(T)) // #define TYPE_CHECK(T, x) ({ T* _ptr1 = 0; __typeof(x)* _ptr2 = _ptr1; (void)_ptr2; (x); }) #define _PTR_ALLOC(T, x) \ T* x __attribute__((cleanup(warn_nonnull_ptr))) = xmalloc(sizeof(T)) #define PTR_ALLOC(T, x) _PTR_ALLOC(__typeof__(T), x) #define PTR_FREE(x) XFREE(x) #define PTR_PASS(x) ({ __typeof__(x) __tmp = (x); (x) = NULL; __tmp; }) extern int parse_cfl(_Complex float res[1], const char* str); extern void error(const char* str, ...); extern void print_dims(int D, const long dims[__VLA(D)]); extern void debug_print_dims(int dblevel, int D, const long dims[__VLA(D)]); typedef int (*quicksort_cmp_t)(const void* data, unsigned int a, unsigned int b); extern void quicksort(unsigned int N, unsigned int ord[__VLA(N)], const void* data, quicksort_cmp_t cmp); extern float quickselect(float *arr, unsigned int n, unsigned int k); extern float quickselect_complex(_Complex float *arr, unsigned int n, unsigned int k); extern void mini_cmdline(int* argcp, char* argv[], int expected_args, const char* usage_str, const char* help_str); extern _Bool mini_cmdline_bool(int* argcp, char* argv[], char flag_char, int expected_args, const char* usage_str, const char* help_str); extern void print_long(unsigned int D, const long arr[__VLA(D)]); extern void print_float(unsigned int D, const float arr[__VLA(D)]); extern void print_int(unsigned int D, const int arr[__VLA(D)]); extern void print_complex(unsigned int D, const _Complex float arr[__VLA(D)]); extern unsigned int bitcount(unsigned int flags); extern const char* command_line; extern void save_command_line(int argc, char* argv[__VLA(argc)]); #include "misc/cppwrap.h" #endif // __MISC_H bart-0.4.02/src/misc/mmio.c000066400000000000000000000233631320577655200153770ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2015 Jonathan Tamir */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include "num/multind.h" #include "misc/misc.h" #include "misc/io.h" #include "misc/debug.h" #include "mmio.h" // for BSD compatibility #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif static void io_error(const char* fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fflush(stderr); perror(" "); exit(EXIT_FAILURE); } #define err_assert(x) ({ if (!(x)) { debug_printf(DP_ERROR, "%s", #x); exit(EXIT_FAILURE); } }) static bool long_mul_overflow_p(long a, long b) { bool of = false; of |= (a > 0) && (b > 0) && (a > LONG_MAX / b); of |= (a > 0) && (b < 0) && (b < LONG_MIN / a); of |= (a < 0) && (b > 0) && (a < LONG_MIN / b); of |= (a < 0) && (b < 0) && (b < LONG_MAX / a); return of; } static long io_calc_size(unsigned int D, const long dims[D], size_t size) { if (0 == D) return size; long a = io_calc_size(D - 1, dims + 1, size); long b = dims[0]; if ((a < 0) || (b < 0)) return -1; if (long_mul_overflow_p(a, b)) return -1; return a * b; } complex float* load_zra(const char* name, unsigned int D, long dims[D]) { int fd; if (-1 == (fd = open(name, O_RDONLY))) io_error("Loading ra file %s", name); if (-1 == read_ra(fd, D, dims)) error("Loading ra file %s", name); long T; if (-1 == (T = io_calc_size(D, dims, sizeof(complex float)))) error("Loading ra file %s", name); void* addr; struct stat st; if (-1 == fstat(fd, &st)) io_error("Loading ra file %s", name); off_t header_size; if (-1 == (header_size = lseek(fd, 0, SEEK_CUR))) io_error("Loading ra file %s", name); // ra allows random stuff at the end if (T + header_size > st.st_size) error("Loading ra file %s", name); assert(header_size < 4096); if (MAP_FAILED == (addr = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0))) io_error("Loading ra file %s", name); if (-1 == close(fd)) io_error("Loading ra file %s", name); return (complex float*)(addr + header_size);; } static void* create_data(int ofd, size_t header_size, size_t size) { if (-1 == (ftruncate(ofd, size + header_size))) return NULL; size_t skip = header_size & ~4095UL; size_t off = header_size & 4095UL; void* addr; if (MAP_FAILED == (addr = mmap(NULL, size + off, PROT_READ|PROT_WRITE, MAP_SHARED, ofd, skip))) return NULL; return (char*)addr + off; } complex float* create_zra(const char* name, unsigned int D, const long dims[D]) { int ofd; if (-1 == (ofd = open(name, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR))) io_error("Creating ra file %s", name); if (-1 == write_ra(ofd, D, dims)) error("Creating ra file %s", name); long T; if (-1 == (T = io_calc_size(D, dims, sizeof(complex float)))) error("Creating ra file %s", name); off_t header_size; if (-1 == (header_size = lseek(ofd, 0, SEEK_CUR))) io_error("Creating ra file %s", name); void* data; if (NULL == (data = create_data(ofd, header_size, T))) error("Creating ra file %s", name); if (-1 == close(ofd)) io_error("Creating ra file %s", name); return (complex float*)data; } float* create_coo(const char* name, unsigned int D, const long dims[D]) { int ofd; if (-1 == (ofd = open(name, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR))) io_error("Creating coo file %s", name); if (-1 == write_coo(ofd, D, dims)) error("Creating coo file %s", name); long T; if (-1 == (T = io_calc_size(D, dims, sizeof(float)))) error("Creating coo file %s", name); void* addr; if (NULL == (addr = create_data(ofd, 4096, T))) error("Creating coo file %s", name); if (-1 == close(ofd)) io_error("Creating coo file %s", name); return (float*)addr; } complex float* create_zcoo(const char* name, unsigned int D, const long dimensions[D]) { long dims[D + 1]; dims[0] = 2; // complex memcpy(dims + 1, dimensions, D * sizeof(long)); return (complex float*)create_coo(name, D + 1, dims); } complex float* create_cfl(const char* name, unsigned int D, const long dimensions[D]) { io_register_output(name); const char *p = strrchr(name, '.'); if ((NULL != p) && (p != name) && (0 == strcmp(p, ".ra"))) return create_zra(name, D, dimensions); if ((NULL != p) && (p != name) && (0 == strcmp(p, ".coo"))) return create_zcoo(name, D, dimensions); char name_bdy[1024]; if (1024 <= snprintf(name_bdy, 1024, "%s.cfl", name)) error("Creating cfl file %s", name); char name_hdr[1024]; if (1024 <= snprintf(name_hdr, 1024, "%s.hdr", name)) error("Creating cfl file %s", name); int ofd; if (-1 == (ofd = open(name_hdr, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR))) io_error("Creating cfl file %s", name); if (-1 == write_cfl_header(ofd, D, dimensions)) error("Creating cfl file %s", name); if (-1 == close(ofd)) io_error("Creating cfl file %s", name); return shared_cfl(D, dimensions, name_bdy); } float* load_coo(const char* name, unsigned int D, long dims[D]) { int fd; if (-1 == (fd = open(name, O_RDONLY))) io_error("Loading coo file %s", name); if (-1 == read_coo(fd, D, dims)) error("Loading coo file %s", name); long T; if (-1 == (T = io_calc_size(D, dims, sizeof(float)))) error("Loading coo file %s", name); void* addr; struct stat st; if (-1 == fstat(fd, &st)) io_error("Loading coo file %s", name); if (T + 4096 != st.st_size) error("Loading coo file %s", name); if (MAP_FAILED == (addr = mmap(NULL, T, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 4096))) io_error("Loading coo file %s", name); if (-1 == close(fd)) io_error("Loading coo file %s", name); return (float*)addr; } complex float* load_zcoo(const char* name, unsigned int D, long dimensions[D]) { long dims[D + 1]; float* data = load_coo(name, D + 1, dims); if (2 != dims[0]) error("Loading coo file %s", name); memcpy(dimensions, dims + 1, D * sizeof(long)); return (complex float*)data; } static complex float* load_cfl_internal(const char* name, unsigned int D, long dimensions[D], bool priv) { io_register_input(name); const char *p = strrchr(name, '.'); if ((NULL != p) && (p != name) && (0 == strcmp(p, ".ra"))) return load_zra(name, D, dimensions); if ((NULL != p) && (p != name) && (0 == strcmp(p, ".coo"))) return load_zcoo(name, D, dimensions); char name_bdy[1024]; if (1024 <= snprintf(name_bdy, 1024, "%s.cfl", name)) error("Loading cfl file %s", name); char name_hdr[1024]; if (1024 <= snprintf(name_hdr, 1024, "%s.hdr", name)) error("Loading cfl file %s", name); int ofd; if (-1 == (ofd = open(name_hdr, O_RDONLY))) io_error("Loading cfl file %s", name); if (-1 == read_cfl_header(ofd, D, dimensions)) error("Loading cfl file %s", name); if (-1 == close(ofd)) io_error("Loading cfl file %s", name); return (priv ? private_cfl : shared_cfl)(D, dimensions, name_bdy); } complex float* load_cfl(const char* name, unsigned int D, long dimensions[D]) { return load_cfl_internal(name, D, dimensions, true); } complex float* load_shared_cfl(const char* name, unsigned int D, long dimensions[D]) { return load_cfl_internal(name, D, dimensions, false); } complex float* shared_cfl(unsigned int D, const long dims[D], const char* name) { // struct stat st; int fd; void* addr; long T; if (-1 == (T = io_calc_size(D, dims, sizeof(complex float)))) error("shared cfl %s", name); err_assert(T > 0); if (-1 == (fd = open(name, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR))) io_error("shared cfl %s", name); // if (-1 == (fstat(fd, &st))) // abort(); // if (!((0 == st.st_size) || (T == st.st_size))) // abort(); if (NULL == (addr = create_data(fd, 0, T))) error("shared cfl %s", name); if (-1 == close(fd)) io_error("shared cfl %s", name); return (complex float*)addr; } complex float* anon_cfl(const char* name, unsigned int D, const long dims[D]) { UNUSED(name); void* addr; long T; if (-1 == (T = io_calc_size(D, dims, sizeof(complex float)))) error("anon cfl"); if (MAP_FAILED == (addr = mmap(NULL, T, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0))) io_error("anon cfl"); return (complex float*)addr; } #if 0 void* private_raw(size_t* size, const char* name) { int fd; void* addr; struct stat st; if (-1 == (fd = open(name, O_RDONLY))) abort(); if (-1 == (fstat(fd, &st))) abort(); *size = st.st_size; if (MAP_FAILED == (addr = mmap(NULL, *size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0))) abort(); if (-1 == close(fd)) abort(); return addr; } #endif complex float* private_cfl(unsigned int D, const long dims[D], const char* name) { long T; if (-1 == (T = io_calc_size(D, dims, sizeof(complex float)))) error("private cfl %s", name); int fd; void* addr; struct stat st; if (-1 == (fd = open(name, O_RDONLY))) io_error("private cfl %s", name); if (-1 == (fstat(fd, &st))) io_error("private cfl %s", name); if (T != st.st_size) error("private cfl %s", name); if (MAP_FAILED == (addr = mmap(NULL, T, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0))) io_error("private cfl %s", name); if (-1 == close(fd)) io_error("private cfl %s", name); return (complex float*)addr; } void unmap_cfl(unsigned int D, const long dims[D], const complex float* x) { long T; if (-1 == (T = io_calc_size(D, dims, sizeof(complex float)))) error("unmap cfl"); if (-1 == munmap((void*)((uintptr_t)x & ~4095UL), T)) io_error("unmap cfl"); } bart-0.4.02/src/misc/mmio.h000066400000000000000000000030241320577655200153740ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" // extern void* private_raw(size_t* size, const char* name); extern _Complex float* shared_cfl(unsigned int D, const long dims[__VLA(D)], const char* name); extern _Complex float* private_cfl(unsigned int D, const long dims[__VLA(D)], const char* name); extern void unmap_cfl(unsigned int D, const long dims[__VLA(D)], const _Complex float* x); extern _Complex float* anon_cfl(const char* name, unsigned int D, const long dims[__VLA(D)]); extern _Complex float* create_cfl(const char* name, unsigned int D, const long dimensions[__VLA(D)]); extern _Complex float* load_cfl(const char* name, unsigned int D, long dimensions[__VLA(D)]); extern _Complex float* load_shared_cfl(const char* name, unsigned int D, long dimensions[__VLA(D)]); extern float* create_coo(const char* name, unsigned int D, const long dimensions[__VLA(D)]); extern float* load_coo(const char* name, unsigned int D, long dimensions[__VLA(D)]); extern _Complex float* create_zcoo(const char* name, unsigned int D, const long dimensions[__VLA(D)]); extern _Complex float* load_zcoo(const char* name, unsigned int D, long dimensions[__VLA(D)]); extern _Complex float* create_zra(const char* name, unsigned int D, const long dims[__VLA(D)]); extern _Complex float* load_zra(const char* name, unsigned int D, long dims[__VLA(D)]); #include "misc/cppwrap.h" bart-0.4.02/src/misc/mri.c000066400000000000000000000133311320577655200152170ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/loop.h" #include "misc/misc.h" #include "misc/debug.h" #include "sense/optcom.h" #include "mri.h" void data_consistency(const long dims[DIMS], complex float* dst, const complex float* pattern, const complex float* kspace1, const complex float* kspace2) { assert(1 == dims[MAPS_DIM]); long strs[DIMS]; long dims1[DIMS]; long strs1[DIMS]; md_select_dims(DIMS, ~COIL_FLAG, dims1, dims); md_calc_strides(DIMS, strs1, dims1, CFL_SIZE); md_calc_strides(DIMS, strs, dims, CFL_SIZE); complex float* tmp = md_alloc_sameplace(DIMS, dims, CFL_SIZE, dst); md_zmul2(DIMS, dims, strs, tmp, strs, kspace2, strs1, pattern); md_zsub(DIMS, dims, tmp, kspace2, tmp); md_zfmac2(DIMS, dims, strs, tmp, strs, kspace1, strs1, pattern); md_copy(DIMS, dims, dst, tmp, CFL_SIZE); md_free(tmp); } void estimate_pattern(unsigned int D, const long dims[D], unsigned int flags, complex float* pattern, const complex float* kspace_data) { md_zrss(D, dims, flags, pattern, kspace_data); long dims2[D]; long strs2[D]; md_select_dims(D, ~flags, dims2, dims); md_calc_strides(D, strs2, dims2, CFL_SIZE); long strs1[D]; md_singleton_strides(D, strs1); md_zcmp2(D, dims2, strs2, pattern, strs2, pattern, strs1, &(complex float){ 0. }); md_zsub2(D, dims2, strs2, pattern, strs1, &(complex float){ 1. }, strs2, pattern); } static void calib_readout_pos(const long caldims[DIMS], long calpos[DIMS], const long in_dims[DIMS], const complex float* in_data) { // now move along readout to find maximum energy long in_strs[DIMS]; md_calc_strides(DIMS, in_strs, in_dims, CFL_SIZE); int maxind = 0; float maxeng = 0.; for (int r = 0; r < in_dims[READ_DIM] - caldims[READ_DIM] + 1; r++) { calpos[READ_DIM] = r; long offset = md_calc_offset(DIMS, calpos, in_strs); float energy = md_znorm2(DIMS, caldims, in_strs, in_data + offset / CFL_SIZE); if (energy > maxeng) { maxind = r; maxeng = energy; } } calpos[READ_DIM] = maxind; } void calib_geom(long caldims[DIMS], long calpos[DIMS], const long calsize[3], const long in_dims[DIMS], const complex float* in_data) { long pat_dims[DIMS]; assert(1 == in_dims[MAPS_DIM]); md_select_dims(DIMS, ~COIL_FLAG, pat_dims, in_dims); complex float* pattern = md_alloc(DIMS, pat_dims, CFL_SIZE); estimate_pattern(DIMS, in_dims, COIL_FLAG, pattern, in_data); for (unsigned int i = 0; i < DIMS; i++) caldims[i] = 1; for (unsigned int i = 0; i < DIMS; i++) calpos[i] = 0; calpos[0] = (in_dims[0] - caldims[0]) / 2; calpos[1] = (in_dims[1] - caldims[1]) / 2; calpos[2] = (in_dims[2] - caldims[2]) / 2; long pat_strs[DIMS]; md_calc_strides(DIMS, pat_strs, pat_dims, CFL_SIZE); bool stop[3] = { false, false, false }; // increase in diagonals first while (!(stop[0] && stop[1] & stop[2])) { for (int i = 0; i < 3; i++) { if (caldims[i] == in_dims[i]) stop[i] = true; if (caldims[i] >= calsize[i]) stop[i] = true; if (stop[i]) continue; caldims[i] += 1; calpos[i] = (in_dims[i] - caldims[i]) / 2; // printf("Try: %ld %ld %ld %ld\n", caldims[1], caldims[2], calpos[1], calpos[2]); long offset = md_calc_offset(DIMS, calpos, pat_strs); float si = sqrtf((float)caldims[0] * (float)caldims[1] * (float)caldims[2]); if (si != md_znorm2(DIMS, caldims, pat_strs, pattern + offset / CFL_SIZE)) { caldims[i]--; calpos[i] = (in_dims[i] - caldims[i]) / 2; stop[i] = true; } } } caldims[COIL_DIM] = in_dims[COIL_DIM]; md_free(pattern); #if 1 calib_readout_pos(caldims, calpos, in_dims, in_data); #endif } complex float* extract_calib2(long caldims[DIMS], const long calsize[3], const long in_dims[DIMS], const long in_strs[DIMS], const complex float* in_data, bool fixed) { // first extract center of size in_dims[0], calsize[1], calsize[2], and then process further to save time long tmp_dims[DIMS]; long tmp_pos[DIMS]; long tmp_strs[DIMS]; md_copy_dims(DIMS, tmp_dims, in_dims); md_set_dims(DIMS, tmp_pos, 0); for (unsigned int i = 0; i < 3; i++) { //tmp_dims[i] = MIN(calsize[i], in_dims[i]); tmp_dims[i] = (READ_DIM == i) ? in_dims[i] : MIN(calsize[i], in_dims[i]); tmp_pos[i] = (in_dims[i] - tmp_dims[i]) / 2.; // what about odd sizes? } complex float* tmp_data = md_alloc(DIMS, tmp_dims, CFL_SIZE); md_calc_strides(DIMS, tmp_strs, tmp_dims, CFL_SIZE); md_copy_block2(DIMS, tmp_pos, tmp_dims, tmp_strs, tmp_data, in_dims, in_strs, in_data, CFL_SIZE); long calpos[DIMS]; calib_geom(caldims, calpos, calsize, tmp_dims, tmp_data); if (fixed) { // we should probably change calib_geom instead for (unsigned int i = 0; i < 3; i++) { caldims[i] = MIN(calsize[i], tmp_dims[i]); if (i != READ_DIM) calpos[i] = (tmp_dims[i] - caldims[i]) / 2; } } debug_printf(DP_DEBUG1, "Calibration region... (size: %ldx%ldx%ld, pos: %ldx%ldx%ld)\n", caldims[0], caldims[1], caldims[2], calpos[0] + tmp_pos[0], calpos[1] + tmp_pos[1], calpos[2] + tmp_pos[2]); complex float* cal_data = md_alloc(DIMS, caldims, CFL_SIZE); md_copy_block(DIMS, calpos, caldims, cal_data, tmp_dims, tmp_data, CFL_SIZE); md_free(tmp_data); return cal_data; } complex float* extract_calib(long caldims[DIMS], const long calsize[3], const long in_dims[DIMS], const complex float* in_data, bool fixed) { long in_strs[DIMS]; md_calc_strides(DIMS, in_strs, in_dims, CFL_SIZE); return extract_calib2(caldims, calsize, in_dims, in_strs, in_data, fixed); } bart-0.4.02/src/misc/mri.h000066400000000000000000000037151320577655200152310ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __MRI_H #define __MRI_H #include #include "misc/cppwrap.h" enum mri_dims { READ_DIM, PHS1_DIM, PHS2_DIM, COIL_DIM, MAPS_DIM, TE_DIM, COEFF_DIM, COEFF2_DIM, ITER_DIM, CSHIFT_DIM, TIME_DIM, TIME2_DIM, LEVEL_DIM, SLICE_DIM, AVG_DIM, }; #ifdef BERKELEY_SVN #define KSPACE_DIMS 16u #endif #ifndef DIMS #define DIMS 16u #endif #define READ_FLAG (1u << READ_DIM) #define PHS1_FLAG (1u << PHS1_DIM) #define PHS2_FLAG (1u << PHS2_DIM) #define COIL_FLAG (1u << COIL_DIM) #define MAPS_FLAG (1u << MAPS_DIM) #define TE_FLAG (1u << TE_DIM) #define COEFF_FLAG (1u << COEFF_DIM) #define COEFF2_FLAG (1u << COEFF2_DIM) #define ITER_FLAG (1u << ITER_DIM) #define CSHIFT_FLAG (1u << CSHIFT_DIM) #define TIME_FLAG (1u << TIME_DIM) #define TIME2_FLAG (1u << TIME2_DIM) #define LEVEL_FLAG (1u << LEVEL_DIM) #define FFT_FLAGS (READ_FLAG|PHS1_FLAG|PHS2_FLAG) #define SENS_FLAGS (COIL_FLAG|MAPS_FLAG) #define SLICE_FLAG (1u << SLICE_DIM) extern void estimate_pattern(unsigned int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* pattern, const _Complex float* kspace_data); extern _Complex float* extract_calib(long caldims[DIMS], const long calsize[3], const long in_dims[DIMS], const _Complex float* in_data, _Bool fixed); extern _Complex float* extract_calib2(long caldims[DIMS], const long calsize[3], const long in_dims[DIMS], const long in_strs[DIMS], const _Complex float* in_data, _Bool fixed); extern void data_consistency(const long dims[DIMS], _Complex float* dst, const _Complex float* pattern, const _Complex float* kspace1, const _Complex float* kspace2); extern void calib_geom(long caldims[DIMS], long calpos[DIMS], const long calsize[3], const long in_dims[DIMS], const _Complex float* in_data); #include "misc/cppwrap.h" #endif // __MRI_H bart-0.4.02/src/misc/opts.c000066400000000000000000000142771320577655200154270ustar00rootroot00000000000000/* Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015-2017 Martin Uecker */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include "misc/misc.h" #include "misc/debug.h" #include "opts.h" enum OPT_ARG_TYPE { OPT_SPECIAL, OPT_SET, OPT_CLEAR, OPT_INT, OPT_UINT, OPT_LONG, OPT_FLOAT, OPT_STRING }; static const char* opt_arg_types[] = { " ...", "", "", " d", " d", " d", " f", " " }; static enum OPT_ARG_TYPE opt_arg_type(opt_conv_f fun) { if (opt_set == fun) return OPT_SET; if (opt_clear == fun) return OPT_CLEAR; if (opt_int == fun) return OPT_INT; if (opt_uint == fun) return OPT_UINT; if (opt_long == fun) return OPT_LONG; if (opt_float == fun) return OPT_FLOAT; if (opt_string == fun) return OPT_STRING; return OPT_SPECIAL; } static const char* trim_space(const char* str) { while (isspace(*str)) str++; return str; } static bool show_option_p(const struct opt_s opt) { return (NULL != opt.descr) && !( ('(' == trim_space(opt.descr)[0]) && (')' == opt.descr[strlen(opt.descr) - 1])); } static void print_usage(FILE* fp, const char* name, const char* usage_str, int n, const struct opt_s opts[static n ?: 1]) { fprintf(fp, "Usage: %s ", name); for (int i = 0; i < n; i++) if (show_option_p(opts[i])) fprintf(fp, "[-%c%s] ", opts[i].c, opt_arg_types[opt_arg_type(opts[i].conv)]); fprintf(fp, "%s\n", usage_str); } static const char* add_space(bool has_arg, bool has_space) { const char* space = "\t\t"; if (has_arg) space = " "; if (!has_space) space = ""; return space; } static void print_help(const char* help_str, int n, const struct opt_s opts[n ?: 1]) { printf("\n%s\n\n", help_str); for (int i = 0; i < n; i++) if (show_option_p(opts[i])) printf("-%c%s%s\n", opts[i].c, add_space(opts[i].arg, isspace(opts[i].descr[0])), trim_space(opts[i].descr)); printf("-h\t\thelp\n"); } static void check_options(int n, const struct opt_s opts[n ?: 1]) { bool f[256] = { false }; for (int i = 0; i < n; i++) { assert(256 > (unsigned int)opts[i].c); if (f[(unsigned int)opts[i].c]) error("duplicate option: %c\n", opts[i].c); f[(unsigned int)opts[i].c] = true; } } static void process_option(char c, const char* optarg, const char* name, const char* usage_str, const char* help_str, int n, const struct opt_s opts[n ?: 1]) { if ('h' == c) { print_usage(stdout, name, usage_str, n, opts); print_help(help_str, n, opts); exit(0); } for (int i = 0; i < n; i++) { if (opts[i].c == c) { if (opts[i].conv(opts[i].ptr, c, optarg)) { print_usage(stderr, name, usage_str, n, opts); exit(1); } return; } } print_usage(stderr, name, usage_str, n, opts); exit(1); } void cmdline(int* argcp, char* argv[], int min_args, int max_args, const char* usage_str, const char* help_str, int n, const struct opt_s opts[n ?: 1]) { int argc = *argcp; char optstr[2 * n + 2]; check_options(n, opts); save_command_line(argc, argv); int l = 0; optstr[l++] = 'h'; for (int i = 0; i < n; i++) { optstr[l++] = opts[i].c; if (opts[i].arg) optstr[l++] = ':'; } optstr[l] = '\0'; int c; while (-1 != (c = getopt(argc, argv, optstr))) { #if 0 if ('h' == c) { print_usage(stdout, argv[0], usage_str, n, opts); print_help(help_str, n, opts); exit(0); } for (int i = 0; i < n; i++) { if (opts[i].c == c) { if (opts[i].conv(opts[i].ptr, c, optarg)) { print_usage(stderr, argv[0], usage_str, n, opts); exit(1); } goto out; } } print_usage(stderr, argv[0], usage_str, n, opts); exit(1); out: continue; #else process_option(c, optarg, argv[0], usage_str, help_str, n, opts); #endif } if ( (argc - optind < min_args) || (argc - optind > max_args)) { print_usage(stderr, argv[0], usage_str, n, opts); exit(1); } int i; for (i = optind; i < argc; i++) argv[i - optind + 1] = argv[i]; *argcp = argc - optind + 1; } bool opt_set(void* ptr, char c, const char* optarg) { UNUSED(c); UNUSED(optarg); *(bool*)ptr = true; return false; } bool opt_clear(void* ptr, char c, const char* optarg) { UNUSED(c); UNUSED(optarg); *(bool*)ptr = false; return false; } bool opt_int(void* ptr, char c, const char* optarg) { UNUSED(c); *(int*)ptr = atoi(optarg); return false; } bool opt_uint(void* ptr, char c, const char* optarg) { UNUSED(c); *(unsigned int*)ptr = atoi(optarg); return false; } bool opt_long(void* ptr, char c, const char* optarg) { UNUSED(c); *(long*)ptr = atoi(optarg); return false; } bool opt_float(void* ptr, char c, const char* optarg) { UNUSED(c); *(float*)ptr = atof(optarg); return false; } bool opt_string(void* ptr, char c, const char* optarg) { UNUSED(c); *(char**)ptr = strdup(optarg); assert(NULL != ptr); return false; } bool opt_float_vec3(void* ptr, char c, const char* optarg) { UNUSED(c); int r = sscanf(optarg, "%f:%f:%f", &(*(float(*)[3])ptr)[0], &(*(float(*)[3])ptr)[1], &(*(float(*)[3])ptr)[2]); assert(3 == r); return false; } bool opt_vec3(void* ptr, char c, const char* optarg) { if (islower(c)) { if (3 != sscanf(optarg, "%ld:%ld:%ld", &(*(long(*)[3])ptr)[0], &(*(long(*)[3])ptr)[1], &(*(long(*)[3])ptr)[2])) { (*(long(*)[3])ptr)[0] = atol(optarg); (*(long(*)[3])ptr)[1] = atol(optarg); (*(long(*)[3])ptr)[2] = atol(optarg); } } else { debug_printf(DP_WARN, "the upper-case options for specifying dimensions are deprecated.\n"); int r = sscanf(optarg, "%ld:%ld:%ld", &(*(long(*)[3])ptr)[0], &(*(long(*)[3])ptr)[1], &(*(long(*)[3])ptr)[2]); assert(3 == r); } return false; } bool opt_select(void* ptr, char c, const char* optarg) { UNUSED(c); UNUSED(optarg); struct opt_select_s* sel = ptr; if (0 != memcmp(sel->ptr, sel->default_value, sel->size)) return true; memcpy(sel->ptr, sel->value, sel->size); return false; } bool opt_subopt(void* _ptr, char c, const char* optarg) { UNUSED(c); struct opt_subopt_s* ptr = _ptr; process_option(optarg[0], optarg + 1, "", "", "", ptr->n, ptr->opts); return false; } bart-0.4.02/src/misc/opts.h000066400000000000000000000044701320577655200154260ustar00rootroot00000000000000 #include #include "misc/cppwrap.h" #include "misc/types.h" #include "misc/misc.h" typedef bool opt_conv_f(void* ptr, char c, const char* optarg); struct opt_s { char c; bool arg; opt_conv_f* conv; void* ptr; const char* descr; }; extern opt_conv_f opt_set; extern opt_conv_f opt_clear; extern opt_conv_f opt_int; extern opt_conv_f opt_uint; extern opt_conv_f opt_long; extern opt_conv_f opt_float; extern opt_conv_f opt_string; extern opt_conv_f opt_vec3; extern opt_conv_f opt_float_vec3; extern opt_conv_f opt_select; extern opt_conv_f opt_subopt; struct opt_select_s { void* ptr; const void* value; const void* default_value; size_t size; }; struct opt_subopt_s { int n; struct opt_s* opts; }; typedef long opt_vec3_t[3]; typedef float opt_fvec3_t[3]; #define OPT_SEL(T, x, v) &(struct opt_select_s){ (x), &(T){ (v) }, &(T){ *(x) }, sizeof(T) } #define OPT_SUB(n, opts) &(struct opt_subopt_s){ (n), (opts) } #define OPT_SET(c, ptr, descr) { (c), false, opt_set, TYPE_CHECK(bool*, (ptr)), "\t" descr } #define OPT_CLEAR(c, ptr, descr) { (c), false, opt_clear, TYPE_CHECK(bool*, (ptr)), "\t" descr } #define OPT_ARG(c, _fun, T, ptr, argname, descr) { (c), true, _fun, TYPE_CHECK(T*, (ptr)), " " argname " \t" descr } #define OPT_STRING(c, ptr, argname, descr) OPT_ARG(c, opt_string, const char*, ptr, argname, descr) #define OPT_UINT(c, ptr, argname, descr) OPT_ARG(c, opt_uint, unsigned int, ptr, argname, descr) #define OPT_INT(c, ptr, argname, descr) OPT_ARG(c, opt_int, int, ptr, argname, descr) #define OPT_LONG(c, ptr, argname, descr) OPT_ARG(c, opt_long, long, ptr, argname, descr) #define OPT_FLOAT(c, ptr, argname, descr) OPT_ARG(c, opt_float, float, ptr, argname, descr) #define OPT_VEC3(c, ptr, argname, descr) OPT_ARG(c, opt_vec3, opt_vec3_t, ptr, argname, descr) #define OPT_FLVEC3(c, ptr, argname, descr) OPT_ARG(c, opt_float_vec3, opt_fvec3_t, ptr, argname, descr) #define OPT_SELECT(c, T, ptr, value, descr) { (c), false, opt_select, OPT_SEL(T, TYPE_CHECK(T*, ptr), value), "\t" descr } #define OPT_SUBOPT(c, argname, descr, NR, opts) OPT_ARG(c, opt_subopt, struct opt_subopt_s, OPT_SUB(NR, opts), argname, descr) extern void cmdline(int* argc, char* argv[], int min_args, int max_args, const char* usage_str, const char* help_str, int n, const struct opt_s opts[n]); #include "misc/cppwrap.h" bart-0.4.02/src/misc/pcaa.h000066400000000000000000000021641320577655200153430ustar00rootroot00000000000000 #if __GNUC__ >= 5 #warning "Don't include pcaa.h for newer compilers!" #endif /* Macro wrappers for functions to work around a limitation of * the C language standard: A pointer to an array cannot passed * as a pointer to a constant array without adding an explicit cast. * We hide this cast in the macro definitions. For GCC we can define * a type-safe version of the macro. * * A similar idea is used in Jens Gustedt's P99 preprocessor macros * and functions package available at: http://p99.gforge.inria.fr/ */ #ifndef AR2D_CAST #ifndef __GNUC__ #define AR2D_CAST(t, n, m, x) (const t(*)[m])(x) #else #define GNUVERSION ((__GNUC__ * 100 + __GNUC_MINOR__) * 100 + __GNUC_PATCHLEVEL__) #if GNUVERSION > 40603 #ifndef BUILD_BUG_ON #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #endif #define AR2D_CAST(t, n, m, x) (BUILD_BUG_ON(!(__builtin_types_compatible_p(const t[m], __typeof__((x)[0])) \ || __builtin_types_compatible_p(t[m], __typeof__((x)[0])))), (const t(*)[m])(x)) #else // for broken versions of GCC simply cast to void* #define AR2D_CAST(t, n, m, x) ((void*)(x)) #endif #endif #endif bart-0.4.02/src/misc/pd.c000066400000000000000000000155231320577655200150400ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker * * Wei, L. Multi-Class Blue Noise Sampling. ACM Trans. Graph. 29:79 (2010) * */ #include #include #include #include #if 1 #define GRID // #define CHECK #endif #ifdef GRID #include "num/multind.h" #endif #include "num/rand.h" #include "misc/misc.h" #include "pd.h" static float dist(int D, const float a[D], const float b[D]) { float r = 0.; for (int i = 0; i < D; i++) r += powf(a[i] - b[i], 2.); return sqrtf(r); } #ifdef GRID static void grid_pos(int D, long pos[D], float delta, const float fpos[D]) { for (int i = 0; i < D; i++) pos[i] = (int)floorf(fpos[i] / delta); } #endif static float vard_scale(int D, const float p[D], float vard) { float cen[D]; for (int i = 0; i < D; i++) cen[i] = 0.5; return 1. + powf(dist(D, cen, p), 2.) * vard; } static bool distance_check(int D, int T, int N, float vard, const float delta[T][T], /*const*/ float points[N][D], const int kind[N], int a, int b) { return dist(D, points[a], points[b]) > vard_scale(D, points[a], vard) * delta[kind[a]][kind[b]]; } int (poissondisc_mc)(int D, int T, int N, int I, float vardens, const float delta[T][T], float points[N][D], int kind[N]) { PTR_ALLOC(char[N], active); assert((0 < I) && (I < N)); assert(vardens >= 0.); // otherwise grid granularity needs to be changed memset(*active, 0, N * sizeof(char)); memset(*active, 1, I * sizeof(char)); int k = 30; int p = I; int a = I; #ifdef GRID float mindelta = 1.; float maxdelta = 0.; for (int i = 0; i < T; i++) { for (int j = 0; j < T; j++) { if (delta[i][j] < mindelta) mindelta = delta[i][j]; if (delta[i][j] > maxdelta) maxdelta = delta[i][j]; } } float corner[D]; for (int i = 0; i < D; i++) corner[i] = 0.; maxdelta *= vard_scale(D, corner, vardens); mindelta /= sqrtf((float)D); long patchdims[D]; for (int i = 0; i < D; i++) patchdims[i] = 3 * ceilf(maxdelta / mindelta); long patchstrs[D]; md_calc_strides(D, patchstrs, patchdims, 1); int* patch = md_alloc(D, patchdims, sizeof(int)); long griddims[D]; for (int i = 0; i < D; i++) griddims[i] = ceilf(1. / mindelta); long gridstrs[D]; md_calc_strides(D, gridstrs, griddims, 1); // element size 1! int* grid = md_alloc(D, griddims, sizeof(int)); int mone = -1; md_fill(D, griddims, grid, &mone, sizeof(int)); for (int i = 0; i < I; i++) { long pos[D]; grid_pos(D, pos, mindelta, points[i]); grid[md_calc_offset(D, gridstrs, pos)] = i; } #endif while (a > 0) { // pick active point randomly int sel = (int)floor(a * uniform_rand()) % a; int s2 = 0; while (true) { while (!(*active)[s2]) s2++; if (0 == sel) break; sel--; s2++; } assert((*active)[s2]); // try k times to place a new point near the selected point bool found = false; int rr = 0; // ? for (int i = 0; i < k; i++) { float d; float dd; // create a random point between one and two times the allowed distance do { kind[p] = rr++ % T; dd = delta[kind[s2]][kind[p]]; dd *= vard_scale(D, points[s2], vardens); for (int j = 0; j < D; j++) { do { points[p][j] = points[s2][j] + (uniform_rand() - 0.5) * 4. * dd; } while ((points[p][j] < 0.) || (points[p][j] > 1.)); } d = dist(D, points[s2], points[p]); } while ((d < dd) || (d > 2. * dd)); // check if the new point is far enough from all other points bool accept = true; #ifdef GRID long pos[D]; grid_pos(D, pos, mindelta, points[p]); long index = md_calc_offset(D, gridstrs, pos); assert(index < md_calc_size(D, griddims)); if (-1 != grid[index]) { assert(!distance_check(D, T, N, vardens, delta, points, kind, p, grid[index])); accept = false; } if (accept) { long off[D]; for (int i = 0; i < D; i++) off[i] = MIN(MAX(0, pos[i] - (patchdims[i] + 1) / 2), griddims[i] - patchdims[i]); md_copy_block(D, off, patchdims, patch, griddims, grid, sizeof(int)); for (int i = 0; i < md_calc_size(D, patchdims); i++) if (-1 != patch[i]) accept &= distance_check(D, T, N, vardens, delta, points, kind, p, patch[i]); } #endif #ifdef CHECK bool accept2 = true; for (int j = 0; j < p; j++) accept2 &= distance_check(D, T, N, vardens, delta, points, kind, p, j); assert(accept == accept2); #endif #ifndef GRID for (int j = 0; j < p; j++) accept &= distance_check(D, T, N, vardens, delta, points, kind, p, j); #endif if (accept) { // add new point to active list #ifdef GRID assert(-1 == grid[index]); // 0 is actually the first point grid[index] = p; #endif (*active)[p] = 1; a++; p++; if (N == p) goto out; found = true; break; } } // if we can not place a new point, remove point from active list if (!found) { (*active)[s2] = 0; a--; } } out: #ifdef GRID md_free(grid); md_free(patch); #endif XFREE(active); return p; } extern int poissondisc(int D, int N, int I, float vardens, float delta, float points[N][D]) { PTR_ALLOC(int[N], kind); memset(*kind, 0, I * sizeof(int)); const float dd[1][1] = { { delta } }; int P = poissondisc_mc(D, 1, N, I, vardens, dd, points, *kind); XFREE(kind); return P; } static void compute_rmatrix(int D, int T, float rmatrix[T][T], const float delta[T], int C, const int nc[T], const int mc[T][T]) { unsigned long processed = 0; float density = 0.; for (int i = 0; i < T; i++) rmatrix[i][i] = delta[i]; for (int k = 0; k < C; k++) { for (int i = 0; i < nc[k]; i++) { int ind = mc[k][i]; processed = MD_SET(processed, ind); density += 1. / powf(delta[ind], (float)D); // printf("%d (%f)\t", ind, density); } //printf("\n"); for (int i = 0; i < nc[k]; i++) for (int j = 0; j < T; j++) if (MD_IS_SET(processed, j) && (i != j)) rmatrix[i][j] = rmatrix[j][i] = powf(density, -1. / (float)D); } } struct sort_label { int index; float x; }; static int sort_cmp(const void* _a, const void* _b) { const struct sort_label* a = _a; const struct sort_label* b = _b; return ((a->x < b->x) - (a->x > b->x)); // FIXME } extern void mc_poisson_rmatrix(int D, int T, float rmatrix[T][T], const float delta[T]) { assert(T <= 32); struct sort_label table[T]; for (int i = 0; i < T; i++) { table[i].index = i; table[i].x = delta[i]; } qsort(&table, T, sizeof(struct sort_label), sort_cmp); int mc[T][T]; int nc[T]; int ind = 0; int i; for (i = 0; (i < T) && (ind < T); i++) { float val = table[ind].x; int j = 0; while ((ind < T) && (table[ind].x == val)) mc[i][j++] = table[ind++].index; nc[i] = j; } compute_rmatrix(D, T, rmatrix, delta, i, nc, (const int (*)[T])mc); } bart-0.4.02/src/misc/pd.h000066400000000000000000000014151320577655200150400ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifdef __cplusplus #error This file does not support C++ #endif extern int poissondisc(int D, int N, int II, float vardens, float delta, float points[static N][D]); extern int poissondisc_mc(int D, int T, int N, int II, float vardens, const float delta[static T][T], float points[static N][D], int kind[static N]); extern void mc_poisson_rmatrix(int D, int T, float rmatrix[static T][T], const float delta[static T]); #if __GNUC__ < 5 #include "misc/pcaa.h" #define poissondisc_mc(A, B, C, D, E, x, y, z) \ poissondisc_mc(A, B, C, D, E, AR2D_CAST(float, B, B, x), y, z) #endif bart-0.4.02/src/misc/png.c000066400000000000000000000051141320577655200152140ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker */ #include #include #include #include #include "misc/misc.h" #include "png.h" static int png_write_anyrgb(const char* name, unsigned int w, unsigned int h, unsigned int nbytes, bool rgb, const unsigned char* buf) { FILE* fp; png_structp structp = NULL; png_infop infop = NULL; png_bytep* volatile row_ptrs = NULL; volatile int ret = -1; // default: return failure if (NULL == (fp = fopen(name, "wb"))) return -1; if (NULL == (structp = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL))) goto cleanup; if (NULL == (infop = png_create_info_struct(structp))) goto cleanup; if (setjmp(png_jmpbuf(structp))) goto cleanup; switch(nbytes){ case 3: png_set_IHDR(structp, infop, w, h, 8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); break; case 4: png_set_IHDR(structp, infop, w, h, 8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); break; default: error("Supported PNG formats are 24bit (RGB) and 32bit (RGBA)!\n"); } if (!rgb) png_set_bgr(structp); png_init_io(structp, fp); png_write_info(structp, infop); row_ptrs = xmalloc(sizeof(png_bytep) * h); int row_size = png_get_rowbytes(structp, infop); for (unsigned int i = 0; i < h; i++) row_ptrs[i] = (png_bytep)(buf + row_size * i); png_write_image(structp, row_ptrs); png_write_end(structp, infop); ret = 0; // return success cleanup: if (NULL != structp) png_destroy_write_struct(&structp, &infop); if (NULL != row_ptrs) free(row_ptrs); fclose(fp); return ret; } int png_write_rgb24(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf) { UNUSED(inum); return png_write_anyrgb(name, w, h, 3, true, buf); } int png_write_rgb32(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf) { UNUSED(inum); return png_write_anyrgb(name, w, h, 4, true, buf); } int png_write_bgr24(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf) { UNUSED(inum); return png_write_anyrgb(name, w, h, 3, false, buf); } int png_write_bgr32(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf) { UNUSED(inum); return png_write_anyrgb(name, w, h, 4, false, buf); } bart-0.4.02/src/misc/png.h000066400000000000000000000007161320577655200152240ustar00rootroot00000000000000 extern int png_write_rgb24(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf); extern int png_write_rgb32(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf); extern int png_write_bgr24(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf); extern int png_write_bgr32(const char* name, unsigned int w, unsigned int h, long inum, const unsigned char* buf); bart-0.4.02/src/misc/resize.c000066400000000000000000000146531320577655200157410ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2014 Martin Uecker */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/filter.h" #include "resize.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif // FIXME: implement inverse, adjoint, etc.. static void fft_xzeropad2(unsigned int N, const long dims[N], unsigned int d, unsigned int x, const long ostr[N], complex float* dst, const long istr[N], const complex float* src) { assert(d < N); long tdims[N + 1]; md_copy_dims(N, tdims, dims); tdims[N] = x; long tostr[N + 1]; md_copy_strides(N, tostr, ostr); tostr[d] = x * ostr[d]; tostr[N] = ostr[d]; long pdims[N + 1]; md_select_dims(N + 1, MD_BIT(d) | MD_BIT(N), pdims, tdims); long pstr[N + 1]; md_calc_strides(N + 1, pstr, pdims, CFL_SIZE); complex float* shift = md_alloc_sameplace(N + 1, pdims, CFL_SIZE, src); float pos[N]; for (unsigned int i = 0; i < N; i++) pos[i] = 0.; for (unsigned int i = 0; i < x; i++) { pos[d] = -(1. / (float)x) * i; linear_phase(N, pdims, pos, (void*)shift + i * pstr[N]); } long tistr[N + 1]; md_copy_strides(N, tistr, istr); tistr[N] = 0; md_zmul2(N + 1, tdims, tostr, dst, tistr, src, pstr, shift); md_free(shift); fftc2(N + 1, tdims, MD_BIT(d), tostr, dst, tostr, dst); } static void fft_xzeropad(unsigned int N, const long dims[N], unsigned int d, unsigned int x, complex float* dst, const complex float* src) { long odims[N]; long ostrs[N]; long istrs[N]; md_copy_dims(N, odims, dims); odims[d] = x * dims[d]; md_calc_strides(N, ostrs, odims, CFL_SIZE); md_calc_strides(N, istrs, dims, CFL_SIZE); fft_xzeropad2(N, dims, d, x, ostrs, dst, istrs, src); } static void fft_zeropad_simple(unsigned int N, unsigned int flags, const long odims[N], complex float* dst, const long idims[N], const complex float* src) { md_resize_center(N, odims, dst, idims, src, CFL_SIZE); fftc(N, odims, flags, dst, dst); } #if 0 static void fft_zeropad_simpleH(unsigned int N, unsigned int flags, const long odims[N], complex float* dst, const long idims[N], const complex float* src) { complex float* tmp = md_alloc_sameplace(N, idims, CFL_SIZE, src); ifftc(N, idims, flags, tmp, src); md_resize_center(N, odims, dst, idims, tmp, CFL_SIZE); md_free(tmp); } #endif static void fft_zeropad_r(unsigned int N, const long odims[N], complex float* dst, const long idims[N], const complex float* src) { int i = N - 1; while (odims[i] == idims[i]) { if (0 == i) { if (dst != src) md_copy(N, odims, dst, src, CFL_SIZE); return; } i--; } //printf("%d %ld %ld\n", i, odims[i], idims[i]); assert(odims[i] > idims[i]); long tdims[N]; md_copy_dims(N, tdims, idims); tdims[i] = odims[i]; complex float* tmp = md_alloc_sameplace(N, tdims, CFL_SIZE, src); #if 1 if (0 == tdims[i] % idims[i]) { fft_xzeropad(N, idims, i, tdims[i] / idims[i], tmp, src); } else { #else { #endif fft_zeropad_simple(N, MD_BIT(i), tdims, tmp, idims, src); } fft_zeropad_r(N, odims, dst, tdims, tmp); md_free(tmp); } /* * perform zero-padded FFT * */ void fft_zeropad(unsigned int N, unsigned int flags, const long odims[N], complex float* dst, const long idims[N], const complex float* src) { unsigned int lflags = 0; for (unsigned int i = 0; i < N; i++) if (odims[i] > idims[i]) lflags = MD_SET(lflags, i); assert(flags == lflags); unsigned int sflags = 0; for (unsigned int i = 0; i < N; i++) if (odims[i] < idims[i]) sflags = MD_SET(sflags, i); assert(0 == sflags); fft_zeropad_r(N, odims, dst, idims, src); } static void fft_zeropadH_r(unsigned int N, const long odims[N], complex float* dst, const long idims[N], const complex float* src) { int i = N - 1; while (odims[i] == idims[i]) { if (0 == i) { if (dst != src) md_copy(N, odims, dst, src, CFL_SIZE); return; } i--; } assert (idims[i] > odims[i]); long tdims[N]; md_copy_dims(N, tdims, odims); tdims[i] = idims[i]; complex float* tmp = md_alloc_sameplace(N, tdims, CFL_SIZE, src); fft_zeropadH_r(N, tdims, tmp, idims, src); ifftc(N, tdims, MD_BIT(i), tmp, tmp); md_resize_center(N, odims, dst, tdims, tmp, CFL_SIZE); md_free(tmp); } /* * perform zero-padded FFT * */ void fft_zeropadH(unsigned int N, unsigned int flags, const long odims[N], complex float* dst, const long idims[N], const complex float* src) { unsigned int lflags = 0; for (unsigned int i = 0; i < N; i++) if (odims[i] > idims[i]) lflags = MD_SET(lflags, i); assert(0 == lflags); unsigned int sflags = 0; for (unsigned int i = 0; i < N; i++) if (odims[i] < idims[i]) sflags = MD_SET(sflags, i); assert(flags == sflags); fft_zeropadH_r(N, odims, dst, idims, src); } /* scale using zero-padding in the Fourier domain * */ void sinc_resize(unsigned int D, const long out_dims[D], complex float* out, const long in_dims[D], const complex float* in) { complex float* tmp = md_alloc_sameplace(D, in_dims, CFL_SIZE, in); unsigned int flags = 0; for (unsigned int i = 0; i < D; i++) if (out_dims[i] != in_dims[i]) flags = MD_SET(flags, i); fftmod(D, in_dims, flags, tmp, in); fft(D, in_dims, flags, tmp, tmp); fftmod(D, in_dims, flags, tmp, tmp); // NOTE: the inner fftmod/ifftmod should cancel for N % 4 == 0 // and could be replaced by a sign change for N % 4 == 1 // md_resize_center can size up or down md_resize_center(D, out_dims, out, in_dims, tmp, CFL_SIZE); md_free(tmp); ifftmod(D, out_dims, flags, out, out); // see above ifft(D, out_dims, flags, out, out); ifftmod(D, out_dims, flags, out, out); } /* scale using zero-padding in the Fourier domain - scale each dimensions in sequence (faster) * */ void sinc_zeropad(unsigned int D, const long out_dims[D], complex float* out, const long in_dims[D], const complex float* in) { unsigned int i = D - 1; while (out_dims[i] == in_dims[i]) { if (0 == i) { if (out != in) md_copy(D, out_dims, out, in, CFL_SIZE); return; } i--; } assert(out_dims[i] > in_dims[i]); long tmp_dims[D]; for (unsigned int l = 0; l < D; l++) tmp_dims[l] = in_dims[l]; tmp_dims[i] = out_dims[i]; //printf("Resizing...%d: %ld->%ld\n", i, in_dims[i], tmp_dims[i]); sinc_resize(D, tmp_dims, out, in_dims, in); sinc_zeropad(D, out_dims, out, tmp_dims, out); } bart-0.4.02/src/misc/resize.h000066400000000000000000000015731320577655200157430ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" extern void sinc_resize(unsigned int D, const long out_dims[__VLA(D)], _Complex float* out, const long in_dims[__VLA(D)], const _Complex float* in); extern void sinc_zeropad(unsigned int D, const long out_dims[__VLA(D)], _Complex float* out, const long in_dims[__VLA(D)], const _Complex float* in); extern void fft_zeropad(unsigned int D, unsigned int flags, const long out_dims[__VLA(D)], _Complex float* out, const long in_dims[__VLA(D)], const _Complex float* in); extern void fft_zeropadH(unsigned int D, unsigned int flags, const long out_dims[__VLA(D)], _Complex float* out, const long in_dims[__VLA(D)], const _Complex float* in); #include "misc/cppwrap.h" bart-0.4.02/src/misc/shrdptr.c000066400000000000000000000025141320577655200161170ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include "misc/types.h" #include "shrdptr.h" void shared_obj_init(struct shared_obj_s* obj, void (*del)(const struct shared_obj_s* s)) { obj->refcount = 1; obj->del = del; } void shared_obj_ref(const struct shared_obj_s* obj) { ((struct shared_obj_s*)obj)->refcount++; } void shared_obj_destroy(const struct shared_obj_s* x) { if (1 > --(((struct shared_obj_s*)x)->refcount)) if (NULL != x->del) x->del(x); } void shared_ptr_init(struct shared_ptr_s* dst, void (*del)(const struct shared_ptr_s*)) { dst->next = dst->prev = dst; dst->del = del; } void shared_ptr_copy(struct shared_ptr_s* dst, struct shared_ptr_s* src) { dst->next = src; dst->prev = src->prev; src->prev->next = dst; src->prev = dst; dst->del = src->del; } static void shared_unlink(struct shared_ptr_s* data) { data->next->prev = data->prev; data->prev->next = data->next; } void shared_ptr_destroy(const struct shared_ptr_s* ptr) { if (ptr->next == ptr) { assert(ptr == ptr->prev); ptr->del(ptr); } else { shared_unlink(CAST_CONST(struct shared_ptr_s*, ptr)); } } bart-0.4.02/src/misc/shrdptr.h000066400000000000000000000017651320577655200161330ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ // to be included in pointed-to object struct shared_obj_s { void (*del)(const struct shared_obj_s* x); int refcount; }; extern void shared_obj_init(struct shared_obj_s* obj, void (*del)(const struct shared_obj_s* s)); extern void shared_obj_destroy(const struct shared_obj_s* x); extern void shared_obj_ref(const struct shared_obj_s*); // alternative: to be included in object with pointer struct shared_ptr_s { struct shared_ptr_s* next; struct shared_ptr_s* prev; void (*del)(const struct shared_ptr_s*); }; extern void shared_ptr_init(struct shared_ptr_s* dst, void (*del)(const struct shared_ptr_s* p)); extern void shared_ptr_copy(struct shared_ptr_s* dst, struct shared_ptr_s* src); extern void shared_ptr_destroy(const struct shared_ptr_s* ptr); bart-0.4.02/src/misc/subpixel.c000066400000000000000000000017701320577655200162670ustar00rootroot00000000000000 #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "misc/misc.h" #include "subpixel.h" void est_subpixel_shift(unsigned int N, float shifts[N], const long dims[N], unsigned int flags, const complex float* in1, const complex float* in2) { complex float* tmp1 = md_alloc(N, dims, CFL_SIZE); complex float* tmp2 = md_alloc(N, dims, CFL_SIZE); fftuc(N, dims, flags, tmp1, in1); fftuc(N, dims, flags, tmp2, in2); md_zmulc(N, dims, tmp1, tmp1, tmp2); for (unsigned int i = 0; i < N; i++) { shifts[i] = 0.; if (!MD_IS_SET(flags, i)) continue; long shift[N]; for (unsigned int j = 0; j < N; j++) shift[j] = 0; shift[i] = 1; md_circ_shift(N, dims, shift, tmp2, tmp1, CFL_SIZE); // the weighting is not optimal due to double squaring // and we compute finite differences (filter?) complex float sc = md_zscalar(N, dims, tmp2, tmp1); shifts[i] = cargf(sc) / (2. * M_PI) * (float)dims[i]; } md_free(tmp1); md_free(tmp2); } bart-0.4.02/src/misc/subpixel.h000066400000000000000000000002371320577655200162710ustar00rootroot00000000000000 extern void est_subpixel_shift(unsigned int N, float shifts[N], const long dims[N], unsigned int flags, const complex float* in1, const complex float* in2); bart-0.4.02/src/misc/types.h000066400000000000000000000021061320577655200155770ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #ifndef __TYPES_H #define __TYPES_H #define TYPE_CHECK(T, x) (1 ? (x) : (T)0) #define CONTAINER_OF(x, T, member) \ ((T*)((char*)TYPE_CHECK(__typeof__(&((T*)0)->member), x) - offsetof(T, member))) #define CAST_CONST(T, x) ((T)TYPE_CHECK(const T, x)) #define CAST_DOWN(T, x) ({ \ __typeof__(x) __tmp = (x); \ extern __typeof__(*__tmp->TYPEID) T ## _TYPEID; \ if (__tmp->TYPEID != &T ## _TYPEID) \ error("run-time type check failed: %s\n", #T); \ CONTAINER_OF(__tmp, struct T, INTERFACE); \ }) #define CAST_UP(x) (&(x)->INTERFACE) #define INTERFACE(X) X INTERFACE typedef const struct typeid_s { int:0; } TYPEID; #define TYPEID(T) T ## _TYPEID #define DEF_TYPEID(T) TYPEID TYPEID(T) #define SET_TYPEID(T, x) (TYPE_CHECK(struct T*, x)->INTERFACE.TYPEID = &TYPEID(T)) // redefine auto - needs newer compilers #define auto __auto_type #endif bart-0.4.02/src/misc/utils.c000066400000000000000000000101661320577655200155730ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2014 Martin Uecker */ #include #include "num/multind.h" #include "num/flpmath.h" #include "num/loop.h" #include "misc/misc.h" #include "utils.h" complex float* compute_mask(unsigned int N, const long msk_dims[N], const float restrict_fov[N]) { complex float* mask = md_alloc(N, msk_dims, CFL_SIZE); long small_dims[N]; for (unsigned int i = 0; i < N; i++) small_dims[i] = (1 == msk_dims[i]) ? 1 : (msk_dims[i] * restrict_fov[i]); complex float* small_mask = md_alloc(N, small_dims, CFL_SIZE); md_fill(N, small_dims, small_mask, &(complex float){ 1. }, CFL_SIZE); md_resize_center(N, msk_dims, mask, small_dims, small_mask, CFL_SIZE); md_free(small_mask); return mask; } void apply_mask(unsigned int N, const long dims[N], complex float* x, const float restrict_fov[N]) { unsigned int flags = 0; for (unsigned int i = 0; i < N; i++) if (1. != restrict_fov[i]) flags = MD_SET(flags, i); long msk_dims[N]; md_select_dims(N, flags, msk_dims, dims); long msk_strs[N]; md_calc_strides(N, msk_strs, msk_dims, CFL_SIZE); complex float* mask = compute_mask(N, msk_dims, restrict_fov); long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); md_zmul2(N, dims, strs, x, strs, x, msk_strs, mask); md_free(mask); } void normalize(int N, unsigned int flags, const long dims[N], complex float* maps) { long dims_img[N]; md_select_dims(N, ~flags, dims_img, dims); complex float* maps_norm = md_alloc(N, dims_img, CFL_SIZE); md_zrss(N, dims, flags, maps_norm, maps); long str[N]; long str_img[N]; md_calc_strides(N, str, dims, CFL_SIZE); md_calc_strides(N, str_img, dims_img, CFL_SIZE); md_zdiv2(N, dims, str, maps, str, maps, str_img, maps_norm); md_free(maps_norm); } void normalizel1(int N, unsigned int flags, const long dims[N], complex float* maps) { long dims_img[N]; md_select_dims(N, ~flags, dims_img, dims); complex float* maps_norm = md_alloc(N, dims_img, CFL_SIZE); complex float* maps_abs = md_alloc(N, dims, CFL_SIZE); md_zabs(N, dims, maps_abs, maps); long strs[N]; long strs_img[N]; md_calc_strides(N, strs_img, dims_img, CFL_SIZE); md_calc_strides(N, strs, dims, CFL_SIZE); md_clear(N, dims_img, maps_norm, CFL_SIZE); md_zadd2(N, dims, strs_img, maps_norm, strs_img, maps_norm, strs, maps_abs); md_free(maps_abs); long str[N]; long str_img[N]; md_calc_strides(N, str, dims, CFL_SIZE); md_calc_strides(N, str_img, dims_img, CFL_SIZE); md_zdiv2(N, dims, str, maps, str, maps, str_img, maps_norm); md_free(maps_norm); } /* * rotate phase jointly along dim so that the 0-th slice along dim has phase = 0 * */ void fixphase(unsigned int N, const long dims[N], unsigned int dim, complex float* out, const complex float* in) { assert(dim < N); long dims2[N]; md_select_dims(N, ~MD_BIT(dim), dims2, dims); complex float* tmp = md_alloc_sameplace(N, dims2, CFL_SIZE, in); long pos[N]; for (unsigned int i = 0; i < N; i++) pos[i] = 0; md_slice(N, MD_BIT(dim), pos, dims, tmp, in, CFL_SIZE); md_zphsr(N, dims2, tmp, tmp); long strs[N]; long strs2[N]; md_calc_strides(N, strs, dims, CFL_SIZE); md_calc_strides(N, strs2, dims2, CFL_SIZE); md_zmulc2(N, dims, strs, out, strs, in, strs2, tmp); md_free(tmp); } void fixphase2(unsigned int N, const long dims[N], unsigned int dim, const complex float rot[dims[dim]], complex float* out, const complex float* in) { assert(dim < N); long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); long dims2[N]; long strs2[N]; md_select_dims(N, ~MD_BIT(dim), dims2, dims); md_calc_strides(N, strs2, dims2, CFL_SIZE); complex float* tmp = md_alloc_sameplace(N, dims2, CFL_SIZE, in); long tdims[N]; long tstrs[N]; md_select_dims(N, MD_BIT(dim), tdims, dims); md_calc_strides(N, tstrs, tdims, CFL_SIZE); md_clear(N, dims2, tmp, CFL_SIZE); md_zfmac2(N, dims, strs2, tmp, tstrs, rot, strs, in); md_zphsr(N, dims2, tmp, tmp); md_zmulc2(N, dims, strs, out, strs, in, strs2, tmp); md_free(tmp); } bart-0.4.02/src/misc/utils.h000066400000000000000000000014451320577655200156000ustar00rootroot00000000000000 #include "misc/cppwrap.h" extern void normalizel1(int N, unsigned int flags, const long dims[__VLA(N)], _Complex float* maps); extern void normalize(int N, unsigned int flags, const long dims[__VLA(N)], _Complex float* maps); extern _Complex float* compute_mask(unsigned int N, const long msk_dims[__VLA(N)], const float restrict_fov[__VLA(N)]); extern void apply_mask(unsigned int N, const long dims[__VLA(N)], _Complex float* x, const float restrict_fov[__VLA(N)]); extern void fixphase(unsigned int D, const long dims[__VLA(D)], unsigned int dim, _Complex float* out, const _Complex float* in); extern void fixphase2(unsigned int D, const long dims[__VLA(D)], unsigned int dim, const _Complex float rot[__VLA(dims[dim])], _Complex float* out, const _Complex float* in); #include "misc/cppwrap.h" bart-0.4.02/src/misc/version.c000066400000000000000000000002071320577655200161130ustar00rootroot00000000000000 #include "version.h" #define STRINGIFY(x) # x #define VERSION(x) STRINGIFY(x) const char* bart_version = #include "version.inc" ; bart-0.4.02/src/misc/version.h000066400000000000000000000000441320577655200161170ustar00rootroot00000000000000 extern const char* bart_version; bart-0.4.02/src/nlinv.c000066400000000000000000000111251320577655200146220ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/utils.h" #include "misc/opts.h" #include "misc/debug.h" #include "noir/recon.h" static const char usage_str[] = " []"; static const char help_str[] = "Jointly estimate image and sensitivities with nonlinear\n" "inversion using {iter} iteration steps. Optionally outputs\n" "the sensitivities."; int main_nlinv(int argc, char* argv[]) { bool normalize = true; float restrict_fov = -1.; const char* psf = NULL; struct noir_conf_s conf = noir_defaults; bool out_sens = false; bool scale_im = false; const struct opt_s opts[] = { OPT_UINT('i', &conf.iter, "iter", ""), OPT_SET('c', &conf.rvc, ""), OPT_CLEAR('N', &normalize, ""), OPT_FLOAT('f', &restrict_fov, "FOV", ""), OPT_STRING('p', &psf, "PSF", ""), OPT_SET('g', &conf.usegpu, "use gpu"), OPT_SET('S', &scale_im, "Re-scale image after reconstruction"), }; cmdline(&argc, argv, 2, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); if (4 == argc) out_sens = true; num_init(); long ksp_dims[DIMS]; complex float* kspace_data = load_cfl(argv[1], DIMS, ksp_dims); // SMS if (1 != ksp_dims[SLICE_DIM]) { debug_printf(DP_INFO, "SMS-NLINV reconstruction. Multiband factor: %d\n", ksp_dims[SLICE_DIM]); fftmod(DIMS, ksp_dims, SLICE_FLAG, kspace_data, kspace_data); // fftmod to get correct slice order in output } assert(1 == ksp_dims[MAPS_DIM]); long dims[DIMS]; md_copy_dims(DIMS, dims, ksp_dims); long img_dims[DIMS]; md_select_dims(DIMS, FFT_FLAGS|CSHIFT_FLAG|SLICE_FLAG, img_dims, dims); long img_strs[DIMS]; md_calc_strides(DIMS, img_strs, img_dims, CFL_SIZE); complex float* image = create_cfl(argv[2], DIMS, img_dims); long msk_dims[DIMS]; md_select_dims(DIMS, FFT_FLAGS, msk_dims, dims); long msk_strs[DIMS]; md_calc_strides(DIMS, msk_strs, msk_dims, CFL_SIZE); complex float* mask; complex float* norm = md_alloc(DIMS, img_dims, CFL_SIZE); complex float* sens = (out_sens ? create_cfl : anon_cfl)(out_sens ? argv[3] : "", DIMS, ksp_dims); complex float* pattern = NULL; long pat_dims[DIMS]; if (NULL != psf) { pattern = load_cfl(psf, DIMS, pat_dims); // FIXME: check compatibility if (-1 == restrict_fov) restrict_fov = 0.5; conf.noncart = true; } else { md_copy_dims(DIMS, pat_dims, img_dims); pattern = anon_cfl("", DIMS, pat_dims); estimate_pattern(DIMS, ksp_dims, COIL_FLAG, pattern, kspace_data); } #if 0 float scaling = 1. / estimate_scaling(ksp_dims, NULL, kspace_data); #else double scaling = 100. / md_znorm(DIMS, ksp_dims, kspace_data); if (1 != ksp_dims[SLICE_DIM]) // SMS scaling *= sqrt(ksp_dims[SLICE_DIM]); #endif debug_printf(DP_INFO, "Scaling: %f\n", scaling); md_zsmul(DIMS, ksp_dims, kspace_data, kspace_data, scaling); if (-1. == restrict_fov) { mask = md_alloc(DIMS, msk_dims, CFL_SIZE); md_zfill(DIMS, msk_dims, mask, 1.); } else { float restrict_dims[DIMS] = { [0 ... DIMS - 1] = 1. }; restrict_dims[0] = restrict_fov; restrict_dims[1] = restrict_fov; restrict_dims[2] = restrict_fov; mask = compute_mask(DIMS, msk_dims, restrict_dims); } #ifdef USE_CUDA if (conf.usegpu) { complex float* kspace_gpu = md_alloc_gpu(DIMS, ksp_dims, CFL_SIZE); md_copy(DIMS, ksp_dims, kspace_gpu, kspace_data, CFL_SIZE); noir_recon(&conf, dims, image, NULL, pattern, mask, kspace_gpu); md_free(kspace_gpu); md_zfill(DIMS, ksp_dims, sens, 1.); } else #endif noir_recon(&conf, dims, image, sens, pattern, mask, kspace_data); if (normalize) { md_zrss(DIMS, ksp_dims, COIL_FLAG, norm, sens); md_zmul2(DIMS, img_dims, img_strs, image, img_strs, image, img_strs, norm); } if (out_sens) { long strs[DIMS]; md_calc_strides(DIMS, strs, ksp_dims, CFL_SIZE); if (normalize) md_zdiv2(DIMS, ksp_dims, strs, sens, strs, sens, img_strs, norm); fftmod(DIMS, ksp_dims, FFT_FLAGS, sens, sens); } if (scale_im) md_zsmul(DIMS, img_dims, image, image, 1. / scaling); md_free(norm); md_free(mask); unmap_cfl(DIMS, ksp_dims, sens); unmap_cfl(DIMS, pat_dims, pattern); unmap_cfl(DIMS, img_dims, image); unmap_cfl(DIMS, ksp_dims, kspace_data); exit(0); } bart-0.4.02/src/nlops/000077500000000000000000000000001320577655200144635ustar00rootroot00000000000000bart-0.4.02/src/nlops/nlop.c000066400000000000000000000076521320577655200156110ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include "num/multind.h" #include "num/ops.h" #include "linops/linop.h" #include "misc/shrdptr.h" #include "misc/misc.h" #include "misc/types.h" #include "nlop.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif struct nlop_op_data_s { INTERFACE(operator_data_t); nlop_data_t* data; nlop_del_fun_t del; struct shared_ptr_s sptr; nlop_fun_t forward; }; static DEF_TYPEID(nlop_op_data_s); struct nlop_linop_data_s { INTERFACE(linop_data_t); nlop_data_t* data; nlop_del_fun_t del; struct shared_ptr_s sptr; nlop_fun_t deriv; nlop_fun_t adjoint; }; static DEF_TYPEID(nlop_linop_data_s); static void sptr_op_del(const struct shared_ptr_s* sptr) { struct nlop_op_data_s* data = CONTAINER_OF(sptr, struct nlop_op_data_s, sptr); data->del(data->data); } static void sptr_linop_del(const struct shared_ptr_s* sptr) { struct nlop_linop_data_s* data = CONTAINER_OF(sptr, struct nlop_linop_data_s, sptr); data->del(data->data); } static void op_fun(const operator_data_t* _data, unsigned int N, void* args[__VLA(N)]) { const struct nlop_op_data_s* data = CAST_DOWN(nlop_op_data_s, _data); data->forward(data->data, args[0], args[1]); // FIXME: check } static void op_del(const operator_data_t* _data) { const struct nlop_op_data_s* data = CAST_DOWN(nlop_op_data_s, _data); shared_ptr_destroy(&data->sptr); xfree(data); } static void lop_der(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct nlop_linop_data_s* data = CAST_DOWN(nlop_linop_data_s, _data); data->deriv(data->data, dst, src); } static void lop_adj(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct nlop_linop_data_s* data = CAST_DOWN(nlop_linop_data_s, _data); data->adjoint(data->data, dst, src); } static void lop_del(const linop_data_t* _data) { const struct nlop_linop_data_s* data = CAST_DOWN(nlop_linop_data_s, _data); shared_ptr_destroy(&data->sptr); xfree(data); } struct nlop_s* nlop_create2(unsigned int ON, const long odims[__VLA(ON)], const long ostrs[__VLA(ON)], unsigned int IN, const long idims[__VLA(IN)], const long istrs[__VLA(IN)], nlop_data_t* data, nlop_fun_t forward, nlop_fun_t deriv, nlop_fun_t adjoint, nlop_fun_t normal, nlop_p_fun_t norm_inv, nlop_del_fun_t del) { PTR_ALLOC(struct nlop_s, n); PTR_ALLOC(struct nlop_op_data_s, d); SET_TYPEID(nlop_op_data_s, d); d->data = data; d->forward = forward; d->del = del; shared_ptr_init(&d->sptr, sptr_op_del); PTR_ALLOC(struct nlop_linop_data_s, d2); SET_TYPEID(nlop_linop_data_s, d2); d2->data = data; d2->del = del; d2->deriv = deriv; d2->adjoint = adjoint; assert(NULL == normal); assert(NULL == norm_inv); shared_ptr_copy(&d2->sptr, &d->sptr); d2->sptr.del = sptr_linop_del; n->op = operator_create2(ON, odims, ostrs, IN, idims, istrs, CAST_UP(PTR_PASS(d)), op_fun, op_del); n->derivative = linop_create2(ON, odims, ostrs, IN, idims, istrs, CAST_UP(PTR_PASS(d2)), lop_der, lop_adj, NULL, NULL, lop_del); //linop_create return PTR_PASS(n); } struct nlop_s* nlop_create(unsigned int ON, const long odims[__VLA(ON)], unsigned int IN, const long idims[__VLA(IN)], nlop_data_t* data, nlop_fun_t forward, nlop_fun_t deriv, nlop_fun_t adjoint, nlop_fun_t normal, nlop_p_fun_t norm_inv, nlop_del_fun_t del) { return nlop_create2( ON, odims, MD_STRIDES(ON, odims, CFL_SIZE), IN, idims, MD_STRIDES(IN, idims, CFL_SIZE), data, forward, deriv, adjoint, normal, norm_inv, del); } void nlop_free(const struct nlop_s* op) { operator_free(op->op); linop_free(op->derivative); xfree(op); } nlop_data_t* nlop_get_data(struct nlop_s* op) { struct nlop_op_data_s* data2 = CAST_DOWN(nlop_op_data_s, operator_get_data(op->op)); return data2->data; } bart-0.4.02/src/nlops/nlop.h000066400000000000000000000026551320577655200156140ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include "linops/linop.h" typedef struct nlop_data_s { TYPEID* TYPEID; } nlop_data_t; typedef void (*nlop_fun_t)(const nlop_data_t* _data, complex float* dst, const complex float* src); typedef void (*nlop_p_fun_t)(const nlop_data_t* _data, float lambda, complex float* dst, const complex float* src); typedef void (*nlop_del_fun_t)(const nlop_data_t* _data); struct operator_s; struct linop_s; struct nlop_s { const struct operator_s* op; const struct linop_s* derivative; }; extern struct nlop_s* nlop_create(unsigned int ON, const long odims[__VLA(ON)], unsigned int IN, const long idims[__VLA(IN)], nlop_data_t* data, nlop_fun_t forward, nlop_fun_t deriv, nlop_fun_t adjoint, nlop_fun_t normal, nlop_p_fun_t norm_inv, nlop_del_fun_t); extern struct nlop_s* nlop_create2(unsigned int ON, const long odims[__VLA(ON)], const long ostr[__VLA(ON)], unsigned int IN, const long idims[__VLA(IN)], const long istrs[__VLA(IN)], nlop_data_t* data, nlop_fun_t forward, nlop_fun_t deriv, nlop_fun_t adjoint, nlop_fun_t normal, nlop_p_fun_t norm_inv, nlop_del_fun_t); extern void nlop_free(const struct nlop_s* op); extern nlop_data_t* nlop_get_data(struct nlop_s* op); bart-0.4.02/src/noir/000077500000000000000000000000001320577655200142775ustar00rootroot00000000000000bart-0.4.02/src/noir/model.c000066400000000000000000000204071320577655200155460ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2011-2012,2017 Martin Uecker * * * Uecker M, Hohage T, Block KT, Frahm J. Image reconstruction by regularized nonlinear * inversion – Joint estimation of coil sensitivities and image content. * Magn Reson Med 2008; 60:674-682. */ #include #include #include #include #include #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "num/fft.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/filter.h" #include "model.h" struct noir_model_conf_s noir_model_conf_defaults = { .fft_flags = FFT_FLAGS, .rvc = false, .use_gpu = false, .noncart = false, }; struct noir_data { long dims[DIMS]; long sign_dims[DIMS]; long sign_strs[DIMS]; long data_dims[DIMS]; long data_strs[DIMS]; long coil_dims[DIMS]; long coil_strs[DIMS]; long imgs_dims[DIMS]; long imgs_strs[DIMS]; long mask_dims[DIMS]; long mask_strs[DIMS]; long ptrn_dims[DIMS]; long ptrn_strs[DIMS]; long wght_dims[DIMS]; long wght_strs[DIMS]; const complex float* pattern; const complex float* adj_pattern; const complex float* mask; const complex float* weights; complex float* sens; complex float* xn; complex float* tmp; struct noir_model_conf_s conf; }; static void noir_calc_weights(const long dims[3], complex float* dst) { unsigned int flags = 0; for (int i = 0; i < 3; i++) if (1 != dims[i]) flags = MD_SET(flags, i); klaplace(3, dims, flags, dst); md_zsmul(3, dims, dst, dst, 220.); md_zsadd(3, dims, dst, dst, 1.); md_zspow(3, dims, dst, dst, -16.); // 1 + 222. \Laplace^16 } struct noir_data* noir_init(const long dims[DIMS], const complex float* mask, const complex float* psf, const struct noir_model_conf_s* conf) { #ifdef USE_CUDA md_alloc_fun_t my_alloc = conf->use_gpu ? md_alloc_gpu : md_alloc; #else assert(!conf->use_gpu); md_alloc_fun_t my_alloc = md_alloc; #endif PTR_ALLOC(struct noir_data, data); data->conf = *conf; md_copy_dims(DIMS, data->dims, dims); md_select_dims(DIMS, conf->fft_flags|COIL_FLAG|CSHIFT_FLAG, data->sign_dims, dims); md_calc_strides(DIMS, data->sign_strs, data->sign_dims, CFL_SIZE); md_select_dims(DIMS, conf->fft_flags|COIL_FLAG|MAPS_FLAG, data->coil_dims, dims); md_calc_strides(DIMS, data->coil_strs, data->coil_dims, CFL_SIZE); md_select_dims(DIMS, conf->fft_flags|MAPS_FLAG|CSHIFT_FLAG, data->imgs_dims, dims); md_calc_strides(DIMS, data->imgs_strs, data->imgs_dims, CFL_SIZE); md_select_dims(DIMS, conf->fft_flags|COIL_FLAG, data->data_dims, dims); md_calc_strides(DIMS, data->data_strs, data->data_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS, data->mask_dims, dims); md_calc_strides(DIMS, data->mask_strs, data->mask_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS, data->wght_dims, dims); md_calc_strides(DIMS, data->wght_strs, data->wght_dims, CFL_SIZE); md_select_dims(DIMS, conf->fft_flags|CSHIFT_FLAG, data->ptrn_dims, dims); md_calc_strides(DIMS, data->ptrn_strs, data->ptrn_dims, CFL_SIZE); complex float* weights = md_alloc(DIMS, data->wght_dims, CFL_SIZE); noir_calc_weights(dims, weights); fftmod(DIMS, data->wght_dims, FFT_FLAGS, weights, weights); fftscale(DIMS, data->wght_dims, FFT_FLAGS, weights, weights); data->weights = weights; #ifdef USE_CUDA if (conf->use_gpu) { data->weights = md_gpu_move(DIMS, data->wght_dims, weights, CFL_SIZE); md_free(weights); } #endif complex float* ptr = my_alloc(DIMS, data->ptrn_dims, CFL_SIZE); md_copy(DIMS, data->ptrn_dims, ptr, psf, CFL_SIZE); fftmod(DIMS, data->ptrn_dims, conf->fft_flags, ptr, ptr); data->pattern = ptr; complex float* adj_pattern = my_alloc(DIMS, data->ptrn_dims, CFL_SIZE); if (!conf->noncart) { md_zconj(DIMS, data->ptrn_dims, adj_pattern, ptr); } else { md_zfill(DIMS, data->ptrn_dims, adj_pattern, 1.); ifftmod(DIMS, data->ptrn_dims, conf->fft_flags, adj_pattern, adj_pattern); } data->adj_pattern = adj_pattern; complex float* msk = my_alloc(DIMS, data->mask_dims, CFL_SIZE); if (NULL == mask) { assert(!conf->use_gpu); md_zfill(DIMS, data->mask_dims, msk, 1.); } else { md_copy(DIMS, data->mask_dims, msk, mask, CFL_SIZE); } // fftmod(DIMS, data->mask_dims, 7, msk, msk); fftscale(DIMS, data->mask_dims, FFT_FLAGS, msk, msk); data->mask = msk; data->sens = my_alloc(DIMS, data->coil_dims, CFL_SIZE); data->xn = my_alloc(DIMS, data->imgs_dims, CFL_SIZE); data->tmp = my_alloc(DIMS, data->sign_dims, CFL_SIZE); return PTR_PASS(data); } void noir_free(struct noir_data* data) { md_free(data->pattern); md_free(data->mask); md_free(data->xn); md_free(data->sens); md_free(data->weights); md_free(data->tmp); md_free(data->adj_pattern); free(data); } void noir_forw_coils(struct noir_data* data, complex float* dst, const complex float* src) { md_zmul2(DIMS, data->coil_dims, data->coil_strs, dst, data->coil_strs, src, data->wght_strs, data->weights); ifft(DIMS, data->coil_dims, FFT_FLAGS, dst, dst); // fftmod(DIMS, data->coil_dims, 7, dst); } void noir_back_coils(struct noir_data* data, complex float* dst, const complex float* src) { // fftmod(DIMS, data->coil_dims, 7, dst); fft(DIMS, data->coil_dims, FFT_FLAGS, dst, src); md_zmulc2(DIMS, data->coil_dims, data->coil_strs, dst, data->coil_strs, dst, data->wght_strs, data->weights); } void noir_fun(struct noir_data* data, complex float* dst, const complex float* src) { long split = md_calc_size(DIMS, data->imgs_dims); md_copy(DIMS, data->imgs_dims, data->xn, src, CFL_SIZE); noir_forw_coils(data, data->sens, src + split); md_clear(DIMS, data->sign_dims, data->tmp, CFL_SIZE); md_zfmac2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->imgs_strs, src, data->coil_strs, data->sens); // could be moved to the benning, but see comment below md_zmul2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->sign_strs, data->tmp, data->mask_strs, data->mask); fft(DIMS, data->sign_dims, data->conf.fft_flags, data->tmp, data->tmp); md_clear(DIMS, data->data_dims, dst, CFL_SIZE); md_zfmac2(DIMS, data->sign_dims, data->data_strs, dst, data->sign_strs, data->tmp, data->ptrn_strs, data->pattern); } void noir_der(struct noir_data* data, complex float* dst, const complex float* src) { long split = md_calc_size(DIMS, data->imgs_dims); md_clear(DIMS, data->sign_dims, data->tmp, CFL_SIZE); md_zfmac2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->imgs_strs, src, data->coil_strs, data->sens); complex float* delta_coils = md_alloc_sameplace(DIMS, data->coil_dims, CFL_SIZE, src); noir_forw_coils(data, delta_coils, src + split); md_zfmac2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->coil_strs, delta_coils, data->imgs_strs, data->xn); md_free(delta_coils); // could be moved to the benning, but see comment below md_zmul2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->sign_strs, data->tmp, data->mask_strs, data->mask); fft(DIMS, data->sign_dims, data->conf.fft_flags, data->tmp, data->tmp); md_clear(DIMS, data->data_dims, dst, CFL_SIZE); md_zfmac2(DIMS, data->sign_dims, data->data_strs, dst, data->sign_strs, data->tmp, data->ptrn_strs, data->pattern); } void noir_adj(struct noir_data* data, complex float* dst, const complex float* src) { long split = md_calc_size(DIMS, data->imgs_dims); md_zmul2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->data_strs, src, data->ptrn_strs, data->adj_pattern); ifft(DIMS, data->sign_dims, data->conf.fft_flags, data->tmp, data->tmp); // we should move it to the end, but fft scaling is applied so this would be need to moved into data->xn or weights maybe? md_zmulc2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->sign_strs, data->tmp, data->mask_strs, data->mask); md_clear(DIMS, data->coil_dims, dst + split, CFL_SIZE); md_zfmacc2(DIMS, data->sign_dims, data->coil_strs, dst + split, data->sign_strs, data->tmp, data->imgs_strs, data->xn); noir_back_coils(data, dst + split, dst + split); md_clear(DIMS, data->imgs_dims, dst, CFL_SIZE); md_zfmacc2(DIMS, data->sign_dims, data->imgs_strs, dst, data->sign_strs, data->tmp, data->coil_strs, data->sens); if (data->conf.rvc) md_zreal(DIMS, data->imgs_dims, dst, dst); } bart-0.4.02/src/noir/model.h000066400000000000000000000020461320577655200155520ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/mri.h" struct noir_data; extern void noir_fun(struct noir_data*, complex float* dst, const complex float* src); extern void noir_der(struct noir_data*, complex float* dst, const complex float* src); extern void noir_adj(struct noir_data*, complex float* dst, const complex float* src); extern void noir_forw_coils(struct noir_data* data, complex float* dst, const complex float* src); extern void noir_back_coils(struct noir_data* data, complex float* dst, const complex float* src); struct noir_model_conf_s { unsigned int fft_flags; _Bool rvc; _Bool use_gpu; _Bool noncart; }; extern struct noir_model_conf_s noir_model_conf_defaults; extern struct noir_data* noir_init(const long dims[DIMS], const complex float* mask, const complex float* psf, const struct noir_model_conf_s* conf); extern void noir_free(struct noir_data* data); bart-0.4.02/src/noir/nl.c000066400000000000000000000034551320577655200150630ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include "num/multind.h" #include "nlops/nlop.h" #include "noir/model.h" #include "misc/types.h" #include "misc/misc.h" #include "misc/mri.h" #include "nl.h" struct noir_op_s { INTERFACE(nlop_data_t); struct noir_data* data; }; DEF_TYPEID(noir_op_s); static void noir2_for(const nlop_data_t* _data, complex float* dst, const complex float* src) { struct noir_op_s* data = CAST_DOWN(noir_op_s, _data); noir_fun(data->data, dst, src); } static void noir2_der(const nlop_data_t* _data, complex float* dst, const complex float* src) { struct noir_op_s* data = CAST_DOWN(noir_op_s, _data); noir_der(data->data, dst, src); } static void noir2_adj(const nlop_data_t* _data, complex float* dst, const complex float* src) { struct noir_op_s* data = CAST_DOWN(noir_op_s, _data); noir_adj(data->data, dst, src); } static void noir2_del(const nlop_data_t* _data) { struct noir_op_s* data = CAST_DOWN(noir_op_s, _data); noir_free(data->data); xfree(data); } struct nlop_s* noir_create(const long dims[DIMS], const complex float* mask, const complex float* psf, const struct noir_model_conf_s* conf) { PTR_ALLOC(struct noir_op_s, data); SET_TYPEID(noir_op_s, data); data->data = noir_init(dims, mask, psf, conf); long idims[DIMS]; md_select_dims(DIMS, conf->fft_flags|MAPS_FLAG|CSHIFT_FLAG, idims, dims); return nlop_create(DIMS, dims, DIMS, idims, CAST_UP(PTR_PASS(data)), noir2_for, noir2_der, noir2_adj, NULL, NULL, noir2_del); } struct noir_data* noir_get_data(struct nlop_s* op) { struct noir_op_s* data = CAST_DOWN(noir_op_s, nlop_get_data(op)); return data->data; } bart-0.4.02/src/noir/nl.h000066400000000000000000000011511320577655200150570ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ struct noir_model_conf_s; extern struct noir_model_conf_s noir_model_conf_defaults; struct nlop_s; extern struct nlop_s* noir_create(const long dims[DIMS], const complex float* mask, const complex float* psf, const struct noir_model_conf_s* conf); struct noir_data; extern struct noir_data* noir_get_data(struct nlop_s* op); extern void nlop_free(const struct nlop_s*); bart-0.4.02/src/noir/recon.c000066400000000000000000000056231320577655200155570ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2011-2017 Martin Uecker * * * Uecker M, Hohage T, Block KT, Frahm J. Image reconstruction by regularized * nonlinear inversion – Joint estimation of coil sensitivities and image content. * Magn Reson Med 2008; 60:674-682. */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "iter/iter3.h" #include "iter/iter4.h" #include "iter/thresh.h" #include "iter/italgos.h" #include "misc/misc.h" #include "misc/types.h" #include "misc/mri.h" #include "misc/debug.h" #include "noir/model.h" #include "noir/nl.h" #include "recon.h" const struct noir_conf_s noir_defaults = { .iter = 8, .rvc = false, .usegpu = false, .noncart = false, .alpha = 1., .redu = 2., }; void noir_recon(const struct noir_conf_s* conf, const long dims[DIMS], complex float* outbuf, complex float* sensout, const complex float* psf, const complex float* mask, const complex float* kspace) { long imgs_dims[DIMS]; long coil_dims[DIMS]; long data_dims[DIMS]; long img1_dims[DIMS]; unsigned int fft_flags = FFT_FLAGS|SLICE_FLAG; md_select_dims(DIMS, fft_flags|MAPS_FLAG|CSHIFT_FLAG, imgs_dims, dims); md_select_dims(DIMS, fft_flags|COIL_FLAG|MAPS_FLAG, coil_dims, dims); md_select_dims(DIMS, fft_flags|COIL_FLAG, data_dims, dims); md_select_dims(DIMS, fft_flags, img1_dims, dims); long skip = md_calc_size(DIMS, imgs_dims); long size = skip + md_calc_size(DIMS, coil_dims); long data_size = md_calc_size(DIMS, data_dims); long d1[1] = { size }; complex float* img = md_alloc_sameplace(1, d1, CFL_SIZE, kspace); md_clear(DIMS, imgs_dims, img, CFL_SIZE); md_zfill(DIMS, img1_dims, outbuf, 1.); // initial only first image md_copy(DIMS, img1_dims, img, outbuf, CFL_SIZE); md_clear(DIMS, coil_dims, img + skip, CFL_SIZE); struct noir_model_conf_s mconf = noir_model_conf_defaults; mconf.rvc = conf->rvc; mconf.use_gpu = conf->usegpu; mconf.noncart = conf->noncart; mconf.fft_flags = fft_flags; struct nlop_s* nlop = noir_create(dims, mask, psf, &mconf); struct iter3_irgnm_conf irgnm_conf = iter3_irgnm_defaults; irgnm_conf.iter = conf->iter; irgnm_conf.alpha = conf->alpha; irgnm_conf.redu = conf->redu; irgnm_conf.cgtol = 0.1f; irgnm_conf.nlinv_legacy = true; iter4_irgnm(CAST_UP(&irgnm_conf), nlop, size * 2, (float*)img, NULL, data_size * 2, (const float*)kspace); md_copy(DIMS, imgs_dims, outbuf, img, CFL_SIZE); if (NULL != sensout) { assert(!conf->usegpu); noir_forw_coils(noir_get_data(nlop), sensout, img + skip); fftmod(DIMS, coil_dims, fft_flags, sensout, sensout); } nlop_free(nlop); md_free(img); } bart-0.4.02/src/noir/recon.h000066400000000000000000000012621320577655200155570ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" #include "misc/mri.h" struct noir_conf_s { unsigned int iter; _Bool rvc; _Bool usegpu; _Bool noncart; float alpha; float redu; }; extern const struct noir_conf_s noir_defaults; extern void noir_recon(const struct noir_conf_s* conf, const long dims[DIMS], _Complex float* image, _Complex float* sens, const _Complex float* pattern, const _Complex float* mask, const _Complex float* kspace_data); #include "misc/cppwrap.h" bart-0.4.02/src/noise.c000066400000000000000000000032361320577655200146150ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2013 Jonathan Tamir */ #include #include #include #include "num/multind.h" #include "num/rand.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Add noise with selected variance to input."; int main_noise(int argc, char* argv[]) { float var = 1.; float spike = 1.; bool rvc = false; int rinit = -1; const struct opt_s opts[] = { OPT_INT('s', &rinit, "", "random seed initialization"), OPT_FLOAT('S', &spike, "", "()"), OPT_SET('r', &rvc, "real-valued input"), OPT_FLOAT('n', &var, "variance", "DEFAULT: 1.0"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); if (-1 != rinit) num_rand_init(rinit); unsigned int N = DIMS; long dims[N]; complex float* y = load_cfl(argv[1], N, dims); complex float* x = create_cfl(argv[2], N, dims); long T = md_calc_size(N, dims); // scale var for complex data if (!rvc) var = var / 2.f; float stdev = sqrtf(var); for (long i = 0; i < T; i++) { x[i] = y[i]; if (spike >= uniform_rand()) x[i] += stdev * gaussian_rand(); if (rvc) x[i] = crealf(x[i]); } unmap_cfl(N, dims, y); unmap_cfl(N, dims, x); exit(0); } bart-0.4.02/src/noncart/000077500000000000000000000000001320577655200147745ustar00rootroot00000000000000bart-0.4.02/src/noncart/grid.c000066400000000000000000000276601320577655200161000ustar00rootroot00000000000000/* Copyright 2014-2015 The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2011, 2015 Martin Uecker * 2014 Frank Ong */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/specfun.h" #include "misc/misc.h" #include "grid.h" #define KB_BETA 13.9086 // 13.8551 // 2x oversampling #define KB_WIDTH 3 #ifndef KB128 static double kb(double beta, double x) { if (fabs(x) >= 0.5) return 0.; return bessel_i0(beta * sqrt(1. - pow(2. * x, 2.))) / bessel_i0(beta); } static void kb_precompute(double beta, int n, float table[n + 1]) { for (int i = 0; i < n + 1; i++) table[i] = kb(beta, (double)(i) / (double)(n - 1) / 2.); } #endif static double I0_beta(double beta) { #ifndef KB128 return bessel_i0(beta); #else assert(KB_BETA == beta); return 118509.158946; #endif } const float kb_table128[129] = { 1.0000000000000000, 0.9995847139398653, 0.9983398161018390, 0.9962681840754728, 0.9933746024007669, 0.9896657454602714, 0.9851501536396374, 0.9798382028675283, 0.9737420676763386, 0.9668756779551011, 0.9592546695947362, 0.9508963292536357, 0.9418195334980564, 0.9320446825968820, 0.9215936292739139, 0.9104896027426631, 0.8987571283688524, 0.8864219433239096, 0.8735109086091393, 0.8600519178443380, 0.8460738032267725, 0.8316062390762707, 0.8166796433899514, 0.8013250778354499, 0.7855741466147803, 0.7694588946318385, 0.7530117053952898, 0.7362651990850234, 0.7192521312046796, 0.7020052922349470, 0.6845574086924412, 0.6669410459871208, 0.6491885134575039, 0.6313317719473744, 0.6134023442704702, 0.5954312288909031, 0.5774488171268115, 0.5594848141632452, 0.5415681641375969, 0.5237269795371914, 0.5059884751240438, 0.4883789065765270, 0.4709235140117633, 0.4536464705263341, 0.4365708358662894, 0.4197185153108639, 0.4031102238276424, 0.3867654555305848, 0.3707024584462233, 0.3549382145678427, 0.3394884251524746, 0.3243675011914082, 0.3095885589616078, 0.2951634205431575, 0.2811026191666483, 0.2674154092344597, 0.2541097808412017, 0.2411924786012657, 0.2286690245755740, 0.2165437450752543, 0.2048198011071496, 0.1934992222148726, 0.1825829434594916, 0.1720708452759937, 0.1619617959353290, 0.1522536963371723, 0.1429435268554672, 0.1340273959573592, 0.1255005903162311, 0.1173576261411828, 0.1095923014483913, 0.1021977490043101, 0.0951664896765205, 0.0884904859351842, 0.0821611952563688, 0.0761696231879639, 0.0705063758493464, 0.0651617116473479, 0.0601255920032731, 0.0553877308986640, 0.0509376430610617, 0.0467646906251106, 0.0428581281188566, 0.0392071456399203, 0.0358009101012748, 0.0326286044415178, 0.0296794647097185, 0.0269428149500251, 0.0244080998261697, 0.0220649149406994, 0.0199030348181235, 0.0179124385351197, 0.0160833329944038, 0.0144061738517878, 0.0128716841182598, 0.0114708704705587, 0.0101950373146533, 0.0090357986567118, 0.0079850878455423, 0.0070351652590612, 0.0061786240150858, 0.0054083937936397, 0.0047177428639869, 0.0041002784147792, 0.0035499452900106, 0.0030610232369345, 0.0026281227747268, 0.0022461797944939, 0.0019104490022500, 0.0016164963167613, 0.0013601903336964, 0.0011376929663821, 0.0009454493716780, 0.0007801772670918, 0.0006388557423128, 0.0005187136648862, 0.0004172177758400, 0.0003320605667526, 0.0002611480250751, 0.0002025873295356, 0.0001546745722200, 0.0001158825784783, 0.0000848488902175, 0.0000603639724392, 0.0000413596971257, 0.0000268981528101, 0.0000161608224276, 0.0000000000000000, 0.0000000000000000, }; static double ftkb(double beta, double x) { double a = sqrt(pow(beta, 2.) - pow(M_PI * x, 2.)); return ((0. == a) ? 1. : (sinh(a) / a)) / I0_beta(beta); } static float rolloff(float x, double beta, float width) { return (float)ftkb(beta, x * width) / ftkb(beta, 0.); } // Linear interpolation static float lerp(float a, float b, float c) { return (1. - c) * a + c * b; } // Linear interpolation look up static float intlookup(int n, const float table[n + 1], float x) { float fpart; // fpart = modff(x * n, &ipart); // int index = ipart; int index = (int)(x * (n - 1)); fpart = x * (n - 1) - (float)index; #if 1 assert(index >= 0); assert(index <= n); assert(fpart >= 0.); assert(fpart <= 1.); #endif float l = lerp(table[index], table[index + 1], fpart); #if 1 assert(l <= 1.); assert(0 >= 0.); #endif return l; } void gridH(float os, float width, double beta, const complex float* traj, const long ksp_dims[4], complex float* dst, const long grid_dims[4], const complex float* grid) { long C = ksp_dims[3]; #ifndef KB128 // precompute kaiser bessel table int kb_size = 500; float kb_table[kb_size + 1]; kb_precompute(beta, kb_size, kb_table); #else assert(KB_BETA == beta); int kb_size = 128; const float* kb_table = kb_table128; #endif assert(1 == ksp_dims[0]); long samples = ksp_dims[1] * ksp_dims[2]; #pragma omp parallel for for(int i = 0; i < samples; i++) { float pos[3]; pos[0] = os * (creal(traj[i * 3 + 0])); pos[1] = os * (creal(traj[i * 3 + 1])); pos[2] = os * (creal(traj[i * 3 + 2])); pos[0] += (grid_dims[0] > 1) ? ((float)grid_dims[0] / 2.) : 0.; pos[1] += (grid_dims[1] > 1) ? ((float)grid_dims[1] / 2.) : 0.; pos[2] += (grid_dims[2] > 1) ? ((float)grid_dims[2] / 2.) : 0.; complex float val[C]; for (int j = 0; j < C; j++) val[j] = 0.0; grid_pointH(C, grid_dims, pos, val, grid, width, kb_size, kb_table); for (int j = 0; j < C; j++) dst[j * samples + i] += val[j]; } } void grid(float os, float width, double beta, const complex float* traj, const long grid_dims[4], complex float* grid, const long ksp_dims[4], const complex float* src) { long C = ksp_dims[3]; #ifndef KB128 // precompute kaiser bessel table int kb_size = 500; float kb_table[kb_size + 1]; kb_precompute(beta, kb_size, kb_table); #else assert(KB_BETA == beta); int kb_size = 128; const float* kb_table = kb_table128; #endif assert(1 == ksp_dims[0]); long samples = ksp_dims[1] * ksp_dims[2]; // grid #pragma omp parallel for for(int i = 0; i < samples; i++) { float pos[3]; pos[0] = os * (creal(traj[i * 3 + 0])); pos[1] = os * (creal(traj[i * 3 + 1])); pos[2] = os * (creal(traj[i * 3 + 2])); pos[0] += (grid_dims[0] > 1) ? ((float) grid_dims[0] / 2.) : 0.; pos[1] += (grid_dims[1] > 1) ? ((float) grid_dims[1] / 2.) : 0.; pos[2] += (grid_dims[2] > 1) ? ((float) grid_dims[2] / 2.) : 0.; complex float val[C]; for (int j = 0; j < C; j++) val[j] = src[j * samples + i]; grid_point(C, grid_dims, pos, grid, val, width, kb_size, kb_table); } } static void grid2_dims(unsigned int D, const long trj_dims[D], const long ksp_dims[D], const long grid_dims[D]) { assert(D >= 4); assert(md_check_compat(D - 3, ~0, grid_dims + 3, ksp_dims + 3)); assert(md_check_compat(D - 3, ~(MD_BIT(1) | MD_BIT(2)), trj_dims + 3, ksp_dims + 3)); assert(md_check_bounds(D - 3, ~0, trj_dims + 3, ksp_dims + 3)); assert(3 == trj_dims[0]); assert(1 == trj_dims[3]); assert(1 == ksp_dims[0]); } void grid2(float os, float width, double beta, unsigned int D, const long trj_dims[D], const complex float* traj, const long grid_dims[D], complex float* dst, const long ksp_dims[D], const complex float* src) { grid2_dims(D, trj_dims, ksp_dims, grid_dims); long ksp_strs[D]; md_calc_strides(D, ksp_strs, ksp_dims, CFL_SIZE); long trj_strs[D]; md_calc_strides(D, trj_strs, trj_dims, CFL_SIZE); long grid_strs[D]; md_calc_strides(D, grid_strs, grid_dims, CFL_SIZE); long pos[D]; for (unsigned int i = 0; i < D; i++) pos[i] = 0; do { grid(os, width, beta, &MD_ACCESS(D, trj_strs, pos, traj), grid_dims, &MD_ACCESS(D, grid_strs, pos, dst), ksp_dims, &MD_ACCESS(D, ksp_strs, pos, src)); } while(md_next(D, ksp_dims, (~0 ^ 15), pos)); } void grid2H(float os, float width, double beta, unsigned int D, const long trj_dims[D], const complex float* traj, const long ksp_dims[D], complex float* dst, const long grid_dims[D], const complex float* src) { grid2_dims(D, trj_dims, ksp_dims, grid_dims); long ksp_strs[D]; md_calc_strides(D, ksp_strs, ksp_dims, CFL_SIZE); long trj_strs[D]; md_calc_strides(D, trj_strs, trj_dims, CFL_SIZE); long grid_strs[D]; md_calc_strides(D, grid_strs, grid_dims, CFL_SIZE); long pos[D]; for (unsigned int i = 0; i < D; i++) pos[i] = 0; do { gridH(os, width, beta, &MD_ACCESS(D, trj_strs, pos, traj), ksp_dims, &MD_ACCESS(D, ksp_strs, pos, dst), grid_dims, &MD_ACCESS(D, grid_strs, pos, src)); } while(md_next(D, ksp_dims, (~0 ^ 15), pos)); } void grid_point(unsigned int ch, const long dims[3], const float pos[3], complex float* dst, const complex float val[ch], float width, int kb_size, const float kb_table[kb_size + 1]) { int sti[3]; int eni[3]; for (int j = 0; j < 3; j++) { int st = MAX((int)ceil(pos[j] - width), 0); int en = MIN((int)floor(pos[j] + width), dims[j] - 1); if (st > en) return; sti[j] = st; eni[j] = en; } for (int w = sti[2]; w <= eni[2]; w++) { float frac = fabs(((float)w - pos[2])); float dw = 1. * intlookup(kb_size, kb_table, frac / width); int indw = w * dims[1]; for (int v = sti[1]; v <= eni[1]; v++) { float frac = fabs(((float)v - pos[1])); float dv = dw * intlookup(kb_size, kb_table, frac / width); int indv = (indw + v) * dims[0]; for (int u = sti[0]; u <= eni[0]; u++) { float frac = fabs(((float)u - pos[0])); float du = dv * intlookup(kb_size, kb_table, frac / width); int indu = (indv + u); for (unsigned int c = 0; c < ch; c++) { // we are allowed to update real and imaginary part independently which works atomically #pragma omp atomic __real(dst[indu + c * dims[0] * dims[1] * dims[2]]) += __real(val[c]) * du; #pragma omp atomic __imag(dst[indu + c * dims[0] * dims[1] * dims[2]]) += __imag(val[c]) * du; }}}} } void grid_pointH(unsigned int ch, const long dims[3], const float pos[3], complex float val[ch], const complex float* src, float width, int kb_size, const float kb_table[kb_size + 1]) { int sti[3]; int eni[3]; for (int j = 0; j < 3; j++) { sti[j] = MAX((int)ceil(pos[j] - width), 0); eni[j] = MIN((int)floor(pos[j] + width), dims[j] - 1); } for (unsigned int i = 0; i < ch; i++) val[i] = 0.; //printf("%f %f %f: %d %d %d -- %d %d %d\n", pos[0], pos[1], pos[2], sti[0], sti[1], sti[2], eni[0], eni[1], eni[2]); for (int w = sti[2]; w <= eni[2]; w++) { float frac = fabs(((float)w - pos[2])); float dw = 1. * intlookup(kb_size, kb_table, frac / width); int indw = w * dims[1]; for (int v = sti[1]; v <= eni[1]; v++) { float frac = fabs(((float)v - pos[1])); float dv = dw * intlookup(kb_size, kb_table, frac / width); int indv = (indw + v) * dims[0]; for (int u = sti[0]; u <= eni[0]; u++) { float frac = fabs(((float)u - pos[0])); float du = dv * intlookup(kb_size, kb_table, frac / width); int indu = (indv + u); for (unsigned int c = 0; c < ch; c++) { // we are allowed to update real and imaginary part independently which works atomically #pragma omp atomic __real(val[c]) += __real(src[indu + c * dims[0] * dims[1] * dims[2]]) * du; #pragma omp atomic __imag(val[c]) += __imag(src[indu + c * dims[0] * dims[1] * dims[2]]) * du; }}}} } double calc_beta(float os, float width) { return M_PI * sqrt(pow((width * 2. / os) * (os - 0.5), 2.) - 0.8); } static float pos(int d, int i) { return (1 == d) ? 0. : (((float)i - (float)d / 2.) / (float)d); } void rolloff_correction(float os, float width, float beta, const long dimensions[3], complex float* dst) { UNUSED(os); #pragma omp parallel for collapse(3) for (int z = 0; z < dimensions[2]; z++) for (int y = 0; y < dimensions[1]; y++) for (int x = 0; x < dimensions[0]; x++) dst[x + dimensions[0] * (y + z * dimensions[1])] = 1. / ( rolloff(pos(dimensions[0], x), beta, width) * rolloff(pos(dimensions[1], y), beta, width) * rolloff(pos(dimensions[2], z), beta, width) ); } bart-0.4.02/src/noncart/grid.h000066400000000000000000000032731320577655200160770ustar00rootroot00000000000000/* Copyright 2014-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #include "misc/cppwrap.h" extern const float kb_table128[129]; extern void grid(float os, float width, double beta, const complex float* traj, const long grid_dims[4], complex float* grid, const long ksp_dims[4], const complex float* src); extern void gridH(float os, float width, double beta, const complex float* traj, const long ksp_dims[4], complex float* dst, const long grid_dims[4], const complex float* grid); extern void grid2(float os, float width, double beta, unsigned int D, const long trj_dims[__VLA(D)], const complex float* traj, const long grid_dims[__VLA(D)], complex float* grid, const long ksp_dims[__VLA(D)], const complex float* src); extern void grid2H(float os, float width, double beta, unsigned int D, const long trj_dims[__VLA(D)], const complex float* traj, const long ksp_dims[__VLA(D)], complex float* dst, const long grid_dims[__VLA(D)], const complex float* grid); extern void grid_pointH(unsigned int ch, const long dims[3], const float pos[3], complex float val[__VLA(ch)], const complex float* src, float width, int kb_size, const float kb_table[__VLA(kb_size + 1)]); extern void grid_point(unsigned int ch, const long dims[3], const float pos[3], complex float* dst, const complex float val[__VLA(ch)], float width, int kb_size, const float kb_table[__VLA(kb_size + 1)]); extern double calc_beta(float os, float width); extern void rolloff_correction(float os, float width, float beta, const long dim[3], complex float* dst); #include "misc/cppwrap.h" bart-0.4.02/src/noncart/nudft.c000066400000000000000000000132161320577655200162630ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Author: * 2016 Martin Uecker */ #include #include "misc/misc.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/filter.h" #include "linops/linop.h" #include "nudft.h" static void get_coord(unsigned int N, unsigned long flags, float coord[N], long pos[N], const long tdims[N], const long tstrs[N], const complex float* traj) { assert(0 == pos[0]); unsigned int j = 0; for (unsigned int i = 0; i < N; i++) { coord[i] = 0.; if (MD_IS_SET(flags, i)) { pos[0] = j; coord[i] = -crealf(MD_ACCESS(N, tstrs, pos, traj)); j++; } } assert(tdims[0] == j); pos[0] = 0; } void nudft_forward2(unsigned int N, unsigned long flags, const long kdims[N], const long kstrs[N], complex float* ksp, const long idims[N], const long istrs[N], const complex float* img, const long tdims[N], const long tstrs[N], const complex float* traj) { assert(1 == kdims[0]); assert(md_check_compat(N, ~0, kdims, tdims)); long tmp_dims[N]; long tmp_strs[N]; md_select_dims(N, flags, tmp_dims, idims); md_calc_strides(N, tmp_strs, tmp_dims, CFL_SIZE); complex float* tmp = md_alloc(N, tmp_dims, CFL_SIZE); long kstrs2[N]; for (unsigned int i = 0; i < N; i++) kstrs2[i] = MD_IS_SET(flags, i) ? 0 : kstrs[i]; md_clear2(N, kdims, kstrs, ksp, CFL_SIZE); long pos[N]; for (unsigned int i = 0; i < N; i++) pos[i] = 0; do { float coord[N]; get_coord(N, flags, coord, pos, tdims, tstrs, traj); linear_phase(N, tmp_dims, coord, tmp); md_zfmac2(N, idims, kstrs2, &MD_ACCESS(N, kstrs, pos, ksp), istrs, img, tmp_strs, tmp); } while (md_next(N, tdims, ~MD_BIT(0), pos)); md_free(tmp); } /** * */ void nudft_adjoint2(unsigned int N, unsigned long flags, const long idims[N], const long istrs[N], complex float* img, const long kdims[N], const long kstrs[N], const complex float* ksp, const long tdims[N], const long tstrs[N], const complex float* traj) { assert(1 == kdims[0]); assert(md_check_compat(N, ~0, kdims, tdims)); long tmp_dims[N]; long tmp_strs[N]; md_select_dims(N, flags, tmp_dims, idims); md_calc_strides(N, tmp_strs, tmp_dims, CFL_SIZE); complex float* tmp = md_alloc(N, tmp_dims, CFL_SIZE); long kstrs2[N]; for (unsigned int i = 0; i < N; i++) kstrs2[i] = MD_IS_SET(flags, i) ? 0 : kstrs[i]; md_clear2(N, idims, istrs, img, CFL_SIZE); long pos[N]; for (unsigned int i = 0; i < N; i++) pos[i] = 0; do { float coord[N]; get_coord(N, flags, coord, pos, tdims, tstrs, traj); linear_phase(N, tmp_dims, coord, tmp); md_zfmacc2(N, idims, istrs, img, kstrs2, &MD_ACCESS(N, kstrs, pos, ksp), tmp_strs, tmp); } while (md_next(N, tdims, ~MD_BIT(0), pos)); md_free(tmp); } void nudft_forward(unsigned int N, unsigned long flags, const long odims[N], complex float* out, const long idims[N], const complex float* in, const long tdims[N], const complex float* traj) { long ostrs[N]; long istrs[N]; long tstrs[N]; md_calc_strides(N, ostrs, odims, CFL_SIZE); md_calc_strides(N, istrs, idims, CFL_SIZE); md_calc_strides(N, tstrs, tdims, CFL_SIZE); // FL_SIZE nudft_forward2(N, flags, odims, ostrs, out, idims, istrs, in, tdims, tstrs, traj); } struct nudft_s { linop_data_t base; unsigned int N; unsigned long flags; long* kdims; long* idims; long* tdims; long* kstrs; long* istrs; long* tstrs; const complex float* traj; }; static void nudft_apply(const linop_data_t* _data, complex float* out, const complex float* in) { const struct nudft_s* data = CONTAINER_OF(_data, const struct nudft_s, base); unsigned int N = data->N; nudft_forward2(N, data->flags, data->kdims, data->kstrs, out, data->idims, data->istrs, in, data->tdims, data->tstrs, data->traj); } static void nudft_adj(const linop_data_t* _data, complex float* out, const complex float* in) { const struct nudft_s* data = CONTAINER_OF(_data, const struct nudft_s, base); unsigned int N = data->N; nudft_adjoint2(N, data->flags, data->idims, data->istrs, out, data->kdims, data->kstrs, in, data->tdims, data->tstrs, data->traj); } static void nudft_delete(const linop_data_t* _data) { const struct nudft_s* data = CONTAINER_OF(_data, const struct nudft_s, base); free(data->kdims); free(data->idims); free(data->tdims); free(data->kstrs); free(data->istrs); free(data->tstrs); free((void*)data); } const struct linop_s* nudft_create2(unsigned int N, unsigned long flags, const long odims[N], const long ostrs[N], const long idims[N], const long istrs[N], const long tdims[N], const complex float* traj) { PTR_ALLOC(struct nudft_s, data); data->N = N; data->flags = flags; data->traj = traj; data->kdims = *TYPE_ALLOC(long[N]); data->kstrs = *TYPE_ALLOC(long[N]); md_copy_dims(N, data->kdims, odims); md_copy_strides(N, data->kstrs, ostrs); data->idims = *TYPE_ALLOC(long[N]); data->istrs = *TYPE_ALLOC(long[N]); md_copy_dims(N, data->idims, idims); md_copy_strides(N, data->istrs, istrs); data->tdims = *TYPE_ALLOC(long[N]); data->tstrs = *TYPE_ALLOC(long[N]); md_copy_dims(N, data->tdims, tdims); md_calc_strides(N, data->tstrs, tdims, CFL_SIZE); return linop_create2(N, odims, ostrs, N, idims, istrs, &PTR_PASS(data)->base, nudft_apply, nudft_adj, NULL, NULL, nudft_delete); } const struct linop_s* nudft_create(unsigned int N, unsigned long flags, const long odims[N], const long idims[N], const long tdims[N], const complex float* traj) { return nudft_create2(N, flags, odims, MD_STRIDES(N, odims, CFL_SIZE), idims, MD_STRIDES(N, idims, CFL_SIZE), tdims, traj); } bart-0.4.02/src/noncart/nudft.h000066400000000000000000000026051320577655200162700ustar00rootroot00000000000000 #include extern void nudft_forward2(unsigned int N, unsigned long flags, const long odims[N], const long ostrs[N], complex float* out, const long idims[N], const long istrs[N], const complex float* in, const long tdims[N], const long tstrs[N], const complex float* traj); extern void nudft_forward(unsigned int N, unsigned long flags, const long odims[N], complex float* out, const long idims[N], const complex float* in, const long tdims[N], const complex float* traj); extern void nudft_adjoint2(unsigned int N, unsigned long flags, const long odims[N], const long ostrs[N], complex float* out, const long idims[N], const long istrs[N], const complex float* in, const long tdims[N], const long tstrs[N], const complex float* traj); extern void nudft_adjoint(unsigned int N, unsigned long flags, const long odims[N], complex float* out, const long idims[N], const complex float* in, const long tdims[N], const complex float* traj); struct linop_s; extern const struct linop_s* nudft_create2(unsigned int N, unsigned long flags, const long odims[N], const long ostrs[N], const long idims[N], const long istrs[N], const long tdims[N], const complex float* traj); extern const struct linop_s* nudft_create(unsigned int N, unsigned long flags, const long odims[N], const long idims[N], const long tdims[N], const complex float* traj); bart-0.4.02/src/noncart/nufft.c000066400000000000000000000536261320577655200162760ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2017 Frank Ong * 2014-2017 Martin Uecker * * Strang G. A proposal for Toeplitz matrix calculations. Journal Studies in Applied Math. 1986; 74(2):171-17 * */ #include #include #include #include #include "misc/misc.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/filter.h" #include "num/fft.h" #include "num/shuffle.h" #include "num/ops.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "linops/linop.h" #include "linops/someops.h" #include "noncart/grid.h" #include "nufft.h" #define FFT_FLAGS (MD_BIT(0)|MD_BIT(1)|MD_BIT(2)) struct nufft_conf_s nufft_conf_defaults = { .toeplitz = true, .pcycle = false, }; /** * * NUFFT internal data structure * */ struct nufft_data { INTERFACE(linop_data_t); struct nufft_conf_s conf; ///< NUFFT configuration structure unsigned int N; ///< Number of dimension const complex float* linphase; ///< Linear phase for pruned FFT const complex float* traj; ///< Trajectory const complex float* roll; ///< Roll-off factor const complex float* psf; ///< Point-spread function (2x size) const complex float* fftmod; ///< FFT modulation for centering const complex float* weights; ///< Weights, ex, density compensation #ifdef USE_CUDA const complex float* linphase_gpu; const complex float* psf_gpu; complex float* grid_gpu; #endif complex float* grid; ///< Oversampling grid float width; ///< Interpolation kernel width double beta; ///< Kaiser-Bessel beta parameter const struct linop_s* fft_op; ///< FFT operator long* ksp_dims; ///< Kspace dimension long* cim_dims; ///< Coil image dimension long* cml_dims; ///< Coil + linear phase dimension long* img_dims; ///< Image dimension long* trj_dims; ///< Trajectory dimension long* lph_dims; ///< Linear phase dimension long* psf_dims; ///< Point spread function dimension long* wgh_dims; ///< Weights dimension //! long* cm2_dims; ///< 2x oversampled coil image dimension long* ksp_strs; long* cim_strs; long* cml_strs; long* img_strs; long* trj_strs; long* lph_strs; long* psf_strs; long* wgh_strs; const struct linop_s* cfft_op; ///< Pcycle FFT operator unsigned int cycle; long* rcml_dims; ///< Pcycle Coil + linear phase dimension long* rlph_dims; ///< Pcycle Linear phase dimension long* rpsf_dims; ///< Pcycle Point spread function dimension long* rcml_strs; long* rlph_strs; long* rpsf_strs; }; static DEF_TYPEID(nufft_data); static void nufft_free_data(const linop_data_t* data); static void nufft_apply(const linop_data_t* _data, complex float* dst, const complex float* src); static void nufft_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src); static void nufft_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src); static void toeplitz_mult(const struct nufft_data* data, complex float* dst, const complex float* src); static void toeplitz_mult_pcycle(const struct nufft_data* data, complex float* dst, const complex float* src); static complex float* compute_linphases(unsigned int N, long lph_dims[N + 3], const long img_dims[N]); static complex float* compute_psf2(unsigned int N, const long psf_dims[N + 3], const long trj_dims[N], const complex float* traj, const complex float* weights); /** * NUFFT operator initialization */ struct linop_s* nufft_create(unsigned int N, ///< Number of dimension const long ksp_dims[N], ///< kspace dimension const long cim_dims[N], ///< Coil images dimension const long traj_dims[N], ///< Trajectory dimension const complex float* traj, ///< Trajectory const complex float* weights, ///< Weights, ex, soft-gating or density compensation struct nufft_conf_s conf) ///< NUFFT configuration options { PTR_ALLOC(struct nufft_data, data); SET_TYPEID(nufft_data, data); data->N = N; data->traj = traj; data->conf = conf; data->width = 3.; data->beta = calc_beta(2., data->width); // get dims assert(md_check_compat(N - 3, 0, ksp_dims + 3, cim_dims + 3)); unsigned int ND = N + 3; data->ksp_dims = *TYPE_ALLOC(long[ND]); data->cim_dims = *TYPE_ALLOC(long[ND]); data->cml_dims = *TYPE_ALLOC(long[ND]); data->img_dims = *TYPE_ALLOC(long[ND]); data->trj_dims = *TYPE_ALLOC(long[ND]); data->lph_dims = *TYPE_ALLOC(long[ND]); data->psf_dims = *TYPE_ALLOC(long[ND]); data->wgh_dims = *TYPE_ALLOC(long[ND]); data->ksp_strs = *TYPE_ALLOC(long[ND]); data->cim_strs = *TYPE_ALLOC(long[ND]); data->cml_strs = *TYPE_ALLOC(long[ND]); data->img_strs = *TYPE_ALLOC(long[ND]); data->trj_strs = *TYPE_ALLOC(long[ND]); data->lph_strs = *TYPE_ALLOC(long[ND]); data->psf_strs = *TYPE_ALLOC(long[ND]); data->wgh_strs = *TYPE_ALLOC(long[ND]); data->rlph_dims = *TYPE_ALLOC(long[ND]); data->rpsf_dims = *TYPE_ALLOC(long[ND]); data->rcml_dims = *TYPE_ALLOC(long[ND]); data->rlph_strs = *TYPE_ALLOC(long[ND]); data->rpsf_strs = *TYPE_ALLOC(long[ND]); data->rcml_strs = *TYPE_ALLOC(long[ND]); md_singleton_dims(ND, data->cim_dims); md_singleton_dims(ND, data->ksp_dims); md_copy_dims(N, data->cim_dims, cim_dims); md_copy_dims(N, data->ksp_dims, ksp_dims); md_select_dims(ND, FFT_FLAGS, data->img_dims, data->cim_dims); assert(3 == traj_dims[0]); assert(traj_dims[1] == ksp_dims[1]); assert(traj_dims[2] == ksp_dims[2]); assert(md_check_compat(N - 3, ~0, traj_dims + 3, ksp_dims + 3)); assert(md_check_bounds(N - 3, ~0, traj_dims + 3, ksp_dims + 3)); md_singleton_dims(ND, data->trj_dims); md_copy_dims(N, data->trj_dims, traj_dims); // get strides md_calc_strides(ND, data->cim_strs, data->cim_dims, CFL_SIZE); md_calc_strides(ND, data->img_strs, data->img_dims, CFL_SIZE); md_calc_strides(ND, data->trj_strs, data->trj_dims, CFL_SIZE); md_calc_strides(ND, data->ksp_strs, data->ksp_dims, CFL_SIZE); data->weights = NULL; if (NULL != weights) { md_singleton_dims(ND, data->wgh_dims); md_select_dims(N, ~MD_BIT(0), data->wgh_dims, data->trj_dims); md_calc_strides(ND, data->wgh_strs, data->wgh_dims, CFL_SIZE); complex float* tmp = md_alloc(ND, data->wgh_dims, CFL_SIZE); md_copy(ND, data->wgh_dims, tmp, weights, CFL_SIZE); data->weights = tmp; } complex float* roll = md_alloc(ND, data->img_dims, CFL_SIZE); rolloff_correction(2., data->width, data->beta, data->img_dims, roll); data->roll = roll; complex float* linphase = compute_linphases(N, data->lph_dims, data->img_dims); md_calc_strides(ND, data->lph_strs, data->lph_dims, CFL_SIZE); if (!conf.toeplitz) md_zmul2(ND, data->lph_dims, data->lph_strs, linphase, data->lph_strs, linphase, data->img_strs, data->roll); fftmod(ND, data->lph_dims, FFT_FLAGS, linphase, linphase); fftscale(ND, data->lph_dims, FFT_FLAGS, linphase, linphase); float scale = 1.; for (unsigned int i = 0; i < N; i++) scale *= ((data->lph_dims[i] > 1) && (i < 3)) ? 0.5 : 1.; md_zsmul(ND, data->lph_dims, linphase, linphase, scale); complex float* fftm = md_alloc(ND, data->img_dims, CFL_SIZE); md_zfill(ND, data->img_dims, fftm, 1.); fftmod(ND, data->img_dims, FFT_FLAGS, fftm, fftm); data->fftmod = fftm; data->linphase = linphase; data->psf = NULL; #ifdef USE_CUDA data->linphase_gpu = NULL; data->psf_gpu = NULL; data->grid_gpu = NULL; #endif if (conf.toeplitz) { debug_printf(DP_DEBUG1, "NUFFT: Toeplitz mode\n"); #if 0 md_copy_dims(ND, data->psf_dims, data->lph_dims); #else md_copy_dims(3, data->psf_dims, data->lph_dims); md_copy_dims(ND - 3, data->psf_dims + 3, data->trj_dims + 3); data->psf_dims[N] = data->lph_dims[N]; #endif md_calc_strides(ND, data->psf_strs, data->psf_dims, CFL_SIZE); data->psf = compute_psf2(N, data->psf_dims, data->trj_dims, data->traj, data->weights); } md_copy_dims(ND, data->cml_dims, data->cim_dims); data->cml_dims[N + 0] = data->lph_dims[N + 0]; md_calc_strides(ND, data->cml_strs, data->cml_dims, CFL_SIZE); data->cm2_dims = *TYPE_ALLOC(long[ND]); // ! md_copy_dims(ND, data->cm2_dims, data->cim_dims); for (int i = 0; i < 3; i++) data->cm2_dims[i] = (1 == cim_dims[i]) ? 1 : (2 * cim_dims[i]); data->grid = md_alloc(ND, data->cml_dims, CFL_SIZE); data->fft_op = linop_fft_create(ND, data->cml_dims, FFT_FLAGS); if (conf.pcycle) { debug_printf(DP_DEBUG1, "NUFFT: Pcycle Mode\n"); data->cycle = 0; data->cfft_op = linop_fft_create(N, data->cim_dims, FFT_FLAGS); } return linop_create(N, ksp_dims, N, cim_dims, CAST_UP(PTR_PASS(data)), nufft_apply, nufft_apply_adjoint, nufft_apply_normal, NULL, nufft_free_data); } /** * Compute Strang's circulant preconditioner * * Strang's reconditioner is simply the cropped psf in the image domain * * Ref: Strang G. A proposal for Toeplitz matrix calculations. Journal Studies in Applied Math. 1986; 74(2):171-17 */ static complex float* compute_precond(unsigned int N, const long* pre_dims, const long* pre_strs, const long* psf_dims, const long* psf_strs, const complex float* psf, const complex float* linphase) { unsigned int ND = N + 3; complex float* pre = md_alloc(ND, pre_dims, CFL_SIZE); complex float* psft = md_alloc(ND, psf_dims, CFL_SIZE); // Transform psf to image domain ifftuc(ND, psf_dims, FFT_FLAGS, psft, psf); // Compensate for linear phase to get cropped psf md_clear(ND, pre_dims, pre, CFL_SIZE); md_zfmacc2(ND, psf_dims, pre_strs, pre, psf_strs, psft, psf_strs, linphase); md_free(psft); // Transform to Fourier domain fftuc(N, pre_dims, FFT_FLAGS, pre, pre); for(int i = 0; i < md_calc_size( N, pre_dims ); i++) pre[i] = cabsf(pre[i]); md_zsadd(N, pre_dims, pre, pre, 1e-3); return pre; } /** * NUFFT precondition internal data structure */ struct nufft_precond_data { INTERFACE(operator_data_t); unsigned int N; const complex float* pre; ///< Preconditioner long* cim_dims; ///< Coil image dimension long* pre_dims; ///< Preconditioner dimension long* cim_strs; long* pre_strs; const struct linop_s* fft_op; ///< FFT linear operator }; static DEF_TYPEID(nufft_precond_data); static void nufft_precond_apply(const operator_data_t* _data, unsigned int M, void* args[M]) { assert(2 == M); const struct nufft_precond_data* data = CAST_DOWN(nufft_precond_data, _data); complex float* dst = args[0]; const complex float* src = args[1]; linop_forward(data->fft_op, data->N, data->cim_dims, dst, data->N, data->cim_dims, src); md_zdiv2(data->N, data->cim_dims, data->cim_strs, dst, data->cim_strs, dst, data->pre_strs, data->pre); linop_adjoint(data->fft_op, data->N, data->cim_dims, dst, data->N, data->cim_dims, dst); } static void nufft_precond_del(const operator_data_t* _data) { const struct nufft_precond_data* data = CAST_DOWN(nufft_precond_data, _data); xfree(data->cim_dims); xfree(data->pre_dims); xfree(data->cim_strs); xfree(data->pre_strs); md_free(data->pre); xfree(data); } const struct operator_s* nufft_precond_create(const struct linop_s* nufft_op) { const struct nufft_data* data = CAST_DOWN(nufft_data, linop_get_data(nufft_op)); PTR_ALLOC(struct nufft_precond_data, pdata); SET_TYPEID(nufft_precond_data, pdata); assert(data->conf.toeplitz); unsigned int N = data->N; unsigned int ND = N + 3; pdata->N = N; pdata->cim_dims = *TYPE_ALLOC(long[ND]); pdata->pre_dims = *TYPE_ALLOC(long[ND]); pdata->cim_strs = *TYPE_ALLOC(long[ND]); pdata->pre_strs = *TYPE_ALLOC(long[ND]); md_copy_dims(ND, pdata->cim_dims, data->cim_dims); md_select_dims(ND, FFT_FLAGS, pdata->pre_dims, pdata->cim_dims); md_calc_strides(ND, pdata->cim_strs, pdata->cim_dims, CFL_SIZE); md_calc_strides(ND, pdata->pre_strs, pdata->pre_dims, CFL_SIZE); pdata->pre = compute_precond(pdata->N, pdata->pre_dims, pdata->pre_strs, data->psf_dims, data->psf_strs, data->psf, data->linphase); pdata->fft_op = linop_fft_create(pdata->N, pdata->cim_dims, FFT_FLAGS); const long* cim_dims = pdata->cim_dims; // need to dereference pdata before PTR_PASS return operator_create(N, cim_dims, N, cim_dims, CAST_UP(PTR_PASS(pdata)), nufft_precond_apply, nufft_precond_del); } static complex float* compute_linphases(unsigned int N, long lph_dims[N + 3], const long img_dims[N + 3]) { float shifts[8][3]; int s = 0; for(int i = 0; i < 8; i++) { bool skip = false; for(int j = 0; j < 3; j++) { shifts[s][j] = 0.; if (MD_IS_SET(i, j)) { skip = skip || (1 == img_dims[j]); shifts[s][j] = -0.5; } } if (!skip) s++; } unsigned int ND = N + 3; md_select_dims(ND, FFT_FLAGS, lph_dims, img_dims); lph_dims[N + 0] = s; complex float* linphase = md_alloc(ND, lph_dims, CFL_SIZE); for(int i = 0; i < s; i++) { float shifts2[ND]; for (unsigned int j = 0; j < ND; j++) shifts2[j] = 0.; shifts2[0] = shifts[i][0]; shifts2[1] = shifts[i][1]; shifts2[2] = shifts[i][2]; linear_phase(ND, img_dims, shifts2, linphase + i * md_calc_size(ND, img_dims)); } return linphase; } complex float* compute_psf(unsigned int N, const long img2_dims[N], const long trj_dims[N], const complex float* traj, const complex float* weights) { long ksp_dims1[N]; md_select_dims(N, ~MD_BIT(0), ksp_dims1, trj_dims); struct nufft_conf_s conf = nufft_conf_defaults; conf.toeplitz = false; // avoid infinite loop struct linop_s* op2 = nufft_create(N, ksp_dims1, img2_dims, trj_dims, traj, NULL, conf); complex float* ones = md_alloc(N, ksp_dims1, CFL_SIZE); md_zfill(N, ksp_dims1, ones, 1.); if (NULL != weights) { md_zmul(N, ksp_dims1, ones, ones, weights); md_zmulc(N, ksp_dims1, ones, ones, weights); } complex float* psft = md_alloc(N, img2_dims, CFL_SIZE); linop_adjoint_unchecked(op2, psft, ones); md_free(ones); linop_free(op2); return psft; } static complex float* compute_psf2(unsigned int N, const long psf_dims[N + 3], const long trj_dims[N + 3], const complex float* traj, const complex float* weights) { unsigned int ND = N + 3; long img_dims[ND]; long img_strs[ND]; md_select_dims(ND, ~MD_BIT(N + 0), img_dims, psf_dims); md_calc_strides(ND, img_strs, img_dims, CFL_SIZE); // PSF 2x size long img2_dims[ND]; long img2_strs[ND]; md_copy_dims(ND, img2_dims, img_dims); for (int i = 0; i < 3; i++) img2_dims[i] = (1 == img_dims[i]) ? 1 : (2 * img_dims[i]); md_calc_strides(ND, img2_strs, img2_dims, CFL_SIZE); complex float* traj2 = md_alloc(ND, trj_dims, CFL_SIZE); md_zsmul(ND, trj_dims, traj2, traj, 2.); complex float* psft = compute_psf(ND, img2_dims, trj_dims, traj2, weights); md_free(traj2); fftuc(ND, img2_dims, FFT_FLAGS, psft, psft); float scale = 1.; for (unsigned int i = 0; i < N; i++) scale *= ((img2_dims[i] > 1) && (i < 3)) ? 4. : 1.; md_zsmul(ND, img2_dims, psft, psft, scale); // reformat complex float* psf = md_alloc(ND, psf_dims, CFL_SIZE); long factors[N]; for (unsigned int i = 0; i < N; i++) factors[i] = ((img_dims[i] > 1) && (i < 3)) ? 2 : 1; md_decompose(N + 0, factors, psf_dims, psf, img2_dims, psft, CFL_SIZE); md_free(psft); return psf; } static void nufft_free_data(const linop_data_t* _data) { struct nufft_data* data = CAST_DOWN(nufft_data, _data); free(data->ksp_dims); free(data->cim_dims); free(data->cml_dims); free(data->img_dims); free(data->trj_dims); free(data->lph_dims); free(data->psf_dims); free(data->wgh_dims); free(data->ksp_strs); free(data->cim_strs); free(data->cml_strs); free(data->img_strs); free(data->trj_strs); free(data->lph_strs); free(data->psf_strs); free(data->wgh_strs); free(data->rlph_dims); free(data->rpsf_dims); free(data->rcml_dims); free(data->rlph_strs); free(data->rpsf_strs); free(data->rcml_strs); free(data->cm2_dims); md_free(data->grid); md_free(data->linphase); md_free(data->psf); md_free(data->fftmod); md_free(data->weights); md_free(data->roll); #ifdef USE_CUDA md_free(data->linphase_gpu); md_free(data->psf_gpu); md_free(data->grid_gpu); #endif linop_free(data->fft_op); if (data->conf.pcycle) linop_free(data->cfft_op); free(data); } // Forward: from image to kspace static void nufft_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { struct nufft_data* data = CAST_DOWN(nufft_data, _data); #ifdef USE_CUDA assert(!cuda_ondevice(src)); #endif assert(!data->conf.toeplitz); // if toeplitz linphase has no roll, so would need to be added unsigned int ND = data->N + 3; md_zmul2(ND, data->cml_dims, data->cml_strs, data->grid, data->cim_strs, src, data->lph_strs, data->linphase); linop_forward(data->fft_op, ND, data->cml_dims, data->grid, ND, data->cml_dims, data->grid); md_zmul2(ND, data->cml_dims, data->cml_strs, data->grid, data->cml_strs, data->grid, data->img_strs, data->fftmod); md_clear(ND, data->ksp_dims, dst, CFL_SIZE); complex float* gridX = md_alloc(data->N, data->cm2_dims, CFL_SIZE); long factors[data->N]; for (unsigned int i = 0; i < data->N; i++) factors[i] = ((data->img_dims[i] > 1) && (i < 3)) ? 2 : 1; md_recompose(data->N, factors, data->cm2_dims, gridX, data->cml_dims, data->grid, CFL_SIZE); grid2H(2., data->width, data->beta, ND, data->trj_dims, data->traj, data->ksp_dims, dst, data->cm2_dims, gridX); md_free(gridX); if (NULL != data->weights) md_zmul2(data->N, data->ksp_dims, data->ksp_strs, dst, data->ksp_strs, dst, data->wgh_strs, data->weights); } // Adjoint: from kspace to image static void nufft_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { struct nufft_data* data = CAST_DOWN(nufft_data, _data); #ifdef USE_CUDA assert(!cuda_ondevice(src)); #endif unsigned int ND = data->N + 3; complex float* gridX = md_calloc(data->N, data->cm2_dims, CFL_SIZE); complex float* wdat = NULL; if (NULL != data->weights) { wdat = md_alloc(data->N, data->ksp_dims, CFL_SIZE); md_zmulc2(data->N, data->ksp_dims, data->ksp_strs, wdat, data->ksp_strs, src, data->wgh_strs, data->weights); src = wdat; } grid2(2., data->width, data->beta, ND, data->trj_dims, data->traj, data->cm2_dims, gridX, data->ksp_dims, src); md_free(wdat); long factors[data->N]; for (unsigned int i = 0; i < data->N; i++) factors[i] = ((data->img_dims[i] > 1) && (i < 3)) ? 2 : 1; md_decompose(data->N, factors, data->cml_dims, data->grid, data->cm2_dims, gridX, CFL_SIZE); md_free(gridX); md_zmulc2(ND, data->cml_dims, data->cml_strs, data->grid, data->cml_strs, data->grid, data->img_strs, data->fftmod); linop_adjoint(data->fft_op, ND, data->cml_dims, data->grid, ND, data->cml_dims, data->grid); md_clear(ND, data->cim_dims, dst, CFL_SIZE); md_zfmacc2(ND, data->cml_dims, data->cim_strs, dst, data->cml_strs, data->grid, data->lph_strs, data->linphase); if (data->conf.toeplitz) md_zmul2(ND, data->cim_dims, data->cim_strs, dst, data->cim_strs, dst, data->img_strs, data->roll); } /** * */ static void nufft_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { struct nufft_data* data = CAST_DOWN(nufft_data, _data); if (data->conf.toeplitz) { if (data->conf.pcycle) toeplitz_mult_pcycle(data, dst, src); else toeplitz_mult(data, dst, src); } else { complex float* tmp_ksp = md_alloc(data->N + 3, data->ksp_dims, CFL_SIZE); nufft_apply(_data, tmp_ksp, src); nufft_apply_adjoint(_data, dst, tmp_ksp); md_free(tmp_ksp); } } static void toeplitz_mult(const struct nufft_data* data, complex float* dst, const complex float* src) { unsigned int ND = data->N + 3; const complex float* linphase = data->linphase; const complex float* psf = data->psf; complex float* grid = data->grid; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->linphase_gpu) ((struct nufft_data*)data)->linphase_gpu = md_gpu_move(ND, data->lph_dims, data->linphase, CFL_SIZE); if (NULL == data->psf_gpu) ((struct nufft_data*)data)->psf_gpu = md_gpu_move(ND, data->psf_dims, data->psf, CFL_SIZE); if (NULL == data->grid_gpu) ((struct nufft_data*)data)->grid_gpu = md_gpu_move(ND, data->cml_dims, data->grid, CFL_SIZE); linphase = data->linphase_gpu; psf = data->psf_gpu; grid = data->grid_gpu; } #endif md_zmul2(ND, data->cml_dims, data->cml_strs, grid, data->cim_strs, src, data->lph_strs, linphase); linop_forward(data->fft_op, ND, data->cml_dims, grid, ND, data->cml_dims, grid); md_zmul2(ND, data->cml_dims, data->cml_strs, grid, data->cml_strs, grid, data->psf_strs, psf); linop_adjoint(data->fft_op, ND, data->cml_dims, grid, ND, data->cml_dims, grid); md_clear(ND, data->cim_dims, dst, CFL_SIZE); md_zfmacc2(ND, data->cml_dims, data->cim_strs, dst, data->cml_strs, grid, data->lph_strs, linphase); } static void toeplitz_mult_pcycle(const struct nufft_data* data, complex float* dst, const complex float* src) { unsigned int ncycles = data->lph_dims[data->N]; ((struct nufft_data*) data)->cycle = (data->cycle + 1) % ncycles; const complex float* clinphase = data->linphase + data->cycle * md_calc_size(data->N, data->lph_dims); const complex float* cpsf = data->psf + data->cycle * md_calc_size(data->N, data->psf_dims); complex float* grid = data->grid; md_zmul2(data->N, data->cim_dims, data->cim_strs, grid, data->cim_strs, src, data->img_strs, clinphase); linop_forward(data->cfft_op, data->N, data->cim_dims, grid, data->N, data->cim_dims, grid); md_zmul2(data->N, data->cim_dims, data->cim_strs, grid, data->cim_strs, grid, data->img_strs, cpsf); linop_adjoint(data->cfft_op, data->N, data->cim_dims, grid, data->N, data->cim_dims, grid); md_zmulc2(data->N, data->cim_dims, data->cim_strs, dst, data->cim_strs, grid, data->img_strs, clinphase); } /** * Estimate image dimensions from trajectory */ void estimate_im_dims(unsigned int N, long dims[3], const long tdims[N], const complex float* traj) { float max_dims[3] = { 0., 0., 0. }; for (long i = 0; i < md_calc_size(N - 1, tdims + 1); i++) for(int j = 0; j < 3; j++) max_dims[j] = MAX(cabsf(traj[j + tdims[0] * i]), max_dims[j]); for (int j = 0; j < 3; j++) dims[j] = (0. == max_dims[j]) ? 1 : (2 * (long)((2. * max_dims[j] + 1.5) / 2.)); } bart-0.4.02/src/noncart/nufft.h000066400000000000000000000027001320577655200162660ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" struct operator_s; struct linop_s; struct nufft_conf_s { _Bool toeplitz; ///< Toeplitz embedding boolean for A^T A _Bool pcycle; /// < Phase cycling }; extern struct nufft_conf_s nufft_conf_defaults; extern struct linop_s* nufft_create(unsigned int N, ///< Number of dimensions const long ksp_dims[__VLA(N)], ///< Kspace dimension const long coilim_dims[__VLA(N)], ///< Coil image dimension const long traj_dims[__VLA(N)], ///< Trajectory dimension const _Complex float* traj, ///< Trajectory const _Complex float* weights, ///< Weights, ex, density-compensation struct nufft_conf_s conf); ///< NUFFT configuration extern void estimate_im_dims(unsigned int N, ///< Number of dimensions long dims[3], ///< Output estimated image dimensions const long tdims[__VLA(N)], ///< Trajectory dimesion const _Complex float* traj); ///< Trajectory extern _Complex float* compute_psf(unsigned int N, const long img2_dims[__VLA(N)], const long trj_dims[__VLA(N)], const complex float* traj, const complex float* weights); extern const struct operator_s* nufft_precond_create( const struct linop_s* nufft_op ); #include "misc/cppwrap.h" bart-0.4.02/src/normalize.c000066400000000000000000000022041320577655200154720ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2014 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/utils.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "flags "; static const char help_str[] = "Normalize along selected dimensions.\n"; int main_normalize(int argc, char* argv[]) { bool l1 = false; l1 = mini_cmdline_bool(&argc, argv, 'b', 3, usage_str, help_str); num_init(); int N = DIMS; long dims[N]; complex float* data = load_cfl(argv[2], N, dims); int flags = atoi(argv[1]); assert(flags >= 0); complex float* out = create_cfl(argv[3], N, dims); md_copy(N, dims, out, data, CFL_SIZE); (l1 ? normalizel1 : normalize)(N, flags, dims, out); unmap_cfl(N, dims, out); exit(0); } bart-0.4.02/src/nrmse.c000077500000000000000000000036501320577655200146270ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Dara Bahri * 2014 Frank Ong * 2014 Jonathan Tamir * 2015-2016 Martin Uecker */ #include #include #include #include #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Output normalized root mean square error (NRMSE),\n" "i.e. norm(input - ref) / norm(ref)"; int main_nrmse(int argc, char* argv[]) { float test = -1.; bool auto_scale = false; const struct opt_s opts[] = { OPT_FLOAT('t', &test, "eps", "compare to eps"), OPT_SET('s', &auto_scale, "automatic (complex) scaling"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); long ref_dims[DIMS]; long in_dims[DIMS]; complex float* ref = load_cfl(argv[1], DIMS, ref_dims); complex float* in = load_cfl(argv[2], DIMS, in_dims); assert(md_check_compat(DIMS, 0u, in_dims, ref_dims)); if (auto_scale) { complex float sc = md_zscalar(DIMS, ref_dims, in, ref); float n = md_znorm(DIMS, ref_dims, ref); if (0. == n) error("Reference has zero norm"); sc /= n * n; debug_printf(DP_INFO, "Scaled by: %f%+fi\n", crealf(sc), cimagf(sc)); md_zsmul(DIMS, ref_dims, ref, ref, sc); } float err = md_znrmse(DIMS, ref_dims, ref, in); printf("%f\n", err); unmap_cfl(DIMS, ref_dims, ref); unmap_cfl(DIMS, in_dims, in); exit(((test == -1.) || (err <= test)) ? 0 : 1); } bart-0.4.02/src/nufft.c000066400000000000000000000105251320577655200146210ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong * 2014-2017 Martin Uecker */ #include #include #include #include "misc/mmio.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "misc/opts.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "num/ops.h" #include "linops/linop.h" #include "iter/iter.h" #include "iter/lsqr.h" #include "noncart/nufft.h" #include "noncart/nudft.h" static const char usage_str[] = " "; static const char help_str[] = "Perform non-uniform Fast Fourier Transform."; int main_nufft(int argc, char* argv[]) { bool adjoint = false; bool inverse = false; bool precond = false; bool dft = false; bool gpu = false; struct nufft_conf_s conf = nufft_conf_defaults; struct iter_conjgrad_conf cgconf = iter_conjgrad_defaults; long coilim_vec[3] = { 0 }; float lambda = 0.; const struct opt_s opts[] = { OPT_SET('a', &adjoint, "adjoint"), OPT_SET('i', &inverse, "inverse"), OPT_VEC3('d', &coilim_vec, "x:y:z", "dimensions"), OPT_VEC3('D', &coilim_vec, "", "()"), OPT_SET('t', &conf.toeplitz, "Toeplitz embedding for inverse NUFFT"), OPT_CLEAR('r', &conf.toeplitz, "turn-off Toeplitz embedding for inverse NUFFT"), OPT_SET('c', &precond, "Preconditioning for inverse NUFFT"), OPT_FLOAT('l', &lambda, "lambda", "l2 regularization"), OPT_UINT('m', &cgconf.maxiter, "", "()"), OPT_SET('s', &dft, "DFT"), OPT_SET('g', &gpu, "GPU (only inverse)"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); // avoid computing PSF if not necessary if (!inverse) conf.toeplitz = false; long coilim_dims[DIMS] = { 0 }; md_copy_dims(3, coilim_dims, coilim_vec); // Read trajectory long traj_dims[DIMS]; complex float* traj = load_cfl(argv[1], DIMS, traj_dims); assert(3 == traj_dims[0]); (gpu ? num_init_gpu : num_init)(); if (inverse || adjoint) { long ksp_dims[DIMS]; const complex float* ksp = load_cfl(argv[2], DIMS, ksp_dims); assert(1 == ksp_dims[0]); assert(md_check_compat(DIMS, ~(PHS1_FLAG|PHS2_FLAG), ksp_dims, traj_dims)); md_copy_dims(DIMS - 3, coilim_dims + 3, ksp_dims + 3); if (0 == md_calc_size(DIMS, coilim_dims)) { estimate_im_dims(DIMS, coilim_dims, traj_dims, traj); debug_printf(DP_INFO, "Est. image size: %ld %ld %ld\n", coilim_dims[0], coilim_dims[1], coilim_dims[2]); } complex float* img = create_cfl(argv[3], DIMS, coilim_dims); md_clear(DIMS, coilim_dims, img, CFL_SIZE); const struct linop_s* nufft_op; if (!dft) nufft_op = nufft_create(DIMS, ksp_dims, coilim_dims, traj_dims, traj, NULL, conf); else nufft_op = nudft_create(DIMS, FFT_FLAGS, ksp_dims, coilim_dims, traj_dims, traj); if (inverse) { const struct operator_s* precond_op = NULL; if (conf.toeplitz && precond) precond_op = nufft_precond_create(nufft_op); lsqr(DIMS, &(struct lsqr_conf){ lambda, gpu }, iter_conjgrad, CAST_UP(&cgconf), nufft_op, NULL, coilim_dims, img, ksp_dims, ksp, precond_op); if (conf.toeplitz && precond) operator_free(precond_op); } else { linop_adjoint(nufft_op, DIMS, coilim_dims, img, DIMS, ksp_dims, ksp); } linop_free(nufft_op); unmap_cfl(DIMS, ksp_dims, ksp); unmap_cfl(DIMS, coilim_dims, img); } else { // Read image data const complex float* img = load_cfl(argv[2], DIMS, coilim_dims); // Initialize kspace data long ksp_dims[DIMS]; md_select_dims(DIMS, PHS1_FLAG|PHS2_FLAG, ksp_dims, traj_dims); md_copy_dims(DIMS - 3, ksp_dims + 3, coilim_dims + 3); complex float* ksp = create_cfl(argv[3], DIMS, ksp_dims); const struct linop_s* nufft_op; if (!dft) nufft_op = nufft_create(DIMS, ksp_dims, coilim_dims, traj_dims, traj, NULL, conf); else nufft_op = nudft_create(DIMS, FFT_FLAGS, ksp_dims, coilim_dims, traj_dims, traj); // nufft linop_forward(nufft_op, DIMS, ksp_dims, ksp, DIMS, coilim_dims, img); linop_free(nufft_op); unmap_cfl(DIMS, coilim_dims, img); unmap_cfl(DIMS, ksp_dims, ksp); } unmap_cfl(DIMS, traj_dims, traj); debug_printf(DP_INFO, "Done.\n"); exit(0); } bart-0.4.02/src/num/000077500000000000000000000000001320577655200141275ustar00rootroot00000000000000bart-0.4.02/src/num/blas.c000066400000000000000000000037221320577655200152200ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jonathan Tamir * 2016 Martin Uecker */ #include #include #include "misc/misc.h" #ifdef USE_MACPORTS #include #else #include #endif #ifdef USE_CUDA #include "num/gpuops.h" #include #endif #include "blas.h" void blas_cgemm(char transa, char transb, long M, long N, long K, const complex float alpha, long lda, const complex float A[K][lda], long ldb, const complex float B[N][ldb], const complex float beta, long ldc, complex float C[N][ldc]) { #ifdef USE_CUDA #define CUCOMPLEX(x) (((union { cuComplex cu; complex float std; }){ .std = (x) }).cu) if (cuda_ondevice(A)) { cublasCgemm(transa, transb, M, N, K, CUCOMPLEX(alpha), (const cuComplex*)A, lda, (const cuComplex*)B, ldb, CUCOMPLEX(beta), (cuComplex*)C, ldc); } else #endif cblas_cgemm(CblasColMajor, transa, transb, M, N, K, (void*)&alpha, (void*)A, lda, (void*)B, ldb, (void*)&beta, (void*)C, ldc); } void (blas_matrix_multiply)(long M, long N, long K, complex float C[N][M], const complex float A[K][M], const complex float B[N][K]) { blas_cgemm(CblasNoTrans, CblasNoTrans, M, N, K, 1., M, A, K, B, 0., M, C); } void (blas_csyrk)(char uplo, char trans, long N, long K, const complex float alpha, long lda, const complex float A[][lda], complex float beta, long ldc, complex float C[][ldc]) { assert('U' == uplo); assert(('T' == trans) || ('N' == trans)); cblas_csyrk(CblasColMajor, CblasUpper, ('T' == trans) ? CblasTrans : CblasNoTrans, N, K, (void*)&alpha, (void*)A, lda, (void*)&beta, (void*)C, ldc); } bart-0.4.02/src/num/blas.h000066400000000000000000000017261320577655200152270ustar00rootroot00000000000000 #include #include "misc/misc.h" extern void blas_matrix_multiply(long M, long N, long K, complex float C[N][M], const complex float A[K][M], const complex float B[N][K]); extern void blas_cgemm(char transa, char transb, long M, long N, long K, const complex float alpha, long lda, const complex float A[M][lda], long ldb, const complex float B[K][ldb], const complex float beta, long ldc, complex float C[M][ldc]); extern void blas_csyrk(char uplow, char trans, long N, long K, complex float alpha, long lda, const complex float A[*][lda], complex float beta, long ldc, complex float C[*][ldc]); #if __GNUC__ < 5 #include "misc/pcaa.h" #define blas_matrix_multiply(M, N, K, C, A, B) \ blas_matrix_multiply(M, N, K, C, AR2D_CAST(complex float, M, K, A), AR2D_CAST(complex float, K, N, B)) #define blas_csyrk(uplow, trans, N, K, alpha, lda, A, beta, ldc, C) \ blas_csyrk(uplow, trans, N, K, alpha, lda, AR2D_CAST(complex float, *, lda, A), beta, ldc, C) #endif bart-0.4.02/src/num/blockproc.c000066400000000000000000000263121320577655200162550ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Frank Ong * */ #include #include #include #include #include #include "misc/misc.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/iovec.h" #include "blockproc.h" float lineproc2( unsigned int D, const long dims[D], const long blkdims[D], const long line_dims[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), const long ostrs[D], complex float* dst, const long istrs[D], const complex float* src ) { // Get number of blocks per dimension long nblocks[D]; long shifts[D]; for (unsigned int i = 0; i < D; i++) { nblocks[i] = dims[i] - blkdims[i] + 1; shifts[i] = ( dims[i] - nblocks[i] * line_dims[i]) / 2; } long line_strs[D]; md_calc_strides( D, line_strs, line_dims, CFL_SIZE ); long numblocks = md_calc_size( D, nblocks ); float info = 0; // Loop over blocks complex float* blk = md_alloc_sameplace(D, blkdims, sizeof( complex float ), src); complex float* line = md_alloc_sameplace(D, line_dims, sizeof( complex float ), src); for (long b = 0; b < numblocks; b++) { // Get block position and actual block size long blkpos[D]; long linepos[D]; long ind = b; for ( unsigned int i = 0; i < D; i++) { long blkind = ind % nblocks[i]; blkpos[i] = blkind; linepos[i] = blkind + shifts[i]; ind = (ind - blkind) / nblocks[i]; } long blkstrs[D]; md_calc_strides( D, blkstrs, blkdims, CFL_SIZE ); // Extract block md_copy_block2( D, blkpos, blkdims, blkstrs, blk, dims, istrs, src, sizeof(complex float) ); // Process block info += op( data, blkdims, line, blk ); // Put back block md_copy_block2( D, linepos, dims, ostrs, dst, line_dims, line_strs, line, sizeof( complex float )); } md_free( blk ); md_free( line ); return info; } float lineproc( unsigned int D, const long dims[D], const long blkdims[D], const long line_dims[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), complex float* dst, const complex float* src ) { long strs[D]; md_calc_strides( D, strs, dims, CFL_SIZE ); return lineproc2( D, dims, blkdims, line_dims, data, op, strs, dst, strs, src ); } float blockproc_shift_mult2( unsigned int D, const long dims[D], const long blkdims[D], const long shifts[D], const long mult[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), const long ostrs[D], complex float* dst, const long istrs[D], const complex float* src ) { float info = 0; long pos[D]; for (unsigned int i = 0; i < D; i++) { pos[i] = shifts[i]; while (pos[i] < 0) pos[i] += dims[i]; } unsigned int i = 0; while ((i < D) && (0 == pos[i])) i++; if (D == i) { info += blockproc2( D, dims, blkdims, data, op, ostrs, dst, istrs, src ); return info; } long shift = pos[i]; assert(shift != 0); long dim0[D]; long dim1[D]; long dim2[D]; long dim3[D]; md_copy_dims(D, dim0, dims); md_copy_dims(D, dim1, dims); md_copy_dims(D, dim2, dims); md_copy_dims(D, dim3, dims); dim0[i] = shift - (shift/mult[i]) * mult[i]; dim1[i] = (shift/mult[i]) * mult[i]; dim2[i] = ((dims[i] - shift) / mult[i]) * mult[i]; dim3[i] = dims[i] - (((dims[i] - shift) / mult[i]) * mult[i]) - shift; long off0 = 0; long off1 = off0 + dim0[i] * ostrs[i] / CFL_SIZE; long off2 = off1 + dim1[i] * ostrs[i] / CFL_SIZE; long off3 = off2 + dim2[i] * ostrs[i] / CFL_SIZE; pos[i] = 0; info += blockproc_shift_mult2( D, dim0, blkdims, pos, mult, data, op, ostrs, dst + off0, istrs, src + off0 ); info += blockproc_shift_mult2( D, dim1, blkdims, pos, mult, data, op, ostrs, dst + off1, istrs, src + off1 ); info += blockproc_shift_mult2( D, dim2, blkdims, pos, mult, data, op, ostrs, dst + off2, istrs, src + off2 ); info += blockproc_shift_mult2( D, dim3, blkdims, pos, mult, data, op, ostrs, dst + off3, istrs, src + off3 ); return info; } float blockproc_shift_mult( unsigned int D, const long dims[D], const long blkdims[D], const long shifts[D], const long mult[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), complex float* dst, const complex float* src ) { long strs[D]; md_calc_strides( D, strs, dims, CFL_SIZE ); return blockproc_shift_mult2( D, dims, blkdims, shifts, mult, data, op, strs, dst, strs, src ); } float blockproc_shift2( unsigned int D, const long dims[D], const long blkdims[D], const long shifts[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), const long ostrs[D], complex float* dst, const long istrs[D], const complex float* src ) { float info = 0; long pos[D]; for (unsigned int i = 0; i < D; i++) { pos[i] = shifts[i]; while (pos[i] < 0) pos[i] += dims[i]; } unsigned int i = 0; while ((i < D) && (0 == pos[i])) i++; if (D == i) { info += blockproc2( D, dims, blkdims, data, op, ostrs, dst, istrs, src ); return info; } long shift = pos[i]; assert(shift != 0); long dim1[D]; long dim2[D]; md_copy_dims(D, dim1, dims); md_copy_dims(D, dim2, dims); dim1[i] = shift; dim2[i] = dims[i] - shift; pos[i] = 0; info += blockproc_shift2( D, dim1, blkdims, pos, data, op, ostrs, dst, istrs, src ); info += blockproc_shift2( D, dim2, blkdims, pos, data, op, ostrs, dst + dim1[i]*ostrs[i]/CFL_SIZE, istrs, src + dim1[i]*istrs[i]/CFL_SIZE ); return info; } float blockproc_shift( unsigned int D, const long dims[D], const long blkdims[D], const long shifts[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), complex float* dst, const complex float* src ) { long strs[D]; md_calc_strides( D, strs, dims, CFL_SIZE ); return blockproc_shift2( D, dims, blkdims, shifts, data, op, strs, dst, strs, src ); } float blockproc_circshift( unsigned int D, const long dims[D], const long blkdims[D], const long shifts[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), complex float* dst, const complex float* src ) { complex float* tmp = md_alloc( D, dims, CFL_SIZE ); long unshifts[D]; for (unsigned int i = 0; i < D; i++) unshifts[i] = -shifts[i]; md_circ_shift( D, dims, shifts, tmp, src, CFL_SIZE ); return blockproc( D, dims, blkdims, data, op, tmp, tmp ); md_circ_shift( D, dims, unshifts, dst, tmp, CFL_SIZE ); md_free( tmp ); } float blockproc2( unsigned int D, const long dims[D], const long blkdims[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), const long ostrs[D], complex float* dst, const long istrs[D], const complex float* src ) { // Get number of blocks per dimension long nblocks[D]; for (unsigned int i = 0; i < D; i++) { nblocks[i] = (float) ( dims[i] + blkdims[i] - 1 ) / (float) blkdims[i]; } long numblocks = md_calc_size( D, nblocks ); float info = 0; // Loop over blocks complex float* blk = md_alloc_sameplace(D, blkdims, sizeof( complex float ), src); for (long b = 0; b < numblocks; b++) { // Get block position and actual block size long blkpos[D]; long blkdims_b[D]; // actual block size long ind = b; for ( unsigned int i = 0; i < D; i++) { long blkind = ind % nblocks[i]; blkpos[i] = blkind * blkdims[i]; ind = (ind - blkind) / nblocks[i]; blkdims_b[i] = MIN( dims[i] - blkpos[i], blkdims[i] ); } long blkstrs[D]; md_calc_strides( D, blkstrs, blkdims_b, CFL_SIZE ); // Extract block md_copy_block2( D, blkpos, blkdims_b, blkstrs, blk, dims, istrs, src, sizeof(complex float) ); // Process block info += op( data, blkdims_b, blk, blk ); // Put back block md_copy_block2( D, blkpos, dims, ostrs, dst, blkdims_b, blkstrs, blk, sizeof( complex float )); } md_free( blk ); return info; } float blockproc( unsigned int D, const long dims[D], const long blkdims[D], const void* data, float (*op)(const void* data, const long blkdims[D], complex float* dst, const complex float* src), complex float* dst, const complex float* src ) { long strs[D]; md_calc_strides( D, strs, dims, CFL_SIZE ); return blockproc2( D, dims, blkdims, data, op, strs, dst, strs, src ); } float stackproc2( unsigned int D, const long dims[D], const long blkdims[D], unsigned int stkdim, const void* data, float (*op)(const void* data, const long stkdims[D], complex float* dst, const complex float* src), const long ostrs[D], complex float* dst, const long istrs[D], const complex float* src ) { // Get number of blocks per dimension long nblocks[D]; for (unsigned int i = 0; i < D; i++) { nblocks[i] = (float) ( dims[i] + blkdims[i] - 1 ) / (float) blkdims[i]; } long numblocks = md_calc_size( D, nblocks ); float info = 0; // Initialize stack long stkdims[D]; md_copy_dims( D, stkdims, blkdims ); stkdims[stkdim] = numblocks; long stkstrs[D]; md_calc_strides( D, stkstrs, stkdims, CFL_SIZE ); long stkstr1[D]; md_calc_strides( D, stkstr1, stkdims, 1 ); complex float* stk = md_alloc(D, stkdims, sizeof( complex float )); md_clear( D, stkdims, stk, sizeof( complex float ) ); // Loop over blocks and stack them up for (long b = 0; b < numblocks; b++) { // Get block position and actual block size long blkpos[D]; long blkdims_b[D]; // actual block size long ind = b; for ( unsigned int i = 0; i < D; i++) { long blkind = ind % nblocks[i]; blkpos[i] = blkind * blkdims[i]; ind = (ind - blkind) / nblocks[i]; blkdims_b[i] = MIN( dims[i] - blkpos[i], blkdims[i] ); } long blkstrs[D]; md_calc_strides( D, blkstrs, blkdims_b, CFL_SIZE ); // Extract block and put in stack md_copy_block2( D, blkpos, blkdims_b, blkstrs, stk + stkstr1[stkdim] * b, dims, istrs, src, sizeof(complex float) ); } long blkstrs[D]; md_calc_strides( D, blkstrs, blkdims, CFL_SIZE ); // Process block info = op( data, stkdims, stk, stk ); // Put back block for (long b = 0; b < numblocks; b++) { // Get block position and actual block size long blkpos[D]; long blkdims_b[D]; // actual block size long ind = b; for ( unsigned int i = 0; i < D; i++) { long blkind = ind % nblocks[i]; blkpos[i] = blkind * blkdims[i]; ind = (ind - blkind) / nblocks[i]; blkdims_b[i] = MIN( dims[i] - blkpos[i], blkdims[i] ); } long blkstrs[D]; md_calc_strides( D, blkstrs, blkdims_b, CFL_SIZE ); // Put back block md_copy_block2( D, blkpos, dims, ostrs, dst, blkdims_b, blkstrs, stk + stkstr1[stkdim] * b, sizeof( complex float )); } // Free stack md_free( stk ); return info; } float stackproc( unsigned int D, const long dims[D], const long blkdims[D], unsigned int stkdim, const void* data, float (*op)(const void* data, const long stkdims[D], complex float* dst, const complex float* src), complex float* dst, const complex float* src ) { long strs[D]; md_calc_strides( D, strs, dims, CFL_SIZE ); return stackproc2( D, dims, blkdims, stkdim, data, op, strs, dst, strs, src ); } bart-0.4.02/src/num/blockproc.h000066400000000000000000000077121320577655200162650ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __BLOCKPROC #define __BLOCKPROC #include "misc/cppwrap.h" extern float lineproc2(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const long line_dims[__VLA(D)], const void * data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), const long ostrs[__VLA(D)], complex float* dst, const long istrs[__VLA(D)], const complex float* src); extern float lineproc(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const long line_dims[__VLA(D)], const void * data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), complex float* dst, const complex float* src); extern float blockproc2(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const void * data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), const long ostrs[__VLA(D)], complex float* dst, const long istrs[__VLA(D)], const complex float* src); extern float blockproc(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const void * data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), complex float* dst, const complex float* src); extern float blockproc_shift2(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const long shifts[__VLA(D)], const void* data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), const long ostrs[__VLA(D)], complex float* dst, const long istrs[__VLA(D)], const complex float* src); extern float blockproc_shift(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const long shifts[__VLA(D)], const void* data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), complex float* dst, const complex float* src); extern float blockproc_circshift(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const long shifts[__VLA(D)], const void* data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), complex float* dst, const complex float* src); extern float blockproc_shift_mult2(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const long shifts[__VLA(D)], const long mult[__VLA(D)], const void* data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), const long ostrs[__VLA(D)], complex float* dst, const long istrs[__VLA(D)], const complex float* src); extern float blockproc_shift_mult(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], const long shifts[__VLA(D)], const long mult[__VLA(D)], const void* data, float (*op)(const void* data, const long blkdims[__VLA(D)], complex float* dst, const complex float* src), complex float* dst, const complex float* src); extern float stackproc2(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], unsigned int stkdim, const void* data, float (*op)(const void* data, const long stkdims[__VLA(D)], complex float* dst, const complex float* src), const long ostrs[__VLA(D)], complex float* dst, const long istrs[__VLA(D)], const complex float* src); extern float stackproc(unsigned int D, const long dims[__VLA(D)], const long blkdims[__VLA(D)], unsigned int stkdim, const void* data, float (*op)(const void* data, const long stkdims[__VLA(D)], complex float* dst, const complex float* src), complex float* dst, const complex float* src); #include "misc/cppwrap.h" #endif bart-0.4.02/src/num/casorati.c000066400000000000000000000077341320577655200161130ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker * 2014 Frank Ong */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "casorati.h" static void calc_casorati_geom(unsigned int N, long dimc[2 * N], long str2[2 * N], const long dimk[N], const long dim[N], const long str[N]) { for (unsigned int i = 0; i < N; i++) { assert(dim[i] >= dimk[i]); dimc[i + 0] = dim[i] - dimk[i] + 1; // number of shifted blocks dimc[i + N] = dimk[i]; // size of blocks str2[i + N] = str[i]; // by having the same strides str2[i + 0] = str[i]; // we can address overlapping blocks } } void casorati_dims(unsigned int N, long odim[2], const long dimk[N], const long dim[N]) { long dimc[2 * N]; for (unsigned int i = 0; i < N; i++) { assert(dim[i] >= dimk[i]); dimc[i + 0] = dim[i] - dimk[i] + 1; // number of shifted blocks dimc[i + N] = dimk[i]; // size of blocks } odim[0] = md_calc_size(N, dimc + 0); odim[1] = md_calc_size(N, dimc + N); } void casorati_matrix(unsigned int N, const long dimk[N], const long odim[2], complex float* optr, const long dim[N], const long str[N], const complex float* iptr) { long str2[2 * N]; long strc[2 * N]; long dimc[2 * N]; calc_casorati_geom(N, dimc, str2, dimk, dim, str); assert(odim[0] == md_calc_size(N, dimc + 0)); // all shifts are collapsed assert(odim[1] == md_calc_size(N, dimc + N)); // linearized size of a block md_calc_strides(2 * N, strc, dimc, CFL_SIZE); md_copy2(2 * N, dimc, strc, optr, str2, iptr, CFL_SIZE); } void casorati_matrixH(unsigned int N, const long dimk[N], const long dim[N], const long str[N], complex float* optr, const long odim[2], const complex float* iptr) { long str2[2 * N]; long strc[2 * N]; long dimc[2 * N]; calc_casorati_geom(N, dimc, str2, dimk, dim, str); assert(odim[0] == md_calc_size(N, dimc)); assert(odim[1] == md_calc_size(N, dimc + N)); md_clear(N, dim, optr, CFL_SIZE); md_calc_strides(2 * N, strc, dimc, CFL_SIZE); md_zadd2(2 * N, dimc, str2, optr, str2, optr, strc, iptr); } static void calc_basorati_geom(unsigned int N, long dimc[2 * N], long str2[2 * N], const long dimk[N], const long dim[N], const long str[N]) { for (unsigned int i = 0; i < N; i++) { dimc[i + 0] = dimk[i]; // size of blocks dimc[i + N] = dim[i] / dimk[i]; // number of shifted blocks str2[i + 0] = str[i]; str2[i + N] = str[i] * dimk[i]; } } void basorati_dims(unsigned int N, long odim[2], const long dimk[N], const long dim[N]) { long dimc[2 * N]; for (unsigned int i = 0; i < N; i++) { assert(0 == dim[i] % dimk[i]); dimc[i + 0] = dimk[i]; // size of blocks dimc[i + N] = dim[i] / dimk[i]; // number of shifted blocks } odim[0] = md_calc_size(N, dimc + 0); odim[1] = md_calc_size(N, dimc + N); } void basorati_matrix(unsigned int N, const long dimk[N], const long odim[2], complex float* optr, const long dim[N], const long str[N], const complex float* iptr) { long str2[2 * N]; long strc[2 * N]; long dimc[2 * N]; calc_basorati_geom(N, dimc, str2, dimk, dim, str); assert(odim[0] == md_calc_size(N, dimc + 0)); // all shifts are collapsed assert(odim[1] == md_calc_size(N, dimc + N)); // linearized size of a block md_calc_strides(2 * N, strc, dimc, CFL_SIZE); md_copy2(2 * N, dimc, strc, optr, str2, iptr, CFL_SIZE); } void basorati_matrixH(unsigned int N, const long dimk[N], const long dim[N], const long str[N], complex float* optr, const long odim[2], const complex float* iptr) { long str2[2 * N]; long strc[2 * N]; long dimc[2 * N]; calc_basorati_geom(N, dimc, str2, dimk, dim, str); assert(odim[0] == md_calc_size(N, dimc)); assert(odim[1] == md_calc_size(N, dimc + N)); md_calc_strides(2 * N, strc, dimc, CFL_SIZE); md_copy2(2 * N, dimc, str2, optr, strc, iptr, CFL_SIZE); } bart-0.4.02/src/num/casorati.h000066400000000000000000000023501320577655200161050ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" extern void casorati_dims(unsigned int N, long odim[2], const long dimk[__VLA(N)], const long dims[__VLA(N)]); extern void casorati_matrix(unsigned int N, const long dimk[__VLA(N)], const long odim[2], _Complex float* optr, const long dim[__VLA(N)], const long str[__VLA(N)], const _Complex float* iptr); extern void casorati_matrixH(unsigned int N, const long dimk[__VLA(N)], const long dim[__VLA(N)], const long str[__VLA(N)], _Complex float* optr, const long odim[2], const _Complex float* iptr); extern void basorati_dims(unsigned int N, long odim[2], const long dimk[__VLA(N)], const long dims[__VLA(N)]); extern void basorati_matrix(unsigned int N, const long dimk[__VLA(N)], const long odim[2], _Complex float* optr, const long dim[__VLA(N)], const long str[__VLA(N)], const _Complex float* iptr); extern void basorati_matrixH(unsigned int N, const long dimk[__VLA(N)], const long dim[__VLA(N)], const long str[__VLA(N)], _Complex float* optr, const long odim[2], const _Complex float* iptr); #include "misc/cppwrap.h" bart-0.4.02/src/num/chebfun.c000066400000000000000000000101461320577655200157070ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #include #include #include #include "num/fft.h" #include "num/flpmath.h" #include "misc/misc.h" #include "num/chebfun.h" static void fft1(int N, complex float tmp[N], const complex float ext[N]) { fft(1, (long[1]){ N }, 1, tmp, ext); md_zsmul(1, (long[1]){ N }, tmp, tmp, 1. / sqrt((double)N)); } static void ifft1(int N, complex float tmp[N], const complex float ext[N]) { ifft(1, (long[1]){ N }, 1, tmp, ext); md_zsmul(1, (long[1]){ N }, tmp, tmp, 1. / sqrt((double)N)); } void chebpoly(int N, float coeff[N], const float val[N]) { complex float ext[(N - 1) * 2]; complex float tmp[(N - 1) * 2]; for (int i = 0; i < N - 1; i++) ext[i] = val[i]; for (int i = 0; i < N - 1; i++) ext[N - 1 + i] = val[N - 1 - i]; fft1((N - 1) * 2, tmp, ext); for (int i = 0; i < N; i++) coeff[i] = crealf(tmp[i]) * sqrt((N - 1) * 2) / (N - 1); // strange? coeff[0] /= 2.; coeff[N - 1] /= 2.; } void chebinv(int N, float val[N], const float coeff[N]) { complex float ext[(N - 1) * 2]; complex float tmp[(N - 1) * 2]; for (int i = 0; i < N; i++) ext[i] = coeff[i] / sqrt((N - 1) * 2) * (N - 1); for (int i = N; i < (N - 1) * 2; i++) ext[i] = 0.; ifft1((N - 1) * 2, tmp, ext); for (int i = 0; i < N; i++) val[i] = crealf(tmp[i]) * 2.; } static void resample(int A, int B, float dst[A], const float src[B]) { for (int i = 0; i < A; i++) dst[i] = 0.; for (int i = 0; i < B; i++) dst[i % A] = src[i]; } float chebeval(float x, int N, const float pval[N]) { float norm = 0.; float val = 0.; for (int i = 0; i < N; i++) { float dist = x - cosf(M_PI * (float)i / (float)(N - 1)); if (0. == dist) return pval[i]; float weight = ((0 == i % 2) ? 1. : -1.) / dist; if ((0 == i) || (N - 1 == i)) weight /= 2.; norm += weight; val += weight * pval[i]; } return val / norm; } void chebadd(int A, int B, float dst[(A > B) ? A : B], const float src1[A], const float src2[B]) { int N = (A > B) ? A : B; float tmp1[N]; float tmp2[N]; resample(N, A, tmp1, src1); resample(N, B, tmp2, src2); for (int i = 0; i < N; i++) dst[i] = tmp1[i] + tmp2[i]; } void chebmul(int A, int B, float dst[A + B], const float src1[A], const float src2[B]) { int N = A + B; float tmp1[N]; float tmp2[N]; resample(N, A, tmp1, src1); resample(N, B, tmp2, src2); float val1[N]; float val2[N]; chebinv(N, val1, tmp1); chebinv(N, val2, tmp2); for (int i = 0; i < N; i++) val1[i] *= val2[i]; chebpoly(N, dst, val1); } float chebint(int N, const float coeff[N]) { double sum = 0.; for (int i = 0; i < N; i += 2) sum += coeff[i] * 2. / (1. - (float)(i * i)); return sum; } void chebindint(int N, float dst[N + 1], const float src[N]) { for (int i = 0; i < N + 1; i++) dst[i] = 0; dst[1] += src[0]; dst[2] += src[1] / 4.; for (int i = 2; i < N; i++) { dst[i - 1] -= src[i] / (2. * (i - 1)); dst[i + 1] += src[i] / (2. * (i + 1)); } } void chebdiff(int N, float dst[N - 1], const float src[N]) { dst[N - 2] = 2. * (N - 1) * src[N - 1]; dst[N - 3] = 2. * (N - 2) * src[N - 2]; for (int i = N - 4; i > 0; i--) dst[i] = dst[i + 2] + 2. * (i + 1) * src[i + 1]; dst[0] = dst[2] / 2. + src[1]; } void chebfun2(int N, float coeff[N], float (*fun)(float x)) { float val[N]; for (int i = 0; i < N; i++) val[i] = fun(cosf(M_PI * (float)i / (float)(N - 1))); chebpoly(N, coeff, val); } float* chebfun(int* NP, float (*fun)(float x)) { int N = 129; while(true) { float coeff[N]; chebfun2(N, coeff, fun); int maxind = 0; for (int i = 0; i < N; i++) if (fabsf(coeff[maxind]) < fabsf(coeff[i])) maxind = i; if (coeff[N - 1] < 2. * FLT_EPSILON * coeff[maxind]) { while ( (coeff[N - 1] < 2. * FLT_EPSILON * coeff[maxind]) && (coeff[N - 2] < 2. * FLT_EPSILON * coeff[maxind])) N -= 2; float* out = xmalloc(sizeof(float) * N); for (int i = 0; i < N; i++) out[i] = coeff[i]; *NP = N; return out; } N = (N - 1) * 2 + 1; } } bart-0.4.02/src/num/chebfun.h000066400000000000000000000012761320577655200157200ustar00rootroot00000000000000 extern float* chebfun(int* NP, float (*fun)(float x)); extern void chebfun2(int N, float coeff[N], float (*fun)(float x)); extern void chebpoly(int N, float coeff[N], const float val[N]); extern void chebinv(int N, float val[N], const float coeff[N]); extern void chebadd(int A, int B, float dst[(A > B) ? A : B], const float src1[A], const float src2[B]); extern void chebmul(int A, int B, float dst[A + B], const float src1[A], const float src2[B]); extern float chebint(int N, const float coeff[N]); extern float chebeval(float x, int N, const float pval[N]); extern void chebdiff(int N, float dst[N - 1], const float src[N]); extern void chebindint(int N, float dst[N + 1], const float src[N]); bart-0.4.02/src/num/conv.c000066400000000000000000000171331320577655200152450ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2017 Martin Uecker */ #include #include #include #include "num/fft.h" #include "num/multind.h" #include "num/flpmath.h" #include "misc/misc.h" #include "conv.h" struct conv_plan { enum conv_mode cmode; enum conv_type ctype; int N; unsigned int flags; struct fft_plan_s* fft_plan; long* idims; long* odims; long* dims; long* dims1; long* dims2; long* str1; long* str2; long* kdims; long* kstr; complex float* kernel; }; struct conv_plan* conv_plan(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[N], const long idims1[N], const long idims2[N], const complex float* src2) { PTR_ALLOC(struct conv_plan, plan); plan->N = N; plan->flags = flags; plan->cmode = cmode; plan->ctype = ctype; plan->dims = *TYPE_ALLOC(long[N]); plan->dims1 = *TYPE_ALLOC(long[N]); plan->dims2 = *TYPE_ALLOC(long[N]); plan->kdims = *TYPE_ALLOC(long[N]); plan->idims = *TYPE_ALLOC(long[N]); plan->odims = *TYPE_ALLOC(long[N]); complex float U = 1.; for (int i = 0; i < N; i++) { plan->idims[i] = idims1[i]; plan->odims[i] = odims[i]; if (MD_IS_SET(flags, i)) { assert(idims2[i] <= idims1[i]); if (cmode == CONV_SYMMETRIC) { assert((0 == idims1[i] % 2) && (1 == idims2[i] % 2)); if (1 == (idims1[i] / 2) % 2) U *= -1.i; } switch (ctype) { case CONV_CYCLIC: assert(odims[i] == idims1[i]); plan->dims1[i] = idims1[i]; plan->dims2[i] = odims[i]; break; case CONV_TRUNCATED: assert(odims[i] == idims1[i]); plan->dims1[i] = idims1[i] + idims2[i] - 1; plan->dims2[i] = odims[i] + idims2[i] - 1; break; case CONV_VALID: plan->dims1[i] = odims[i] + idims2[i] - 1; plan->dims2[i] = odims[i] + idims2[i] - 1; assert(idims1[i] == plan->dims1[i]); break; case CONV_EXTENDED: plan->dims1[i] = idims1[i] + idims2[i] - 1; plan->dims2[i] = idims1[i] + idims2[i] - 1; assert(odims[i] == plan->dims2[i]); } plan->kdims[i] = (1 == idims2[i]) ? 1 : plan->dims1[i]; U *= (float)plan->dims1[i]; } else { // O I K: // X X X // X X 1 // X 1 X // X 1 1 (inefficient) // 1 X X assert((1 == idims1[i]) || (idims1[i] == odims[i]) || (idims1[i] == idims2[i])); assert((1 == idims2[i]) || (idims2[i] == odims[i]) || (idims2[i] == idims1[i])); assert((1 == odims[i]) || (idims2[i] == odims[i]) || (idims1[i] == odims[i])); plan->dims1[i] = idims1[i]; plan->dims2[i] = odims[i]; plan->kdims[i] = idims2[i]; } plan->dims[i] = MAX(plan->dims1[i], plan->dims2[i]); } plan->str1 = *TYPE_ALLOC(long[N]); plan->str2 = *TYPE_ALLOC(long[N]); plan->kstr = *TYPE_ALLOC(long[N]); md_calc_strides(N, plan->str1, plan->dims1, CFL_SIZE); md_calc_strides(N, plan->str2, plan->dims2, CFL_SIZE); md_calc_strides(N, plan->kstr, plan->kdims, CFL_SIZE); plan->kernel = md_alloc_sameplace(N, plan->kdims, CFL_SIZE, src2); switch (cmode) { case CONV_SYMMETRIC: md_resize_center(N, plan->kdims, plan->kernel, idims2, src2, CFL_SIZE); ifft(N, plan->kdims, flags, plan->kernel, plan->kernel); fftmod(N, plan->kdims, flags, plan->kernel, plan->kernel); break; case CONV_CAUSAL: md_resize(N, plan->kdims, plan->kernel, idims2, src2, CFL_SIZE); ifft(N, plan->kdims, flags, plan->kernel, plan->kernel); break; case CONV_ANTICAUSAL: md_resize(N, plan->kdims, plan->kernel, idims2, src2, CFL_SIZE); fft(N, plan->kdims, flags, plan->kernel, plan->kernel); break; default: assert(0); } md_zsmul(N, plan->kdims, plan->kernel, plan->kernel, 1. / U); // plan->fftplan = fft_plan(N, plan->dims, plan->flags); return PTR_PASS(plan); } void conv_free(struct conv_plan* plan) { md_free(plan->kernel); // fft_free_plan free(plan->dims); free(plan->dims1); free(plan->dims2); free(plan->kdims); free(plan->str1); free(plan->str2); free(plan->kstr); free(plan->idims); free(plan->odims); free(plan); } static void conv_cyclic(struct conv_plan* plan, complex float* dst, const complex float* src1) { // FIXME: optimize tmp away when possible complex float* tmp = md_alloc_sameplace(plan->N, plan->dims1, CFL_SIZE, plan->kernel); ifft(plan->N, plan->dims1, plan->flags, tmp, src1); md_clear(plan->N, plan->dims2, dst, CFL_SIZE); md_zfmac2(plan->N, plan->dims, plan->str2, dst, plan->str1, tmp, plan->kstr, plan->kernel); // md_zmul2(plan->N, plan->dims, plan->str2, dst, plan->str1, tmp, plan->kstr, plan->kernel); fft(plan->N, plan->dims2, plan->flags, dst, dst); md_free(tmp); } static void conv_cyclicH(struct conv_plan* plan, complex float* dst, const complex float* src1) { complex float* tmp = md_alloc_sameplace(plan->N, plan->dims1, CFL_SIZE, plan->kernel); ifft(plan->N, plan->dims2, plan->flags, tmp, src1); md_clear(plan->N, plan->dims1, dst, CFL_SIZE); md_zfmacc2(plan->N, plan->dims, plan->str1, dst, plan->str2, tmp, plan->kstr, plan->kernel); //md_zmulc2(plan->N, plan->dims1, plan->str1, dst, plan->str2, tmp, plan->kstr, plan->kernel); fft(plan->N, plan->dims1, plan->flags, dst, dst); md_free(tmp); } void conv_exec(struct conv_plan* plan, complex float* dst, const complex float* src1) { bool crop = (CONV_SYMMETRIC == plan->cmode); bool pre = (CONV_TRUNCATED == plan->ctype) || (CONV_EXTENDED == plan->ctype); bool post = (CONV_TRUNCATED == plan->ctype) || (CONV_VALID == plan->ctype); complex float* tmp = NULL; if (pre || post) { tmp = md_alloc_sameplace(plan->N, plan->dims1, CFL_SIZE, plan->kernel); } if (pre) (crop ? md_resize_center : md_resize)(plan->N, plan->dims1, tmp, plan->idims, src1, CFL_SIZE); conv_cyclic(plan, post ? tmp : dst, pre ? tmp : src1); if (post) (crop ? md_resize_center : md_resize)(plan->N, plan->odims, dst, plan->dims2, tmp, CFL_SIZE); if (pre || post) md_free(tmp); } void conv_adjoint(struct conv_plan* plan, complex float* dst, const complex float* src1) { bool crop = (CONV_SYMMETRIC == plan->cmode); bool post = (CONV_TRUNCATED == plan->ctype) || (CONV_EXTENDED == plan->ctype); bool pre = (CONV_TRUNCATED == plan->ctype) || (CONV_VALID == plan->ctype); complex float* tmp = NULL; if (pre || post) { tmp = md_alloc_sameplace(plan->N, plan->dims1, CFL_SIZE, plan->kernel); } if (pre) (crop ? md_resize_center : md_resize)(plan->N, plan->dims2, tmp, plan->odims, src1, CFL_SIZE); conv_cyclicH(plan, post ? tmp : dst, pre ? tmp : src1); if (post) (crop ? md_resize_center : md_resize)(plan->N, plan->idims, dst, plan->dims1, tmp, CFL_SIZE); if (pre || post) md_free(tmp); } void conv(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[N], complex float* dst, const long idims1[N], const complex float* src1, const long idims2[N], const complex float* src2) { struct conv_plan* plan = conv_plan(N, flags, ctype, cmode, odims, idims1, idims2, src2); conv_exec(plan, dst, src1); conv_free(plan); } void convH(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[N], complex float* dst, const long idims1[N], const complex float* src1, const long idims2[N], const complex float* src2) { struct conv_plan* plan = conv_plan(N, flags, ctype, cmode, idims1, odims, idims2, src2); // idims1 <-> odims conv_adjoint(plan, dst, src1); conv_free(plan); } bart-0.4.02/src/num/conv.h000066400000000000000000000027421320577655200152520ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2012-10-28 Martin Uecker */ #include #include "misc/cppwrap.h" #ifndef __CONV_ENUMS #define __CONV_ENUMS enum conv_mode { CONV_SYMMETRIC, CONV_CAUSAL, CONV_ANTICAUSAL }; enum conv_type { CONV_CYCLIC, CONV_TRUNCATED, CONV_VALID, CONV_EXTENDED }; #endif struct conv_plan; extern struct conv_plan* conv_plan(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[__VLA(N)], const long idims1[__VLA(N)], const long idims2[__VLA(N)], const complex float* src2); extern void conv_exec(struct conv_plan* plan, complex float* dst, const complex float* src1); extern void conv_adjoint(struct conv_plan* plan, complex float* dst, const complex float* src1); extern void conv_free(struct conv_plan* plan); extern void conv(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[__VLA(N)], complex float* dst, const long idims1[__VLA(N)], const complex float* src1, const long idims2[__VLA(N)], const complex float* src2); extern void convH(int N, unsigned int flags, enum conv_type ctype, enum conv_mode cmode, const long odims[__VLA(N)], complex float* dst, const long idims1[__VLA(N)], const complex float* src1, const long idims2[__VLA(N)], const complex float* src2); #include "misc/cppwrap.h" bart-0.4.02/src/num/convoaa.c000066400000000000000000000432621320577655200157300ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker uecker@eecs.berkeley.edu */ #include #include "misc/misc.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/conv.h" #include "num/vecops.h" #include "convoaa.h" void overlapandadd(int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N], complex float* src2) { long ndims[2 * N]; long L[2 * N]; long ndim2[2 * N]; long ndim3[2 * N]; for (int i = 0; i < N; i++) { assert(0 == dims[i] % blk[i]); assert(dim2[i] <= blk[i]); ndims[i * 2 + 1] = dims[i] / blk[i]; ndims[i * 2 + 0] = blk[i]; L[i * 2 + 1] = dims[i] / blk[i]; L[i * 2 + 0] = blk[i] + dim2[i] - 1; ndim2[i * 2 + 1] = 1; ndim2[i * 2 + 0] = dim2[i]; ndim3[i * 2 + 1] = dims[i] / blk[i] + 1; ndim3[i * 2 + 0] = blk[i]; } complex float* tmp = md_alloc(2 * N, L, CFL_SIZE); // conv_causal_extend(2 * N, L, tmp, ndims, src1, ndim2, src2); conv(2 * N, ~0, CONV_EXTENDED, CONV_CAUSAL, L, tmp, ndims, src1, ndim2, src2); // [------++++|||||||| //long str1[2 * N]; long str2[2 * N]; long str3[2 * N]; //md_calc_strides(2 * N, str1, ndims, 8); md_calc_strides(2 * N, str2, L, 8); md_calc_strides(2 * N, str3, ndim3, 8); md_clear(2 * N, ndim3, dst, CFL_SIZE); md_zadd2(2 * N, L, str3, dst, str3, dst, str2, tmp); md_free(tmp); } void overlapandsave(int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N], complex float* src2) { // [------++++ // [------ long ndims[2 * N]; long L[2 * N]; long ndim2[2 * N]; long ndim3[2 * N]; for (int i = 0; i < N; i++) { assert(0 == dims[i] % blk[i]); assert(dim2[i] <= blk[i]); ndims[i * 2 + 1] = dims[i] / blk[i]; ndims[i * 2 + 0] = blk[i]; L[i * 2 + 1] = dims[i] / blk[i]; L[i * 2 + 0] = blk[i] + dim2[i] - 1; ndim2[i * 2 + 1] = 1; ndim2[i * 2 + 0] = dim2[i]; ndim3[i * 2 + 1] = dims[i] / blk[i] - 0; ndim3[i * 2 + 0] = blk[i]; } complex float* tmp = md_alloc(2 * N, L, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; long str3[2 * N]; md_calc_strides(2 * N, str1, ndims, 8); md_calc_strides(2 * N, str2, L, 8); md_calc_strides(2 * N, str3, ndim3, 8); md_clear(2 * N, L, tmp, 8); md_copy2(2 * N, ndim3, str2, tmp, str1, src1, 8); conv(2 * N, ~0, CONV_VALID, CONV_CAUSAL, ndims, dst, L, tmp, ndim2, src2); md_free(tmp); } #if 0 struct conv_plan* overlapandsave_plan(int N, const long dims[N], const long blk[N], const long dim2[N], complex float* src2) { return conv_plan(2 * N, ~0, CONV_VALID, CONV_CAUSAL, ndims, L, ndim2, src2); } void overlapandsave_exec(struct conv_plan* plan, int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N]) { md_clear(2 * N, L, tmp, 8); md_copy2(2 * N, ndim3, str2, tmp, str1, src1, 8); conv_exec(plan, dst, tmp); free(tmp); } #endif void overlapandsave2(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } complex float* src1B = md_alloc(N, dims1B, CFL_SIZE); md_resize_center(N, dims1B, src1B, dims1, src1, CFL_SIZE); complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, CFL_SIZE); md_calc_strides(2 * N, str2, tdims, CFL_SIZE); long off = md_calc_offset(2 * N, str1, shift); md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, CFL_SIZE); md_free(src1B); conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, dst, tdims, tmp, ndims2, src2); md_free(tmp); } void overlapandsave2H(int N, unsigned int flags, const long blk[N], const long dims1[N], complex float* dst, const long odims[N], const complex float* src1, const long dims2[N], const complex float* src2) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE); //conv(2 * N, flags, CONV_VALID, CONV_SYMMETRIC, nodims, dst, tdims, tmp, ndims2, src2); convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, src1, ndims2, src2); complex float* src1B = md_alloc(N, dims1B, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, CFL_SIZE); md_calc_strides(2 * N, str2, tdims, CFL_SIZE); long off = md_calc_offset(2 * N, str1, shift); md_clear(N, dims1B, src1B, CFL_SIZE); //md_copy2(2 * N, tdims, str1, ((void*)src1B) + off, str2, tmp, sizeof(complex float));// FIXME: md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp); md_resize_center(N, dims1, dst, dims1B, src1B, CFL_SIZE); md_free(src1B); md_free(tmp); } void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } complex float* src1B = md_alloc(N, dims1B, CFL_SIZE); complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE); complex float* tmpX = md_alloc(N, odims, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, sizeof(complex float)); md_calc_strides(2 * N, str2, tdims, sizeof(complex float)); long off = md_calc_offset(2 * N, str1, shift); md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float)); // we can loop here md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float)); conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2); long ostr[N]; long mstr[N]; md_calc_strides(N, ostr, odims, sizeof(complex float)); md_calc_strides(N, mstr, mdims, sizeof(complex float)); md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk); convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); md_clear(N, dims1B, src1B, sizeof(complex float)); md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp); // md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float)); md_free(src1B); md_free(tmpX); md_free(tmp); } void overlapandsave2NEB(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims2[2 * N]; long nmdims[2 * N]; int e = N; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); assert((1 == mdims[i]) || (mdims[i] == dims1[i])); // blocked output nodims[e] = odims[i] / blk[i]; nodims[i] = blk[i]; // expanded temporary storage tdims[e] = dims1[i] / blk[i]; tdims[i] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + if (1 == mdims[i]) { nmdims[2 * i + 1] = 1; nmdims[2 * i + 1] = 1; } else { nmdims[2 * i + 1] = mdims[i] / blk[i]; nmdims[2 * i + 0] = blk[i]; } // resized input // minimal padding dims1B[i] = dims1[i] + (dims2[i] - 1); // kernel ndims2[e] = 1; ndims2[i] = dims2[i]; e++; } else { nodims[i] = odims[i]; tdims[i] = dims1[i]; nmdims[2 * i + 1] = 1; nmdims[2 * i + 0] = mdims[i]; dims1B[i] = dims1[i]; ndims2[i] = dims2[i]; } } int NE = e; //long S = md_calc_size(N, dims1B, 1); long str1[NE]; long str1B[N]; md_calc_strides(N, str1B, dims1B, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { str1[i] = str1B[i]; if (MD_IS_SET(flags, i)) str1[e++] = str1B[i] * blk[i]; } assert(NE == e); long str2[NE]; md_calc_strides(NE, str2, tdims, sizeof(complex float)); long ostr[NE]; long mstr[NE]; long mstrB[2 * N]; md_calc_strides(NE, ostr, nodims, sizeof(complex float)); md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { mstr[i] = mstrB[2 * i + 0]; if (MD_IS_SET(flags, i)) mstr[e++] = mstrB[2 * i + 1]; } assert(NE == e); const complex float* src1B = src1;//! //complex float* src1B = xmalloc(S * sizeof(complex float)); //md_resizec(N, dims1B, src1B, dims1, src1, sizeof(complex float)); // we can loop here assert(NE == N + 3); assert(1 == ndims2[N + 0]); assert(1 == ndims2[N + 1]); assert(1 == ndims2[N + 2]); assert(tdims[N + 0] == nodims[N + 0]); assert(tdims[N + 1] == nodims[N + 1]); assert(tdims[N + 2] == nodims[N + 2]); //complex float* src1C = xmalloc(S * sizeof(complex float)); complex float* src1C = dst; md_clear(N, dims1B, src1C, sizeof(complex float)); // must be done here #pragma omp parallel for collapse(3) for (int k = 0; k < nodims[N + 2]; k++) { for (int j = 0; j < nodims[N + 1]; j++) { for (int i = 0; i < nodims[N + 0]; i++) { complex float* tmp = md_alloc_sameplace(N, tdims, CFL_SIZE, dst); complex float* tmpX = md_alloc_sameplace(N, nodims, CFL_SIZE, dst); long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k; long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k; md_copy2(N, tdims, str2, tmp, str1, ((const void*)src1B) + off1, sizeof(complex float)); conv(N, flags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2); md_zmul2(N, nodims, ostr, tmpX, ostr, tmpX, mstr, ((const void*)msk) + off2); convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); #pragma omp critical md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2, tmp); md_free(tmpX); md_free(tmp); }}} //md_resizec(N, dims1, dst, dims1B, src1C, sizeof(complex float)); //free(src1C); //free(src1B); } void overlapandsave2HB(int N, unsigned int flags, const long blk[N], const long dims1[N], complex float* dst, const long odims[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims2[2 * N]; long nmdims[2 * N]; int e = N; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); assert((1 == mdims[i]) || (mdims[i] == dims1[i])); // blocked output nodims[e] = odims[i] / blk[i]; nodims[i] = blk[i]; // expanded temporary storage tdims[e] = dims1[i] / blk[i]; tdims[i] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + if (1 == mdims[i]) { nmdims[2 * i + 1] = 1; nmdims[2 * i + 1] = 1; } else { nmdims[2 * i + 1] = mdims[i] / blk[i]; nmdims[2 * i + 0] = blk[i]; } // resized input // minimal padding dims1B[i] = dims1[i] + (dims2[i] - 1); // kernel ndims2[e] = 1; ndims2[i] = dims2[i]; e++; } else { nodims[i] = odims[i]; tdims[i] = dims1[i]; nmdims[2 * i + 1] = 1; nmdims[2 * i + 0] = mdims[i]; dims1B[i] = dims1[i]; ndims2[i] = dims2[i]; } } int NE = e; // long S = md_calc_size(N, dims1B, 1); long str1[NE]; long str1B[N]; md_calc_strides(N, str1B, dims1B, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { str1[i] = str1B[i]; if (MD_IS_SET(flags, i)) str1[e++] = str1B[i] * blk[i]; } assert(NE == e); long str2[NE]; md_calc_strides(NE, str2, tdims, sizeof(complex float)); long ostr[NE]; long mstr[NE]; long mstrB[2 * N]; md_calc_strides(NE, ostr, nodims, sizeof(complex float)); md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { mstr[i] = mstrB[2 * i + 0]; if (MD_IS_SET(flags, i)) mstr[e++] = mstrB[2 * i + 1]; } assert(NE == e); // we can loop here assert(NE == N + 3); assert(1 == ndims2[N + 0]); assert(1 == ndims2[N + 1]); assert(1 == ndims2[N + 2]); assert(tdims[N + 0] == nodims[N + 0]); assert(tdims[N + 1] == nodims[N + 1]); assert(tdims[N + 2] == nodims[N + 2]); //complex float* src1C = xmalloc(S * sizeof(complex float)); complex float* src1C = dst; md_clear(N, dims1B, src1C, CFL_SIZE); // must be done here #pragma omp parallel for collapse(3) for (int k = 0; k < nodims[N + 2]; k++) { for (int j = 0; j < nodims[N + 1]; j++) { for (int i = 0; i < nodims[N + 0]; i++) { complex float* tmp = md_alloc_sameplace(N, tdims, CFL_SIZE, dst); complex float* tmpX = md_alloc_sameplace(N, nodims, CFL_SIZE, dst); long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k; long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k; long off3 = ostr[N + 0] * i + ostr[N + 1] * j + ostr[N + 2] * k; md_zmul2(N, nodims, ostr, tmpX, ostr, ((const void*)src1) + off3, mstr, ((const void*)msk) + off2); convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); #pragma omp critical md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2, tmp); md_free(tmpX); md_free(tmp); }}} } bart-0.4.02/src/num/convoaa.h000066400000000000000000000034641320577655200157350ustar00rootroot00000000000000 #include extern void overlapandadd(int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N], complex float* src2); extern void overlapandsave(int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N], complex float* src2); extern struct conv_plan* overlapandsave_plan(int N, const long dims[N], const long blk[N], const long dim2[N], complex float* src2); extern void overlapandsave_exec(struct conv_plan* plan, int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N]); extern void overlapandsave2(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2); extern void overlapandsave2H(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2); extern void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk); struct vec_ops; extern void overlapandsave2NEB(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk); extern void overlapandsave2HB(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk); bart-0.4.02/src/num/fft-cuda.c000066400000000000000000000076331320577655200157750ustar00rootroot00000000000000/* Copyright 2013, 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2013, 2015 Martin Uecker * * * Internal interface to the CUFFT library used in fft.c. */ #include #include #include #include "misc/misc.h" #include "num/multind.h" #include "fft-cuda.h" #ifdef USE_CUDA #include #include "num/gpuops.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif struct fft_cuda_plan_s { cufftHandle cufft; bool backwards; long batch; long idist; long odist; }; struct iovec { long n; long is; long os; }; struct fft_cuda_plan_s* fft_cuda_plan(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], const long istrides[D], bool backwards) { PTR_ALLOC(struct fft_cuda_plan_s, plan); unsigned int N = D; plan->batch = 1; plan->odist = 0; plan->idist = 0; plan->backwards = backwards; struct iovec dims[N]; struct iovec hmdims[N]; assert(0 != flags); // the cufft interface is strange, but we do our best... unsigned int k = 0; unsigned int l = 0; for (unsigned int i = 0; i < N; i++) { if (1 == dimensions[i]) continue; if (MD_IS_SET(flags, i)) { dims[k].n = dimensions[i]; dims[k].is = istrides[i] / CFL_SIZE; dims[k].os = ostrides[i] / CFL_SIZE; k++; } else { hmdims[l].n = dimensions[i]; hmdims[l].is = istrides[i] / CFL_SIZE; hmdims[l].os = ostrides[i] / CFL_SIZE; l++; } } assert(k > 0); int cudims[k]; int cuiemb[k]; int cuoemb[k]; long batchdims[l]; long batchistr[l]; long batchostr[l]; int lis = dims[0].is; int los = dims[0].os; if (k > 3) goto errout; for (unsigned int i = 0; i < k; i++) { assert(dims[i].is == lis); assert(dims[i].os == los); cudims[k - 1 - i] = dims[i].n; cuiemb[k - 1 - i] = dims[i].n; cuoemb[k - 1 - i] = dims[i].n; lis = dims[i].n * dims[i].is; los = dims[i].n * dims[i].os; } for (unsigned int i = 0; i < l; i++) { batchdims[i] = hmdims[i].n; batchistr[i] = hmdims[i].is; batchostr[i] = hmdims[i].os; } int istride = dims[0].is; int ostride = dims[0].os; int idist = lis; int odist = los; int cubs = 1; // check that batch dimensions can be collapsed to one unsigned int bi = md_calc_blockdim(l, batchdims, batchistr, hmdims[0].is); unsigned int bo = md_calc_blockdim(l, batchdims, batchostr, hmdims[0].os); if (bi != bo) goto errout; if (bi > 0) { idist = hmdims[0].is; odist = hmdims[0].os; cubs = md_calc_size(bi, batchdims); } if (l != bi) { // check that batch dimensions can be collapsed to one if (l - bi != md_calc_blockdim(l - bi, batchdims + bi, batchistr + bi, hmdims[bi].is)) goto errout; if (l - bo != md_calc_blockdim(l - bo, batchdims + bo, batchostr + bo, hmdims[bo].os)) goto errout; plan->idist = hmdims[bi].is; plan->odist = hmdims[bo].os; plan->batch = md_calc_size(l - bi, batchdims + bi); } assert(k <= 3); int err; #pragma omp critical err = cufftPlanMany(&plan->cufft, k, cudims, cuiemb, istride, idist, cuoemb, ostride, odist, CUFFT_C2C, cubs); if (CUFFT_SUCCESS != err) goto errout; return PTR_PASS(plan); errout: PTR_FREE(plan); return NULL; } void fft_cuda_free_plan(struct fft_cuda_plan_s* cuplan) { cufftDestroy(cuplan->cufft); free(cuplan); } void fft_cuda_exec(struct fft_cuda_plan_s* cuplan, complex float* dst, const complex float* src) { assert(cuda_ondevice(src)); assert(cuda_ondevice(dst)); assert(NULL != cuplan); int err; for (int i = 0; i < cuplan->batch; i++) { if (CUFFT_SUCCESS != (err = cufftExecC2C(cuplan->cufft, (cufftComplex*)src + i * cuplan->idist, (cufftComplex*)dst + i * cuplan->odist, (!cuplan->backwards) ? CUFFT_FORWARD : CUFFT_INVERSE))) error("CUFFT: %d\n", err); } } #endif bart-0.4.02/src/num/fft-cuda.h000066400000000000000000000010601320577655200157660ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ struct fft_cuda_plan_s; extern struct fft_cuda_plan_s* fft_cuda_plan(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], const long istrides[D], _Bool dir); extern void fft_cuda_free_plan(struct fft_cuda_plan_s* cuplan); extern void fft_cuda_exec(struct fft_cuda_plan_s* cuplan, complex float* dst, const complex float* src); bart-0.4.02/src/num/fft.c000066400000000000000000000355451320577655200150660ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2011-2016 Martin Uecker * 2014 Frank Ong * * * FFT. It uses FFTW or CUFFT internally. * * * Gauss, Carl F. 1805. "Nachlass: Theoria Interpolationis Methodo Nova * Tractata." Werke 3, pp. 265-327, Königliche Gesellschaft der * Wissenschaften, Göttingen, 1866 */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/ops.h" #include "misc/misc.h" #include "misc/debug.h" #include "fft.h" #undef fft_plan_s #ifdef USE_CUDA #include "num/gpuops.h" #include "fft-cuda.h" #define LAZY_CUDA #endif void fftscale2(unsigned int N, const long dimensions[N], unsigned long flags, const long ostrides[N], complex float* dst, const long istrides[N], const complex float* src) { long fft_dims[N]; md_select_dims(N, flags, fft_dims, dimensions); float scale = 1. / sqrtf((float)md_calc_size(N, fft_dims)); md_zsmul2(N, dimensions, ostrides, dst, istrides, src, scale); } void fftscale(unsigned int N, const long dims[N], unsigned long flags, complex float* dst, const complex float* src) { long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); fftscale2(N, dims, flags, strs, dst, strs, src); } static double fftmod_phase(long length, int j) { long center1 = length / 2; double shift = (double)center1 / (double)length; return ((double)j - (double)center1 / 2.) * shift; } static void fftmod2_r(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src, bool inv, double phase) { if (0 == flags) { md_zsmul2(N, dims, ostrs, dst, istrs, src, cexp(M_PI * 2.i * (inv ? -phase : phase))); return; } /* this will also currently be slow on the GPU because we do not * support strides there on the lowest level */ unsigned int i = N - 1; while (!MD_IS_SET(flags, i)) i--; #if 1 // If there is only one dimensions left and it is the innermost // which is contiguous optimize using md_zfftmod2 if ((0u == MD_CLEAR(flags, i)) && (1 == md_calc_size(i, dims)) && (CFL_SIZE == ostrs[i]) && (CFL_SIZE == istrs[i])) { md_zfftmod2(N - i, dims + i, ostrs + i, dst, istrs + i, src, inv, phase); return; } #endif long tdims[N]; md_select_dims(N, ~MD_BIT(i), tdims, dims); for (int j = 0; j < dims[i]; j++) fftmod2_r(N, tdims, MD_CLEAR(flags, i), ostrs, (void*)dst + j * ostrs[i], istrs, (void*)src + j * istrs[i], inv, phase + fftmod_phase(dims[i], j)); } static unsigned long clear_singletons(unsigned int N, const long dims[N], unsigned long flags) { return (0 == N) ? flags : clear_singletons(N - 1, dims, (1 == dims[N - 1]) ? MD_CLEAR(flags, N - 1) : flags); } void fftmod2(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src) { fftmod2_r(N, dims, clear_singletons(N, dims, flags), ostrs, dst, istrs, src, false, 0.); } /* * The correct usage is fftmod before and after fft and * ifftmod before and after ifft (this is different from * how fftshift/ifftshift has to be used) */ void ifftmod2(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src) { fftmod2_r(N, dims, clear_singletons(N, dims, flags), ostrs, dst, istrs, src, true, 0.); } void fftmod(unsigned int N, const long dimensions[N], unsigned long flags, complex float* dst, const complex float* src) { long strs[N]; md_calc_strides(N, strs, dimensions, CFL_SIZE); fftmod2(N, dimensions, flags, strs, dst, strs, src); } void ifftmod(unsigned int N, const long dimensions[N], unsigned long flags, complex float* dst, const complex float* src) { long strs[N]; md_calc_strides(N, strs, dimensions, CFL_SIZE); ifftmod2(N, dimensions, flags, strs, dst, strs, src); } void ifftshift2(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src) { long pos[N]; md_set_dims(N, pos, 0); for (unsigned int i = 0; i < N; i++) if (MD_IS_SET(flags, i)) pos[i] = dims[i] - dims[i] / 2; md_circ_shift2(N, dims, pos, ostrs, dst, istrs, src, CFL_SIZE); } void ifftshift(unsigned int N, const long dimensions[N], unsigned long flags, complex float* dst, const complex float* src) { long strs[N]; md_calc_strides(N, strs, dimensions, CFL_SIZE); ifftshift2(N, dimensions, flags, strs, dst, strs, src); } void fftshift2(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src) { long pos[N]; md_set_dims(N, pos, 0); for (unsigned int i = 0; i < N; i++) if (MD_IS_SET(flags, i)) pos[i] = dims[i] / 2; md_circ_shift2(N, dims, pos, ostrs, dst, istrs, src, CFL_SIZE); } void fftshift(unsigned int N, const long dimensions[N], unsigned long flags, complex float* dst, const complex float* src) { long strs[N]; md_calc_strides(N, strs, dimensions, CFL_SIZE); fftshift2(N, dimensions, flags, strs, dst, strs, src); } struct fft_plan_s { INTERFACE(operator_data_t); fftwf_plan fftw; #ifdef USE_CUDA #ifdef LAZY_CUDA unsigned int D; unsigned long flags; bool backwards; const long* dims; const long* istrs; const long* ostrs; #endif struct fft_cuda_plan_s* cuplan; #endif }; static DEF_TYPEID(fft_plan_s); static fftwf_plan fft_fftwf_plan(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src, bool backwards, bool measure) { unsigned int N = D; fftwf_iodim64 dims[N]; fftwf_iodim64 hmdims[N]; unsigned int k = 0; unsigned int l = 0; //FFTW seems to be fine with this //assert(0 != flags); for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { dims[k].n = dimensions[i]; dims[k].is = istrides[i] / CFL_SIZE; dims[k].os = ostrides[i] / CFL_SIZE; k++; } else { hmdims[l].n = dimensions[i]; hmdims[l].is = istrides[i] / CFL_SIZE; hmdims[l].os = ostrides[i] / CFL_SIZE; l++; } } fftwf_plan fftwf; #pragma omp critical fftwf = fftwf_plan_guru64_dft(k, dims, l, hmdims, (complex float*)src, dst, backwards ? 1 : (-1), measure ? FFTW_MEASURE : FFTW_ESTIMATE); return fftwf; } static void fft_apply(const operator_data_t* _plan, unsigned int N, void* args[N]) { complex float* dst = args[0]; const complex float* src = args[1]; const struct fft_plan_s* plan = CAST_DOWN(fft_plan_s, _plan); assert(2 == N); #ifdef USE_CUDA if (cuda_ondevice(src)) { #ifdef LAZY_CUDA if (NULL == plan->cuplan) ((struct fft_plan_s*)plan)->cuplan = fft_cuda_plan(plan->D, plan->dims, plan->flags, plan->ostrs, plan->istrs, plan->backwards); #endif assert(NULL != plan->cuplan); fft_cuda_exec(plan->cuplan, dst, src); } else #endif { assert(NULL != plan->fftw); fftwf_execute_dft(plan->fftw, (complex float*)src, dst); } } static void fft_free_plan(const operator_data_t* _data) { const struct fft_plan_s* plan = CAST_DOWN(fft_plan_s, _data); fftwf_destroy_plan(plan->fftw); #ifdef USE_CUDA #ifdef LAZY_CUDA xfree(plan->dims); xfree(plan->istrs); xfree(plan->ostrs); #endif if (NULL != plan->cuplan) fft_cuda_free_plan(plan->cuplan); #endif xfree(plan); } const struct operator_s* fft_measure_create(unsigned int D, const long dimensions[D], unsigned long flags, bool inplace, bool backwards) { PTR_ALLOC(struct fft_plan_s, plan); SET_TYPEID(fft_plan_s, plan); complex float* src = md_alloc(D, dimensions, CFL_SIZE); complex float* dst = inplace ? src : md_alloc(D, dimensions, CFL_SIZE); long strides[D]; md_calc_strides(D, strides, dimensions, CFL_SIZE); plan->fftw = fft_fftwf_plan(D, dimensions, flags, strides, dst, strides, src, backwards, true); md_free(src); if (!inplace) md_free(dst); #ifdef USE_CUDA plan->cuplan = NULL; #ifndef LAZY_CUDA if (cuda_ondevice(src)) plan->cuplan = fft_cuda_plan(D, dimensions, flags, strides, strides, backwards); #else plan->D = D; plan->flags = flags; plan->backwards = backwards; PTR_ALLOC(long[D], dims); md_copy_dims(D, *dims, dimensions); plan->dims = *PTR_PASS(dims); PTR_ALLOC(long[D], istrs); md_copy_strides(D, *istrs, strides); plan->istrs = *PTR_PASS(istrs); PTR_ALLOC(long[D], ostrs); md_copy_strides(D, *ostrs, strides); plan->ostrs = *PTR_PASS(ostrs); #endif #endif return operator_create2(D, dimensions, strides, D, dimensions, strides, CAST_UP(PTR_PASS(plan)), fft_apply, fft_free_plan); } const struct operator_s* fft_create2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src, bool backwards) { PTR_ALLOC(struct fft_plan_s, plan); SET_TYPEID(fft_plan_s, plan); plan->fftw = fft_fftwf_plan(D, dimensions, flags, ostrides, dst, istrides, src, backwards, false); #ifdef USE_CUDA plan->cuplan = NULL; #ifndef LAZY_CUDA if (cuda_ondevice(src)) plan->cuplan = fft_cuda_plan(D, dimensions, flags, ostrides, istrides, backwards); #else plan->D = D; plan->flags = flags; plan->backwards = backwards; PTR_ALLOC(long[D], dims); md_copy_dims(D, *dims, dimensions); plan->dims = *PTR_PASS(dims); PTR_ALLOC(long[D], istrs); md_copy_strides(D, *istrs, istrides); plan->istrs = *PTR_PASS(istrs); PTR_ALLOC(long[D], ostrs); md_copy_strides(D, *ostrs, ostrides); plan->ostrs = *PTR_PASS(ostrs); #endif #endif return operator_create2(D, dimensions, ostrides, D, dimensions, istrides, CAST_UP(PTR_PASS(plan)), fft_apply, fft_free_plan); } const struct operator_s* fft_create(unsigned int D, const long dimensions[D], unsigned long flags, complex float* dst, const complex float* src, bool backwards) { long strides[D]; md_calc_strides(D, strides, dimensions, CFL_SIZE); return fft_create2(D, dimensions, flags, strides, dst, strides, src, backwards); } void fft_exec(const struct operator_s* o, complex float* dst, const complex float* src) { operator_apply_unchecked(o, dst, src); } void fft_free(const struct operator_s* o) { operator_free(o); } void fft2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { const struct operator_s* plan = fft_create2(D, dimensions, flags, ostrides, dst, istrides, src, false); fft_exec(plan, dst, src); fft_free(plan); } void ifft2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { const struct operator_s* plan = fft_create2(D, dimensions, flags, ostrides, dst, istrides, src, true); fft_exec(plan, dst, src); fft_free(plan); } void fft(unsigned int D, const long dimensions[D], unsigned long flags, complex float* dst, const complex float* src) { const struct operator_s* plan = fft_create(D, dimensions, flags, dst, src, false); fft_exec(plan, dst, src); fft_free(plan); } void ifft(unsigned int D, const long dimensions[D], unsigned long flags, complex float* dst, const complex float* src) { const struct operator_s* plan = fft_create(D, dimensions, flags, dst, src, true); fft_exec(plan, dst, src); fft_free(plan); } void fftc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, complex float* dst, const complex float* src) { fftmod(D, dimensions, flags, dst, src); fft(D, dimensions, flags, dst, dst); fftmod(D, dimensions, flags, dst, dst); } void ifftc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, complex float* dst, const complex float* src) { ifftmod(D, dimensions, flags, dst, src); ifft(D, dimensions, flags, dst, dst); ifftmod(D, dimensions, flags, dst, dst); } void fftc2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { fftmod2(D, dimensions, flags, ostrides, dst, istrides, src); fft2(D, dimensions, flags, ostrides, dst, ostrides, dst); fftmod2(D, dimensions, flags, ostrides, dst, ostrides, dst); } void ifftc2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { ifftmod2(D, dimensions, flags, ostrides, dst, istrides, src); ifft2(D, dimensions, flags, ostrides, dst, ostrides, dst); ifftmod2(D, dimensions, flags, ostrides, dst, ostrides, dst); } void fftu(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, complex float* dst, const complex float* src) { fft(D, dimensions, flags, dst, src); fftscale(D, dimensions, flags, dst, dst); } void ifftu(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, complex float* dst, const complex float* src) { ifft(D, dimensions, flags, dst, src); fftscale(D, dimensions, flags, dst, dst); } void fftu2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { fft2(D, dimensions, flags, ostrides, dst, istrides, src); fftscale2(D, dimensions, flags, ostrides, dst, ostrides, dst); } void ifftu2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { ifft2(D, dimensions, flags, ostrides, dst, istrides, src); fftscale2(D, dimensions, flags, ostrides, dst, ostrides, dst); } void fftuc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, complex float* dst, const complex float* src) { fftc(D, dimensions, flags, dst, src); fftscale(D, dimensions, flags, dst, dst); } void ifftuc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, complex float* dst, const complex float* src) { ifftc(D, dimensions, flags, dst, src); fftscale(D, dimensions, flags, dst, dst); } void fftuc2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { fftc2(D, dimensions, flags, ostrides, dst, istrides, src); fftscale2(D, dimensions, flags, ostrides, dst, ostrides, dst); } void ifftuc2(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src) { ifftc2(D, dimensions, flags, ostrides, dst, istrides, src); fftscale2(D, dimensions, flags, ostrides, dst, ostrides, dst); } bool fft_threads_init = false; void fft_set_num_threads(unsigned int n) { #ifdef FFTWTHREADS #pragma omp critical if (!fft_threads_init) { fft_threads_init = true; fftwf_init_threads(); } #pragma omp critical fftwf_plan_with_nthreads(n); #else UNUSED(n); #endif } bart-0.4.02/src/num/fft.h000066400000000000000000000132521320577655200150620ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2011, 2013 Martin Uecker */ #ifndef __FFT_H #define __FFT_H #include #include "misc/cppwrap.h" // similar to fftshift but modulates in the transform domain extern void fftmod(unsigned int N, const long dims[__VLA(N)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void fftmod2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // fftmod for ifft extern void ifftmod(unsigned int N, const long dims[__VLA(N)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void ifftmod2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // apply scaling necessary for unitarity extern void fftscale(unsigned int N, const long dims[__VLA(N)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void fftscale2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // fftshift extern void fftshift(unsigned int N, const long dims[__VLA(N)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void fftshift2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // ifftshift extern void ifftshift(unsigned int N, const long dims[__VLA(N)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void ifftshift2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // FFT extern void fft(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void ifft(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void fft2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); extern void ifft2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // centered extern void fftc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void ifftc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void fftc2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); extern void ifftc2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // unitary extern void fftu(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void ifftu(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void fftu2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); extern void ifftu2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); // unitary and centered extern void fftuc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void ifftuc(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src); extern void fftuc2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); extern void ifftuc2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src); struct operator_s; extern const struct operator_s* fft_create(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Complex float* dst, const _Complex float* src, _Bool backwards); extern const struct operator_s* fft_create2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long ostrides[__VLA(D)], _Complex float* dst, const long istrides[__VLA(D)], const _Complex float* src, _Bool backwards); extern const struct operator_s* fft_measure_create(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, _Bool inplace, _Bool backwards); // interface using a plan extern void fft_exec(const struct operator_s* plan, _Complex float* dst, const _Complex float* src); extern void fft_free(const struct operator_s* plan); extern void fft_set_num_threads(unsigned int n); #include "misc/cppwrap.h" #endif bart-0.4.02/src/num/filter.c000066400000000000000000000164761320577655200155760ustar00rootroot00000000000000/* Copyright 2015-2017. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2017 Jon Tamir */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/loop.h" #include "misc/misc.h" #include "filter.h" static int cmp_float(const void* a, const void* b) { return (*(float*)a - *(float*)b > 0.) ? 1. : -1.; } static int cmp_complex_float(const void* a, const void* b) // gives sign for 0. (not 0) { return (cabsf(*(complex float*)a) - cabsf(*(complex float*)b) > 0.) ? 1. : -1.; } static void sort_floats(int N, float ar[N]) { qsort((void*)ar, N, sizeof(float), cmp_float); } static void sort_complex_floats(int N, complex float ar[N]) { qsort((void*)ar, N, sizeof(complex float), cmp_complex_float); } float median_float(int N, const float ar[N]) { float tmp[N]; memcpy(tmp, ar, N * sizeof(float)); sort_floats(N, tmp); return (1 == N % 2) ? tmp[(N - 1) / 2] : ((tmp[(N - 1) / 2 + 0] + tmp[(N - 1) / 2 + 1]) / 2.); } complex float median_complex_float(int N, const complex float ar[N]) { complex float tmp[N]; memcpy(tmp, ar, N * sizeof(complex float)); sort_complex_floats(N, tmp); return (1 == N % 2) ? tmp[(N - 1) / 2] : ((tmp[(N - 1) / 2 + 0] + tmp[(N - 1) / 2 + 1]) / 2.); } struct median_s { long length; long stride; }; static void nary_medianz(void* _data, void* ptr[]) { struct median_s* data = (struct median_s*)_data; complex float tmp[data->length]; for (long i = 0; i < data->length; i++) tmp[i] = *((complex float*)(ptr[1] + i * data->stride)); *(complex float*)ptr[0] = median_complex_float(data->length, tmp); } void md_medianz2(int D, int M, const long dim[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { assert(M < D); const long* nstr[2] = { ostr, istr }; void* nptr[2] = { optr, (void*)iptr }; struct median_s data = { dim[M], istr[M] }; long dim2[D]; for (int i = 0; i < D; i++) dim2[i] = dim[i]; dim2[M] = 1; md_nary(2, D, dim2, nstr, nptr, (void*)&data, &nary_medianz); } void md_medianz(int D, int M, const long dim[D], complex float* optr, const complex float* iptr) { assert(M < D); long dim2[D]; for (int i = 0; i < D; i++) dim2[i] = dim[i]; dim2[M] = 1; long istr[D]; long ostr[D]; md_calc_strides(D, istr, dim, 8); md_calc_strides(D, ostr, dim2, 8); md_medianz2(D, M, dim, ostr, optr, istr, iptr); } void centered_gradient(unsigned int N, const long dims[N], const complex float grad[N], complex float* out) { md_zgradient(N, dims, out, grad); long dims0[N]; md_singleton_dims(N, dims0); long strs0[N]; md_calc_strides(N, strs0, dims0, CFL_SIZE); complex float cn = 0.; for (unsigned int n = 0; n < N; n++) cn -= grad[n] * (float)dims[n] / 2.; long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); md_zadd2(N, dims, strs, out, strs, out, strs0, &cn); } void linear_phase(unsigned int N, const long dims[N], const float pos[N], complex float* out) { complex float grad[N]; for (unsigned int n = 0; n < N; n++) grad[n] = 2.i * M_PI * (float)(pos[n]) / ((float)dims[n]); centered_gradient(N, dims, grad, out); md_zmap(N, dims, out, out, cexpf); } void klaplace_scaled(unsigned int N, const long dims[N], unsigned int flags, const float sc[N], complex float* out) { unsigned int flags2 = flags; complex float* tmp = md_alloc(N, dims, CFL_SIZE); md_clear(N, dims, out, CFL_SIZE); for (unsigned int i = 0; i < bitcount(flags); i++) { unsigned int lsb = ffs(flags2) - 1; flags2 = MD_CLEAR(flags2, lsb); complex float grad[N]; for (unsigned int j = 0; j < N; j++) grad[j] = 0.; grad[lsb] = sc[lsb]; centered_gradient(N, dims, grad, tmp); md_zspow(N, dims, tmp, tmp, 2.); md_zadd(N, dims, out, out, tmp); } md_free(tmp); } void klaplace(unsigned int N, const long dims[N], unsigned int flags, complex float* out) { float sc[N]; for (unsigned int j = 0; j < N; j++) sc[j] = 1. / (float)dims[j]; klaplace_scaled(N, dims, flags, sc, out); } static void nary_zwindow(const long N, const float alpha, const float beta, complex float* ptr) { if (1 == N) { ptr[0] = 1.; return; } #pragma omp parallel for for (long i = 0; i < N; i++) ptr[i] = alpha - beta * cosf(2. * M_PI * i / (N - 1)); } static void nary_zhamming(const long N, complex float* ptr) { #if 0 const float alpha = 0.53836; const float beta = 0.46164; #else const float alpha = 0.54; const float beta = 0.46; #endif return nary_zwindow(N, alpha, beta, ptr); } static void nary_zhann(const long N, complex float* ptr) { const float alpha = 0.5; const float beta = 0.5; return nary_zwindow(N, alpha, beta, ptr); } enum window_type { WINDOW_HAMMING, WINDOW_HANN }; static void md_zwindow2(unsigned int D, const long dims[D], unsigned int flags, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr, enum window_type wt) { if (0 == flags) { md_copy2(D, dims, ostrs, optr, istrs, iptr, CFL_SIZE); return; } // process first flagged dimension unsigned int lsb = ffs(flags) - 1; long win_dims[D]; long win_strs[D]; md_select_dims(D, MD_BIT(lsb), win_dims, dims); md_calc_strides(D, win_strs, win_dims, CFL_SIZE); complex float* win = md_alloc_sameplace(D, win_dims, CFL_SIZE, iptr); switch (wt) { case WINDOW_HAMMING: nary_zhamming(dims[lsb], win); break; case WINDOW_HANN: nary_zhann(dims[lsb], win); break; }; md_zmul2(D, dims, ostrs, optr, istrs, iptr, win_strs, win); md_free(win); flags = MD_CLEAR(flags, lsb); // process other dimensions if (0 != flags) md_zwindow2(D, dims, flags, ostrs, optr, ostrs, optr, wt); return; } #if 0 static void md_zwindow(const unsigned int D, const long dims[D], const long flags, complex float* optr, const complex float* iptr, bool hamming) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zwindow2(D, dims, flags, strs, optr, strs, iptr, hamming); } #endif /* * Apply Hamming window to iptr along flags */ void md_zhamming(const unsigned int D, const long dims[D], const long flags, complex float* optr, const complex float* iptr) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); return md_zhamming2(D, dims, flags, strs, optr, strs, iptr); } /* * Apply Hamming window to iptr along flags (with strides) */ void md_zhamming2(const unsigned int D, const long dims[D], const long flags, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { return md_zwindow2(D, dims, flags, ostrs, optr, istrs, iptr, WINDOW_HAMMING); } /* * Apply Hann window to iptr along flags */ void md_zhann(const unsigned int D, const long dims[D], const long flags, complex float* optr, const complex float* iptr) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); return md_zhann2(D, dims, flags, strs, optr, strs, iptr); } /* * Apply Hann window to iptr along flags (with strides) */ void md_zhann2(const unsigned int D, const long dims[D], const long flags, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { return md_zwindow2(D, dims, flags, ostrs, optr, istrs, iptr, WINDOW_HANN); } bart-0.4.02/src/num/filter.h000066400000000000000000000031001320577655200155570ustar00rootroot00000000000000 #include #include "misc/cppwrap.h" extern float median_float(int N, const float ar[N]); extern complex float median_complex_float(int N, const complex float ar[N]); extern void md_medianz2(int D, int M, const long dim[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr); extern void md_medianz(int D, int M, const long dim[D], complex float* optr, const complex float* iptr); extern void linear_phase(unsigned int N, const long dims[__VLA(N)], const float pos[__VLA(N)], _Complex float* out); extern void centered_gradient(unsigned int N, const long dims[__VLA(N)], const _Complex float grad[__VLA(N)], _Complex float* out); extern void klaplace(unsigned int N, const long dims[__VLA(N)], unsigned int flags, _Complex float* out); void klaplace_scaled(unsigned int N, const long dims[N], unsigned int flags, const float sc[N], complex float* out); extern void md_zhamming(const unsigned int D, const long dims[__VLA(D)], const long flags, complex float* optr, const complex float* iptr); extern void md_zhamming2(const unsigned int D, const long dims[__VLA(D)], const long flags, const long ostr[__VLA(D)], complex float* optr, const long istr[__VLA(D)], const complex float* iptr); extern void md_zhann(const unsigned int D, const long dims[__VLA(D)], const long flags, complex float* optr, const complex float* iptr); extern void md_zhann2(const unsigned int D, const long dims[__VLA(D)], const long flags, const long ostr[__VLA(D)], complex float* optr, const long istr[__VLA(D)], const complex float* iptr); #include "misc/cppwrap.h" bart-0.4.02/src/num/flpmath.c000066400000000000000000002714301320577655200157350ustar00rootroot00000000000000/* Copyright 2013-2015 The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2013 Dara Bahri * 2014 Frank Ong * 2014-2015 Jonathan Tamir * 2016 Siddharth Iyer * 2017 Sofia Dimoudi * * * Operations on arrays of complex single-precision floating * point numbers. Most functions come in two flavours: * * 1. A basic version which takes the number of dimensions, an array * of long integers specifing the size of each dimension, the pointers * to the data, and the size of each element and other required parameters. * * 2. An extended version which takes an array of long integers which * specifies the strides for each argument. * * All functions should work on CPU and GPU. * */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/vecops.h" #include "num/optimize.h" #include "num/blas.h" #include "misc/misc.h" #include "misc/types.h" #include "misc/debug.h" #ifdef USE_CUDA #include "num/gpuops.h" /* * including gpukrnls.h so that I can directly call cuda_zreal. * this can be removed after md_zreal is optimized for GPU. */ #include "num/gpukrnls.h" #endif typedef void (*md_2op_t)(unsigned int D, const long dims[D], const long ostrs[D], float* optr, const long istrs1[D], const float* iptr1); typedef void (*md_z2op_t)(unsigned int D, const long dims[D], const long ostrs[D], complex float* optr, const long istrs1[D], const complex float* iptr1); typedef void (*md_2opf_t)(unsigned int D, const long dims[D], const long ostrs[D], float* optr, const long istrs1[D], const double* iptr1); typedef void (*md_2opd_t)(unsigned int D, const long dims[D], const long ostrs[D], double* optr, const long istrs1[D], const float* iptr1); typedef void (*md_z2opf_t)(unsigned int D, const long dims[D], const long ostrs[D], complex float* optr, const long istrs1[D], const complex double* iptr1); typedef void (*md_z2opd_t)(unsigned int D, const long dims[D], const long ostrs[D], complex double* optr, const long istrs1[D], const complex float* iptr1); typedef void (*md_3op_t)(unsigned int D, const long dims[D], const long ostrs[D], float* optr, const long istrs1[D], const float* iptr1, const long istrs2[D], const float* iptr2); typedef void (*md_z3op_t)(unsigned int D, const long dims[D], const long ostrs[D], complex float* optr, const long istrs1[D], const complex float* iptr1, const long istrs2[D], const complex float* iptr2); typedef void (*md_3opd_t)(unsigned int D, const long dims[D], const long ostrs[D], double* optr, const long istrs1[D], const float* iptr1, const long istrs2[D], const float* iptr2); typedef void (*md_z3opd_t)(unsigned int D, const long dims[D], const long ostrs[D], complex double* optr, const long istrs1[D], const complex float* iptr1, const long istrs2[D], const complex float* iptr2); #if 0 static void optimized_twoop(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr1[D], void* iptr1, size_t sizes[2], md_nary_fun_t too, void* data_ptr) __attribute__((always_inline)); static void optimized_twoop_oi(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr1[D], const void* iptr1, size_t sizes[2], md_nary_fun_t too, void* data_ptr) __attribute__((always_inline)); static void optimized_threeop(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr1[D], void* iptr1, const long istr2[D], void* iptr2, size_t sizes[3], md_nary_fun_t too, void* data_ptr) __attribute__((always_inline)); static void optimized_threeop_oii(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr1[D], const void* iptr1, const long istr2[D], const void* iptr2, size_t sizes[3], md_nary_fun_t too, void* data_ptr) __attribute__((always_inline)); static void make_z3op_simple(md_z3op_t fun, unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) __attribute__((always_inline)); static void make_3op_simple(md_3op_t fun, unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) __attribute__((always_inline)); static void make_z3op(size_t offset, unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) __attribute__((always_inline)); static void make_3opd_simple(md_3opd_t fun, unsigned int D, const long dims[D], double* optr, const float* iptr1, const float* iptr2) __attribute__((always_inline)); static void make_z2op_simple(md_z2op_t fun, unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1) __attribute__((always_inline)); static void make_2op_simple(md_2op_t fun, unsigned int D, const long dims[D], float* optr, const float* iptr1) __attribute__((always_inline)); #endif /** * Optimized two-op wrapper. Use when input is constant * * @param D number of dimensions * @param dim dimensions * @param ostr output strides * @param optr output * @param istr1 input 1 strides * @param iptr1 input 1 (constant) * @param size size of data structures, e.g. complex float * @param too two-op multiply function * @param data_ptr pointer to additional data used by too */ static void optimized_twoop_oi(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr1[D], const void* iptr1, size_t sizes[2], md_nary_opt_fun_t too, void* data_ptr) { const long (*nstr[2])[D] = { (const long (*)[D])ostr, (const long (*)[D])istr1 }; void *nptr[2] = { optr, (void*)iptr1 }; unsigned int io = 1 + ((iptr1 == optr) ? 2 : 0); optimized_nop(2, io, D, dim, nstr, nptr, sizes, too, data_ptr); } /** * Optimized threeop wrapper. Use when inputs are constants * * @param D number of dimensions * @param dim dimensions * @param ostr output strides * @param optr output * @param istr1 input 1 strides * @param iptr1 input 1 (constant) * @param istr2 input 2 strides * @param iptr2 input 2 (constant) * @param size size of data structures, e.g. complex float * @param too three-op multiply function * @param data_ptr pointer to additional data used by too */ static void optimized_threeop_oii(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr1[D], const void* iptr1, const long istr2[D], const void* iptr2, size_t sizes[3], md_nary_opt_fun_t too, void* data_ptr) { const long (*nstr[3])[D] = { (const long (*)[D])ostr, (const long (*)[D])istr1, (const long (*)[D])istr2 }; void *nptr[3] = { optr, (void*)iptr1, (void*)iptr2 }; unsigned int io = 1 + ((iptr1 == optr) ? 2 : 0) + ((iptr2 == optr) ? 4 : 0); optimized_nop(3, io, D, dim, nstr, nptr, sizes, too, data_ptr); } /* HELPER FUNCTIONS * * The following functions, typedefs, and macros are used internally in flpmath.c * to simplify implementation of many similar functions. */ typedef void (*r2op_t)(long N, float* dst, const float* src1); typedef void (*z2op_t)(long N, complex float* dst, const complex float* src1); typedef void (*r3op_t)(long N, float* dst, const float* src1, const float* src2); typedef void (*z3op_t)(long N, complex float* dst, const complex float* src1, const complex float* src2); typedef void (*r2opd_t)(long N, double* dst, const float* src1); typedef void (*z2opd_t)(long N, complex double* dst, const complex float* src1); typedef void (*r3opd_t)(long N, double* dst, const float* src1, const float* src2); typedef void (*z3opd_t)(long N, complex double* dst, const complex float* src1, const complex float* src2); typedef void (*r2opf_t)(long N, float* dst, const double* src1); typedef void (*z2opf_t)(long N, complex float* dst, const complex double* src1); static void make_z3op_simple(md_z3op_t fun, unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); fun(D, dims, strs, optr, strs, iptr1, strs, iptr2); } static void make_3op_simple(md_3op_t fun, unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); fun(D, dims, strs, optr, strs, iptr1, strs, iptr2); } static void make_z3opd_simple(md_z3opd_t fun, unsigned int D, const long dims[D], complex double* optr, const complex float* iptr1, const complex float* iptr2) { long strs_single[D]; long strs_double[D]; md_calc_strides(D, strs_single, dims, CFL_SIZE); md_calc_strides(D, strs_double, dims, CDL_SIZE); fun(D, dims, strs_double, optr, strs_single, iptr1, strs_single, iptr2); } static void make_3opd_simple(md_3opd_t fun, unsigned int D, const long dims[D], double* optr, const float* iptr1, const float* iptr2) { long strs_single[D]; long strs_double[D]; md_calc_strides(D, strs_single, dims, FL_SIZE); md_calc_strides(D, strs_double, dims, DL_SIZE); fun(D, dims, strs_double, optr, strs_single, iptr1, strs_single, iptr2); } static void make_z2op_simple(md_z2op_t fun, unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); fun(D, dims, strs, optr, strs, iptr1); } static void make_2op_simple(md_2op_t fun, unsigned int D, const long dims[D], float* optr, const float* iptr1) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); fun(D, dims, strs, optr, strs, iptr1); } static void make_z2opd_simple(md_z2opd_t fun, unsigned int D, const long dims[D], complex double* optr, const complex float* iptr1) { long strs_single[D]; long strs_double[D]; md_calc_strides(D, strs_single, dims, CFL_SIZE); md_calc_strides(D, strs_double, dims, CDL_SIZE); fun(D, dims, strs_double, optr, strs_single, iptr1); } static void make_2opd_simple(md_2opd_t fun, unsigned int D, const long dims[D], double* optr, const float* iptr1) { long strs_single[D]; long strs_double[D]; md_calc_strides(D, strs_single, dims, FL_SIZE); md_calc_strides(D, strs_double, dims, DL_SIZE); fun(D, dims, strs_double, optr, strs_single, iptr1); } static void nary_z3op(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(z3op_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1], ptr[2]); } static void make_z3op(size_t offset, unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { optimized_threeop_oii(D, dim, ostr, optr, istr1, iptr1, istr2, iptr2, (size_t[3]){ [0 ... 2] = CFL_SIZE }, nary_z3op, &offset); } static void nary_3op(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(r3op_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1], ptr[2]); } static void make_3op(size_t offset, unsigned int D, const long dim[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { optimized_threeop_oii(D, dim, ostr, optr, istr1, iptr1, istr2, iptr2, (size_t[3]){ [0 ... 2] = FL_SIZE }, nary_3op, &offset); } static void nary_z3opd(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(z3opd_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1], ptr[2]); } static void make_z3opd(size_t offset, unsigned int D, const long dim[D], const long ostr[D], complex double* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { optimized_threeop_oii(D, dim, ostr, optr, istr1, iptr1, istr2, iptr2, (size_t[3]){ CDL_SIZE, CFL_SIZE, CFL_SIZE }, nary_z3opd, &offset); } static void nary_3opd(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(r3opd_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1], ptr[2]); } static void make_3opd(size_t offset, unsigned int D, const long dim[D], const long ostr[D], double* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { optimized_threeop_oii(D, dim, ostr, optr, istr1, iptr1, istr2, iptr2, (size_t[3]){ DL_SIZE, FL_SIZE, FL_SIZE }, nary_3opd, &offset); } static void nary_z2op(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(z2op_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1]); } static void make_z2op(size_t offset, unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1) { optimized_twoop_oi(D, dim, ostr, optr, istr1, iptr1, (size_t[2]){ CFL_SIZE, CFL_SIZE }, nary_z2op, &offset); } static void nary_2op(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(r2op_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1]); } static void make_2op(size_t offset, unsigned int D, const long dim[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1) { optimized_twoop_oi(D, dim, ostr, optr, istr1, iptr1, (size_t[2]){ FL_SIZE, FL_SIZE }, nary_2op, &offset); } static void nary_z2opd(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(z2opd_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1]); } __attribute__((unused)) static void make_z2opd(size_t offset, unsigned int D, const long dim[D], const long ostr[D], complex double* optr, const long istr1[D], const complex float* iptr1) { size_t sizes[2] = { sizeof(complex double), sizeof(complex float) }; optimized_twoop_oi(D, dim, ostr, optr, istr1, iptr1, sizes, nary_z2opd, &offset); } static void nary_2opd(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(r2opd_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1]); } static void make_2opd(size_t offset, unsigned int D, const long dim[D], const long ostr[D], double* optr, const long istr1[D], const float* iptr1) { optimized_twoop_oi(D, dim, ostr, optr, istr1, iptr1, (size_t[2]){ DL_SIZE, FL_SIZE }, nary_2opd, &offset); } static void nary_z2opf(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(z2opf_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1]); } static void make_z2opf(size_t offset, unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex double* iptr1) { size_t sizes[2] = { sizeof(complex float), sizeof(complex double) }; optimized_twoop_oi(D, dim, ostr, optr, istr1, iptr1, sizes, nary_z2opf, &offset); } void* unused2 = make_z2opf; static void nary_2opf(struct nary_opt_data_s* data, void* ptr[]) { size_t offset = *(size_t*)data->data_ptr; (*(r2opf_t*)(((char*)data->ops) + offset))(data->size, ptr[0], ptr[1]); } static void make_2opf(size_t offset, unsigned int D, const long dim[D], const long ostr[D], float* optr, const long istr1[D], const double* iptr1) { optimized_twoop_oi(D, dim, ostr, optr, istr1, iptr1, (size_t[2]){ FL_SIZE, DL_SIZE }, nary_2opf, &offset); } static void make_z2opf_simple(md_z2opf_t fun, unsigned int D, const long dims[D], complex float* optr, const complex double* iptr1) { long strs_single[D]; long strs_double[D]; md_calc_strides(D, strs_single, dims, CFL_SIZE); md_calc_strides(D, strs_double, dims, CDL_SIZE); fun(D, dims, strs_single, optr, strs_double, iptr1); } static void make_2opf_simple(md_2opf_t fun, unsigned int D, const long dims[D], float* optr, const double* iptr1) { long strs_single[D]; long strs_double[D]; md_calc_strides(D, strs_single, dims, FL_SIZE); md_calc_strides(D, strs_double, dims, DL_SIZE); fun(D, dims, strs_single, optr, strs_double, iptr1); } #ifdef USE_CUDA static void* gpu_constant(const void* vp, size_t size) { return md_gpu_move(1, (long[1]){ 1 }, vp, size); } #endif static void make_z3op_scalar(md_z3op_t fun, unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr, complex float val) { complex float* valp = &val; #ifdef USE_CUDA if (cuda_ondevice(optr)) valp = gpu_constant(&val, CFL_SIZE); #endif long strs1[D]; md_singleton_strides(D, strs1); fun(D, dims, ostr, optr, istr, iptr, strs1, valp); #ifdef USE_CUDA if (cuda_ondevice(optr)) md_free(valp); #endif } static void make_3op_scalar(md_3op_t fun, unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr[D], const float* iptr, float val) { float* valp = &val; #ifdef USE_CUDA if (cuda_ondevice(optr)) valp = gpu_constant(&val, FL_SIZE); #endif long strs1[D]; md_singleton_strides(D, strs1); fun(D, dims, ostr, optr, istr, iptr, strs1, valp); #ifdef USE_CUDA if (cuda_ondevice(optr)) md_free(valp); #endif } static void real_from_complex_dims(unsigned int D, long odims[D + 1], const long idims[D]) { odims[0] = 2; md_copy_dims(D, odims + 1, idims); } static void real_from_complex_strides(unsigned int D, long ostrs[D + 1], const long istrs[D]) { ostrs[0] = FL_SIZE; md_copy_dims(D, ostrs + 1, istrs); // works for strides too } static void make_z3op_from_real(size_t offset, unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { long rdims[D + 1]; long rostr[D + 1]; long ristr1[D + 1]; long ristr2[D + 1]; real_from_complex_dims(D, rdims, dims); real_from_complex_strides(D, rostr, ostr); real_from_complex_strides(D, ristr1, istr1); real_from_complex_strides(D, ristr2, istr2); make_3op(offset, D + 1, rdims, rostr, (float*)optr, ristr1, (const float*)iptr1, ristr2, (const float*)iptr2); } static void make_z2opd_from_real(size_t offset, unsigned int D, const long dims[D], const long ostr[D], complex double* optr, const long istr1[D], const complex float* iptr1) { long rdims[D + 1]; long rostr[D + 1]; long ristr1[D + 1]; real_from_complex_dims(D, rdims, dims); real_from_complex_strides(D, rostr, ostr); real_from_complex_strides(D, ristr1, istr1); make_2opd(offset, D + 1, rdims, rostr, (double*)optr, ristr1, (const float*)iptr1); } static void make_z2opf_from_real(size_t offset, unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex double* iptr1) { long rdims[D + 1]; long rostr[D + 1]; long ristr1[D + 1]; real_from_complex_dims(D, rdims, dims); real_from_complex_strides(D, rostr, ostr); real_from_complex_strides(D, ristr1, istr1); make_2opf(offset, D + 1, rdims, rostr, (float*)optr, ristr1, (const double*)iptr1); } // type save #define MAKE_3OP(fun, ...) ((void)TYPE_CHECK(r3op_t, cpu_ops.fun), make_3op(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z3OP(fun, ...) ((void)TYPE_CHECK(z3op_t, cpu_ops.fun), make_z3op(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_2OP(fun, ...) ((void)TYPE_CHECK(r2op_t, cpu_ops.fun), make_2op(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z2OP(fun, ...) ((void)TYPE_CHECK(z2op_t, cpu_ops.fun), make_z2op(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_2OPD(fun, ...) ((void)TYPE_CHECK(r2opd_t, cpu_ops.fun), make_2opd(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z2OPD(fun, ...) ((void)TYPE_CHECK(z2opd_t, cpu_ops.fun), make_z2opd(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_2OPF(fun, ...) ((void)TYPE_CHECK(r2opf_t, cpu_ops.fun), make_2opf(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z2OPF(fun, ...) ((void)TYPE_CHECK(z2opf_t, cpu_ops.fun), make_z2opf(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_3OPD(fun, ...) ((void)TYPE_CHECK(r3opd_t, cpu_ops.fun), make_3opd(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z3OPD(fun, ...) ((void)TYPE_CHECK(z3opd_t, cpu_ops.fun), make_z3opd(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z3OP_FROM_REAL(fun, ...) \ ((void)TYPE_CHECK(r3op_t, cpu_ops.fun), make_z3op_from_real(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z2OPD_FROM_REAL(fun, ...) \ ((void)TYPE_CHECK(r2opd_t, cpu_ops.fun), make_z2opd_from_real(offsetof(struct vec_ops, fun), __VA_ARGS__)) #define MAKE_Z2OPF_FROM_REAL(fun, ...) \ ((void)TYPE_CHECK(r2opf_t, cpu_ops.fun), make_z2opf_from_real(offsetof(struct vec_ops, fun), __VA_ARGS__)) /* The section with exported functions starts here. */ /** * Multiply two complex arrays and save to output (with strides) * * optr = iptr1 * iptr2 */ void md_zmul2(unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP(zmul, D, dim, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply two complex arrays and save to output (without strides) * * optr = iptr1 * iptr2 */ void md_zmul(unsigned int D, const long dim[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zmul2, D, dim, optr, iptr1, iptr2); } /** * Multiply two scalar arrays and save to output (with strides) * * optr = iptr1 * iptr2 */ void md_mul2(unsigned int D, const long dim[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(mul, D, dim, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply two scalar arrays and save to output (without strides) * * optr = iptr1 * iptr2 */ void md_mul(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_mul2, D, dims, optr, iptr1, iptr2); } /** * Multiply real and imaginary parts of two complex arrays separately and save to output (with strides) * * real(optr) = real(iptr1) * real(iptr2) * * imag(optr) = imag(iptr1) * imag(iptr2) */ void md_zrmul2(unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP_FROM_REAL(mul, D, dim, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply real and imaginary parts of two complex arrays separately and save to output (without strides) * * real(optr) = real(iptr1) * real(iptr2) * * imag(optr) = imag(iptr1) * imag(iptr2) */ void md_zrmul(unsigned int D, const long dim[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zrmul2, D, dim, optr, iptr1, iptr2); } /** * Multiply complex array with a scalar and save to output (with strides) * * optr = iptr * val */ void md_zsmul2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr, complex float val) { if (0. == cimagf(val)) { // strength reduction: complex to real multiplication long dimsR[D + 1]; long ostrR[D + 1]; long istrR[D + 1]; real_from_complex_dims(D, dimsR, dims); real_from_complex_strides(D, ostrR, ostr); real_from_complex_strides(D, istrR, istr); md_smul2(D + 1, dimsR, ostrR, (float*)optr, istrR, (const float*)iptr, crealf(val)); return; } make_z3op_scalar(md_zmul2, D, dims, ostr, optr, istr, iptr, val); } /** * Multiply complex array with a scalar and save to output (without strides) * * optr = iptr * val */ void md_zsmul(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr, complex float var) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zsmul2(D, dims, strs, optr, strs, iptr, var); } /** * Multiply scalar array with a scalar and save to output (with strides) * * optr = iptr * var */ void md_smul2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr[D], const float* iptr, float var) { #ifdef USE_CUDA if (cuda_ondevice(iptr)) { assert(cuda_ondevice(optr)); if (md_calc_blockdim(D, dims, ostr, FL_SIZE) != D) goto fallback; if (md_calc_blockdim(D, dims, istr, FL_SIZE) != D) goto fallback; if (iptr == optr) { gpu_ops.axpy(md_calc_size(D, dims), optr, var - 1., iptr); return; } // no strides needed because of checks above md_clear(D, dims, optr, FL_SIZE); // or call md_zaxpy gpu_ops.axpy(md_calc_size(D, dims), optr, var, iptr); return; } fallback: #endif make_3op_scalar(md_mul2, D, dims, ostr, optr, istr, iptr, var); } /** * Multiply scalar array with a scalar and save to output (without strides) * * optr = iptr * var */ void md_smul(unsigned int D, const long dims[D], float* optr, const float* iptr, float var) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_smul2(D, dims, strs, optr, strs, iptr, var); } /** * Multiply the first complex array with the conjugate of the second complex array and save to output (with strides) * * optr = iptr1 * conj(iptr2) */ void md_zmulc2(unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP(zmulc, D, dim, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply the first complex array with the conjugate of the second complex array and save to output (without strides) * * optr = iptr1 * conj(iptr2) */ void md_zmulc(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zmulc2, D, dims, optr, iptr1, iptr2); } /** * Divide the first complex array by the second complex array and save to output (with strides) * * optr = iptr1 / iptr2 */ void md_zdiv2(unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP(zdiv, D, dim, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Divide the first complex array by the second complex array and save to output (without strides) * * optr = iptr1 / iptr2 */ void md_zdiv(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zdiv2, D, dims, optr, iptr1, iptr2); } /** * Divide the first scalar array by the second scalar array and save to output (with strides) * * optr = iptr1 / iptr2 */ void md_div2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(div, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Divide the first scalar array by the second scalar array and save to output (without strides) * * optr = iptr1 / iptr2 */ void md_div(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_div2, D, dims, optr, iptr1, iptr2); } /** * Take the first complex array to the power of the second complex array and save to output (with strides) * * optr = iptr1 ^ iptr2 */ void md_zpow2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { #ifdef USE_CUDA // FIXME: something is broken with the cuda implementation of zpow assert(!(cuda_ondevice(optr) || cuda_ondevice(iptr1) || cuda_ondevice(iptr2))); #endif MAKE_Z3OP(zpow, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Take the first complex array to the power of the second complex array and save to output (without strides) * * optr = iptr1 ^ iptr2 */ void md_zpow(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zpow2, D, dims, optr, iptr1, iptr2); } /** * Take the first scalar array to the power of the second scalar array and save to output (with strides) * * optr = iptr1 ^ iptr2 */ void md_pow2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(pow, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Take the first scalar array to the power of the second scalar array and save to output (without strides) * * optr = iptr1 ^ iptr2 */ void md_pow(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_pow2, D, dims, optr, iptr1, iptr2); } /** * Take square root of scalar array and save to output (with strides) * * optr = sqrt(iptr) */ void md_sqrt2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr[D], const float* iptr) { MAKE_2OP(sqrt, D, dims, ostr, optr, istr, iptr); } /** * Take square root of scalar array and save to output (without strides) * * optr = sqrt(iptr) */ void md_sqrt(unsigned int D, const long dims[D], float* optr, const float* iptr) { make_2op_simple(md_sqrt2, D, dims, optr, iptr); } /** * Take square root of complex array and save to output (with strides) * * optr = sqrt(iptr) */ void md_zsqrt2(unsigned int D, const long dims[D], const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { md_zspow2(D, dims, ostrs, optr, istrs, iptr, 0.5); } /** * Take square root of complex array and save to output (without strides) * * optr = sqrt(iptr) */ void md_zsqrt(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { make_z2op_simple(md_zsqrt2, D, dims, optr, iptr); } /** * Raise complex array to the power of a scalar and save to output (without strides) * * optr = pow(iptr, scalar) */ void md_zspow(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr, complex float val) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zspow2(D, dims, strs, optr, strs, iptr, val); } /** * Raise complex array to the power of a scalar and save to output (with strides) * * optr = pow(iptr, scalar) */ void md_zspow2(unsigned int D, const long dims[D], const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr, complex float val) { make_z3op_scalar(md_zpow2, D, dims, ostrs, optr, istrs, iptr, val); } /** * Convert float array to double array * * dst = (double)src */ void md_float2double2(unsigned int D, const long dims[D], const long ostr[D], double* dst, const long istr[D], const float* src) { MAKE_2OPD(float2double, D, dims, ostr, dst, istr, src); } /** * Convert float array to double array * * dst = (double)src */ void md_float2double(unsigned int D, const long dims[D], double* dst, const float* src) { make_2opd_simple(md_float2double2, D, dims, dst, src); } /** * Convert double array to float array * * dst = (double)src */ void md_double2float2(unsigned int D, const long dims[D], const long ostr[D], float* dst, const long istr[D], const double* src) { MAKE_2OPF(double2float, D, dims, ostr, dst, istr, src); } /** * Convert double array to float array * * dst = (float)src */ void md_double2float(unsigned int D, const long dims[D], float* dst, const double* src) { make_2opf_simple(md_double2float2, D, dims, dst, src); } /** * Convert complex float array to complex double array * * dst = (complex double)src */ void md_zdouble2float2(unsigned int D, const long dims[D], const long ostr[D], complex float* dst, const long istr[D], const complex double* src) { MAKE_Z2OPF_FROM_REAL(double2float, D, dims, ostr, dst, istr, src); } /** * Convert complex float array to complex double array * * dst = (complex double)src */ void md_zdouble2float(unsigned int D, const long dims[D], complex float* dst, const complex double* src) { make_z2opf_simple(md_zdouble2float2, D, dims, dst, src); } /** * Convert complex double array to complex float array * * dst = (complex float)src */ void md_zfloat2double2(unsigned int D, const long dims[D], const long ostr[D], complex double* dst, const long istr[D], const complex float* src) { MAKE_Z2OPD_FROM_REAL(float2double, D, dims, ostr, dst, istr, src); } /** * Convert complex double array to complex float array * * dst = (complex float)src */ void md_zfloat2double(unsigned int D, const long dims[D], complex double* dst, const complex float* src) { make_z2opd_simple(md_zfloat2double2, D, dims, dst, src); } /* * A A A ok * A A 1 ok * A 1 A ok * 1 A A ok * A 1 1 ! * 1 A 1 ! * 1 1 A ! * 1 1 1 ok */ void md_tenmul_dims(unsigned int D, long max_dims[D], const long out_dims[D], const long in1_dims[D], const long in2_dims[D]) { md_max_dims(D, ~0u, max_dims, in1_dims, out_dims); long max2_dims[D]; md_max_dims(D, ~0u, max2_dims, in2_dims, out_dims); assert(md_check_compat(D, 0u, max_dims, max2_dims)); } static bool detect_matrix(const long dims[3], const long ostrs[3], const long mstrs[3], const long istrs[3]) { return ( (0 == ostrs[1]) && (0 == mstrs[2]) && (0 == istrs[0]) && ((CFL_SIZE == ostrs[0]) && (ostrs[0] * dims[0] == ostrs[2])) && ((CFL_SIZE == mstrs[0]) && (mstrs[0] * dims[0] == mstrs[1])) && ((CFL_SIZE == istrs[1]) && (istrs[1] * dims[1] == istrs[2]))); } static bool simple_matmul(unsigned int N, const long max_dims[N], const long ostrs[N], complex float* out, const long mstrs[N], const complex float* mat, const long istrs[N], const complex float* in) { long dims[N]; md_copy_dims(N, dims, max_dims); long ostrs2[N]; md_copy_strides(N, ostrs2, ostrs); long mstrs2[N]; md_copy_strides(N, mstrs2, mstrs); long istrs2[N]; md_copy_strides(N, istrs2, istrs); long (*strs[3])[N] = { &ostrs2, &istrs2, &mstrs2 }; unsigned int ND = simplify_dims(3, N, dims, strs); long C = dims[0]; long B = dims[1]; long A = dims[2]; if ((3 == ND) && detect_matrix(dims, ostrs2, istrs2, mstrs2)) { debug_printf(DP_DEBUG4, "matmul: matrix multiplication (1).\n"); #if 0 // num/linalg.h mat_mul(A, B, C, *(complex float (*)[A][C])out, *(const complex float (*)[A][B])mat, *(const complex float (*)[B][C])in); #else blas_matrix_multiply(C, A, B, *(complex float (*)[A][C])out, *(const complex float (*)[B][C])in, *(const complex float (*)[A][B])mat); #endif return true; } if ((3 == ND) && detect_matrix(dims, ostrs2, mstrs2, istrs2)) { debug_printf(DP_DEBUG4, "matmul: matrix multiplication (2).\n"); #if 0 // num/linalg.h mat_mul(A, B, C, *(complex float (*)[A][C])out, *(const complex float (*)[A][B])in, *(const complex float (*)[B][C])mat); #else blas_matrix_multiply(C, A, B, *(complex float (*)[A][C])out, *(const complex float (*)[B][C])mat, *(const complex float (*)[A][B])in); #endif return true; } return false; } /* * tenmul (tensor multiplication) family of functions are revised * versions of the matmul functions. */ void md_ztenmul2(unsigned int D, const long max_dims[D], const long out_strs[D], complex float* out, const long in1_strs[D], const complex float* in1, const long in2_strs[D], const complex float* in2) { if (simple_matmul(D, max_dims, out_strs, out, in2_strs, in2, in1_strs, in1)) return; md_clear2(D, max_dims, out_strs, out, CFL_SIZE); md_zfmac2(D, max_dims, out_strs, out, in1_strs, in1, in2_strs, in2); } void md_ztenmulc2(unsigned int D, const long max_dims[D], const long out_strs[D], complex float* out, const long in1_strs[D], const complex float* in1, const long in2_strs[D], const complex float* in2) { md_clear2(D, max_dims, out_strs, out, CFL_SIZE); md_zfmacc2(D, max_dims, out_strs, out, in1_strs, in1, in2_strs, in2); } void md_ztenmul(unsigned int D, const long out_dims[D], complex float* out, const long in1_dims[D], const complex float* in1, const long in2_dims[D], const complex float* in2) { long max_dims[D]; md_tenmul_dims(D, max_dims, out_dims, in1_dims, in2_dims); md_ztenmul2(D, max_dims, MD_STRIDES(D, out_dims, CFL_SIZE), out, MD_STRIDES(D, in1_dims, CFL_SIZE), in1, MD_STRIDES(D, in2_dims, CFL_SIZE), in2); } void md_ztenmulc(unsigned int D, const long out_dims[D], complex float* out, const long in1_dims[D], const complex float* in1, const long in2_dims[D], const complex float* in2) { long max_dims[D]; md_tenmul_dims(D, max_dims, out_dims, in1_dims, in2_dims); md_ztenmulc2(D, max_dims, MD_STRIDES(D, out_dims, CFL_SIZE), out, MD_STRIDES(D, in1_dims, CFL_SIZE), in1, MD_STRIDES(D, in2_dims, CFL_SIZE), in2); } /* * matmul family of functions is deprecated - use tenmul instead */ static void md_zmatmul2_priv(unsigned int D, const long out_dims[D], const long out_strs[D], complex float* dst, const long mat_dims[D], const long mat_strs[D], const complex float* mat, const long in_dims[D], const long in_strs[D], const complex float* src, bool conj) { long max_dims[D]; md_tenmul_dims(D, max_dims, out_dims, mat_dims, in_dims); if ((!conj) && simple_matmul(D, max_dims, out_strs, dst, mat_strs, mat, in_strs, src)) return; md_clear2(D, out_dims, out_strs, dst, CFL_SIZE); (conj ? md_zfmacc2 : md_zfmac2)(D, max_dims, out_strs, dst, in_strs, src, mat_strs, mat); } /** * Matrix conjugate multiplication (with strides) * FIXME simplify interface? use macros? */ void md_zmatmulc2(unsigned int D, const long out_dims[D], const long out_strs[D], complex float* dst, const long mat_dims[D], const long mat_strs[D], const complex float* mat, const long in_dims[D], const long in_strs[D], const complex float* src) { md_zmatmul2_priv(D, out_dims, out_strs, dst, mat_dims, mat_strs, mat, in_dims, in_strs, src, true); } /** * Matrix conjugate multiplication (without strides) */ void md_zmatmulc(unsigned int D, const long out_dims[D], complex float* dst, const long mat_dims[D], const complex float* mat, const long in_dims[D], const complex float* src) { md_zmatmulc2(D, out_dims, MD_STRIDES(D, out_dims, CFL_SIZE), dst, mat_dims, MD_STRIDES(D, mat_dims, CFL_SIZE), mat, in_dims, MD_STRIDES(D, in_dims, CFL_SIZE), src); } /** * Matrix multiplication (with strides) * FIXME simplify interface? * FIXME: implementation assumes strides == 0 for dims == 1 */ void md_zmatmul2(unsigned int D, const long out_dims[D], const long out_strs[D], complex float* dst, const long mat_dims[D], const long mat_strs[D], const complex float* mat, const long in_dims[D], const long in_strs[D], const complex float* src) { md_zmatmul2_priv(D, out_dims, out_strs, dst, mat_dims, mat_strs, mat, in_dims, in_strs, src, false); } /** * Matrix multiplication (without strides) */ void md_zmatmul(unsigned int D, const long out_dims[D], complex float* dst, const long mat_dims[D], const complex float* mat, const long in_dims[D], const complex float* src) { md_zmatmul2(D, out_dims, MD_STRIDES(D, out_dims, CFL_SIZE), dst, mat_dims, MD_STRIDES(D, mat_dims, CFL_SIZE), mat, in_dims, MD_STRIDES(D, in_dims, CFL_SIZE), src); } /** * Multiply two complex arrays and add to output (with strides) * * optr = optr + iptr1 * iptr2 */ void md_zfmac2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP(zfmac, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply two complex arrays and add to output (without strides) * * optr = optr + iptr1 * iptr2 */ void md_zfmac(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zfmac2, D, dims, optr, iptr1, iptr2); } /** * Multiply two complex arrays and add to output (with strides) * * optr = optr + iptr1 * iptr2 */ void md_zfmacD2(unsigned int D, const long dims[D], const long ostr[D], complex double* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OPD(zfmac2, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply two complex arrays and add to output (without strides) * * optr = optr + iptr1 * iptr2 */ void md_zfmacD(unsigned int D, const long dims[D], complex double* optr, const complex float* iptr1, const complex float* iptr2) { make_z3opd_simple(md_zfmacD2, D, dims, optr, iptr1, iptr2); } /** * Multiply two scalar arrays and add to output (with strides) * * optr = optr + iptr1 * iptr2 */ void md_fmac2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(fmac, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply two scalar arrays and add to output (without strides) * * optr = optr + iptr1 * iptr2 */ void md_fmac(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_fmac2, D, dims, optr, iptr1, iptr2); } /** * Multiply two scalar arrays and add to output (with strides) * * optr = optr + iptr1 * iptr2 */ void md_fmacD2(unsigned int D, const long dims[D], const long ostr[D], double* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OPD(fmac2, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply two scalar arrays and add to output (without strides) * * optr = optr + iptr1 * iptr2 */ void md_fmacD(unsigned int D, const long dims[D], double* optr, const float* iptr1, const float* iptr2) { make_3opd_simple(md_fmacD2, D, dims, optr, iptr1, iptr2); } /** * Multiply the first complex array with the conjugate of the second complex array and add to output (with strides) * * optr = optr + iptr1 * conj(iptr2) */ void md_zfmacc2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP(zfmacc, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply the first complex array with the conjugate of the second complex array and add to output (without strides) * * optr = optr + iptr1 * conj(iptr2) */ void md_zfmacc(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zfmacc2, D, dims, optr, iptr1, iptr2); } /** * Multiply the first complex array with the conjugate of the second complex array and add to output (with strides) * * optr = optr + iptr1 * conj(iptr2) */ void md_zfmaccD2(unsigned int D, const long dims[D], const long ostr[D], complex double* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OPD(zfmacc2, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply the first complex array with the conjugate of the second complex array and add to output (without strides) * * optr = optr + iptr1 * conj(iptr2) */ void md_zfmaccD(unsigned int D, const long dims[D], complex double* optr, const complex float* iptr1, const complex float* iptr2) { make_z3opd_simple(md_zfmaccD2, D, dims, optr, iptr1, iptr2); } /** * Multiply complex array with a scalar and add to output (with strides) * * optr = optr + iptr * val */ void md_zaxpy2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, complex float val, const long istr[D], const complex float* iptr) { if (0. == cimagf(val)) { // strength reduction: complex to real multiplication long dimsR[D + 1]; long ostrR[D + 1]; long istrR[D + 1]; real_from_complex_dims(D, dimsR, dims); real_from_complex_strides(D, ostrR, ostr); real_from_complex_strides(D, istrR, istr); md_axpy2(D + 1, dimsR, ostrR, (float*)optr, crealf(val), istrR, (const float*)iptr); return; } make_z3op_scalar(md_zfmac2, D, dims, ostr, optr, istr, iptr, val); } /** * Max of inputs (without strides) * * optr = max(iptr1, iptr2) */ void md_max(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_max2(D, dims, strs, optr, strs, iptr1, strs, iptr2); } /** * Max of inputs (with strides) * * optr = max(iptr1, iptr2) */ void md_max2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(max, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Min of inputs (without strides) * * optr = min(iptr1, iptr2) */ void md_min(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_min2(D, dims, strs, optr, strs, iptr1, strs, iptr2); } /** * Min of inputs (with strides) * * optr = min(iptr1, iptr2) */ void md_min2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(min, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Multiply complex array with a scalar and add to output (without strides) * * optr = optr + iptr * val */ void md_zaxpy(unsigned int D, const long dims[D], complex float* optr, complex float val, const complex float* iptr) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zaxpy2(D, dims, strs, optr, val, strs, iptr); } /** * Multiply scalar array with a scalar and add to output (with strides) * * optr = optr + iptr * val */ void md_axpy2(unsigned int D, const long dims[D], const long ostr[D], float* optr, float val, const long istr[D], const float* iptr) { if (0. == val) return; // strength reduction if (1. == val) { md_add2(D, dims, ostr, optr, ostr, optr, istr, iptr); return; } #ifdef USE_CUDA if (cuda_ondevice(iptr)) { assert(cuda_ondevice(optr)); if (md_calc_blockdim(D, dims, ostr, FL_SIZE) != D) goto fallback; if (md_calc_blockdim(D, dims, istr, FL_SIZE) != D) goto fallback; if (iptr == optr) goto fallback; gpu_ops.axpy(md_calc_size(D, dims), optr, val, iptr); return; } fallback: #endif make_3op_scalar(md_fmac2, D, dims, ostr, optr, istr, iptr, val); } /** * Multiply scalar array with a scalar and add to output (without strides) * * optr = optr + iptr * val */ void md_axpy(unsigned int D, const long dims[D], float* optr, float val, const float* iptr) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_axpy2(D, dims, strs, optr, val, strs, iptr); } /** * Add two complex arrays and save to output (with strides) * * optr = iptr1 + iptr2 */ void md_zadd2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP_FROM_REAL(add, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Add two complex arrays and save to output (without strides) * * optr = iptr1 + iptr2 */ void md_zadd(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zadd2, D, dims, optr, iptr1, iptr2); } /** * Add scalar to complex array (with strides) * * optr = iptr + val */ void md_zsadd2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr, complex float val) { make_z3op_scalar(md_zadd2, D, dims, ostr, optr, istr, iptr, val); } /** * Add scalar to complex array (without strides) * * optr = iptr + val */ void md_zsadd(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr, complex float val) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zsadd2(D, dims, strs, optr, strs, iptr, val); } /** * Subtract the first complex array from the second complex array and save to output (with strides) * * optr = iptr1 - iptr2 */ void md_zsub2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP_FROM_REAL(sub, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Subtract the first complex array from the second complex array and save to output (without strides) * * optr = iptr1 - iptr2 */ void md_zsub(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zsub2, D, dims, optr, iptr1, iptr2); } /** * Add two scalar arrays and save to output (with strides) * * optr = iptr1 + iptr2 */ void md_add2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(add, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Add two scalar arrays and save to output (without strides) * * optr = iptr1 + iptr2 */ void md_add(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_add2, D, dims, optr, iptr1, iptr2); } /** * Add scalar to scalar array (with strides) * * optr = iptr + val */ void md_sadd2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr[D], const float* iptr, float val) { make_3op_scalar(md_add2, D, dims, ostr, optr, istr, iptr, val); } /** * Add scalar to scalar array (without strides) * * optr = iptr + val */ void md_sadd(unsigned int D, const long dims[D], float* optr, const float* iptr, float val) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_sadd2(D, dims, strs, optr, strs, iptr, val); } /** * Subtract the first scalar array from the second scalar array and save to output (with strides) * * optr = iptr1 - iptr2 */ void md_sub2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(sub, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Subtract the first scalar array from the second scalar array and save to output (without strides) * * optr = iptr1 - iptr2 */ void md_sub(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_sub2, D, dims, optr, iptr1, iptr2); } /** * Take complex conjugate of complex array and save to output (with strides) * * optr = conj(iptr) */ void md_zconj2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { MAKE_Z2OP(zconj, D, dims, ostr, optr, istr, iptr); } /** * Take complex conjugate of complex array and save to output (without strides) * * optr = conj(iptr) */ void md_zconj(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { make_z2op_simple(md_zconj2, D, dims, optr, iptr); } /** * Take the real part of complex array and save to output (with strides) * * optr = real(iptr) */ void md_zreal2(unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { make_z3op_scalar(md_zrmul2, D, dim, ostr, optr, istr, iptr, 1.); } /** * Take the real part of complex array and save to output (without strides) * * optr = real(iptr) */ void md_zreal(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { #ifdef USE_CUDA if (cuda_ondevice(iptr)) { assert(cuda_ondevice(optr)); cuda_zreal(md_calc_size(D, dims), optr, iptr); return; } #endif make_z2op_simple(md_zreal2, D, dims, optr, iptr); } /** * Take the imaginary part of complex array and save to output (with strides) * * optr = imag(iptr) */ void md_zimag2(unsigned int D, const long dim[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { make_z3op_scalar(md_zrmul2, D, dim, ostr, optr, istr, iptr, 1.i); } /** * Take the imaginary part of complex array and save to output (without strides) * * optr = imag(iptr) */ void md_zimag(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { make_z2op_simple(md_zimag2, D, dims, optr, iptr); } /** * Compare two complex arrays (with strides) * * optr = iptr1 == iptr2 */ void md_zcmp2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr1[D], const complex float* iptr1, const long istr2[D], const complex float* iptr2) { MAKE_Z3OP(zcmp, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Compare two complex arrays (without strides) * * optr = iptr1 == iptr2 */ void md_zcmp(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr1, const complex float* iptr2) { make_z3op_simple(md_zcmp2, D, dims, optr, iptr1, iptr2); } /** * Elementwise less than or equal to (with strides) * * optr = (iptr1 <= iptr2) */ void md_lessequal2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(le, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Elementwise less than or equal to (without strides) * * optr = (iptr1 <= iptr2) */ void md_lessequal(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_lessequal2, D, dims, optr, iptr1, iptr2); } /** * Elementwise less than or equal to scalar (with strides) * * optr = (iptr <= val) */ void md_slessequal2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr[D], const float* iptr, float val) { make_3op_scalar(md_lessequal2, D, dims, ostr, optr, istr, iptr, val); } /** * Elementwise less than or equal to scalar (without strides) * * optr = (iptr <= val) */ void md_slessequal(unsigned int D, const long dims[D], float* optr, const float* iptr, float val) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_slessequal2(D, dims, strs, optr, strs, iptr, val); } /** * Elementwise greater than or equal to (with strides) * * optr = (iptr1 => iptr2) */ void md_greatequal2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr1[D], const float* iptr1, const long istr2[D], const float* iptr2) { MAKE_3OP(ge, D, dims, ostr, optr, istr1, iptr1, istr2, iptr2); } /** * Elementwise greater than or equal to (without strides) * * optr = (iptr1 >= iptr2) */ void md_greatequal(unsigned int D, const long dims[D], float* optr, const float* iptr1, const float* iptr2) { make_3op_simple(md_greatequal2, D, dims, optr, iptr1, iptr2); } /** * Elementwise greater than or equal to scalar (with strides) * * optr = (iptr >= val) */ void md_sgreatequal2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr[D], const float* iptr, float val) { make_3op_scalar(md_greatequal2, D, dims, ostr, optr, istr, iptr, val); } /** * Elementwise greater than or equal to scalar (without strides) * * optr = (iptr >= val) */ void md_sgreatequal(unsigned int D, const long dims[D], float* optr, const float* iptr, float val) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_sgreatequal2(D, dims, strs, optr, strs, iptr, val); } /** * Extract unit-norm complex exponentials from complex arrays (with strides) * * optr = iptr / abs(iptr) */ void md_zphsr2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { MAKE_Z2OP(zphsr, D, dims, ostr, optr, istr, iptr); } /** * Extract unit-norm complex exponentials from complex arrays (without strides) * * optr = iptr / abs(iptr) */ void md_zphsr(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { make_z2op_simple(md_zphsr2, D, dims, optr, iptr); } /** * Get complex exponential with phase = complex arrays (with strides) * * optr = zexp(j * iptr) */ void md_zexpj2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { MAKE_Z2OP(zexpj, D, dims, ostr, optr, istr, iptr); } /** * Get complex exponential with phase = complex arrays (without strides) * * optr = zexp(j * iptr) */ void md_zexpj(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { make_z2op_simple(md_zexpj2, D, dims, optr, iptr); } /** * Get argument of complex arrays (with strides) * * optr = zarg(iptr) */ void md_zarg2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { MAKE_Z2OP(zarg, D, dims, ostr, optr, istr, iptr); } /** * Get argument of complex arrays (without strides) * * optr = zarg(iptr) */ void md_zarg(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { make_z2op_simple(md_zarg2, D, dims, optr, iptr); } /** * Calculate inner product between two scalar arrays (with strides) * * return iptr1^T * iptr2 */ float md_scalar2(unsigned int D, const long dim[D], const long str1[D], const float* ptr1, const long str2[D], const float* ptr2) { #if 1 if ( (D == md_calc_blockdim(D, dim, str1, FL_SIZE)) && (D == md_calc_blockdim(D, dim, str2, FL_SIZE))) { #ifdef USE_CUDA if (cuda_ondevice(ptr1)) { assert(cuda_ondevice(ptr2)); return gpu_ops.dot(md_calc_size(D, dim), ptr1, ptr2); } #endif return cpu_ops.dot(md_calc_size(D, dim), ptr1, ptr2); } #endif double ret = 0.; double* retp = &ret; #ifdef USE_CUDA if (cuda_ondevice(ptr1)) retp = gpu_constant(&ret, DL_SIZE); #endif long stro[D]; md_singleton_strides(D, stro); // Because this might lose precision for large data sets // we use double precision to accumlate result // (Kahan summation formula would be another option) md_fmacD2(D, dim, stro, retp, str1, ptr1, str2, ptr2); #ifdef USE_CUDA if (cuda_ondevice(ptr1)) { md_copy(1, (long[1]){ 1 }, &ret, retp, DL_SIZE); md_free(retp); } #endif return ret; } /** * Calculate inner product between two scalar arrays (without strides) * * return iptr1^T * iptr2 */ float md_scalar(unsigned int D, const long dim[D], const float* ptr1, const float* ptr2) { long str[D]; md_calc_strides(D, str, dim, FL_SIZE); return md_scalar2(D, dim, str, ptr1, str, ptr2); } /** * Calculate l2 norm of scalar array (with strides) * * return sqrt(iptr^T * iptr) */ float md_norm2(unsigned int D, const long dim[D], const long str[D], const float* ptr) { return sqrtf(md_scalar2(D, dim, str, ptr, str, ptr)); } /** * Calculate l2 norm of scalar array (without strides) * * return sqrt(iptr^T * iptr) */ float md_norm(unsigned int D, const long dim[D], const float* ptr) { return sqrtf(md_scalar(D, dim, ptr, ptr)); } /** * Calculate root-mean-square of complex array * * return sqrt(in^H * in / length(in)) */ float md_zrms(unsigned int D, const long dim[D], const complex float* in) { return md_znorm(D, dim, in) / sqrtl(md_calc_size(D, dim)); } /** * Calculate root-mean-square error between two complex arrays * * return sqrt((in1 - in2)^2 / length(in)) */ float md_zrmse(unsigned int D, const long dim[D], const complex float* in1, const complex float* in2) { complex float* err = md_alloc_sameplace(D, dim, CFL_SIZE, in1); md_zsub(D, dim, err, in1, in2); float val = md_zrms(D, dim, err); md_free(err); return val; } /** * Calculate normalized root-mean-square error between two complex arrays * * return RMSE(ref,in) / RMS(in) */ float md_znrmse(unsigned int D, const long dim[D], const complex float* ref, const complex float* in) { return md_zrmse(D, dim, ref, in) / md_zrms(D, dim, ref); } /** * Calculate l2 norm error between two complex arrays * * return sqrt(sum(in1 - in2)^2) */ float md_znorme(unsigned int D, const long dim[D], const complex float* in1, const complex float* in2) { complex float* err = md_alloc_sameplace(D, dim, CFL_SIZE, in1); md_zsub(D, dim, err, in1, in2); float val = md_znorm(D, dim, err); md_free(err); return val; } /** * Calculate relative l2 norm error of two complex arrays * * return norm(ref - in) / norm(ref) */ float md_zrnorme(unsigned int D, const long dim[D], const complex float* ref, const complex float* in) { return md_znorme(D, dim, ref, in) / md_znorm(D, dim, ref); } /** * Calculate inner product between two complex arrays (with strides) * * return iptr1^H * iptr2 */ complex float md_zscalar2(unsigned int D, const long dim[D], const long str1[D], const complex float* ptr1, const long str2[D], const complex float* ptr2) { complex double ret = 0.; complex double* retp = &ret; #ifdef USE_CUDA if (cuda_ondevice(ptr1)) retp = gpu_constant(&ret, CDL_SIZE); #endif long stro[D]; md_singleton_strides(D, stro); // Because this might lose precision for large data sets // we use double precision to accumlate result // (Kahan summation formula would be another option) md_zfmaccD2(D, dim, stro, retp, str1, ptr1, str2, ptr2); #ifdef USE_CUDA if (cuda_ondevice(ptr1)) { md_copy(1, (long[1]){ 1 }, &ret, retp, CDL_SIZE); md_free(retp); } #endif return (complex float)ret; } /** * Calculate inner product between two complex arrays (without strides) * * return iptr1^H * iptr2 */ complex float md_zscalar(unsigned int D, const long dim[D], const complex float* ptr1, const complex float* ptr2) { long str[D]; md_calc_strides(D, str, dim, CFL_SIZE); return md_zscalar2(D, dim, str, ptr1, str, ptr2); } /** * Calculate real part of the inner product between two complex arrays (with strides) * * return iptr1^H * iptr2 */ float md_zscalar_real2(unsigned int D, const long dims[D], const long strs1[D], const complex float* ptr1, const long strs2[D], const complex float* ptr2) { long dimsR[D + 1]; long strs1R[D + 1]; long strs2R[D + 1]; real_from_complex_dims(D, dimsR, dims); real_from_complex_strides(D, strs1R, strs1); real_from_complex_strides(D, strs2R, strs2); return md_scalar2(D + 1, dimsR, strs1R, (const float*)ptr1, strs2R, (const float*)ptr2); } /** * Calculate real part of the inner product between two complex arrays (without strides) * * return iptr1^H * iptr2 */ float md_zscalar_real(unsigned int D, const long dims[D], const complex float* ptr1, const complex float* ptr2) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); return md_zscalar_real2(D, dims, strs, ptr1, strs, ptr2); } /** * Calculate l2 norm of complex array (with strides) * * return sqrt(iptr^H * iptr) */ float md_znorm2(unsigned int D, const long dim[D], const long str[D], const complex float* ptr) { return sqrtf(md_zscalar_real2(D, dim, str, ptr, str, ptr)); // return sqrtf(crealf(md_zscalar2(D, dim, str, ptr, str, ptr))); } /** * Calculate l2 norm of complex array (without strides) * * return sqrt(iptr^H * iptr) */ float md_znorm(unsigned int D, const long dim[D], const complex float* ptr) { return sqrtf(md_zscalar_real(D, dim, ptr, ptr)); // return sqrtf(crealf(md_zscalar(D, dim, ptr, ptr))); } /** * Calculate absolute value. * */ void md_abs2(unsigned int D, const long dims[D], const long ostr[D], float* optr, const long istr[D], const float* iptr) { assert(optr != iptr); md_clear2(D, dims, ostr, optr, FL_SIZE); md_fmac2(D, dims, ostr, optr, istr, iptr, istr, iptr); // FIXME: should be cheaper md_sqrt2(D, dims, ostr, optr, ostr, optr); } /** * Calculate absolute value. * */ void md_abs(unsigned int D, const long dims[D], float* optr, const float* iptr) { make_2op_simple(md_abs2, D, dims, optr, iptr); } /** * Calculate absolute value. * */ void md_zabs2(unsigned int D, const long dims[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { // FIXME: special case of md_rss assert(optr != iptr); md_clear2(D, dims, ostr, optr, CFL_SIZE); md_zfmacc2(D, dims, ostr, optr, istr, iptr, istr, iptr); #if 1 long dimsR[D + 1]; long strsR[D + 1]; real_from_complex_dims(D, dimsR, dims); real_from_complex_strides(D, strsR, ostr); //md_sqrt2(D, dimsR + 1, strsR + 1, (float*)optr, strsR + 1, (const float*)optr); // skipping imaginary part is expensive md_sqrt2(D + 1, dimsR, strsR, (float*)optr, strsR, (const float*)optr); #else md_zsqrt2(D, dims, ostr, optr, ostr, optr); #endif } /** * Calculate absolute value. * */ void md_zabs(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr) { make_z2op_simple(md_zabs2, D, dims, optr, iptr); } /** * Calculate sum of absolute values. * */ float md_asum2(unsigned int D, const long dims[D], const long strs[D], const float* ptr) { #if 1 if (md_calc_blockdim(D, dims, strs, FL_SIZE) == D) { #ifdef USE_CUDA if (cuda_ondevice(ptr)) return gpu_ops.asum(md_calc_size(D, dims), ptr); #endif return cpu_ops.asum(md_calc_size(D, dims), ptr); } #endif float* tmp = md_alloc_sameplace(D, dims, FL_SIZE, ptr); long strs1[D]; md_calc_strides(D, strs1, dims, FL_SIZE); md_abs2(D, dims, strs1, tmp, strs, ptr); float ret = 0.; float* retp = &ret; #ifdef USE_CUDA if (cuda_ondevice(ptr)) retp = gpu_constant(&ret, FL_SIZE); #endif long dims0[D]; md_singleton_dims(D, dims0); md_axpy2(D, dims, MD_STRIDES(D, dims0, FL_SIZE), retp, 1., strs1, tmp); #ifdef USE_CUDA if (cuda_ondevice(ptr)) { md_copy(D, dims0, &ret, retp, FL_SIZE); md_free(retp); } #endif md_free(tmp); return ret; } /** * Calculate sum of absolute values. * */ float md_asum(unsigned int D, const long dims[D], const float* ptr) { return md_asum2(D, dims, MD_STRIDES(D, dims, FL_SIZE), ptr); } /** * Calculate sum of absolute values of complex numbers * where real and imaginary are separate elements of the sum. * (similar to BLAS L1 function). * */ float md_zasum2(unsigned int D, const long dims[D], const long strs[D], const complex float* ptr) { long dimsR[D + 1]; real_from_complex_dims(D, dimsR, dims); long strsR[D + 1]; real_from_complex_strides(D, strsR, strs); return md_asum2(D + 1, dimsR, strsR, (const float*)ptr); } /** * Calculate sum of absolute values of complex numbers * where real and imaginary are separate elements of the sum. * (similar to BLAS L1 function). * */ float md_zasum(unsigned int D, const long dims[D], const complex float* ptr) { return md_zasum2(D, dims, MD_STRIDES(D, dims, CFL_SIZE), ptr); } /** * Calculate l1 norm of complex array (with strides) */ float md_z1norm2(unsigned int D, const long dims[D], const long strs[D], const complex float* ptr) { complex float* tmp = md_alloc_sameplace(D, dims, CFL_SIZE, ptr); md_zabs2(D, dims, MD_STRIDES(D, dims, CFL_SIZE), tmp, strs, ptr); float val = md_zasum(D, dims, tmp); md_free(tmp); return val; } /** * Calculate l1 norm of complex array (without strides) */ float md_z1norm(unsigned int D, const long dim[D], const complex float* ptr) { return md_z1norm2(D, dim, MD_STRIDES(D, dim, CFL_SIZE), ptr); } /** * Root of sum of squares along selected dimensions * * @param dims -- full dimensions of src image * @param flags -- bitmask for applying the root of sum of squares, ie the dimensions that will not stay */ void md_rss(unsigned int D, const long dims[D], unsigned int flags, float* dst, const float* src) { long str1[D]; long str2[D]; long dims2[D]; md_select_dims(D, ~flags, dims2, dims); md_calc_strides(D, str1, dims, FL_SIZE); md_calc_strides(D, str2, dims2, FL_SIZE); md_clear(D, dims2, dst, FL_SIZE); md_fmac2(D, dims, str2, dst, str1, src, str1, src); md_sqrt(D, dims2, dst, dst); } /** * Sum of squares along selected dimensions * * @param dims -- full dimensions of src image * @param flags -- bitmask for applying the root of sum of squares, i.e. the dimensions that will not stay */ void md_zss(unsigned int D, const long dims[D], unsigned int flags, complex float* dst, const complex float* src) { long str1[D]; long str2[D]; long dims2[D]; md_select_dims(D, ~flags, dims2, dims); md_calc_strides(D, str1, dims, CFL_SIZE); md_calc_strides(D, str2, dims2, CFL_SIZE); md_clear(D, dims2, dst, CFL_SIZE); md_zfmacc2(D, dims, str2, dst, str1, src, str1, src); } /** * Root of sum of squares along selected dimensions * * @param dims -- full dimensions of src image * @param flags -- bitmask for applying the root of sum of squares, i.e. the dimensions that will not stay */ void md_zrss(unsigned int D, const long dims[D], unsigned int flags, complex float* dst, const complex float* src) { long dims2[D]; md_select_dims(D, ~flags, dims2, dims); #if 1 md_zss(D, dims, flags, dst, src); #if 1 long dims2R[D + 1]; real_from_complex_dims(D, dims2R, dims2); md_sqrt(D + 1, dims2R, (float*)dst, (const float*)dst); #else md_zsqrt(D, dims2, dst, dst); #endif #else long dimsR[D + 1]; real_from_complex_dims(D, dimsR, dims); md_rrss(D + 1, dimsR, (flags << 1), (float*)dst, (const float*)src); #endif } /** * Compute variance or standard deviation along selected dimensions (with strides) * * @param dims -- full dimensions of src image * @param flags -- bitmask for calculating var/std, i.e. the dimensions that will not stay * @param variance -- true if computing variance, false if computing standard deviation */ static void md_zvarstd2(unsigned int D, const long dims[D], unsigned int flags, const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr, bool variance) { long odims[D]; long fdims[D]; md_select_dims(D, ~flags, odims, dims); md_select_dims(D, flags, fdims, dims); complex float* tmp = md_alloc_sameplace(D, dims, CFL_SIZE, iptr); md_zavg2(D, dims, flags, ostr, optr, istr, iptr); md_zsub2(D, dims, istr, tmp, istr, iptr, ostr, optr); double scale = variance ? md_calc_size(D, fdims) - 1. : sqrtf(md_calc_size(D, fdims) - 1.); (variance ? md_zss : md_zrss)(D, dims, flags, optr, tmp); md_zsmul2(D, odims, ostr, optr, ostr, optr, 1. / scale); md_free(tmp); } /** * Compute variance along selected dimensions (without strides) * * @param dims -- full dimensions of src image * @param flags -- bitmask for calculating variance, i.e. the dimensions that will not stay */ void md_zvar(unsigned int D, const long dims[D], unsigned int flags, complex float* optr, const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); md_zvar2(D, dims, flags, MD_STRIDES(D, odims, CFL_SIZE), optr, MD_STRIDES(D, dims, CFL_SIZE), iptr); } /** * Compute variance along selected dimensions (with strides) * * @param dims -- full dimensions of src image * @param flags -- bitmask for calculating variance, i.e. the dimensions that will not stay */ void md_zvar2(unsigned int D, const long dims[D], unsigned int flags, const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { md_zvarstd2(D, dims, flags, ostr, optr, istr, iptr, true); } /** * Compute standard deviation along selected dimensions (without strides) * * @param dims -- full dimensions of src image * @param flags -- bitmask for calculating standard deviation, i.e. the dimensions that will not stay */ void md_zstd(unsigned int D, const long dims[D], unsigned int flags, complex float* optr, const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); md_zvarstd2(D, dims, flags, MD_STRIDES(D, odims, CFL_SIZE), optr, MD_STRIDES(D, dims, CFL_SIZE), iptr, false); } /** * Compute standard deviation along selected dimensions (with strides) * * @param dims -- full dimensions of src image * @param flags -- bitmask for calculating standard deviation, i.e. the dimensions that will not stay */ void md_zstd2(unsigned int D, const long dims[D], unsigned int flags, const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { md_zvarstd2(D, dims, flags, ostr, optr, istr, iptr, false); } /** * Average along flagged dimensions (without strides) * * @param dims -- full dimensions of iptr * @param flags -- bitmask for applying the average, i.e. the dimensions that will not stay */ void md_zavg(unsigned int D, const long dims[D], unsigned int flags, complex float* optr, const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); md_zavg2(D, dims, flags, MD_STRIDES(D, odims, CFL_SIZE), optr, MD_STRIDES(D, dims, CFL_SIZE), iptr); } /** * Average along flagged dimensions (with strides) * * @param dims -- full dimensions of iptr * @param flags -- bitmask for applying the average, i.e. the dimensions that will not stay */ void md_zavg2(unsigned int D, const long dims[D], unsigned int flags, const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); md_clear(D, odims, optr, CFL_SIZE); //FIXME: this is faster #if 1 complex float* o = md_alloc_sameplace(1, MD_DIMS(1), CFL_SIZE, optr); md_zfill(1, MD_DIMS(1), o, 1.); long ss[D]; md_singleton_strides(D, ss); md_zfmac2(D, dims, ostr, optr, istr, iptr, ss, o); md_free(o); #else md_zaxpy2(D, dims, ostr, optr, 1., istr, iptr); #endif long sdims[D]; md_select_dims(D, flags, sdims, dims); long scale = md_calc_size(D, sdims); if (scale != 0.) md_zsmul(D, odims, optr, optr, 1. / scale); } /** * Weighted average along flagged dimensions (without strides) * * @param dims -- full dimensions of iptr * @param flags -- bitmask for applying the weighted average, i.e. the dimensions that will not stay */ void md_zwavg(unsigned int D, const long dims[D], unsigned int flags, complex float* optr, const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); md_zwavg2(D, dims, flags, MD_STRIDES(D, odims, CFL_SIZE), optr, MD_STRIDES(D, dims, CFL_SIZE), iptr); } /** * Weighted average along flagged dimensions (with strides) * * @param dims -- full dimensions of iptr * @param flags -- bitmask for applying the weighted average, i.e. the dimensions that will not stay */ void md_zwavg2(unsigned int D, const long dims[D], unsigned int flags, const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); complex float* weights = md_alloc_sameplace(D, odims, CFL_SIZE, iptr); md_zwavg2_core1(D, dims, flags, ostr, weights, istr, iptr); md_zwavg2_core2(D, dims, flags, ostr, optr, weights, istr, iptr); md_free(weights); } /** * Compute weights for weighted average * * @param iptr input array to be averaged * @param weights output weights */ void md_zwavg2_core1(unsigned int D, const long dims[D], unsigned int flags, const long ostr[D], complex float* weights, const long istr[D], const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); complex float* pattern = md_alloc_sameplace(D, dims, CFL_SIZE, iptr); long onestrs[D]; md_singleton_strides(D, onestrs); md_zcmp2(D, dims, istr, pattern, istr, iptr, onestrs, &(complex float){ 0. }); md_zsub2(D, dims, istr, pattern, onestrs, &(complex float){ 1. }, istr, pattern); md_clear2(D, odims, ostr, weights, CFL_SIZE); md_zaxpy2(D, dims, ostr, weights, 1., istr, pattern); md_free(pattern); } /** * Weighted average along flagged dimensions with given weights * * @param weights precomputed weights for averaging * @param optr output array after averaging */ void md_zwavg2_core2(unsigned int D, const long dims[D], unsigned int flags, const long ostr[D], complex float* optr, const complex float* weights, const long istr[D], const complex float* iptr) { long odims[D]; md_select_dims(D, ~flags, odims, dims); md_clear2(D, odims, ostr, optr, CFL_SIZE); md_zaxpy2(D, dims, ostr, optr, 1., istr, iptr); md_zdiv(D, odims, optr, optr, weights); } /** * Fill complex array with value (with strides). * */ void md_zfill2(unsigned int D, const long dim[D], const long str[D], complex float* ptr, complex float val) { md_fill2(D, dim, str, ptr, &val, CFL_SIZE); } /** * Fill complex array with value (without strides). * */ extern void md_zfill(unsigned int D, const long dim[D], complex float* ptr, complex float val) { md_fill(D, dim, ptr, &val, CFL_SIZE); } /** * Soft Thresholding complex array * * return SoftThresh(ptr) */ static void nary_zsoftthresh_half(struct nary_opt_data_s* data, void* ptr[]) { data->ops->zsoftthresh_half(data->size, *(float*)data->data_ptr, ptr[0], ptr[1]); } /** * Step (2) of Soft Thresholding multi-dimensional arrays, y = ST(x, lambda) * 2) computes resid = MAX( (abs(x) - lambda)/abs(x), 0 ) (with strides) * * @param D number of dimensions * @param dim dimensions of input/output * @param lambda threshold parameter * @param ostr output strides * @param optr pointer to output, y * @param istr input strides * @param iptr pointer to input, abs(x) */ void md_zsoftthresh_half2(unsigned int D, const long dim[D], float lambda, const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { optimized_twoop_oi(D, dim, ostr, optr, istr, iptr, (size_t[2]){ CFL_SIZE, CFL_SIZE }, nary_zsoftthresh_half, &lambda); } /** * Soft Thresholding array * * return SoftThresh(ptr) */ static void nary_softthresh_half(struct nary_opt_data_s* data, void* ptr[]) { data->ops->softthresh_half(data->size, *(float*)data->data_ptr, ptr[0], ptr[1]); } /** * Step (2) of Soft Thresholding multi-dimensional arrays, y = ST(x, lambda) * 2) computes resid = MAX( (abs(x) - lambda)/abs(x), 0 ) (with strides) * * @param D number of dimensions * @param dim dimensions of input/output * @param lambda threshold parameter * @param ostr output strides * @param optr pointer to output, y * @param istr input strides * @param iptr pointer to input, abs(x) */ void md_softthresh_half2(unsigned int D, const long dim[D], float lambda, const long ostr[D], float* optr, const long istr[D], const float* iptr) { optimized_twoop_oi(D, dim, ostr, optr, istr, iptr, (size_t[2]){ FL_SIZE, FL_SIZE }, nary_softthresh_half, &lambda); } /** * Step (1) of Soft Thresholding multi-dimensional arrays, y = ST(x, lambda) * 1) computes resid = MAX( (abs(x) - lambda)/abs(x), 0 ) (without strides) * * @param D number of dimensions * @param dim dimensions of input/output * @param lambda threshold parameter * @param optr pointer to output, y * @param iptr pointer to input, x */ void md_zsoftthresh_half(unsigned int D, const long dim[D], float lambda, complex float* optr, const complex float* iptr) { long str[D]; md_calc_strides(D, str, dim, CFL_SIZE); md_zsoftthresh_half2(D, dim, lambda, str, optr, str, iptr); } void md_softthresh_core2(unsigned int D, const long dims[D], float lambda, unsigned int flags, float* tmp_norm, const long ostrs[D], float* optr, const long istrs[D], const float* iptr) { long norm_dims[D]; long norm_strs[D]; md_select_dims(D, ~flags, norm_dims, dims); md_calc_strides(D, norm_strs, norm_dims, FL_SIZE); md_rss(D, dims, flags, tmp_norm, iptr); md_softthresh_half2(D, norm_dims, lambda, norm_strs, tmp_norm, norm_strs, tmp_norm); md_mul2(D, dims, ostrs, optr, norm_strs, tmp_norm, istrs, iptr); } /** * Soft Thresholding complex array * * return SoftThresh(ptr) */ static void nary_softthresh(struct nary_opt_data_s* data, void* ptr[]) { data->ops->softthresh(data->size, *(float*)data->data_ptr, ptr[0], ptr[1]); } /** * Soft Thresholding for floats (with strides) * * optr = ST(iptr, lambda) */ void md_softthresh2(unsigned int D, const long dims[D], float lambda, unsigned int flags, const long ostrs[D], float* optr, const long istrs[D], const float* iptr) { if (0 == flags) { optimized_twoop_oi(D, dims, ostrs, optr, istrs, iptr, (size_t[2]){ FL_SIZE, FL_SIZE }, nary_softthresh, &lambda); return; } long norm_dims[D]; md_select_dims(D, ~flags, norm_dims, dims); float* tmp_norm = md_alloc_sameplace(D, norm_dims, FL_SIZE, iptr); md_softthresh_core2(D, dims, lambda, flags, tmp_norm, ostrs, optr, istrs, iptr); md_free(tmp_norm); } /** * Soft Thresholding for floats (without strides) * * optr = ST(iptr, lambda) */ void md_softthresh(unsigned int D, const long dims[D], float lambda, unsigned int flags, float* optr, const float* iptr) { long str[D]; md_calc_strides(D, str, dims, FL_SIZE); md_softthresh2(D, dims, lambda, flags, str, optr, str, iptr); } void md_zsoftthresh_core2(unsigned int D, const long dims[D], float lambda, unsigned int flags, complex float* tmp_norm, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { long norm_dims[D]; long norm_strs[D]; md_select_dims(D, ~flags, norm_dims, dims); md_calc_strides(D, norm_strs, norm_dims, CFL_SIZE); md_zrss(D, dims, flags, tmp_norm, iptr); md_zsoftthresh_half2(D, norm_dims, lambda, norm_strs, tmp_norm, norm_strs, tmp_norm); md_zmul2(D, dims, ostrs, optr, norm_strs, tmp_norm, istrs, iptr); } static void nary_zsoftthresh(struct nary_opt_data_s* data, void* ptr[]) { data->ops->zsoftthresh(data->size, *(float*)data->data_ptr, ptr[0], ptr[1]); } /** * Soft thresholding using norm along arbitrary dimension (with strides) * * y = ST(x, lambda) * 1) computes resid = MAX((norm(x) - lambda) / norm(x), 0) * 2) multiplies y = resid * x * * @param D number of dimensions * @param dims dimensions of input/output * @param lambda threshold parameter * @param flags jointly thresholded dimensions * @param optr destination -- soft thresholded values * @param iptr source -- values to be soft thresholded */ void md_zsoftthresh2(unsigned int D, const long dims[D], float lambda, unsigned int flags, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { if (0 == flags) { optimized_twoop_oi(D, dims, ostrs, optr, istrs, iptr, (size_t[2]){ CFL_SIZE, CFL_SIZE }, nary_zsoftthresh, &lambda); return; } long norm_dims[D]; md_select_dims(D, ~flags, norm_dims, dims); complex float* tmp_norm = md_alloc_sameplace(D, norm_dims, CFL_SIZE, iptr); md_zsoftthresh_core2(D, dims, lambda, flags, tmp_norm, ostrs, optr, istrs, iptr); md_free(tmp_norm); } /** * Soft thresholding using norm along arbitrary dimension (without strides) * * y = ST(x, lambda) * 1) computes resid = MAX((norm(x) - lambda) / norm(x), 0) * 2) multiplies y = resid * x * * @param D number of dimensions * @param dims dimensions of input/output * @param lambda threshold parameter * @param flags jointly thresholded dimensions * @param optr destination -- soft thresholded values * @param iptr source -- values to be soft thresholded */ void md_zsoftthresh(unsigned int D, const long dims[D], float lambda, unsigned int flags, complex float* optr, const complex float* iptr) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zsoftthresh2(D, dims, lambda, flags, strs, optr, strs, iptr); } /** * Hard Thresholding mask complex array (nonzero support of k-largest elements) * Writes to the output a mask of the non-zero elements in the input * * return HardThreshSupp(ptr) */ static void nary_zhardthresh_mask(struct nary_opt_data_s* data, void* ptr[]) { data->ops->zhardthresh_mask(data->size, (*(unsigned int*)data->data_ptr), ptr[0], ptr[1]); } /** * Produces a mask (1s and 0s) of the non-zero support of a hard thresholded input vector * Multi-dimensional operation with strides * Hard thresholding is performed by selection of the k largest elements in input. * * @param D number of dimensions * @param dim dimensions of input/output * @param k threshold parameter * @param flags flags for joint operation * @param ostr output strides * @param optr pointer to output * @param istr input strides * @param iptr pointer to input */ void md_zhardthresh_mask2(unsigned int D, const long dim[D], unsigned int k, unsigned int flags, complex float* tmp_norm, const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { if (0 == flags) { optimized_twoop_oi(D, dim, ostr, optr, istr, iptr, (size_t[2]){ CFL_SIZE, CFL_SIZE }, nary_zhardthresh_mask, &k); return; } long norm_dims[D]; long norm_strs[D]; md_select_dims(D, ~flags, norm_dims, dim); md_calc_strides(D, norm_strs, norm_dims, CFL_SIZE); md_zrss(D, dim, flags, tmp_norm, iptr); optimized_twoop_oi(D, norm_dims, norm_strs, tmp_norm, norm_strs, tmp_norm, (size_t[2]){ CFL_SIZE, CFL_SIZE }, nary_zhardthresh_mask, &k); md_copy2(D, dim, ostr, optr, norm_strs, tmp_norm, CFL_SIZE); } /** * Produces a mask (1s and 0s) of the non-zero support of a hard thresholded input vector * Multi-dimensional operation using the same strides for input and output. * Hard thresholding is performed by selection of the k largest elements in input. * * @param D number of dimensions * @param dim dimensions of input/output * @param k threshold parameter * @param optr pointer to output * @param iptr pointer to input */ void md_zhardthresh_mask(unsigned int D, const long dim[D], unsigned int k, unsigned int flags, complex float* optr, const complex float* iptr) { long str[D]; md_calc_strides(D, str, dim, CFL_SIZE); long norm_dims[D]; md_select_dims(D, ~flags, norm_dims, dim); complex float* tmp_norm = md_alloc_sameplace(D, norm_dims, CFL_SIZE, iptr); md_zhardthresh_mask2(D, dim, k, flags, tmp_norm, str, optr, str, iptr); md_free(tmp_norm); } /** * Joint Hard thresholding (with strides) * Performs hard thresholding to the norm along dimension specified by flags * Applies the support of thresholded norm to every vector along that dimension * Hard thresholding refers to the selection of the k largest elements in vector. * * @param D number of dimensions * @param dims dimensions of input/output * @param k threshold (sorted) index * @param flags jointly thresholded dimensions * @param tmp_norm temporary array for joint operation * @param ostrs destination strides * @param optr destination -- thresholded values * @param istrs source strides * @param iptr source -- values to be thresholded */ void md_zhardthresh_joint2(unsigned int D, const long dims[D], unsigned int k, unsigned int flags, complex float* tmp_norm, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { long norm_dims[D]; long norm_strs[D]; md_select_dims(D, ~flags, norm_dims, dims); md_calc_strides(D, norm_strs, norm_dims, CFL_SIZE); md_zrss(D, dims, flags, tmp_norm, iptr); optimized_twoop_oi(D, norm_dims, norm_strs, tmp_norm, norm_strs, tmp_norm, (size_t[2]){ CFL_SIZE, CFL_SIZE }, nary_zhardthresh_mask, &k); md_zmul2(D, dims, ostrs, optr, norm_strs, tmp_norm, istrs, iptr); } /** * Hard Thresholding complex array (select k-largest elements) * * return HardThresh(ptr) */ static void nary_zhardthresh(struct nary_opt_data_s* data, void* ptr[]) { data->ops->zhardthresh(data->size, (*(unsigned int*)data->data_ptr), ptr[0], ptr[1]); } /** * Hard thresholding (with strides) * * y = HT(x, k), selects k largest elements of x * computes y = x * (abs(x) > t(k)), * k = threshold index of sorted x, t(k)= value of sorted x at k * * @param D number of dimensions * @param dims dimensions of input/output * @param k threshold (sorted) index * @param flags jointly thresholded dimensions * @param tmp_norm temporary array for joint operation * @param ostrs destination strides * @param optr destination -- thresholded values * @param istrs source strides * @param iptr source -- values to be thresholded */ void md_zhardthresh2(unsigned int D, const long dims[D], unsigned int k, unsigned int flags, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { if (0 == flags) { optimized_twoop_oi(D, dims, ostrs, optr, istrs, iptr, (size_t[2]){ CFL_SIZE, CFL_SIZE }, nary_zhardthresh, &k); return; } long norm_dims[D]; md_select_dims(D, ~flags, norm_dims, dims); complex float* tmp_norm = md_alloc_sameplace(D, norm_dims, CFL_SIZE, iptr); md_zhardthresh_joint2(D, dims, k, flags, tmp_norm, ostrs, optr, istrs,iptr); md_free(tmp_norm); } /** * Hard thresholding (without strides) * * y = HT(x, k), select k largest elements. * * @param D number of dimensions * @param dims dimensions of input/output * @param k threshold parameter * @param flags jointly thresholded dimensions * @param optr destination -- thresholded values * @param iptr source -- values to be thresholded */ void md_zhardthresh(unsigned int D, const long dims[D], unsigned int k, unsigned int flags, complex float* optr, const complex float* iptr) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zhardthresh2(D, dims, k, flags, strs, optr, strs, iptr); } /** * Elementwise minimum of input and scalar (with strides) * * optr = min(val, iptr) */ void md_smin2(unsigned int D, const long dim[D], const long ostr[D], float* optr, const long istr[D], const float* iptr, float val) { float* tmp = md_alloc_sameplace(D, dim, FL_SIZE, iptr); md_slessequal2(D, dim, ostr, tmp, istr, iptr, val); md_mul2(D, dim, ostr, optr, istr, iptr, istr, tmp); md_free(tmp); } /** * Elementwise minimum of input and scalar (without strides) * * optr = min(val, iptr) */ void md_smin(unsigned int D, const long dim[D], float* optr, const float* iptr, float val) { long str[D]; md_calc_strides(D, str, dim, FL_SIZE); md_smin2(D, dim, str, optr, str, iptr, val); } /** * Elementwise maximum of input and scalar (with strides) * * optr = max(val, iptr) */ void md_smax2(unsigned int D, const long dim[D], const long ostr[D], float* optr, const long istr[D], const float* iptr, float val) { #if 0 float* tmp = md_alloc_sameplace(D, dim, FL_SIZE, iptr); md_sgreatequal2(D, dim, ostr, tmp, istr, iptr, val); md_mul2(D, dim, ostr, optr, istr, iptr, istr, tmp); md_free(tmp); #else make_3op_scalar(md_max2, D, dim, ostr, optr, istr, iptr, val); #endif } /** * Elementwise minimum of input and scalar (without strides) * * optr = max(val, iptr) */ void md_smax(unsigned int D, const long dim[D], float* optr, const float* iptr, float val) { long str[D]; md_calc_strides(D, str, dim, FL_SIZE); md_smax2(D, dim, str, optr, str, iptr, val); } static void md_fdiff_core2(unsigned int D, const long dims[D], unsigned int d, bool dir, const long ostr[D], float* out, const long istr[D], const float* in) { long pos[D]; md_set_dims(D, pos, 0); pos[d] = dir ? 1 : -1; md_circ_shift2(D, dims, pos, ostr, out, istr, in, FL_SIZE); md_sub2(D, dims, ostr, out, istr, in, ostr, out); } /** * Compute finite (forward) differences along selected dimensions. * */ void md_fdiff2(unsigned int D, const long dims[D], unsigned int d, const long ostr[D], float* out, const long istr[D], const float* in) { md_fdiff_core2(D, dims, d, true, ostr, out, istr, in); } /** * Compute finite differences along selected dimensions. * */ void md_fdiff(unsigned int D, const long dims[D], unsigned int d, float* out, const float* in) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_fdiff2(D, dims, d, strs, out, strs, in); } /** * Compute finite (backward) differences along selected dimensions. * */ void md_fdiff_backwards2(unsigned int D, const long dims[D], unsigned int d, const long ostr[D], float* out, const long istr[D], const float* in) { md_fdiff_core2(D, dims, d, false, ostr, out, istr, in); } /** * Compute finite (backward) differences along selected dimensions. * */ void md_fdiff_backwards(unsigned int D, const long dims[D], unsigned int d, float* out, const float* in) { long strs[D]; md_calc_strides(D, strs, dims, FL_SIZE); md_fdiff_backwards2(D, dims, d, strs, out, strs, in); } static void md_zfdiff_core2(unsigned int D, const long dims[D], unsigned int d, bool dir, const long ostr[D], complex float* out, const long istr[D], const complex float* in) { // we could also implement in terms of md_fdiff2 long pos[D]; md_set_dims(D, pos, 0); pos[d] = dir ? 1 : -1; md_circ_shift2(D, dims, pos, ostr, out, istr, in, CFL_SIZE); md_zsub2(D, dims, ostr, out, istr, in, ostr, out); } /** * Compute finite (forward) differences along selected dimensions. * */ void md_zfdiff2(unsigned int D, const long dims[D], unsigned int d, const long ostr[D], complex float* out, const long istr[D], const complex float* in) { md_zfdiff_core2(D, dims, d, true, ostr, out, istr, in); } /** * Compute finite (backward) differences along selected dimensions. * */ void md_zfdiff_backwards2(unsigned int D, const long dims[D], unsigned int d, const long ostr[D], complex float* out, const long istr[D], const complex float* in) { md_zfdiff_core2(D, dims, d, false, ostr, out, istr, in); } /** * Compute finite (forward) differences along selected dimensions. * */ void md_zfdiff(unsigned int D, const long dims[D], unsigned int d, complex float* out, const complex float* in) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zfdiff2(D, dims, d, strs, out, strs, in); } /** * Compute finite (backward) differences along selected dimensions. * */ void md_zfdiff_backwards(unsigned int D, const long dims[D], unsigned int d, complex float* out, const complex float* in) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zfdiff_backwards2(D, dims, d, strs, out, strs, in); } struct zfftmod_s { double phase; bool inv; unsigned int N; }; static void nary_zfftmod(struct nary_opt_data_s* data, void* ptr[]) { struct zfftmod_s* mdata = (struct zfftmod_s*)data->data_ptr; data->ops->zfftmod(data->size, ptr[0], ptr[1], mdata->N, mdata->inv, mdata->phase); } // DO NOT USE DIRECTLY - this is used internally by fftmod from fft.[ch] void md_zfftmod2(unsigned int D, const long dims[D], const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr, bool inv, double phase) { assert(D > 0); assert((CFL_SIZE == ostrs[0]) && (CFL_SIZE == istrs[0])); unsigned int N = dims[0]; optimized_twoop_oi(D - 1, dims + 1, ostrs + 1, optr, istrs + 1, iptr, (size_t[2]){ N * CFL_SIZE, N * CFL_SIZE }, nary_zfftmod, &(struct zfftmod_s){ phase, inv, N }); } void md_zfftmod(unsigned int D, const long dims[D], complex float* optr, const complex float* iptr, bool inv, double phase) { long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); md_zfftmod2(D, dims, strs, optr, strs, iptr, inv, phase); } bart-0.4.02/src/num/flpmath.h000066400000000000000000000670571320577655200157520ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __FLPMATH #define __FLPMATH #include "misc/cppwrap.h" #define CFL_SIZE sizeof(_Complex float) #define FL_SIZE sizeof(float) #define CDL_SIZE sizeof(_Complex double) #define DL_SIZE sizeof(double) extern void md_mul2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_mul(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_zrmul2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zrmul(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zmul2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zmul(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zdiv2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zdiv(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zdiv_reg2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2, _Complex float lambda); extern void md_zdiv_reg(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2, _Complex float lambda); extern void md_div2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_div(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_zmulc2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zmulc(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zsmul2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr, _Complex float val); extern void md_zsmul(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr, _Complex float val); extern void md_smul2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr, float val); extern void md_smul(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr, float val); extern void md_zpow2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zpow(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_pow2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_pow(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_sqrt2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr); extern void md_sqrt(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr); extern void md_zsqrt2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1); extern void md_zsqrt(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1); extern void md_zspow2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr, _Complex float val); extern void md_zspow(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr, _Complex float val); extern void md_zaxpy2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, _Complex float val, const long istr1[__VLA(D)], const _Complex float* iptr1); extern void md_zaxpy(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, _Complex float val, const _Complex float* iptr1); extern void md_axpy2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, float val, const long istr1[__VLA(D)], const float* iptr1); extern void md_axpy(unsigned int D, const long dim[__VLA(D)], float* optr, float val, const float* iptr); extern void md_zfmac2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zfmac(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_tenmul_dims(unsigned int D, long max_dims[__VLA(D)], const long out_dims[__VLA(D)], const long in1_dims[__VLA(D)], const long in2_dims[__VLA(D)]); extern void md_ztenmul2(unsigned int D, const long max_dims[__VLA(D)], const long out_strs[__VLA(D)], _Complex float* out, const long in1_strs[__VLA(D)], const _Complex float* in1, const long in2_strs[__VLA(D)], const _Complex float* in2); extern void md_ztenmul(unsigned int D, const long out_dims[__VLA(D)], _Complex float* out, const long in1_dims[__VLA(D)], const _Complex float* in1, const long in2_dims[__VLA(D)], const _Complex float* int2); extern void md_ztenmulc2(unsigned int D, const long max_dims[__VLA(D)], const long out_strs[__VLA(D)], _Complex float* out, const long in1_strs[__VLA(D)], const _Complex float* in1, const long in2_strs[__VLA(D)], const _Complex float* in2); extern void md_ztenmulc(unsigned int D, const long out_dims[__VLA(D)], _Complex float* out, const long in1_dims[__VLA(D)], const _Complex float* in1, const long in2_dims[__VLA(D)], const _Complex float* int2); extern void md_matmul_dims(unsigned int D, long max_dims[__VLA(D)], const long out_dims[__VLA(D)], const long mat_dims[__VLA(D)], const long in_dims[__VLA(D)]); extern void md_zmatmul2(unsigned int D, const long out_dims[__VLA(D)], const long out_strs[__VLA(D)], _Complex float* dst, const long mat_dims[__VLA(D)], const long mat_strs[__VLA(D)], const _Complex float* mat, const long in_dims[__VLA(D)], const long in_strs[__VLA(D)], const _Complex float* src); extern void md_zmatmul(unsigned int D, const long out_dims[__VLA(D)], _Complex float* dst, const long mat_dims[__VLA(D)], const _Complex float* mat, const long in_dims[__VLA(D)], const _Complex float* src); extern void md_zmatmulc2(unsigned int D, const long out_dims[__VLA(D)], const long out_strs[__VLA(D)], _Complex float* dst, const long mat_dims[__VLA(D)], const long mat_strs[__VLA(D)], const _Complex float* mat, const long in_dims[__VLA(D)], const long in_strs[__VLA(D)], const _Complex float* src); extern void md_zmatmulc(unsigned int D, const long out_dims[__VLA(D)], _Complex float* dst, const long mat_dims[__VLA(D)], const _Complex float* mat, const long in_dims[__VLA(D)], const _Complex float* src); extern void md_fmac2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_fmac(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_zfmacc2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zfmacc(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zfmaccD(unsigned int D, const long dim[__VLA(D)], _Complex double* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zfmacD2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex double* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zfmacD(unsigned int D, const long dim[__VLA(D)], _Complex double* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_fmacD2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], double* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_fmacD(unsigned int D, const long dim[__VLA(D)], double* optr, const float* iptr1, const float* iptr2); extern void md_zfmaccD2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex double* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zfmaccD(unsigned int D, const long dim[__VLA(D)], _Complex double* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zadd2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zadd(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zsadd2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr, _Complex float val); extern void md_zsadd(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr, _Complex float val); extern void md_zsub2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zsub(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_add2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_add(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_sadd2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr, float val); extern void md_sadd(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr, float val); extern void md_sub2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_sub(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_zphsr(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr); extern void md_zphsr2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_abs(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr); extern void md_abs2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr); extern void md_zabs(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr); extern void md_zabs2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_max(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_max2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_min(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_min2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr); extern void md_zsoftthresh_half2(unsigned int D, const long dim[__VLA(D)], float lambda, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zsoftthresh_half(unsigned int D, const long dim[__VLA(D)], float lambda, _Complex float* optr, const _Complex float* iptr); extern void md_softthresh_half2(unsigned int D, const long dim[__VLA(D)], float lambda, const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr); extern void md_softthresh_half(unsigned int D, const long dim[__VLA(D)], float lambda, float* optr, const float* iptr); extern void md_softthresh2(unsigned int D, const long dim[__VLA(D)], float lambda, unsigned int flags, const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr); extern void md_softthresh(unsigned int D, const long dim[__VLA(D)], float lambda, unsigned int flags, float* optr, const float* iptr); extern void md_softthresh_core2(unsigned int D, const long dims[__VLA(D)], float lambda, unsigned int flags, float* tmp_norm, const long ostrs[__VLA(D)], float* optr, const long istrs[__VLA(D)], const float* iptr); extern void md_zsoftthresh_core2(unsigned int D, const long dims[__VLA(D)], float lambda, unsigned int flags, _Complex float* tmp_norm, const long ostrs[__VLA(D)], _Complex float* optr, const long istrs[__VLA(D)], const _Complex float* iptr); extern void md_zsoftthresh2(unsigned int D, const long dim[__VLA(D)], float lambda, unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zsoftthresh(unsigned int D, const long dim[__VLA(D)], float lambda, unsigned int flags, _Complex float* optr, const _Complex float* iptr); void md_zhardthresh_mask2(unsigned int D, const long dim[__VLA(D)], unsigned int k, unsigned int flags, _Complex float* tmp_norm, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zhardthresh_mask(unsigned int D, const long dim[__VLA(D)], unsigned int k, unsigned int flags, _Complex float* optr, const _Complex float* iptr); extern void md_zhardthresh_joint2(unsigned int D, const long dims[__VLA(D)], unsigned int k, unsigned int flags, _Complex float* tmp_norm, const long ostrs[__VLA(D)], _Complex float* optr, const long istrs[__VLA(D)], const _Complex float* iptr); extern void md_zhardthresh2(unsigned int D, const long dims[__VLA(D)], unsigned int k, unsigned int flags, const long ostrs[__VLA(D)], _Complex float* optr, const long istrs[__VLA(D)], const _Complex float* iptr); extern void md_zhardthresh(unsigned int D, const long dims[__VLA(D)], unsigned int k, unsigned int flags, _Complex float* optr, const _Complex float* iptr); extern void md_zconj(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr); extern void md_zconj2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zreal(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr); extern void md_zreal2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zimag(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr); extern void md_zimag2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zcmp(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr1, const _Complex float* iptr2); extern void md_zcmp2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr1[__VLA(D)], const _Complex float* iptr1, const long istr2[__VLA(D)], const _Complex float* iptr2); extern void md_zexpj(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr); extern void md_zexpj2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zarg(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr); extern void md_zarg2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_lessequal(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_lessequal2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_slessequal(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr, float val); extern void md_slessequal2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr, float val); extern void md_greatequal(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr1, const float* iptr2); extern void md_greatequal2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr1[__VLA(D)], const float* iptr1, const long istr2[__VLA(D)], const float* iptr2); extern void md_sgreatequal(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr, float val); extern void md_sgreatequal2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr, float val); extern float md_znorm2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const _Complex float* ptr); extern float md_znorm(unsigned int D, const long dim[__VLA(D)], const _Complex float* ptr); extern _Complex float md_zscalar2(unsigned int D, const long dim[__VLA(D)], const long str1[__VLA(D)], const _Complex float* ptr1, const long str2[__VLA(D)], const _Complex float* ptr2); extern _Complex float md_zscalar(unsigned int D, const long dim[__VLA(D)], const _Complex float* ptr1, const _Complex float* ptr2); extern float md_zscalar_real2(unsigned int D, const long dim[__VLA(D)], const long str1[__VLA(D)], const _Complex float* ptr1, const long str2[__VLA(D)], const _Complex float* ptr2); extern float md_zscalar_real(unsigned int D, const long dim[__VLA(D)], const _Complex float* ptr1, const _Complex float* ptr2); extern float md_asum2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const float* ptr); extern float md_asum(unsigned int D, const long dim[__VLA(D)], const float* ptr); extern float md_zasum2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const _Complex float* ptr); extern float md_zasum(unsigned int D, const long dim[__VLA(D)], const _Complex float* ptr); extern float md_z1norm2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const _Complex float* ptr); extern float md_z1norm(unsigned int D, const long dim[__VLA(D)], const _Complex float* ptr); extern float md_asum2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const float* ptr); extern float md_asum(unsigned int D, const long dim[__VLA(D)], const float* ptr); extern float md_zasum2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const _Complex float* ptr); extern float md_zasum(unsigned int D, const long dim[__VLA(D)], const _Complex float* ptr); extern float md_z1norm2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const _Complex float* ptr); extern float md_z1norm(unsigned int D, const long dim[__VLA(D)], const _Complex float* ptr); extern float md_norm2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], const float* ptr); extern float md_norm(unsigned int D, const long dim[__VLA(D)], const float* ptr); extern float md_scalar2(unsigned int D, const long dim[__VLA(D)], const long str1[__VLA(D)], const float* ptr1, const long str2[__VLA(D)], const float* ptr2); extern float md_scalar(unsigned int D, const long dim[__VLA(D)], const float* ptr1, const float* ptr2); extern void md_rss(unsigned int D, const long dims[__VLA(D)], unsigned int flags, float* dst, const float* src); extern void md_zrss(unsigned int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* dst, const _Complex float* src); extern void md_zss(unsigned int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* dst, const _Complex float* src); extern void md_zstd(unsigned int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* optr, const _Complex float* iptr); extern void md_zstd2(unsigned int D, const long dims[__VLA(D)], unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zvar(unsigned int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* optr, const _Complex float* iptr); extern void md_zvar2(unsigned int D, const long dims[__VLA(D)], unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zavg(unsigned int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* optr, const _Complex float* iptr); extern void md_zavg2(unsigned int D, const long dims[__VLA(D)], unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zwavg(unsigned int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* optr, const _Complex float* iptr); extern void md_zwavg2(unsigned int D, const long dims[__VLA(D)], unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr); extern void md_zwavg2_core1(unsigned int D, const long dims[__VLA(D)], unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* weights); extern void md_zwavg2_core2(unsigned int D, const long dims[__VLA(D)], unsigned int flags, const long ostr[__VLA(D)], _Complex float* optr, const _Complex float* weights, const long istr[__VLA(D)], const _Complex float* iptr); extern float md_zrms(unsigned int D, const long dim[__VLA(D)], const _Complex float* in); extern float md_zrmse(unsigned int D, const long dim[__VLA(D)], const _Complex float* in1, const _Complex float* in2); extern float md_znrmse(unsigned int D, const long dim[__VLA(D)], const _Complex float* ref, const _Complex float* in); extern float md_znorme(unsigned int D, const long dim[__VLA(D)], const _Complex float* in1, const _Complex float* in2); extern float md_zrnorme(unsigned int D, const long dim[__VLA(D)], const _Complex float* ref, const _Complex float* in); extern void md_zdouble2float(unsigned int D, const long dims[__VLA(D)], _Complex float* dst, const _Complex double* src); extern void md_zfloat2double(unsigned int D, const long dims[__VLA(D)], _Complex double* dst, const _Complex float* src); extern void md_float2double(unsigned int D, const long dims[__VLA(D)], double* dst, const float* src); extern void md_double2float(unsigned int D, const long dims[__VLA(D)], float* dst, const double* src); extern void md_zdouble2float2(unsigned int D, const long dims[__VLA(D)], const long ostr[__VLA(D)], _Complex float* dst, const long istr[__VLA(D)], const _Complex double* src); extern void md_zfloat2double2(unsigned int D, const long dims[__VLA(D)], const long ostr[__VLA(D)], _Complex double* dst, const long istr[__VLA(D)], const _Complex float* src); extern void md_float2double2(unsigned int D, const long dims[__VLA(D)], const long ostr[__VLA(D)], double* dst, const long istr[__VLA(D)], const float* src); extern void md_double2float2(unsigned int D, const long dims[__VLA(D)], const long ostr[__VLA(D)], float* dst, const long istr[__VLA(D)], const double* src); extern void md_zfill2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], _Complex float* ptr, _Complex float val); extern void md_zfill(unsigned int D, const long dim[__VLA(D)], _Complex float* ptr, _Complex float val); extern void md_smin2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr, float val); extern void md_smin(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr, float val); extern void md_smax2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], float* optr, const long istr[__VLA(D)], const float* iptr, float val); extern void md_smax(unsigned int D, const long dim[__VLA(D)], float* optr, const float* iptr, float val); extern void md_fdiff2(unsigned int D, const long dims[__VLA(D)], unsigned int d, const long ostr[__VLA(D)], float* out, const long istr[__VLA(D)], const float* in); extern void md_fdiff(unsigned int D, const long dims[__VLA(D)], unsigned int d, float* out, const float* in); extern void md_fdiff_backwards2(unsigned int D, const long dims[__VLA(D)], unsigned int d, const long ostr[__VLA(D)], float* out, const long istr[__VLA(D)], const float* in); extern void md_fdiff_backwards(unsigned int D, const long dims[__VLA(D)], unsigned int d, float* out, const float* in); extern void md_zfdiff2(unsigned int D, const long dims[__VLA(D)], unsigned int d, const long ostr[__VLA(D)], _Complex float* out, const long istr[__VLA(D)], const _Complex float* in); extern void md_zfdiff(unsigned int D, const long dims[__VLA(D)], unsigned int d, _Complex float* out, const _Complex float* in); extern void md_zfdiff_backwards2(unsigned int D, const long dims[__VLA(D)], unsigned int d, const long ostr[__VLA(D)], _Complex float* out, const long istr[__VLA(D)], const _Complex float* in); extern void md_zfdiff_backwards(unsigned int D, const long dims[__VLA(D)], unsigned int d, _Complex float* out, const _Complex float* in); extern void md_zfftmod(unsigned int D, const long dim[__VLA(D)], _Complex float* optr, const _Complex float* iptr, _Bool inv, double phase); extern void md_zfftmod2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], _Complex float* optr, const long istr[__VLA(D)], const _Complex float* iptr, _Bool inv, double phase); #include "misc/cppwrap.h" #endif bart-0.4.02/src/num/gpukrnls.cu000066400000000000000000000522331320577655200163320ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-03-24 Martin Uecker * 2015,2017 Jon Tamir * * * This file defines basic operations on vectors of floats/complex floats * for operations on the GPU. See the CPU version (vecops.c) for more * information. */ #include #include #include #include #include #include #include "num/gpukrnls.h" #if 1 // see Dara's src/calib/calibcu.cu for how to get // runtime info // limited by hardware to 1024 on most devices // should be a multiple of 32 (warp size) #define BLOCKSIZE 1024 static int blocksize(int N) { return BLOCKSIZE; } static int gridsize(int N) { return (N + BLOCKSIZE - 1) / BLOCKSIZE; } #else // http://stackoverflow.com/questions/5810447/cuda-block-and-grid-size-efficiencies #define WARPSIZE 32 #define MAXBLOCKS (16 * 8) // 16 multi processor times 8 blocks #define MIN(x, y) ((x < y) ? (x) : (y)) #define MAX(x, y) ((x > y) ? (x) : (y)) static int blocksize(int N) { int warps_total = (N + WARPSIZE - 1) / WARPSIZE; int warps_block = MAX(1, MIN(4, warps_total)); return WARPSIZE * warps_block; } static int gridsize(int N) { int warps_total = (N + WARPSIZE - 1) / WARPSIZE; int warps_block = MAX(1, MIN(4, warps_total)); return MIN(MAXBLOCKS, MAX(1, warps_total / warps_block)); } #endif __global__ void kern_float2double(int N, double* dst, const float* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = src[i]; } extern "C" void cuda_float2double(long N, double* dst, const float* src) { kern_float2double<<>>(N, dst, src); } __global__ void kern_double2float(int N, float* dst, const double* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = src[i]; } extern "C" void cuda_double2float(long N, float* dst, const double* src) { kern_double2float<<>>(N, dst, src); } __global__ void kern_xpay(int N, float beta, float* dst, const float* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = dst[i] * beta + src[i]; } extern "C" void cuda_xpay(long N, float beta, float* dst, const float* src) { kern_xpay<<>>(N, beta, dst, src); } __global__ void kern_axpbz(int N, float* dst, const float a1, const float* src1, const float a2, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = a1 * src1[i] + a2 * src2[i]; } extern "C" void cuda_axpbz(long N, float* dst, const float a1, const float* src1, const float a2, const float* src2) { kern_axpbz<<>>(N, dst, a1, src1, a2, src2); } __global__ void kern_smul(int N, float alpha, float* dst, const float* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = alpha * src[i]; } extern "C" void cuda_smul(long N, float alpha, float* dst, const float* src) { kern_smul<<>>(N, alpha, dst, src); } typedef void (*cuda_3op_f)(int N, float* dst, const float* src1, const float* src2); extern "C" void cuda_3op(cuda_3op_f krn, int N, float* dst, const float* src1, const float* src2) { krn<<>>(N, dst, src1, src2); } __global__ void kern_add(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = src1[i] + src2[i]; } extern "C" void cuda_add(long N, float* dst, const float* src1, const float* src2) { cuda_3op(kern_add, N, dst, src1, src2); } __global__ void kern_sub(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = src1[i] - src2[i]; } extern "C" void cuda_sub(long N, float* dst, const float* src1, const float* src2) { cuda_3op(kern_sub, N, dst, src1, src2); } __global__ void kern_mul(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = src1[i] * src2[i]; } extern "C" void cuda_mul(long N, float* dst, const float* src1, const float* src2) { cuda_3op(kern_mul, N, dst, src1, src2); } __global__ void kern_div(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = src1[i] / src2[i]; } extern "C" void cuda_div(long N, float* dst, const float* src1, const float* src2) { cuda_3op(kern_div, N, dst, src1, src2); } __global__ void kern_fmac(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] += src1[i] * src2[i]; } extern "C" void cuda_fmac(long N, float* dst, const float* src1, const float* src2) { cuda_3op(kern_fmac, N, dst, src1, src2); } __global__ void kern_fmac2(int N, double* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] += src1[i] * src2[i]; } extern "C" void cuda_fmac2(long N, double* dst, const float* src1, const float* src2) { kern_fmac2<<>>(N, dst, src1, src2); } __global__ void kern_zmul(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCmulf(src1[i], src2[i]); } extern "C" void cuda_zmul(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2) { kern_zmul<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_zdiv(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCdivf(src1[i], src2[i]); } extern "C" void cuda_zdiv(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2) { kern_zdiv<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_zfmac(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCaddf(dst[i], cuCmulf(src1[i], src2[i])); } extern "C" void cuda_zfmac(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2) { kern_zfmac<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_zfmac2(int N, cuDoubleComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCadd(dst[i], cuComplexFloatToDouble(cuCmulf(src1[i], src2[i]))); } extern "C" void cuda_zfmac2(long N, _Complex double* dst, const _Complex float* src1, const _Complex float* src2) { kern_zfmac2<<>>(N, (cuDoubleComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_zmulc(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCmulf(src1[i], cuConjf(src2[i])); } extern "C" void cuda_zmulc(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2) { kern_zmulc<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_zfmacc(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCaddf(dst[i], cuCmulf(src1[i], cuConjf(src2[i]))); } extern "C" void cuda_zfmacc(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2) { kern_zfmacc<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_zfmacc2(int N, cuDoubleComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCadd(dst[i], cuComplexFloatToDouble(cuCmulf(src1[i], cuConjf(src2[i])))); } extern "C" void cuda_zfmacc2(long N, _Complex double* dst, const _Complex float* src1, const _Complex float* src2) { kern_zfmacc2<<>>(N, (cuDoubleComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_pow(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = powf(src1[i], src2[i]); } extern "C" void cuda_pow(long N, float* dst, const float* src1, const float* src2) { cuda_3op(kern_pow, N, dst, src1, src2); } __device__ cuDoubleComplex zexpD(cuDoubleComplex x) { double sc = exp(cuCreal(x)); double si; double co; sincos(cuCimag(x), &si, &co); return make_cuDoubleComplex(sc * co, sc * si); } __device__ cuFloatComplex zexp(cuFloatComplex x) { float sc = expf(cuCrealf(x)); float si; float co; sincosf(cuCimagf(x), &si, &co); return make_cuFloatComplex(sc * co, sc * si); } __device__ float zarg(cuFloatComplex x) { return atan2(cuCimagf(x), cuCrealf(x)); } __device__ cuFloatComplex zlog(cuFloatComplex x) { return make_cuFloatComplex(log(cuCabsf(x)), zarg(x)); } // x^y = e^{y ln(x)} = e^{y __device__ cuFloatComplex zpow(cuFloatComplex x, cuFloatComplex y) { return zexp(cuCmulf(y, zlog(x))); } __global__ void kern_zpow(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = zpow(src1[i], src2[i]); } extern "C" void cuda_zpow(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2) { kern_zpow<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_sqrt(int N, float* dst, const float* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = sqrtf(fabs(src[i])); } extern "C" void cuda_sqrt(long N, float* dst, const float* src) { kern_sqrt<<>>(N, dst, src); } __global__ void kern_zconj(int N, cuFloatComplex* dst, const cuFloatComplex* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuConjf(src[i]); } extern "C" void cuda_zconj(long N, _Complex float* dst, const _Complex float* src) { kern_zconj<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src); } __global__ void kern_zcmp(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = make_cuFloatComplex(((cuCrealf(src1[i]) == cuCrealf(src2[i])) && (cuCimagf(src1[i]) == cuCimagf(src2[i]))) ? 1. : 0, 0.); } extern "C" void cuda_zcmp(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2) { kern_zcmp<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2); } __global__ void kern_zdiv_reg(int N, cuFloatComplex* dst, const cuFloatComplex* src1, const cuFloatComplex* src2, cuFloatComplex lambda) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = cuCdivf(src1[i], cuCaddf(src2[i], lambda)); } extern "C" void cuda_zdiv_reg(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2, _Complex float lambda) { kern_zdiv_reg<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src1, (const cuFloatComplex*)src2, make_cuFloatComplex(__real(lambda), __imag(lambda))); } __global__ void kern_zphsr(int N, cuFloatComplex* dst, const cuFloatComplex* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { float abs = cuCabsf(src[i]); // moved out, otherwise it triggers a compiler error in nvcc dst[i] = (0. == abs) ? make_cuFloatComplex(1., 0.) : (cuCdivf(src[i], make_cuFloatComplex(abs, 0.))); } } extern "C" void cuda_zphsr(long N, _Complex float* dst, const _Complex float* src) { kern_zphsr<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src); } __global__ void kern_zexpj(int N, cuFloatComplex* dst, const cuFloatComplex* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { float abs = cuCabsf(src[i]); // moved out, otherwise it triggers a compiler error in nvcc dst[i] = zexp(make_cuFloatComplex(0., abs)); } } extern "C" void cuda_zexpj(long N, _Complex float* dst, const _Complex float* src) { kern_zexpj<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src); } __global__ void kern_zarg(int N, cuFloatComplex* dst, const cuFloatComplex* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = make_cuFloatComplex(zarg(src[i]), 0.); } extern "C" void cuda_zarg(long N, _Complex float* dst, const _Complex float* src) { kern_zarg<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src); } /** * (GPU) Step (1) of soft thesholding, y = ST(x, lambda). * Only computes the residual, resid = MAX( (abs(x) - lambda)/abs(x)), 0 ) * * @param N number of elements * @param lambda threshold parameter * @param d pointer to destination, resid * @param x pointer to input */ __global__ void kern_zsoftthresh_half(int N, float lambda, cuFloatComplex* d, const cuFloatComplex* x) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { float norm = cuCabsf(x[i]); float red = norm - lambda; //d[i] = (red > 0.) ? (cuCmulf(make_cuFloatComplex(red / norm, 0.), x[i])) : make_cuFloatComplex(0., 0.); d[i] = (red > 0.) ? make_cuFloatComplex(red / norm, 0.) : make_cuFloatComplex(0., 0.); } } extern "C" void cuda_zsoftthresh_half(long N, float lambda, _Complex float* d, const _Complex float* x) { kern_zsoftthresh_half<<>>(N, lambda, (cuFloatComplex*)d, (const cuFloatComplex*)x); } __global__ void kern_zsoftthresh(int N, float lambda, cuFloatComplex* d, const cuFloatComplex* x) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { float norm = cuCabsf(x[i]); float red = norm - lambda; d[i] = (red > 0.) ? (cuCmulf(make_cuFloatComplex(red / norm, 0.), x[i])) : make_cuFloatComplex(0., 0.); } } extern "C" void cuda_zsoftthresh(long N, float lambda, _Complex float* d, const _Complex float* x) { kern_zsoftthresh<<>>(N, lambda, (cuFloatComplex*)d, (const cuFloatComplex*)x); } __global__ void kern_softthresh_half(int N, float lambda, float* d, const float* x) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { float norm = fabsf(x[i]); float red = norm - lambda; d[i] = (red > 0.) ? (red / norm) : 0.; } } extern "C" void cuda_softthresh_half(long N, float lambda, float* d, const float* x) { kern_softthresh_half<<>>(N, lambda, d, x); } __global__ void kern_softthresh(int N, float lambda, float* d, const float* x) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) { float norm = fabsf(x[i]); float red = norm - lambda; d[i] = (red > 0.) ? (red / norm * x[i]) : 0.; } } extern "C" void cuda_softthresh(long N, float lambda, float* d, const float* x) { kern_softthresh<<>>(N, lambda, d, x); } __global__ void kern_zreal(int N, cuFloatComplex* dst, const cuFloatComplex* src) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = make_cuFloatComplex(cuCrealf(src[i]), 0.); } extern "C" void cuda_zreal(long N, _Complex float* dst, const _Complex float* src) { kern_zreal<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src); } __global__ void kern_le(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = (src1[i] <= src2[i]); } extern "C" void cuda_le(long N, float* dst, const float* src1, const float* src2) { kern_le<<>>(N, dst, src1, src2); } __global__ void kern_ge(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = (src1[i] >= src2[i]); } extern "C" void cuda_ge(long N, float* dst, const float* src1, const float* src2) { kern_ge<<>>(N, dst, src1, src2); } __device__ cuFloatComplex cuDouble2Float(cuDoubleComplex x) { return make_cuFloatComplex(cuCreal(x), cuCimag(x)); } __device__ cuDoubleComplex cuFloat2Double(cuFloatComplex x) { return make_cuDoubleComplex(cuCrealf(x), cuCimagf(x)); } // identical copy in num/fft.c __device__ double fftmod_phase(long length, int j) { long center1 = length / 2; double shift = (double)center1 / (double)length; return ((double)j - (double)center1 / 2.) * shift; } __device__ cuDoubleComplex fftmod_phase2(long n, int j, bool inv, double phase) { phase += fftmod_phase(n, j); double rem = phase - floor(phase); double sgn = inv ? -1. : 1.; #if 1 if (rem == 0.) return make_cuDoubleComplex(1., 0.); if (rem == 0.5) return make_cuDoubleComplex(-1., 0.); if (rem == 0.25) return make_cuDoubleComplex(0., sgn); if (rem == 0.75) return make_cuDoubleComplex(0., -sgn); #endif return zexpD(make_cuDoubleComplex(0., M_PI * 2. * sgn * rem)); } __global__ void kern_zfftmod(int N, cuFloatComplex* dst, const cuFloatComplex* src, unsigned int n, _Bool inv, double phase) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) for (int j = 0; j < n; j++) dst[i * n + j] = cuDouble2Float(cuCmul(fftmod_phase2(n, j, inv, phase), cuFloat2Double(src[i * n + j]))); } extern "C" void cuda_zfftmod(long N, _Complex float* dst, const _Complex float* src, unsigned int n, _Bool inv, double phase) { kern_zfftmod<<>>(N, (cuFloatComplex*)dst, (const cuFloatComplex*)src, n, inv, phase); } #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #define MIN(x, y) (((x) < (y)) ? (x) : (y)) __global__ void kern_max(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = MAX(src1[i], src2[i]); } extern "C" void cuda_max(long N, float* dst, const float* src1, const float* src2) { kern_max<<>>(N, dst, src1, src2); } __global__ void kern_min(int N, float* dst, const float* src1, const float* src2) { int start = threadIdx.x + blockDim.x * blockIdx.x; int stride = blockDim.x * gridDim.x; for (int i = start; i < N; i += stride) dst[i] = MIN(src1[i], src2[i]); } extern "C" void cuda_min(long N, float* dst, const float* src1, const float* src2) { kern_min<<>>(N, dst, src1, src2); } bart-0.4.02/src/num/gpukrnls.h000066400000000000000000000071441320577655200161530ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifdef __cplusplus extern "C" { #endif extern void cuda_float2double(long size, double* dst, const float* src); extern void cuda_double2float(long size, float* dst, const double* src); extern void cuda_sxpay(long size, float* y, float alpha, const float* src); extern void cuda_xpay(long N, float beta, float* dst, const float* src); extern void cuda_axpbz(long N, float* dst, const float a, const float* x, const float b, const float* z); extern void cuda_smul(long N, float alpha, float* dst, const float* src); extern void cuda_mul(long N, float* dst, const float* src1, const float* src2); extern void cuda_div(long N, float* dst, const float* src1, const float* src2); extern void cuda_add(long N, float* dst, const float* src1, const float* src2); extern void cuda_sub(long N, float* dst, const float* src1, const float* src2); extern void cuda_fmac(long N, float* dst, const float* src1, const float* src2); extern void cuda_fmac2(long N, double* dst, const float* src1, const float* src2); extern void cuda_zmul(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_zdiv(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_zfmac(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_zfmac2(long N, _Complex double* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_zmulc(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_zfmacc(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_zfmacc2(long N, _Complex double* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_pow(long N, float* dst, const float* src1, const float* src2); extern void cuda_zpow(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_sqrt(long N, float* dst, const float* src); extern void cuda_zconj(long N, _Complex float* dst, const _Complex float* src); extern void cuda_zphsr(long N, _Complex float* dst, const _Complex float* src); extern void cuda_zexpj(long N, _Complex float* dst, const _Complex float* src); extern void cuda_zarg(long N, _Complex float* dst, const _Complex float* src); extern void cuda_zsoftthresh_half(long N, float lambda, _Complex float* d, const _Complex float* x); extern void cuda_zsoftthresh(long N, float lambda, _Complex float* d, const _Complex float* x); extern void cuda_softthresh_half(long N, float lambda, float* d, const float* x); extern void cuda_softthresh(long N, float lambda, float* d, const float* x); extern void cuda_zreal(long N, _Complex float* dst, const _Complex float* src); extern void cuda_zcmp(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); extern void cuda_zdiv_reg(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2, _Complex float lambda); extern void cuda_le(long N, float* dst, const float* src1, const float* src2); extern void cuda_ge(long N, float* dst, const float* src1, const float* src2); extern void cuda_zfftmod(long N, _Complex float* dst, const _Complex float* src, unsigned int n, _Bool inv, double phase); extern void cuda_max(long N, float* dst, const float* src1, const float* src2); extern void cuda_min(long N, float* dst, const float* src1, const float* src2); #ifdef __cplusplus } #endif bart-0.4.02/src/num/gpuops.c000066400000000000000000000203441320577655200156130ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2014. Joseph Y Cheng. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2014 Joseph Y Cheng * 2015,2017 Jon Tamir * * * CUDA support functions. The file exports gpu_ops of type struct vec_ops * for basic operations on single-precision floating pointer vectors defined * in gpukrnls.cu. See vecops.c for the CPU version. */ #ifdef USE_CUDA #include #include #include #include #include #include #include "num/vecops.h" #include "num/gpuops.h" #include "num/gpukrnls.h" #include "num/mem.h" #include "misc/misc.h" #include "misc/debug.h" #include "gpuops.h" static void cuda_error(int line, cudaError_t code) { const char *err_str = cudaGetErrorString(code); error("cuda error: %d %s \n", line, err_str); } #define CUDA_ERROR(x) ({ cudaError_t errval = (x); if (cudaSuccess != errval) cuda_error(__LINE__, errval); }) int cuda_devices(void) { int count; CUDA_ERROR(cudaGetDeviceCount(&count)); return count; } static __thread int last_init = -1; void cuda_p2p_table(int n, bool table[n][n]) { assert(n == cuda_devices()); for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { int r; CUDA_ERROR(cudaDeviceCanAccessPeer(&r, i, j)); table[i][j] = (1 == r); } } } void cuda_p2p(int a, int b) { int dev; CUDA_ERROR(cudaGetDevice(&dev)); CUDA_ERROR(cudaSetDevice(a)); CUDA_ERROR(cudaDeviceEnablePeerAccess(b, 0)); CUDA_ERROR(cudaSetDevice(dev)); } void cuda_init(int device) { last_init = device; CUDA_ERROR(cudaSetDevice(device)); } int cuda_init_memopt(void) { int num_devices = cuda_devices(); int device; int max_device = 0; if (num_devices > 1) { size_t mem_max = 0; size_t mem_free; size_t mem_total; for (device = 0; device < num_devices; device++) { cuda_init(device); CUDA_ERROR(cudaMemGetInfo(&mem_free, &mem_total)); //printf(" device (%d): %d\n", device, mem_available); if (mem_max < mem_free) { mem_max = mem_free; max_device = device; } } //printf(" max device: %d\n", max_device); CUDA_ERROR(cudaSetDevice(max_device)); // FIXME: we should set last_init } return max_device; } bool cuda_memcache = true; void cuda_memcache_off(void) { assert(-1 == last_init); cuda_memcache = false; } void cuda_clear(long size, void* dst) { // printf("CLEAR %x %ld\n", dst, size); CUDA_ERROR(cudaMemset(dst, 0, size)); } static void cuda_float_clear(long size, float* dst) { cuda_clear(size * sizeof(float), (void*)dst); } void cuda_memcpy(long size, void* dst, const void* src) { // printf("COPY %x %x %ld\n", dst, src, size); CUDA_ERROR(cudaMemcpy(dst, src, size, cudaMemcpyDefault)); } void cuda_memcpy_strided(const long dims[2], long ostr, void* dst, long istr, const void* src) { CUDA_ERROR(cudaMemcpy2D(dst, ostr, src, istr, dims[0], dims[1], cudaMemcpyDefault)); } static void cuda_float_copy(long size, float* dst, const float* src) { cuda_memcpy(size * sizeof(float), (void*)dst, (const void*)src); } static void cuda_free_wrapper(const void* ptr) { CUDA_ERROR(cudaFree((void*)ptr)); } void cuda_memcache_clear(void) { if (!cuda_memcache) return; memcache_clear(last_init, cuda_free_wrapper); } void cuda_exit(void) { cuda_memcache_clear(); CUDA_ERROR(cudaThreadExit()); } #if 0 // We still don use this because it is slow. Why? Nivida, why? static bool cuda_cuda_ondevice(const void* ptr) { if (NULL == ptr) return false; struct cudaPointerAttributes attr; if (cudaSuccess != (cudaPointerGetAttributes(&attr, ptr))) { /* The secret trick to make this work for arbitrary pointers is to clear the error using cudaGetLastError. See end of: http://www.alexstjohn.com/WP/2014/04/28/cuda-6-0-first-look/ */ cudaGetLastError(); return false; } return (cudaMemoryTypeDevice == attr.memoryType); } #endif bool cuda_ondevice(const void* ptr) { return mem_ondevice(ptr); } bool cuda_accessible(const void* ptr) { #if 1 return mem_device_accessible(ptr); #else struct cudaPointerAttributes attr; //CUDA_ERROR(cudaPointerGetAttributes(&attr, ptr)); if (cudaSuccess != (cudaPointerGetAttributes(&attr, ptr))) return false; return true; #endif } void cuda_free(void* ptr) { mem_device_free(ptr, cuda_free_wrapper); } static void* cuda_malloc_wrapper(size_t size) { void* ptr; CUDA_ERROR(cudaMalloc(&ptr, size)); return ptr; } void* cuda_malloc(long size) { return mem_device_malloc(last_init, size, cuda_malloc_wrapper); } #if 0 void* cuda_hostalloc(long N) { void* ptr; if (cudaSuccess != cudaHostAlloc(&ptr, N, cudaHostAllocDefault)) abort(); insert(ptr, N, false); return ptr; } void cuda_hostfree(void* ptr) { struct cuda_mem_s* nptr = search(ptr, true); assert(nptr->ptr == ptr); assert(!nptr->device); free(nptr); cudaFreeHost(ptr); } #endif static float* cuda_float_malloc(long size) { return (float*)cuda_malloc(size * sizeof(float)); } static void cuda_float_free(float* x) { cuda_free((void*)x); } static double cuda_sdot(long size, const float* src1, const float* src2) { assert(cuda_ondevice(src1)); assert(cuda_ondevice(src2)); // printf("SDOT %x %x %ld\n", src1, src2, size); return cublasSdot(size, src1, 1, src2, 1); } static double cuda_norm(long size, const float* src1) { #if 1 // cublasSnrm2 produces NaN in some situations // e.g. nlinv -g -i8 utests/data/und2x2 o // git rev: ab28a9a953a80d243511640b23501f964a585349 // printf("cublas: %f\n", cublasSnrm2(size, src1, 1)); // printf("GPU norm (sdot: %f)\n", sqrt(cuda_sdot(size, src1, src1))); return sqrt(cuda_sdot(size, src1, src1)); #else return cublasSnrm2(size, src1, 1); #endif } static double cuda_asum(long size, const float* src) { return cublasSasum(size, src, 1); } static void cuda_saxpy(long size, float* y, float alpha, const float* src) { // printf("SAXPY %x %x %ld\n", y, src, size); cublasSaxpy(size, alpha, src, 1, y, 1); } static void cuda_swap(long size, float* a, float* b) { cublasSswap(size, a, 1, b, 1); } const struct vec_ops gpu_ops = { .float2double = cuda_float2double, .double2float = cuda_double2float, .dot = cuda_sdot, .asum = cuda_asum, .zl1norm = NULL, .add = cuda_add, .sub = cuda_sub, .mul = cuda_mul, .div = cuda_div, .fmac = cuda_fmac, .fmac2 = cuda_fmac2, .axpy = cuda_saxpy, .pow = cuda_pow, .sqrt = cuda_sqrt, .le = cuda_le, .ge = cuda_ge, .zmul = cuda_zmul, .zdiv = cuda_zdiv, .zfmac = cuda_zfmac, .zfmac2 = cuda_zfmac2, .zmulc = cuda_zmulc, .zfmacc = cuda_zfmacc, .zfmacc2 = cuda_zfmacc2, .zpow = cuda_zpow, .zphsr = cuda_zphsr, .zconj = cuda_zconj, .zexpj = cuda_zexpj, .zarg = cuda_zarg, .zcmp = cuda_zcmp, .zdiv_reg = cuda_zdiv_reg, .zfftmod = cuda_zfftmod, .max = cuda_max, .min = cuda_min, .zsoftthresh = cuda_zsoftthresh, .zsoftthresh_half = cuda_zsoftthresh_half, .softthresh = cuda_softthresh, .softthresh_half = cuda_softthresh_half, .zhardthresh = NULL, }; // defined in iter/vec.h struct vec_iter_s { float* (*allocate)(long N); void (*del)(float* x); void (*clear)(long N, float* x); void (*copy)(long N, float* a, const float* x); void (*swap)(long N, float* a, float* x); double (*norm)(long N, const float* x); double (*dot)(long N, const float* x, const float* y); void (*sub)(long N, float* a, const float* x, const float* y); void (*add)(long N, float* a, const float* x, const float* y); void (*smul)(long N, float alpha, float* a, const float* x); void (*xpay)(long N, float alpha, float* a, const float* x); void (*axpy)(long N, float* a, float alpha, const float* x); void (*axpbz)(long N, float* out, const float a, const float* x, const float b, const float* z); }; extern const struct vec_iter_s gpu_iter_ops; const struct vec_iter_s gpu_iter_ops = { .allocate = cuda_float_malloc, .del = cuda_float_free, .clear = cuda_float_clear, .copy = cuda_float_copy, .dot = cuda_sdot, .norm = cuda_norm, .axpy = cuda_saxpy, .xpay = cuda_xpay, .axpbz = cuda_axpbz, .smul = cuda_smul, .add = cuda_add, .sub = cuda_sub, .swap = cuda_swap, }; #endif bart-0.4.02/src/num/gpuops.h000066400000000000000000000020161320577655200156140ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifdef __cplusplus extern "C" { #endif extern const struct vec_ops gpu_ops; extern _Bool cuda_ondevice(const void* ptr); extern _Bool cuda_accessible(const void* ptr); extern void cuda_clear(long size, void* ptr); extern void cuda_memcpy(long size, void* dst, const void* src); extern void cuda_hostfree(void*); extern void* cuda_hostalloc(long N); extern void* cuda_malloc(long N); extern void cuda_memcpy_strided(const long dims[2], long ostr, void* dst, long istr, const void* src); extern void cuda_free(void*); extern void cuda_init(int device); extern int cuda_init_memopt(void); extern void cuda_p2p_table(int n, _Bool table[n][n]); extern void cuda_p2p(int a, int b); extern void cuda_exit(void); extern int cuda_devices(void); extern void cuda_memcache_off(void); extern void cuda_memcache_clear(void); #ifdef __cplusplus } #endif bart-0.4.02/src/num/init.c000066400000000000000000000041221320577655200152350ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker * * * Initialization routines. */ #define _GNU_SOURCE #include #include #include #include #ifdef _OPENMP #include #endif #include "misc/debug.h" #include "num/fft.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #ifdef USE_CULA #include #endif #include "init.h" extern unsigned long num_chunk_size; // num/optimize.c void num_init(void) { #ifdef __linux__ // feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW); #endif #if 0 // set stack limit if (-1 == setrlimit(RLIMIT_STACK, &(struct rlimit){ 500000000, 500000000 })) debug_printf(DP_WARN, "error setting stack size\n"); // FIXME: should also set openmp stack size #endif #ifdef _OPENMP int p = omp_get_num_procs(); if (NULL == getenv("OMP_NUM_THREADS")) omp_set_num_threads(p); p = omp_get_max_threads(); // omp_set_nested(1); #else int p = 2; #endif #ifdef FFTWTHREADS fft_set_num_threads(p); #endif const char* chunk_str; if (NULL != (chunk_str = getenv("BART_PARALLEL_CHUNK_SIZE"))) { long chunk_size = strtoul(chunk_str, NULL, 10); if (0 < chunk_size) { num_chunk_size = chunk_size; } else { debug_printf(DP_WARN, "invalid chunk size\n"); } } } void num_init_gpu(void) { num_init(); // don't call cuda_init so that GPU can get assigned by driver #ifdef USE_CULA culaInitialize(); #endif } void num_init_gpu_device(int device) { num_init(); #ifdef USE_CUDA cuda_init(device); #else (void)device; assert(0); #endif #ifdef USE_CULA culaInitialize(); #endif } void num_init_gpu_memopt(void) { num_init(); #ifdef USE_CUDA cuda_init_memopt(); #else assert(0); #endif #ifdef USE_CULA culaInitialize(); #endif } void num_set_num_threads(int n) { #ifdef _OPENMP omp_set_num_threads(n); #endif #ifdef FFTWTHREADS fft_set_num_threads(n); #endif } bart-0.4.02/src/num/init.h000066400000000000000000000007531320577655200152500ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ extern void num_init(void); extern void num_init_gpu(void); /** * selects the GPU with the maximum available memory * (if there are more than one on the system) */ extern void num_init_gpu_memopt(void); extern void num_init_gpu_device(int device); extern void num_set_num_threads(int n); bart-0.4.02/src/num/iovec.c000066400000000000000000000033711320577655200154040ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker */ #include #include "misc/misc.h" #include "misc/debug.h" #include "num/multind.h" #include "num/flpmath.h" #include "iovec.h" #if 1 void debug_print_iovec(int level, const struct iovec_s* vec) { debug_printf(level, "iovec:\n"); debug_printf(level, " N = %d\n", vec->N); debug_printf(level, " dims = \t"); debug_print_dims(level, vec->N, vec->dims); debug_printf(level, " strs = \t"); debug_print_dims(level, vec->N, vec->strs); } #endif const struct iovec_s* iovec_create2(unsigned int N, const long dims[N], const long strs[N], size_t size) { PTR_ALLOC(struct iovec_s, n); n->N = N; PTR_ALLOC(long[N], ndims); memcpy(*ndims, dims, N * sizeof(long)); n->dims = *PTR_PASS(ndims); PTR_ALLOC(long[N], nstrs); memcpy(*nstrs, strs, N * sizeof(long)); n->strs = *PTR_PASS(nstrs); n->size = size; return PTR_PASS(n); } const struct iovec_s* iovec_create(unsigned int N, const long dims[N], size_t size) { long strs[N]; md_calc_strides(N, strs, dims, size); return iovec_create2(N, dims, strs, size); } void iovec_free(const struct iovec_s* x) { free((void*)x->dims); free((void*)x->strs); free((void*)x); } bool iovec_check(const struct iovec_s* iov, unsigned int N, const long dims[N], const long strs[N]) { bool ok = true; debug_print_dims(DP_DEBUG4, N, dims); debug_print_dims(DP_DEBUG4, iov->N, iov->dims); if (N != iov->N) return false; for (unsigned int i = 0; i < N; i++) { ok &= (dims[i] == iov->dims[i]); ok &= (strs[i] == iov->strs[i]); } return ok; } bart-0.4.02/src/num/iovec.h000066400000000000000000000016251320577655200154110ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker */ #include #include #include "misc/cppwrap.h" struct iovec_s { unsigned int N; const long* dims; const long* strs; size_t size; }; extern const struct iovec_s* iovec_create(unsigned int N, const long dims[__VLA(N)], size_t size); extern const struct iovec_s* iovec_create2(unsigned int N, const long dims[__VLA(N)], const long strs[__VLA(N)], size_t size); extern void iovec_free(const struct iovec_s* x); extern bool iovec_check(const struct iovec_s* iov, unsigned int N, const long dims[__VLA(N)], const long strs[__VLA(N)]); extern void debug_print_iovec(int level, const struct iovec_s* vec); #include "misc/cppwrap.h" bart-0.4.02/src/num/lapack.c000066400000000000000000000046521320577655200155350ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker * 2017 Jon Tamir */ #include "misc/misc.h" #ifndef NOLAPACKE #include #else #include "lapacke/lapacke.h" #endif #include "lapack.h" #define LAPACKE(x, ...) \ if (0 != LAPACKE_##x(LAPACK_COL_MAJOR, __VA_ARGS__)) \ error("LAPACK: " # x " failed."); /* ATTENTION: blas and lapack use column-major matrices * while native C uses row-major. All matrices are * transposed to what one would expect. * * LAPACK svd destroys its input matrix **/ void lapack_eig(long N, float eigenval[N], complex float matrix[N][N]) { LAPACKE(cheev, 'V', 'U', N, &matrix[0][0], N, eigenval); } void lapack_svd(long M, long N, complex float U[M][M], complex float VH[N][N], float S[(N > M) ? M : N], complex float A[N][M]) { LAPACKE(cgesdd, 'A', M, N, &A[0][0], M, S, &U[0][0], M, &VH[0][0], N); } void lapack_svd_econ(long M, long N, complex float U[M][(N > M) ? M : N], complex float VH[(N > M) ? M : N][N], float S[(N > M) ? M : N], complex float A[N][M]) { PTR_ALLOC(float[MIN(M, N) - 1], superb); LAPACKE(cgesvd, 'S', 'S', M, N, &A[0][0], M, S, &U[0][0], M, &VH[0][0], MIN(M, N), *superb); PTR_FREE(superb); } void lapack_eig_double(long N, double eigenval[N], complex double matrix[N][N]) { LAPACKE(zheev, 'V', 'U', N, &matrix[0][0], N, eigenval); } void lapack_svd_double(long M, long N, complex double U[M][M], complex double VH[N][N], double S[(N > M) ? M : N], complex double A[N][M]) { LAPACKE(zgesdd, 'A', M, N, &A[0][0], M, S, &U[0][0], M, &VH[0][0], N); } static void lapack_cholesky_UL(long N, char UL, complex float A[N][N]) { LAPACKE(cpotrf, UL, N, &A[0][0], N); } void lapack_cholesky(long N, complex float A[N][N]) { lapack_cholesky_UL(N, 'U', A); } void lapack_cholesky_lower(long N, complex float A[N][N]) { lapack_cholesky_UL(N, 'L', A); } static void lapack_trimat_inverse_UL(long N, char UL, complex float A[N][N]) { LAPACKE(ctrtri, UL, 'N', N, &A[0][0], N); } void lapack_trimat_inverse(long N, complex float A[N][N]) { lapack_trimat_inverse_UL(N, 'U', A); } void lapack_trimat_inverse_lower(long N, complex float A[N][N]) { lapack_trimat_inverse_UL(N, 'L', A); } bart-0.4.02/src/num/lapack.h000066400000000000000000000025021320577655200155320ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #ifdef __cplusplus #error This file does not support C++ #endif extern void lapack_eig(long N, float eigenval[N], complex float matrix[N][N]); extern void lapack_svd(long M, long N, complex float U[M][M], complex float VH[N][N], float S[(N > M) ? M : N], complex float A[N][M]); extern void lapack_svd_econ(long M, long N, complex float U[M][(N > M) ? M : N], complex float VH[(N > M) ? M : N][N], float S[(N > M) ? M : N], complex float A[N][M]); extern void lapack_eig_double(long N, double eigenval[N], complex double matrix[N][N]); extern void lapack_svd_double(long M, long N, complex double U[M][M], complex double VH[N][N], double S[(N > M) ? M : N], complex double A[N][M]); extern void lapack_matrix_multiply(long M, long N, long K, complex float C[M][N], const complex float A[M][K], const complex float B[K][N]); extern void lapack_cholesky(long N, complex float A[N][N]); extern void lapack_cholesky_lower(long N, complex float A[N][N]); extern void lapack_trimat_inverse(long N, complex float A[N][N]); extern void lapack_trimat_inverse_lower(long N, complex float A[N][N]); bart-0.4.02/src/num/linalg.c000066400000000000000000000267731320577655200155600ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2013 Dara Bahri * * * Simple linear algebra functions. */ #include #include #include #if 1 // #define MAT_USE_LAPACK #define DOUBLE_ACC #endif #include "misc/misc.h" #ifdef MAT_USE_LAPACK #include "num/blas.h" #include "num/lapack.h" #endif #include "num/rand.h" #include "linalg.h" #ifdef DOUBLE_ACC typedef complex double cfl_acu_t; typedef double fl_acu_t; #else typedef complex float cfl_acu_t; typedef float fl_acu_t; #endif void mat_identity(int A, int B, complex float x[A][B]) { for (int i = 0; i < A; i++) for (int j = 0; j < B; j++) x[i][j] = (i == j) ? 1. : 0.; } void mat_zero(int A, int B, complex float m[A][B]) { for (int a = 0; a < A; a++) for (int b = 0; b < B; b++) m[a][b] = 0.; } void mat_gaussian(int A, int B, complex float x[A][B]) { for (int i = 0; i < A; i++) for (int j = 0; j < B; j++) x[i][j] = gaussian_rand(); } // add constant to vector void vec_sadd(long D, complex float alpha, complex float dst[D], const complex float src[D]) { // #pragma omp parallel for for (long i = 0; i < D; i++) dst[i] = alpha + src[i]; } complex float vec_mean(long D, const complex float src[D]) { cfl_acu_t val = 0; for (long i = 0; i < D; i++) val += src[i]; return val / D; } void (mat_add)(int A, int B, complex float x[A][B], const complex float y[A][B], const complex float z[A][B]) { for (int i = 0; i < A; i++) for (int j = 0; j < B; j++) x[i][j] = y[i][j] + z[i][j]; } void (mat_muladd)(int A, int B, int C, complex float x[MVLA(A)][C], const complex float y[MVLA(A)][B], const complex float z[MVLA(B)][C]) { #ifdef MAT_USE_LAPACK complex float tmp[A][C]; mat_mul(A, B, C, tmp, y, z); mat_add(A, C, x, x, tmp); #else for (int i = 0; i < A; i++) { for (int j = 0; j < C; j++) { cfl_acu_t tmp = 0.; for (int k = 0; k < B; k++) tmp += y[i][k] * z[k][j]; x[i][j] += tmp; } } #endif } void (mat_mul)(int A, int B, int C, complex float x[A][C], const complex float y[A][B], const complex float z[B][C]) { #ifdef MAT_USE_LAPACK blas_matrix_multiply(C, A, B, x, z, y); #else for (int i = 0; i < A; i++) { for (int j = 0; j < C; j++) { cfl_acu_t tmp = 0.; for (int k = 0; k < B; k++) tmp += y[i][k] * z[k][j]; x[i][j] = tmp; } } #endif } bool (mat_inverse)(unsigned int N, complex float out[N][N], const complex float in[N][N]) { #ifdef MAT_USE_LAPACK // return blas_matrix_inverse(N, out, in); UNUSED(in); UNUSED(out); assert(0); #else // ATTENTION: slow and inaccurate complex float tmp[2 * N][N]; mat_transpose(N, N, tmp, in); mat_identity(N, N, tmp + N); complex float tmp2[N][2 * N]; mat_transpose(2 * N, N, tmp2, tmp); for (unsigned int i = 0; i < N; i++) { complex float diag = tmp2[i][i]; if (0. == diag) return false; for (unsigned int j = 0; j < 2 * N; j++) tmp2[i][j] /= diag; for (unsigned int j = 0; j < N; j++) { if (i != j) vec_saxpy(2 * N, tmp2[j], -tmp2[j][i], tmp2[i]); } } mat_transpose(N, 2 * N, tmp, tmp2); mat_transpose(N, N, out, tmp + N); return true; #endif } void (mat_kron)(unsigned int A, unsigned int B, unsigned int C, unsigned int D, complex float out[A * C][B * D], const complex float in1[A][B], const complex float in2[C][D]) { for (unsigned int a = 0; a < A; a++) for (unsigned int b = 0; b < B; b++) for (unsigned int c = 0; c < C; c++) for (unsigned int d = 0; d < D; d++) out[a + c * A][b + d * B] = in1[a][b] * in2[c][d]; } void (mat_vecmul)(unsigned int A, unsigned int B, complex float out[A], const complex float mat[A][B], const complex float in[B]) { for (unsigned int a = 0; a < A; a++) { cfl_acu_t tmp = 0.; for (unsigned int b = 0; b < B; b++) tmp += mat[a][b] * in[b]; out[a] = tmp; } } void (mat_vec)(unsigned int A, unsigned int B, complex float out[A * B], const complex float in[A][B]) { for (unsigned int a = 0; a < A; a++) for (unsigned int b = 0; b < B; b++) out[a * B + b] = in[a][b]; } void (vec_mat)(unsigned int A, unsigned int B, complex float out[A][B], const complex float in[A * B]) { for (unsigned int a = 0; a < A; a++) for (unsigned int b = 0; b < B; b++) out[a][b] = in[a * B + b]; } complex float vec_dot(int N, const complex float x[N], const complex float y[N]) { cfl_acu_t scalar = 0.; // use double here to avoid errors // one could also look into the Kahan summation algorithm for (int k = 0; k < N; k++) scalar += x[k] * conjf(y[k]); return scalar; } // FIXME: this is not axpy void vec_axpy(long N, complex float x[N], complex float alpha, const complex float y[N]) { // #pragma omp parallel for for (long k = 0; k < N; k++) x[k] = alpha * y[k]; } void vec_saxpy(int N, complex float x[N], complex float alpha, const complex float y[N]) { for (int k = 0; k < N; k++) x[k] += alpha * y[k]; } void (gram_matrix)(int N, complex float cov[N][N], int L, const complex float data[N][L]) { #pragma omp parallel for for (int i = 0; i < N; i++) { for (int j = 0; j <= i; j++) { complex float val = vec_dot(L, data[i], data[j]); cov[j][i] = val; cov[i][j] = conj(val); } } } void (pack_tri_matrix)(int N, complex float cov[N * (N + 1) / 2], const complex float m[N][N]) { int l = 0; for (int i = 0; i < N; i++) for (int j = 0; j <= i; j++) cov[l++] = m[i][j]; } void (unpack_tri_matrix)(int N, complex float m[N][N], const complex float cov[N * (N + 1) / 2]) { int l = 0; for (int i = 0; i < N; i++) for (int j = 0; j <= i; j++) m[i][j] = cov[l++]; } void (gram_matrix2)(int N, complex float cov[N * (N + 1) / 2], int L, const complex float data[N][L]) { #if 0 int l = 0; for (int i = 0; i < N; i++) { for (int j = 0; j <= i; j++) { complex float val = vec_dot(L, data[i], data[j]); cov[l++] = conj(val); } } #else complex float c[N][N]; gram_matrix(N, c, L, data); pack_tri_matrix(N, cov, c); #endif } void gram_schmidt(int M, int N, float vals[M], complex float vecs[M][N]) { if (M > 1) gram_schmidt(M - 1, N, vals + 1, vecs + 1); for (int j = 1; j < M; j++) { complex float scalar = vec_dot(N, vecs[0], vecs[j]); vec_saxpy(N, vecs[0], -scalar, vecs[j]); } vals[0] = sqrtf(crealf(vec_dot(N, vecs[0], vecs[0]))); for (int k = 0; k < N; k++) vecs[0][k] /= vals[0]; } void (mat_transpose)(int A, int B, complex float dst[B][A], const complex float src[A][B]) { for (int i = 0; i < B; i++) for (int j = 0; j < A; j++) dst[i][j] = src[j][i]; // swap } void (mat_adjoint)(int A, int B, complex float dst[B][A], const complex float src[A][B]) { for (int i = 0; i < B; i++) for (int j = 0; j < A; j++) dst[i][j] = conjf(src[j][i]); // swap } void (mat_copy)(int A, int B, complex float dst[A][B], const complex float src[A][B]) { for (int i = 0; i < A; i++) for (int j = 0; j < B; j++) dst[i][j] = src[i][j]; } void (orthiter_noinit)(int M, int N, int iter, float val[M], complex float out[M][N], const complex float matrix[N][N]) { complex float tmp[M][N]; for (int n = 0; n < iter; n++) { mat_copy(M, N, tmp, out); mat_mul(M, N, N, out, tmp, matrix); gram_schmidt(M, N, val, out); } } void (orthiter)(int M, int N, int iter, float val[M], complex float out[M][N], const complex float matrix[N][N]) { mat_identity(M, N, out); orthiter_noinit(M, N, iter, val, out, matrix); } void cholesky_double(int N, complex double A[N][N]) { for (int i = 0; i < N; i++) { for (int j = 0; j < i; j++) { cfl_acu_t sum = A[i][j]; for (int k = 0; k < j; k++) sum -= A[i][k] * conj(A[j][k]); A[i][j] = sum / A[j][j]; } fl_acu_t sum = creal(A[i][i]); for (int k = 0; k < i; k++) sum -= creal(A[i][k] * conj(A[i][k])); assert(sum > 0.); A[i][i] = sqrt(sum); } for (int i = 0; i < N; i++) for (int j = 0; j < i; j++) A[j][i] = conj(A[i][j]); } // Tadeusz Banachiewicz void cholesky(int N, complex float A[N][N]) { #ifdef MAT_USE_LAPACK lapack_cholesky(N, A); for (int i = 0; i < N; i++) for (int j = 0; j < i; j++) A[j][i] = conjf(A[i][j]); #else #if 0 complex double B[N][N]; for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) // B[i][j] = A[i][j]; cholesky_double(N, B); for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) // A[i][j] = B[i][j]; #else for (int i = 0; i < N; i++) { for (int j = 0; j < i; j++) { cfl_acu_t sum = A[i][j]; for (int k = 0; k < j; k++) sum -= A[i][k] * conjf(A[j][k]); A[i][j] = sum / A[j][j]; } fl_acu_t sum = creal(A[i][i]); for (int k = 0; k < i; k++) sum -= crealf(A[i][k] * conjf(A[i][k])); assert(sum > 0.); A[i][i] = sqrt(sum); } for (int i = 0; i < N; i++) for (int j = 0; j < i; j++) A[j][i] = conjf(A[i][j]); #endif #endif } #if 0 static void backsubst_lower_double(int N, complex double x[N], complex double L[N][N], complex double b[N]) { for (int i = 0; i < N; i++) { complex double sum = b[i]; for (int j = 0; j < i; j++) sum -= x[j] * L[i][j]; x[i] = sum / L[i][i]; } } static void backsubst_upper_double(int N, complex double x[N], complex double L[N][N], complex double b[N]) { for (int i = N - 1; i >= 0; i--) { complex double sum = b[i]; for (int j = i + 1; j < N; j++) sum -= x[j] * L[i][j]; x[i] = sum / L[i][i]; } } void mat_adjoint_double(int A, int B, complex double dst[B][A], complex double src[A][B]) { for (int i = 0; i < B; i++) for (int j = 0; j < A; j++) dst[i][j] = conj(src[j][i]); // swap } void cholesky_solve_double(int N, complex double x[N], complex double L[N][N], complex double b[N]) { complex double y[N]; complex double T[N][N]; mat_adjoint_double(N, N, T, L); backsubst_lower_double(N, y, L, b); backsubst_upper_double(N, x, T, y); } #endif static void backsubst_lower(int N, complex float x[N], const complex float L[N][N], const complex float b[N]) { for (int i = 0; i < N; i++) { cfl_acu_t sum = b[i]; for (int j = 0; j < i; j++) sum -= x[j] * L[j][i]; x[i] = sum / L[i][i]; } } static void backsubst_upper(int N, complex float x[N], const complex float L[N][N], const complex float b[N]) { for (int i = N - 1; i >= 0; i--) { cfl_acu_t sum = b[i]; for (int j = i + 1; j < N; j++) sum -= x[j] * L[j][i]; x[i] = sum / L[i][i]; } } void (cholesky_solve)(int N, complex float x[N], const complex float L[N][N], const complex float b[N]) { complex float y[N]; backsubst_lower(N, y, L, b); backsubst_upper(N, x, L, y); } void thomas_algorithm(int N, complex float f[N], const complex float A[N][3], const complex float d[N]) { complex float c[N]; complex float e[N]; c[0] = A[0][2] / A[0][1]; e[0] = d[0] / A[0][1]; for (int i = 1; i < N; i++) { c[i] = A[i][2] / (A[i][1] - c[i - 1] * A[i][0]); e[i] = (d[i] - A[i][0] * e[i - 1]) / (A[i][1] - A[i][0] * c[i - 1]); } // backsubstitution f[N - 1] = e[N - 1]; for (int i = N - 2; 0 <= i; i--) f[i] = e[i] - c[i] * f[i + 1]; } bart-0.4.02/src/num/linalg.h000066400000000000000000000122271320577655200155520ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #include #ifdef __cplusplus #error This file does not support C++ #endif extern void mat_identity(int A, int B, complex float x[A][B]); extern void mat_zero(int A, int B, complex float x[A][B]); extern void mat_gaussian(int A, int B, complex float x[A][B]); extern void mat_mul(int A, int B, int C, complex float x[A][C], const complex float y[A][B], const complex float z[B][C]); #define MVLA(x) __restrict__ (A) extern void mat_muladd(int A, int B, int C, complex float x[MVLA(A)][C], const complex float y[MVLA(A)][B], const complex float z[MVLA(B)][C]); extern void mat_add(int A, int B, complex float x[A][B], const complex float y[A][B], const complex float z[A][B]); extern void mat_transpose(int A, int B, complex float dst[B][A], const complex float src[A][B]); extern void mat_adjoint(int A, int B, complex float dst[B][A], const complex float src[A][B]); extern void mat_copy(int A, int B, complex float dst[A][B], const complex float src[A][B]); extern bool mat_inverse(unsigned int N, complex float dst[N][N], const complex float src[N][N]); extern void mat_vecmul(unsigned int A, unsigned int B, complex float out[A], const complex float mat[A][B], const complex float in[B]); extern void mat_kron(unsigned int A, unsigned int B, unsigned int C, unsigned int D, complex float out[A * C][B * D], const complex float in1[A][B], const complex float in2[C][D]); extern void mat_vec(unsigned int A, unsigned int B, complex float out[A * B], const complex float in[A][B]); extern void vec_mat(unsigned int A, unsigned int B, complex float out[A][B], const complex float in[A * B]); // extern complex double vec_dot(int N, const complex float x[N], const complex float y[N]); extern complex float vec_dot(int N, const complex float x[N], const complex float y[N]); extern void vec_saxpy(int N, complex float x[N], complex float alpha, const complex float y[N]); extern void gram_matrix(int N, complex float cov[N][N], int L, const complex float data[N][L]); extern void gram_schmidt(int M, int N, float val[N], complex float vecs[M][N]); extern void gram_matrix2(int N, complex float cov[N * (N + 1) / 2], int L, const complex float data[N][L]); extern void pack_tri_matrix(int N, complex float cov[N * (N + 1) / 2], const complex float m[N][N]); extern void unpack_tri_matrix(int N, complex float m[N][N], const complex float cov[N * (N + 1) / 2]); extern void orthiter_noinit(int M, int N, int iter, float vals[M], complex float out[M][N], const complex float matrix[N][N]); extern void orthiter(int M, int N, int iter, float vals[M], complex float out[M][N], const complex float matrix[N][N]); extern void cholesky(int N, complex float A[N][N]); extern void cholesky_solve(int N, complex float x[N], const complex float L[N][N], const complex float b[N]); extern void cholesky_double(int N, complex double A[N][N]); extern void cholesky_solve_double(int N, complex double x[N], const complex double L[N][N], const complex double b[N]); extern complex float vec_mean(long D, const complex float src[D]); extern void vec_axpy(long N, complex float x[N], complex float alpha, const complex float y[N]); extern void vec_sadd(long D, complex float alpha, complex float dst[D], const complex float src[D]); extern void thomas_algorithm(int N, complex float f[N], const complex float A[N][3], const complex float d[N]); #if __GNUC__ < 5 #include "misc/pcaa.h" #define mat_mul(A, B, C, x, y, z) \ mat_mul(A, B, C, x, AR2D_CAST(complex float, A, B, y), AR2D_CAST(complex float, B, C, z)) #define mat_add(A, B, x, y, z) \ mat_add(A, B, x, AR2D_CAST(complex float, A, B, y), AR2D_CAST(complex float, A, B, z)) #define mat_muladd(A, B, C, x, y, z) \ mat_muladd(A, B, C, x, AR2D_CAST(complex float, A, B, y), AR2D_CAST(complex float, B, C, z)) #define mat_copy(A, B, x, y) \ mat_copy(A, B, x, AR2D_CAST(complex float, A, B, y)) #define mat_transpose(A, B, x, y) \ mat_transpose(A, B, x, AR2D_CAST(complex float, A, B, y)) #define mat_adjoint(A, B, x, y) \ mat_adjoint(A, B, x, AR2D_CAST(complex float, A, B, y)) #define mat_inverse(N, x, y) \ mat_inverse(N, x, AR2D_CAST(complex float, N, N, y)) #define mat_vecmul(A, B, x, y, z) \ mat_vecmul(A, B, x, AR2D_CAST(complex float, A, B, y), z) #define mat_kron(A, B, C, D, x, y, z) \ mat_kron(A, B, C, D, x, AR2D_CAST(complex float, A, B, y), AR2D_CAST(complex float, C, D, z)) #define mat_vec(A, B, x, y) \ mat_vec(A, B, x, AR2D_CAST(complex float, A, B, y)) #define pack_tri_matrix(N, cov, m) \ pack_tri_matrix(N, cov, AR2D_CAST(complex float, N, N, m)) #define orthiter(M, N, iter, vals, out, matrix) \ orthiter(M, N, iter, vals, out, AR2D_CAST(complex float, N, N, matrix)) #define orthiter_noinit(M, N, iter, vals, out, matrix) \ orthiter_noinit(M, N, iter, vals, out, AR2D_CAST(complex float, N, N, matrix)) #define gram_matrix2(N, cov, L, data) \ gram_matrix2(N, cov, L, AR2D_CAST(complex float, N, L, data)) #define cholesky_solve(N, x, L, b) \ cholesky_solve(N, x, AR2D_CAST(complex float, N, N, L), b) #endif bart-0.4.02/src/num/loop.c000066400000000000000000000056511320577655200152530ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker * * * various functions built around md_loop * No GPU support at the moment! */ #include #include "num/multind.h" #include "loop.h" // typedef complex float (*sample_fun_t)(void* _data, const long pos[]); struct sample_data { unsigned int N; const long* strs; complex float* out; void* data; sample_fun_t fun; }; static void sample_kernel(void* _data, const long pos[]) { struct sample_data* data = _data; data->out[md_calc_offset(data->N, data->strs, pos)] = data->fun(data->data, pos); } void md_zsample(unsigned int N, const long dims[N], complex float* out, void* data, sample_fun_t fun) { struct sample_data sdata; sdata.N = N; long strs[N]; md_calc_strides(N, strs, dims, 1); // we use size = 1 here sdata.strs = strs; sdata.out = out; sdata.data = data; sdata.fun = fun; md_loop(N, dims, &sdata, sample_kernel); } void md_parallel_zsample(unsigned int N, const long dims[N], complex float* out, void* data, sample_fun_t fun) { struct sample_data sdata; sdata.N = N; long strs[N]; md_calc_strides(N, strs, dims, 1); // we use size = 1 here sdata.strs = strs; sdata.out = out; sdata.data = data; sdata.fun = fun; md_parallel_loop(N, dims, ~0u, &sdata, sample_kernel); } struct map_data { unsigned int N; const long* strs; const complex float* in; void* data; map_fun_data_t fun; }; static complex float map_kernel(void* _data, const long pos[]) { struct map_data* data = _data; return data->fun(data->data, data->in[md_calc_offset(data->N, data->strs, pos)]); } static void md_zmap_const(unsigned int N, const long dims[N], complex float* out, const complex float* in, void* data, map_fun_data_t fun) { struct map_data sdata; sdata.N = N; long strs[N]; md_calc_strides(N, strs, dims, 1); // we use size = 1 here sdata.strs = strs; sdata.in = in; sdata.data = data; sdata.fun = fun; md_zsample(N, dims, out, &sdata, map_kernel); } static complex float map_data_kernel(void* _data, complex float arg) { map_fun_t fun = _data; return fun(arg); } void md_zmap(unsigned int N, const long dims[N], complex float* out, const complex float* in, map_fun_t fun) { md_zmap_const(N, dims, out, in, (void*)fun, map_data_kernel); } struct gradient_data { unsigned int N; const complex float* grad; }; static complex float gradient_kernel(void* _data, const long pos[]) { struct gradient_data* data = _data; complex float val = 0.; for (unsigned int i = 0; i < data->N; i++) val += pos[i] * data->grad[i]; return val; } void md_zgradient(unsigned int N, const long dims[N], complex float* out, const complex float grad[N]) { struct gradient_data data = { N, grad }; md_zsample(N, dims, out, &data, gradient_kernel); } bart-0.4.02/src/num/loop.h000066400000000000000000000021321320577655200152470ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #include "misc/cppwrap.h" typedef complex float (*sample_fun_t)(void* _data, const long pos[]); extern void md_zsample(unsigned int N, const long dims[__VLA(N)], complex float* z, void* data, sample_fun_t fun); extern void md_parallel_zsample(unsigned int N, const long dims[__VLA(N)], complex float* z, void* data, sample_fun_t fun); extern void md_zgradient(unsigned int N, const long dims[__VLA(N)], complex float* out, const complex float grad[__VLA(N)]); typedef complex float (*map_fun_data_t)(void* _data, complex float arg); typedef complex float (*map_fun_t)(complex float arg); extern void md_zmap_data(unsigned int N, const long dims[__VLA(N)], complex float* out, const complex float* in, void* data, map_fun_data_t fun); extern void md_zmap(unsigned int N, const long dims[__VLA(N)], complex float* out, const complex float* in, map_fun_t fun); #include "misc/cppwrap.h" bart-0.4.02/src/num/mdfft.c000066400000000000000000000161141320577655200153760ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Author: * 2015 Martin Uecker */ #define _GNU_SOURCE #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "misc/misc.h" #include "mdfft.h" static void rot45z2(unsigned int D, unsigned int M, const long dim[D], const long ostr[D], complex float* optr, const long istr[D], const complex float* iptr) { assert(M < D); assert(2 == dim[M]); assert(optr != iptr); long dims2[D]; md_copy_dims(D, dims2, dim); dims2[M] = 1; long ostr2[D]; md_copy_strides(D, ostr2, ostr); ostr2[M] *= 2; long istr2[D]; md_copy_strides(D, istr2, istr); istr2[M] *= 2; md_zadd2(D, dims2, ostr2, optr , istr2, iptr, istr2, ((void*)iptr) + istr[M]); md_zsub2(D, dims2, ostr2, ((void*)optr) + ostr[M], istr2, iptr, istr2, ((void*)iptr) + istr[M]); } static unsigned int find_bit(unsigned long N) { return ffsl(N) - 1; } static unsigned int next_powerof2(unsigned int x) { x--; for (unsigned int i = 0, n = 1; i < 6; i++, n *= 2) x = (x >> n) | x; return x + 1; } static void compute_chirp(unsigned int L, bool dir, unsigned int M, complex float krn[M]) { krn[0] = 1.; for (unsigned int i = 1; i < M; i++) krn[i] = 0.; for (unsigned int i = 1; i < M; i++) { if (i < L) { complex float v = cexpf((dir ? -1. : 1.) * M_PI * 1.i * (float)(i * i) / (float)L); krn[i] = v; krn[M - i] = v; } } } static void bluestein(unsigned int N, const long dims[N], unsigned long flags, unsigned long dirs, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* in) { unsigned int D = find_bit(flags); unsigned int M = next_powerof2(2 * dims[D] - 1); assert(M >= 2 * dims[D] - 1); assert(0 == MD_CLEAR(flags, D)); /* Bluestein * * Transform DFT into convolution according to * * \ksi_N^{nk} = \ksi_N^{-(k-n)^2/2 + n^2/2 + k^2/2} * * ... and use fft of different size to implement it. */ long kdims[N]; md_singleton_dims(N, kdims); kdims[D] = M; long kstrs[N]; md_calc_strides(N, kstrs, kdims, CFL_SIZE); complex float* xkrn = md_alloc(N, kdims, CFL_SIZE); compute_chirp(dims[D], MD_IS_SET(dirs, D), M, xkrn); complex float* krn = md_alloc_sameplace(N, kdims, CFL_SIZE, dst); md_copy(N, kdims, krn, xkrn, CFL_SIZE); md_free(xkrn); complex float* fkrn = md_alloc_sameplace(N, kdims, CFL_SIZE, dst); md_fft(N, kdims, MD_BIT(D), MD_FFT_FORWARD, fkrn, krn); long bdims[N]; md_copy_dims(N, bdims, dims); bdims[D] = M; long bstrs[N]; md_calc_strides(N, bstrs, bdims, CFL_SIZE); complex float* btmp = md_alloc_sameplace(N, bdims, CFL_SIZE, dst); md_clear(N, bdims, btmp, CFL_SIZE); md_zmulc2(N, dims, bstrs, btmp, istrs, in, kstrs, krn); md_fft2(N, bdims, MD_BIT(D), MD_FFT_INVERSE, bstrs, btmp, bstrs, btmp); md_zmul2(N, bdims, bstrs, btmp, bstrs, btmp, kstrs, fkrn); md_fft2(N, bdims, MD_BIT(D), MD_FFT_FORWARD, bstrs, btmp, bstrs, btmp); md_zsmul(1, MD_DIMS(M), krn, krn, 1. / (float)M); md_zmulc2(N, dims, ostrs, dst, bstrs, btmp, kstrs, krn); md_free(fkrn); md_free(krn); md_free(btmp); } static void compute_twiddle(unsigned int n, unsigned int m, complex float t[n][m]) { for (unsigned int i = 0; i < n; i++) for (unsigned int j = 0; j < m; j++) t[i][j] = cexpf(-2.i * M_PI * (float)(i * j) / (float)(n * m)); } static void cooley_tukey(unsigned int N, const long dims[N], unsigned int D, unsigned int a, unsigned int b, unsigned long flags, unsigned long dirs, const long ostr[N], complex float* dst, const long istr[N], const complex float* in) { /* Cooley-Tukey * * With N = A * B, \ksi_N^N = 1, split into smaller FFTs: * * \ksi_N^{(B * i + j)(l + A * k)} * = \ksi_N^{B * i * l + A * j * k + j * l} * = \ksi_N^{B * i * l} \ksi_N^{A * j * k} \ksi_N^{j * l} * = \ksi_A^{i * l} \ksi_N^{j * l} \ksi_B^{j * k} */ long xdims[N + 1]; md_copy_dims(N, xdims, dims); xdims[D] = a; xdims[N] = b; long astr[N + 1]; md_copy_strides(N, astr, istr); astr[D] = istr[D] * 1; astr[N] = istr[D] * a; long bstr[N + 1]; md_copy_strides(N, bstr, ostr); bstr[D] = ostr[D] * b; bstr[N] = ostr[D] * 1; unsigned long flags1 = 0; unsigned long flags2 = MD_CLEAR(flags, D); long tdims[N + 1]; long tstrs[N + 1]; md_select_dims(N + 1, MD_BIT(D) | MD_BIT(N), tdims, xdims); md_calc_strides(N + 1, tstrs, tdims, CFL_SIZE); complex float (*xtw)[b][a] = xmalloc(a * b * CFL_SIZE); compute_twiddle(b, a, *xtw); complex float* tw = md_alloc_sameplace(N + 1, tdims, CFL_SIZE, dst); md_copy(N + 1, tdims, tw, &(*xtw)[0][0], CFL_SIZE); free(xtw); md_fft2(N + 1, xdims, MD_SET(flags1, N), dirs, bstr, dst, astr, in); (MD_IS_SET(dirs, D) ? md_zmulc2 : md_zmul2)(N + 1, xdims, bstr, dst, bstr, dst, tstrs, tw); md_fft2(N + 1, xdims, MD_SET(flags2, D), dirs, bstr, dst, bstr, dst); md_free(tw); } static bool check_strides(unsigned int N, const long ostr[N], const long istr[N]) { bool ret = true; for (unsigned int i = 0; i < N; i++) ret = ret & (ostr[i] == istr[i]); return ret; } static unsigned int find_factor(unsigned int N) { for (unsigned int i = 2; i < N; i++) if (0 == N % i) return i; return N; } void md_fft2(unsigned int N, const long dims[N], unsigned long flags, unsigned long dirs, const long ostr[N], complex float* dst, const long istr[N], const complex float* in) { if (0 == flags) { if (dst == in) { if (!check_strides(N, ostr, istr)) { // detect and use inplace transpose? long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); complex float* tmp = md_alloc_sameplace(N, dims, CFL_SIZE, dst); md_copy2(N, dims, strs, tmp, istr, in, CFL_SIZE); md_copy2(N, dims, ostr, dst, strs, tmp, CFL_SIZE); md_free(tmp); } return; } md_copy2(N, dims, ostr, dst, istr, in, CFL_SIZE); return; } unsigned int D = find_bit(flags); if (1 == dims[D]) { md_fft2(N, dims, MD_CLEAR(flags, D), dirs, ostr, dst, istr, in); return; } if (2 == dims[D]) { if (dst == in) { long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); complex float* tmp = md_alloc_sameplace(N, dims, CFL_SIZE, dst); md_fft2(N, dims, MD_CLEAR(flags, D), dirs, strs, tmp, istr, in); rot45z2(N, D, dims, ostr, dst, strs, tmp); md_free(tmp); } else { // the nufft may do the transpose rot45z2(N, D, dims, ostr, dst, istr, in); md_fft2(N, dims, MD_CLEAR(flags, D), dirs, ostr, dst, ostr, dst); } return; } unsigned int a = find_factor(dims[D]); unsigned int b = dims[D] / a; if (1 == b) { // prime bluestein(N, dims, MD_BIT(D), dirs, ostr, dst, istr, in); md_fft2(N, dims, MD_CLEAR(flags, D), dirs, ostr, dst, ostr, dst); } else { cooley_tukey(N, dims, D, a, b, flags, dirs, ostr, dst, istr, in); } } void md_fft(unsigned int N, const long dims[N], unsigned long flags, unsigned long dirs, complex float* dst, const complex float* in) { long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); md_fft2(N, dims, flags, dirs, strs, dst, strs, in); } bart-0.4.02/src/num/mdfft.h000066400000000000000000000007061320577655200154030ustar00rootroot00000000000000 #ifndef _MD_FFT_H #define _MD_FFT_H 1 #define MD_FFT_FORWARD 0u #define MD_FFT_INVERSE (~0u) extern void md_fft2(unsigned int N, const long dims[N], unsigned long flags, unsigned long dirs, const long ostr[N], complex float* dst, const long istr[N], const complex float* in); extern void md_fft(unsigned int N, const long dims[N], unsigned long flags, unsigned long dirs, complex float* dst, const complex float* in); #endif // _MD_FFT_H bart-0.4.02/src/num/mem.c000066400000000000000000000076001320577655200150540ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * */ #include #include #ifdef _OPENMP #include #endif #include "misc/misc.h" #include "misc/debug.h" #include "mem.h" bool memcache = true; void memcache_off(void) { memcache = false; } struct mem_s { const void* ptr; size_t len; bool device; bool free; int device_id; int thread_id; struct mem_s* next; }; static struct mem_s* mem_list = NULL; static bool inside_p(const struct mem_s* rptr, const void* ptr) { return (ptr >= rptr->ptr) && (ptr < rptr->ptr + rptr->len); } static struct mem_s* search(const void* ptr, bool remove) { struct mem_s* rptr = NULL; #pragma omp critical { struct mem_s** nptr = &mem_list; while (true) { rptr = *nptr; if (NULL == rptr) break; if (inside_p(rptr, ptr)) { if (remove) *nptr = rptr->next; break; } nptr = &(rptr->next); } } return rptr; } static bool free_check_p(const struct mem_s* rptr, size_t size, int dev, int tid) { return (rptr->free && (rptr->device_id == dev) && (rptr->len >= size) && ((-1 == tid) || (rptr->thread_id == tid))); } static struct mem_s** find_free_unsafe(size_t size, int dev, int tid) { struct mem_s* rptr = NULL; struct mem_s** nptr = &mem_list; while (true) { rptr = *nptr; if (NULL == rptr) break; if (free_check_p(rptr, size, dev, tid)) break; nptr = &(rptr->next); } return nptr; } static struct mem_s* find_free(size_t size, int dev) { struct mem_s* rptr = NULL; #pragma omp critical { rptr = *find_free_unsafe(size, dev, -1); if (NULL != rptr) rptr->free = false; } return rptr; } static void insert(const void* ptr, size_t len, bool device, int dev) { PTR_ALLOC(struct mem_s, nptr); nptr->ptr = ptr; nptr->len = len; nptr->device = device; nptr->device_id = dev; #ifdef _OPENMP nptr->thread_id = omp_get_thread_num(); #else nptr->thread_id = -1; #endif nptr->free = false; #pragma omp critical { nptr->next = mem_list; mem_list = PTR_PASS(nptr); } } void memcache_clear(int dev, void (*device_free)(const void*x)) { struct mem_s* nptr = NULL; if (!memcache) return; do { #pragma omp critical { #ifdef _OPENMP int tid = omp_get_thread_num(); #else int tid = -1; #endif struct mem_s** rptr = find_free_unsafe(0, dev, tid); nptr = *rptr; // remove from list if (NULL != nptr) *rptr = nptr->next; } if (NULL != nptr) { assert(nptr->device); debug_printf(DP_DEBUG3, "Freeing %ld bytes. (DID: %d TID: %d)\n\n", nptr->len, nptr->device_id, nptr->thread_id); device_free(nptr->ptr); free(nptr); } } while (NULL != nptr); } bool mem_ondevice(const void* ptr) { if (NULL == ptr) return false; struct mem_s* p = search(ptr, false); bool r = ((NULL != p) && p->device); return r; } bool mem_device_accessible(const void* ptr) { struct mem_s* p = search(ptr, false); return (NULL != p); } void mem_device_free(void* ptr, void (*device_free)(const void* ptr)) { struct mem_s* nptr = search(ptr, !memcache); assert(NULL != nptr); assert(nptr->ptr == ptr); assert(nptr->device); if (memcache) { assert(!nptr->free); nptr->free = true; } else { device_free(ptr); free(nptr); } } void* mem_device_malloc(int device, long size, void* (*device_alloc)(size_t)) { if (memcache) { struct mem_s* nptr = find_free(size, device); if (NULL != nptr) { assert(nptr->device); assert(!nptr->free); #ifdef _OPENMP nptr->thread_id = omp_get_thread_num(); #else nptr->thread_id = -1; #endif return (void*)(nptr->ptr); } } void* ptr = device_alloc(size); insert(ptr, size, true, device); return ptr; } bart-0.4.02/src/num/mem.h000066400000000000000000000005651320577655200150640ustar00rootroot00000000000000 extern void memcache_off(void); extern void memcache_clear(int device, void (*device_free)(const void* x)); extern _Bool mem_ondevice(const void* ptr); extern _Bool mem_device_accessible(const void* ptr); extern void mem_device_free(void* ptr, void (*device_free)(const void* x)); extern void* mem_device_malloc(int device, long size, void* (*device_alloc)(size_t)); bart-0.4.02/src/num/multind.c000066400000000000000000001142011320577655200157460ustar00rootroot00000000000000/* Copyright 2013-2015 The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * Copyright 2017. Intel Corporation. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2013 Frank Ong * 2017 Michael J. Anderson * * Generic operations on multi-dimensional arrays. Most functions * come in two flavours: * * 1. A basic version which takes the number of dimensions, an array * of long integers specifing the size of each dimension, the pointers * to the data, and the size of each element and other required parameters. * The data is assumed to be stored in column-major format. * * 2. An extended version which takes an array of long integers which * specifies the strides for each argument. * * All functions should work on CPU and GPU and md_copy can be used * to copy between CPU and GPU. * */ #define _GNU_SOURCE #include #include #include #include #include #include "misc/misc.h" #include "misc/types.h" #include "misc/debug.h" #include "num/optimize.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "multind.h" /** * Generic functions which loops over all dimensions of a set of * multi-dimensional arrays and calls a given function for each position. */ void md_nary(unsigned int C, unsigned int D, const long dim[D], const long* str[C], void* ptr[C], void* data, md_nary_fun_t fun) { if (0 == D) { fun(data, ptr); return; } for (long i = 0; i < dim[D - 1]; i++) { void* moving_ptr[C]; for (unsigned int j = 0; j < C; j++) moving_ptr[j] = ptr[j] + i * str[j][D - 1]; md_nary(C, D - 1, dim, str, moving_ptr, data, fun); } } /** * Generic functions which loops over all dimensions of a set of * multi-dimensional arrays and calls a given function for each position. * This functions tries to parallelize over the dimensions indicated * with flags. */ void md_parallel_nary(unsigned int C, unsigned int D, const long dim[D], unsigned long flags, const long* str[C], void* ptr[C], void* data, md_nary_fun_t fun) { if (0 == flags) { md_nary(C, D, dim, str, ptr, data, fun); return; } long dimc[D]; md_select_dims(D, ~flags, dimc, dim); // Collect all parallel dimensions int nparallel = 0; int parallel_b[D]; long parallel_dim[D]; long total_iterations = 1L; while (0 != flags) { int b = ffsl(flags & -flags) - 1; assert(MD_IS_SET(flags, b)); flags = MD_CLEAR(flags, b); debug_printf(DP_DEBUG4, "Parallelize: %d\n", dim[b]); parallel_b[nparallel] = b; parallel_dim[nparallel] = dim[b]; total_iterations *= parallel_dim[nparallel]; nparallel++; } #pragma omp parallel for for (long i = 0; i < total_iterations; i++) { // Recover place in parallel iteration space long iter_i[D]; long ii = i; for (int p = nparallel - 1; p >= 0; p--) { iter_i[p] = ii % parallel_dim[p]; ii /= parallel_dim[p]; } void* moving_ptr[C]; for (unsigned int j = 0; j < C; j++) { moving_ptr[j] = ptr[j]; for(int p = 0; p < nparallel; p++) moving_ptr[j] += iter_i[p] * str[j][parallel_b[p]]; } md_nary(C, D, dimc, str, moving_ptr, data, fun); } } static void md_parallel_loop_r(unsigned int D, unsigned int N, const long dim[static N], unsigned int flags, const long pos[static N], void* data, md_loop_fun_t fun) { if (0 == D) { fun(data, pos); return; } D--; // we need to make a copy because firstprivate needs to see // an array instead of a pointer long pos_copy[N]; for (unsigned int i = 0; i < N; i++) pos_copy[i] = pos[i]; #pragma omp parallel for firstprivate(pos_copy) if ((1 < dim[D]) && (flags & (1 << D))) for (int i = 0; i < dim[D]; i++) { pos_copy[D] = i; md_parallel_loop_r(D, N, dim, flags, pos_copy, data, fun); } } /** * Generic function which loops over all dimensions and calls a given * function passing the current indices as argument. * * Runs fun(data, position) for all position in dim * */ void md_parallel_loop(unsigned int D, const long dim[static D], unsigned long flags, void* data, md_loop_fun_t fun) { long pos[D]; md_parallel_loop_r(D, D, dim, flags, pos, data, fun); } static void md_loop_r(unsigned int D, const long dim[D], long pos[D], void* data, md_loop_fun_t fun) { if (0 == D) { fun(data, pos); return; } D--; for (pos[D] = 0; pos[D] < dim[D]; pos[D]++) md_loop_r(D, dim, pos, data, fun); } /** * Generic function which loops over all dimensions and calls a given * function passing the current indices as argument. * * Runs fun( data, position ) for all position in dim * */ void md_loop(unsigned int D, const long dim[D], void* data, md_loop_fun_t fun) { long pos[D]; md_loop_r(D, dim, pos, data, fun); } /** * Computes the next position. Returns true until last index. */ bool md_next(unsigned int D, const long dims[D], unsigned long flags, long pos[D]) { if (0 == D--) return false; if (md_next(D, dims, flags, pos)) return true; if (MD_IS_SET(flags, D)) { assert((0 <= pos[D]) && (pos[D] < dims[D])); if (++pos[D] < dims[D]) return true; pos[D] = 0; } return false; } /** * Returns offset for position in a multidimensional array * * return pos[0]*strides[0] + ... + pos[D-1]*strides[D-1] * * @param D number of dimensions * @param dim dimensions array */ long md_calc_offset(unsigned int D, const long strides[D], const long position[D]) { long pos = 0; for (unsigned int i = 0; i < D; i++) pos += strides[i] * position[i]; return pos; } static long md_calc_size_r(unsigned int D, const long dim[D], size_t size) { if (0 == D) return size; return md_calc_size_r(D - 1, dim, size * dim[D - 1]); } /** * Returns the number of elements * * return dim[0]*dim[1]*...*dim[D-1] * * @param D number of dimensions * @param dim dimensions array */ long md_calc_size(unsigned int D, const long dim[D]) { return md_calc_size_r(D, dim, 1); } /** * Computes the number of smallest dimensions which are stored * contineously, i.e. can be accessed as a block of memory. * */ unsigned int md_calc_blockdim(unsigned int D, const long dim[D], const long str[D], size_t size) { long dist = size; unsigned int i = 0; for (i = 0; i < D; i++) { if (!((str[i] == dist) || (dim[i] == 1))) break; dist *= dim[i]; } return i; } /** * Copy dimensions specified by flags and set remaining dimensions to 1 * * odims = [ 1 idims[1] idims[2] 1 1 idims[5] ] * * @param D number of dimensions * @param flags bitmask specifying which dimensions to copy * @param odims output dimensions * @param idims input dimensions */ void md_select_dims(unsigned int D, unsigned long flags, long odims[D], const long idims[D]) { md_copy_dims(D, odims, idims); for (unsigned int i = 0; i < D; i++) if (!MD_IS_SET(flags, i)) odims[i] = 1; } /** * Copy dimensions * * odims[i] = idims[i] */ void md_copy_dims(unsigned int D, long odims[D], const long idims[D]) { memcpy(odims, idims, D * sizeof(long)); } /** * Copy strides * * ostrs[i] = istrs[i] */ void md_copy_strides(unsigned int D, long ostrs[D], const long istrs[D]) { memcpy(ostrs, istrs, D * sizeof(long)); } /** * Set all dimensions to value * * dims[i] = val */ void md_set_dims(unsigned int D, long dims[D], long val) { for (unsigned int i = 0; i < D; i++) dims[i] = val; } /** * returns whether or not @param pos is a valid index of an array of dimension @param dims */ bool md_is_index(unsigned int D, const long pos[D], const long dims[D]) { if (D == 0) return true; return ((pos[0] >= 0) && (pos[0] < dims[0]) && md_is_index(D - 1, pos + 1, dims + 1)); } /** * return whether some other dimensions are >1 */ bool md_check_dimensions(unsigned int N, const long dims[N], unsigned int flags) { long d[N]; md_select_dims(N, ~flags, d, dims); return (1 != md_calc_size(N, d)); } /* * compute non-trivial (> 1) dims */ unsigned long md_nontriv_dims(unsigned int D, const long dims[D]) { unsigned long flags = 0; for (unsigned int i = 0; i < D; i++) if (dims[i] > 1) flags = MD_SET(flags, i); return flags; } /** * Set all dimensions to one * * dims[i] = 1 */ void md_singleton_dims(unsigned int D, long dims[D]) { for (unsigned int i = 0; i < D; i++) dims[i] = 1; } /** * Set all strides to one * * dims[i] = 1 */ void md_singleton_strides(unsigned int D, long strs[D]) { for (unsigned int i = 0; i < D; i++) strs[i] = 0; } /** * Check dimensions for compatibility. Dimensions must be equal or * where indicated by a set bit in flags one must be equal to one * in atleast one of the arguments. */ bool md_check_compat(unsigned int D, unsigned long flags, const long dim1[D], const long dim2[D]) { if (0 == D) return true; D--; if ((dim1[D] == dim2[D]) || (MD_IS_SET(flags, D) && ((1 == dim1[D]) || (1 == dim2[D])))) return md_check_compat(D, flags, dim1, dim2); return false; } void md_merge_dims(unsigned int N, long out_dims[N], const long dims1[N], const long dims2[N]) { assert(md_check_compat(N, ~0, dims1, dims2)); for (unsigned int i = 0; i < N; i++) out_dims[i] = (1 == dims1[i]) ? dims2[i] : dims1[i]; } /** * dim1 must be bounded by dim2 where a bit is set */ bool md_check_bounds(unsigned int D, unsigned long flags, const long dim1[D], const long dim2[D]) { if (0 == D--) return true; if (!MD_IS_SET(flags, D) || (dim1[D] <= dim2[D])) return md_check_bounds(D, flags, dim1, dim2); return false; } /** * Set the output's flagged dimensions to the minimum of the two input dimensions * * odims = [ MIN(idims1[0],idims2[0] ... MIN(idims1[D-1],idims2[D-1]) ] * * @param D number of dimensions * @param flags bitmask specifying which dimensions to minimize * @param odims output dimensions * @param idims1 input 1 dimensions * @param idims2 input 2 dimensions */ void md_min_dims(unsigned int D, unsigned long flags, long odims[D], const long idims1[D], const long idims2[D]) { for (unsigned int i = 0; i < D; i++) if (MD_IS_SET(flags, i)) odims[i] = MIN(idims1[i], idims2[i]); } /** * Set the output's flagged dimensions to the maximum of the two input dimensions * * odims = [ MAX(idims1[0],idims2[0] ... MAX(idims1[D-1],idims2[D-1]) ] * * @param D number of dimensions * @param flags bitmask specifying which dimensions to maximize * @param odims output dimensions * @param idims1 input 1 dimensions * @param idims2 input 2 dimensions */ void md_max_dims(unsigned int D, unsigned long flags, long odims[D], const long idims1[D], const long idims2[D]) { for (unsigned int i = 0; i < D; i++) if (MD_IS_SET(flags, i)) odims[i] = MAX(idims1[i], idims2[i]); } struct data_s { size_t size; #ifdef USE_CUDA bool use_gpu; #endif }; static void nary_clear(struct nary_opt_data_s* opt_data, void* ptr[]) { struct data_s* data = opt_data->data_ptr; size_t size = data->size * opt_data->size; #ifdef USE_CUDA if (data->use_gpu) { cuda_clear(size, ptr[0]); return; } #endif memset(ptr[0], 0, size); } /** * Zero out array (with strides) * * ptr[i] = 0 */ void md_clear2(unsigned int D, const long dim[D], const long str[D], void* ptr, size_t size) { const long (*nstr[1])[D] = { (const long (*)[D])str }; #ifdef USE_CUDA struct data_s data = { size, cuda_ondevice(ptr) }; #else struct data_s data = { size }; #endif unsigned long flags = 0; for (unsigned int i = 0; i < D; i++) if (0 == str[i]) flags |= MD_BIT(i); long dim2[D]; md_select_dims(D, ~flags, dim2, dim); optimized_nop(1, MD_BIT(0), D, dim2, nstr, (void*[1]){ ptr }, (size_t[1]){ size }, nary_clear, &data); } /** * Calculate strides in column-major format * (smallest index is sequential) * * @param D number of dimensions * @param array of calculates strides * @param dim array of dimensions * @param size of a single element */ long* md_calc_strides(unsigned int D, long str[D], const long dim[D], size_t size) { long old = size; for (unsigned int i = 0; i < D; i++) { str[i] = (1 == dim[i]) ? 0 : old; old *= dim[i]; } return str; } /** * Zero out array (without strides) * * ptr[i] = 0 * * @param D number of dimensions * @param dim dimensions array * @param ptr pointer to data to clear * @param size sizeof() */ void md_clear(unsigned int D, const long dim[D], void* ptr, size_t size) { md_clear2(D, dim, MD_STRIDES(D, dim, size), ptr, size); } struct strided_copy_s { long sizes[2]; long ostr; long istr; }; #ifdef USE_CUDA static void nary_strided_copy(void* _data, void* ptr[]) { struct strided_copy_s* data = _data; // printf("CUDA 2D copy %ld %ld %ld %ld %ld %ld\n", data->sizes[0], data->sizes[1], data->ostr, data->istr, (long)ptr[0], (long)ptr[1]); cuda_memcpy_strided(data->sizes, data->ostr, ptr[0], data->istr, ptr[1]); } #endif static void nary_copy(struct nary_opt_data_s* opt_data, void* ptr[]) { struct data_s* data = opt_data->data_ptr; size_t size = data->size * opt_data->size; #ifdef USE_CUDA if (data->use_gpu) { cuda_memcpy(size, ptr[0], ptr[1]); return; } #endif memcpy(ptr[0], ptr[1], size); } /** * Copy array (with strides) * * optr[i] = iptr[i] */ void md_copy2(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr[D], const void* iptr, size_t size) { #if 0 // this is for a fun comparison between our copy engine and FFTW extern void fft2(unsigned int D, const long dim[D], unsigned int flags, const long ostr[D], void* optr, const long istr[D], const void* iptr); if (sizeof(complex float) == size) fft2(D, dim, 0, ostr, optr, istr, iptr); #endif #ifndef USE_CUDA struct data_s data = { size }; #else struct data_s data = { size, cuda_ondevice(optr) || cuda_ondevice(iptr) }; #if 1 long tostr[D]; long tistr[D]; long tdims[D]; md_copy_strides(D, tostr, ostr); md_copy_strides(D, tistr, istr); md_copy_dims(D, tdims, dim); long (*nstr2[2])[D] = { &tostr, &tistr }; int ND = optimize_dims(2, D, tdims, nstr2); size_t sizes[2] = { size, size }; int skip = min_blockdim(2, ND, tdims, nstr2, sizes); if (data.use_gpu && (ND - skip == 1)) { // FIXME: the test was > 0 which would optimize transpose // but failes in the second cuda_memcpy_strided call // probably because of alignment restrictions const long* nstr[2] = { *nstr2[0] + skip, *nstr2[1] + skip }; void* nptr[2] = { optr, (void*)iptr }; long sizes[2] = { md_calc_size(skip, tdims) * size, tdims[skip] }; struct strided_copy_s data = { { sizes[0], sizes[1] } , (*nstr2[0])[skip], (*nstr2[1])[skip] }; skip++; md_nary(2, ND - skip, tdims + skip , nstr, nptr, &data, &nary_strided_copy); return; } #endif #endif const long (*nstr[2])[D] = { (const long (*)[D])ostr, (const long (*)[D])istr }; optimized_nop(2, MD_BIT(0), D, dim, nstr, (void*[2]){ optr, (void*)iptr }, (size_t[2]){ size, size }, nary_copy, &data); } /** * Copy array (without strides) * * optr[i] = iptr[i] */ void md_copy(unsigned int D, const long dim[D], void* optr, const void* iptr, size_t size) { long str[D]; md_calc_strides(D, str, dim, size); md_copy2(D, dim, str, optr, str, iptr, size); } #ifdef USE_CUDA // copied from flpmath.c static void* gpu_constant(const void* vp, size_t size) { return md_gpu_move(1, (long[1]){ 1 }, vp, size); } #endif /** * Fill array with value pointed by pointer (with strides) * * ptr[i] = iptr[0] */ void md_fill2(unsigned int D, const long dim[D], const long str[D], void* ptr, const void* iptr, size_t size) { #ifdef USE_CUDA if (cuda_ondevice(ptr) && (!cuda_ondevice(iptr))) { void* giptr = gpu_constant(iptr, size); md_fill2(D, dim, str, ptr, giptr, size); md_free(giptr); return; } #endif long istr[D]; md_singleton_strides(D, istr); md_copy2(D, dim, str, ptr, istr, iptr, size); } /** * Fill array with value pointed by pointer (without strides) * * ptr[i] = iptr[0] */ void md_fill(unsigned int D, const long dim[D], void* ptr, const void* iptr, size_t size) { md_fill2(D, dim, MD_STRIDES(D, dim, size), ptr, iptr, size); } struct swap_s { unsigned int M; size_t size; }; static void nary_swap(struct nary_opt_data_s* opt_data, void* ptr[]) { const struct swap_s* data = opt_data->data_ptr; size_t size = data->size * opt_data->size; unsigned int M = data->M; char* tmp = (size < 32) ? alloca(size) : xmalloc(size); #ifdef USE_CUDA assert(!cuda_ondevice(ptr[0])); assert(!cuda_ondevice(ptr[1])); #endif memcpy(tmp, ptr[0], size); for (unsigned int i = 0; i < M - 1; i++) memcpy(ptr[i], ptr[i + 1], size); memcpy(ptr[M - 1], tmp, size); if (size >= 32) xfree(tmp); } /** * Swap values between a number of arrays (with strides) */ void md_circular_swap2(unsigned int M, unsigned int D, const long dims[D], const long* strs[M], void* ptr[M], size_t size) { size_t sizes[M]; for (unsigned int i = 0; i < M; i++) sizes[i] = size; struct swap_s data = { M, size }; const long (*nstrs[M])[D]; for (unsigned int i = 0; i < M; i++) nstrs[i] = (const long (*)[D])strs[i]; optimized_nop(M, (1 << M) - 1, D, dims, nstrs, ptr, sizes, nary_swap, &data); } /** * Swap values between a number of arrays */ void md_circular_swap(unsigned M, unsigned int D, const long dims[D], void* ptr[M], size_t size) { long strs[M][D]; md_calc_strides(D, strs[0], dims, size); const long* strp[M]; strp[0] = strs[0]; for (unsigned int i = 1; i < M; i++) { md_copy_strides(D, strs[i], strs[0]); strp[i] = strs[i]; } md_circular_swap2(M, D, dims, strp, ptr, size); } /** * Swap values between two arrays (with strides) * * iptr[i] = optr[i] and optr[i] = iptr[i] */ void md_swap2(unsigned int D, const long dim[D], const long ostr[D], void* optr, const long istr[D], void* iptr, size_t size) { md_circular_swap2(2, D, dim, (const long*[2]){ ostr, istr }, (void*[2]){ optr, iptr }, size); } /** * Swap values between two arrays (without strides) * * iptr[i] = optr[i] and optr[i] = iptr[i] */ void md_swap(unsigned int D, const long dim[D], void* optr, void* iptr, size_t size) { long str[D]; md_calc_strides(D, str, dim, size); md_swap2(D, dim, str, optr, str, iptr, size); } /** * Move a block from an array to another array (with strides) * */ void md_move_block2(unsigned int D, const long dim[D], const long opos[D], const long odim[D], const long ostr[D], void* optr, const long ipos[D], const long idim[D], const long istr[D], const void* iptr, size_t size) { for (unsigned int i = 0; i < D; i++) { assert(dim[i] <= odim[i]); assert(dim[i] <= idim[i]); assert((0 <= opos[i]) && (opos[i] <= odim[i] - dim[i])); assert((0 <= ipos[i]) && (ipos[i] <= idim[i] - dim[i])); } long ioff = md_calc_offset(D, istr, ipos); long ooff = md_calc_offset(D, ostr, opos); md_copy2(D, dim, ostr, optr + ooff, istr, iptr + ioff, size); } /** * Move a block from an array to another array (without strides) * */ void md_move_block(unsigned int D, const long dim[D], const long opos[D], const long odim[D], void* optr, const long ipos[D], const long idim[D], const void* iptr, size_t size) { md_move_block2(D, dim, opos, odim, MD_STRIDES(D, odim, size), optr, ipos, idim, MD_STRIDES(D, idim, size), iptr, size); } /** * Copy a block from an array to another array (with strides) * * Block dimensions are min(idim , odim) * * if idim[d] > odim[d], then optr[i] = iptr[pos + i] for 0 <= i < odim[d] * * if idim[d] < odim[d], then optr[pos + i] = iptr[i] for 0 <= i < idim[d] * */ void md_copy_block2(unsigned int D, const long pos[D], const long odim[D], const long ostr[D], void* optr, const long idim[D], const long istr[D], const void* iptr, size_t size) { long dim[D]; long ipos[D]; long opos[D]; for (unsigned int i = 0; i < D; i++) { assert((idim[i] != odim[i]) || (0 == pos[i])); dim[i] = MIN(odim[i], idim[i]); ipos[i] = 0; opos[i] = 0; if (idim[i] != dim[i]) ipos[i] = pos[i]; if (odim[i] != dim[i]) opos[i] = pos[i]; } md_move_block2(D, dim, opos, odim, ostr, optr, ipos, idim, istr, iptr, size); } /** * Copy a block from an array to another array (without strides) * * Block dimensions are min(idim , odim) * * if idim[d] > odim[d], then optr[i] = iptr[pos + i] for 0 <= i < odim[d] * * if idim[d] < odim[d], then optr[pos + i] = iptr[i] for 0 <= i < idim[d] * */ void md_copy_block(unsigned int D, const long pos[D], const long odim[D], void* optr, const long idim[D], const void* iptr, size_t size) { md_copy_block2(D, pos, odim, MD_STRIDES(D, odim, size), optr, idim, MD_STRIDES(D, idim, size), iptr, size); } /** * Resize an array by zero-padding or by truncation at the end. * * optr = [iptr 0 0 0 0] * */ void md_resize(unsigned int D, const long odim[D], void* optr, const long idim[D], const void* iptr, size_t size) { long pos[D]; memset(pos, 0, D * sizeof(long)); md_clear(D, odim, optr, size); md_copy_block(D, pos, odim, optr, idim, iptr, size); } /** * Resize an array by zero-padding or by truncation at both ends symmetrically. * * optr = [0 0 iptr 0 0] * */ void md_resize_center(unsigned int D, const long odim[D], void* optr, const long idim[D], const void* iptr, size_t size) { // the definition of the center position corresponds // to the one used in the FFT. long pos[D]; for (unsigned int i = 0; i < D; i++) pos[i] = labs((odim[i] / 2) - (idim[i] / 2)); md_clear(D, odim, optr, size); md_copy_block(D, pos, odim, optr, idim, iptr, size); } /** * Extract slice from array specified by flags (with strides) * * optr = iptr(pos[0], :, pos[2], :, :) * */ void md_slice2(unsigned int D, unsigned long flags, const long pos[D], const long dim[D], const long ostr[D], void* optr, const long istr[D], const void* iptr, size_t size) { long odim[D]; md_select_dims(D, ~flags, odim, dim); md_copy_block2(D, pos, odim, ostr, optr, dim, istr, iptr, size); } /** * Extract slice from array specified by flags (with strides) * * optr = iptr(pos[0], :, pos[2], :, :) * */ void md_slice(unsigned int D, unsigned long flags, const long pos[D], const long dim[D], void* optr, const void* iptr, size_t size) { long odim[D]; md_select_dims(D, ~flags, odim, dim); md_slice2(D, flags, pos, dim, MD_STRIDES(D, odim, size), optr, MD_STRIDES(D, dim, size), iptr, size); } /** * Permute array (with strides) * * optr[order[i]] = iptr[i] * */ void md_permute2(unsigned int D, const unsigned int order[D], const long odims[D], const long ostr[D], void* optr, const long idims[D], const long istr[D], const void* iptr, size_t size) { unsigned int flags = 0; long ostr2[D]; for (unsigned int i = 0; i < D; i++) { assert(order[i] < D); assert(odims[i] == idims[order[i]]); flags = MD_SET(flags, order[i]); ostr2[order[i]] = ostr[i]; } assert(MD_BIT(D) == flags + 1); md_copy2(D, idims, ostr2, optr, istr, iptr, size); } /** * Permute array (without strides) * * optr[order[i]] = iptr[i] * */ void md_permute(unsigned int D, const unsigned int order[D], const long odims[D], void* optr, const long idims[D], const void* iptr, size_t size) { md_permute2(D, order, odims, MD_STRIDES(D, odims, size), optr, idims, MD_STRIDES(D, idims, size), iptr, size); } /** * Permute dimensions * * */ void md_permute_dims(unsigned int D, const unsigned int order[D], long odims[D], const long idims[D]) { for (unsigned int i = 0; i < D; i++) odims[i] = idims[order[i]]; } static void md_transpose_order(unsigned int D, unsigned int order[D], unsigned int dim1, unsigned int dim2) { assert(dim1 < D); assert(dim2 < D); for (unsigned int i = 0; i < D; i++) order[i] = i; order[dim1] = dim2; order[dim2] = dim1; } /** * Transpose dimensions * * */ void md_transpose_dims(unsigned int D, unsigned int dim1, unsigned int dim2, long odims[D], const long idims[D]) { unsigned int order[D]; md_transpose_order(D, order, dim1, dim2); md_permute_dims(D, order, odims, idims); } /** * Tranpose array (with strides) * * optr[dim2] = iptr[dim1] * * optr[dim1] = iptr[dim2] * */ void md_transpose2(unsigned int D, unsigned int dim1, unsigned int dim2, const long odims[D], const long ostr[D], void* optr, const long idims[D], const long istr[D], const void* iptr, size_t size) { for (unsigned int i = 0; i < D; i++) if ((i != dim1) && (i != dim2)) assert(odims[i] == idims[i]); assert(odims[dim1] == idims[dim2]); assert(odims[dim2] == idims[dim1]); unsigned int order[D]; md_transpose_order(D, order, dim1, dim2); md_permute2(D, order, odims, ostr, optr, idims, istr, iptr, size); } /** * Tranpose array (without strides) * * optr[dim2] = iptr[dim1] * * optr[dim1] = iptr[dim2] * */ void md_transpose(unsigned int D, unsigned int dim1, unsigned int dim2, const long odims[D], void* optr, const long idims[D], const void* iptr, size_t size) { md_transpose2(D, dim1, dim2, odims, MD_STRIDES(D, odims, size), optr, idims, MD_STRIDES(D, idims, size), iptr, size); } static void md_flip_inpl2(unsigned int D, const long dims[D], unsigned long flags, const long str[D], void* ptr, size_t size); /** * Swap input and output while flipping selected dimensions * at the same time. */ void md_swap_flip2(unsigned int D, const long dims[D], unsigned long flags, const long ostr[D], void* optr, const long istr[D], void* iptr, size_t size) { #if 1 int i; for (i = D - 1; i >= 0; i--) if ((1 != dims[i]) && MD_IS_SET(flags, i)) break; if (-1 == i) { md_swap2(D, dims, ostr, optr, istr, iptr, size); return; } assert(1 < dims[i]); assert(ostr[i] != 0); assert(istr[i] != 0); long dims2[D]; md_copy_dims(D, dims2, dims); dims2[i] = dims[i] / 2; long off = (dims[i] + 1) / 2; assert(dims2[i] + off == dims[i]); md_swap_flip2(D, dims2, flags, ostr, optr, istr, iptr + off * istr[i], size); md_swap_flip2(D, dims2, flags, ostr, optr + off * ostr[i], istr, iptr, size); // odd, swap center plane // (we should split in three similar sized chunks instead) dims2[i] = 1; if (1 == dims[i] % 2) md_swap_flip2(D, dims2, flags, ostr, optr + (off - 1) * ostr[i], istr, iptr + (off - 1) * istr[i], size); #else // simpler, but more swaps md_swap2(D, dims, ostr, optr, istr, iptr, size); md_flip_inpl2(D, dims, flags, ostr, optr, size); md_flip_inpl2(D, dims, flags, istr, iptr, size); #endif } /** * Swap input and output while flipping selected dimensions * at the same time. */ void md_swap_flip(unsigned int D, const long dims[D], unsigned long flags, void* optr, void* iptr, size_t size) { long strs[D]; md_calc_strides(D, strs, dims, size); md_swap_flip2(D, dims, flags, strs, optr, strs, iptr, size); } static void md_flip_inpl2(unsigned int D, const long dims[D], unsigned long flags, const long str[D], void* ptr, size_t size) { int i; for (i = D - 1; i >= 0; i--) if ((1 != dims[i]) && MD_IS_SET(flags, i)) break; if (-1 == i) return; assert(1 < dims[i]); assert(str[i] != 0); long dims2[D]; md_copy_dims(D, dims2, dims); dims2[i] = dims[i] / 2; long off = str[i] * (0 + (dims[i] + 1) / 2); md_swap_flip2(D, dims2, flags, str, ptr, str, ptr + off, size); } /** * Flip array (with strides) * * optr[dims[D] - 1 - i] = iptr[i] * */ void md_flip2(unsigned int D, const long dims[D], unsigned long flags, const long ostr[D], void* optr, const long istr[D], const void* iptr, size_t size) { if (optr == iptr) { assert(ostr == istr); md_flip_inpl2(D, dims, flags, ostr, optr, size); return; } long off = 0; long ostr2[D]; for (unsigned int i = 0; i < D; i++) { ostr2[i] = ostr[i]; if (MD_IS_SET(flags, i)) { ostr2[i] = -ostr[i]; off += (dims[i] - 1) * ostr[i]; } } md_copy2(D, dims, ostr2, optr + off, istr, iptr, size); } /** * Flip array (without strides) * * optr[dims[D] - 1 - i] = iptr[i] * */ void md_flip(unsigned int D, const long dims[D], unsigned long flags, void* optr, const void* iptr, size_t size) { long str[D]; md_calc_strides(D, str, dims, size); md_flip2(D, dims, flags, str, optr, str, iptr, size); } struct compare_s { bool eq; size_t size; }; static void nary_cmp(struct nary_opt_data_s* opt_data, void* ptrs[]) { struct compare_s* data = opt_data->data_ptr; size_t size = data->size * opt_data->size; bool eq = (0 == memcmp(ptrs[0], ptrs[1], size)); #pragma omp critical data->eq &= eq; } bool md_compare2(unsigned int D, const long dims[D], const long str1[D], const void* src1, const long str2[D], const void* src2, size_t size) { struct compare_s data = { true, size }; const long (*nstr[2])[D] = { (const long (*)[D])str1, (const long (*)[D])str2 }; optimized_nop(2, 0u, D, dims, nstr, (void*[2]){ (void*)src1, (void*)src2 }, (size_t[2]){ size, size }, nary_cmp, &data); return data.eq; } bool md_compare(unsigned int D, const long dims[D], const void* src1, const void* src2, size_t size) { long str[D]; md_calc_strides(D, str, dims, size); return md_compare2(D, dims, str, src1, str, src2, size); } struct septrafo_s { long N; long str; void* data; md_trafo_fun_t fun; }; static void nary_septrafo(void* _data, void* ptr[]) { struct septrafo_s* data = (struct septrafo_s*)_data; data->fun(data->data, data->N, data->str, ptr[0]); } static void md_septrafo_r(unsigned int D, unsigned int R, long dimensions[D], unsigned long flags, const long strides[D], void* ptr, md_trafo_fun_t fun, void* _data) { if (0 == R--) return; md_septrafo_r(D, R, dimensions, flags, strides, ptr, fun, _data); if (MD_IS_SET(flags, R)) { struct septrafo_s data = { dimensions[R], strides[R], _data, fun }; void* nptr[1] = { ptr }; const long* nstrides[1] = { strides }; dimensions[R] = 1; // we made a copy in md_septrafo2 //md_nary_parallel(1, D, dimensions, nstrides, nptr, &data, nary_septrafo); md_nary(1, D, dimensions, nstrides, nptr, &data, nary_septrafo); dimensions[R] = data.N; } } /** * Apply a separable transformation along selected dimensions. * */ void md_septrafo2(unsigned int D, const long dimensions[D], unsigned long flags, const long strides[D], void* ptr, md_trafo_fun_t fun, void* _data) { long dimcopy[D]; md_copy_dims(D, dimcopy, dimensions); md_septrafo_r(D, D, dimcopy, flags, strides, ptr, fun, _data); } /** * Apply a separable transformation along selected dimensions. * */ void md_septrafo(unsigned int D, const long dims[D], unsigned long flags, void* ptr, size_t size, md_trafo_fun_t fun, void* _data) { md_septrafo2(D, dims, flags, MD_STRIDES(D, dims, size), ptr, fun, _data); } /** * Copy diagonals from array specified by flags (with strides) * * dst(i, i, :, i, :) = src(i, i, :, i, :) * */ void md_copy_diag2(unsigned int D, const long dims[D], unsigned long flags, const long str1[D], void* dst, const long str2[D], const void* src, size_t size) { long stride1 = 0; long stride2 = 0; long count = -1; for (unsigned int i = 0; i < D; i++) { if (MD_IS_SET(flags, i)) { if (count < 0) count = dims[i]; assert(dims[i] == count); stride1 += str1[i]; stride2 += str2[i]; } } long xdims[D]; md_select_dims(D, ~flags, xdims, dims); for (long i = 0; i < count; i++) md_copy2(D, xdims, str1, dst + i * stride1, str2, src + i * stride2, size); } /** * Copy diagonals from array specified by flags (without strides) * * dst(i ,i ,: ,i , :) = src(i ,i ,: ,i ,:) * */ void md_copy_diag(unsigned int D, const long dims[D], unsigned long flags, void* dst, const void* src, size_t size) { long str[D]; md_calc_strides(D, str, dims, size); md_copy_diag2(D, dims, flags, str, dst, str, src, size); } /** * Fill diagonals specified by flags with value (without strides) * * dst(i, i, :, i, :) = src[0] * */ void md_fill_diag(unsigned int D, const long dims[D], unsigned long flags, void* dst, const void* src, size_t size) { long str2[D]; md_singleton_strides(D, str2); md_copy_diag2(D, dims, flags, MD_STRIDES(D, dims, size), dst, str2, src, size); } static void md_circ_shift_inpl2(unsigned int D, const long dims[D], const long center[D], const long strs[D], void* dst, size_t size) { #if 0 long dims1[D]; long dims2[D]; md_copy_dims(D, dims1, dims); md_copy_dims(D, dims2, dims); unsigned int i; for (i = 0; i < D; i++) { if (0 != center[i]) { dims1[i] = center[i]; dims2[i] = dims[i] - center[i]; break; } } if (i == D) return; long off = strs[i] * center[i]; // cool but slow, instead we want to have a chain of swaps md_flip2(D, dims, MD_BIT(i), strs, dst, strs, dst, size); md_flip2(D, dims1, MD_BIT(i), strs, dst, strs, dst, size); md_flip2(D, dims2, MD_BIT(i), strs, dst + off, strs, dst + off, size); // also not efficient, we want to merge the chain of swaps long center2[D]; md_copy_dims(D, center2, center); center2[i] = 0; md_circ_shift_inpl2(D, dims, center2, strs, dst, size); #else // use tmp for now unsigned int i; for (i = 0; i < D; i++) if (0 != center[i]) break; if (i == D) return; long tmp_strs[D]; md_calc_strides(D, tmp_strs, dims, size); void* tmp = md_alloc_sameplace(D, dims, size, dst); md_copy2(D, dims, tmp_strs, tmp, strs, dst, size); md_circ_shift2(D, dims, center, strs, dst, tmp_strs, tmp, size); md_free(tmp); #endif } /** * Circularly shift array (with strides) * * dst[mod(i + center)] = src[i] * */ void md_circ_shift2(unsigned int D, const long dimensions[D], const long center[D], const long str1[D], void* dst, const long str2[D], const void* src, size_t size) { long pos[D]; for (unsigned int i = 0; i < D; i++) { // FIXME: it would be better to calc modulo pos[i] = center[i]; while (pos[i] < 0) pos[i] += dimensions[i]; } unsigned int i = 0; // FIXME :maybe we shoud search the other way? while ((i < D) && (0 == pos[i])) i++; if (D == i) { md_copy2(D, dimensions, str1, dst, str2, src, size); return; } if (dst == src) { assert(str1 == str2); md_circ_shift_inpl2(D, dimensions, pos, str1, dst, size); return; } long shift = pos[i]; assert(shift != 0); long dim1[D]; long dim2[D]; md_copy_dims(D, dim1, dimensions); md_copy_dims(D, dim2, dimensions); dim1[i] = shift; dim2[i] = dimensions[i] - shift; assert((dim1[i] >= 0) && (dim2[i] >= 0)); pos[i] = 0; //printf("%d: %ld %ld %d\n", i, dim1[i], dim2[i], sizeof(dimensions)); md_circ_shift2(D, dim1, pos, str1, dst, str2, src + dim2[i] * str2[i], size); md_circ_shift2(D, dim2, pos, str1, dst + dim1[i] * str1[i], str2, src, size); } /** * Circularly shift array (without strides) * * dst[mod(i + center)] = src[i] * */ void md_circ_shift(unsigned int D, const long dimensions[D], const long center[D], void* dst, const void* src, size_t size) { long strides[D]; md_calc_strides(D, strides, dimensions, size); md_circ_shift2(D, dimensions, center, strides, dst, strides, src, size); } /** * Circularly extend array (with strides) * */ void md_circ_ext2(unsigned int D, const long dims1[D], const long strs1[D], void* dst, const long dims2[D], const long strs2[D], const void* src, size_t size) { long ext[D]; for (unsigned int i = 0; i < D; i++) { ext[i] = dims1[i] - dims2[i]; assert(ext[i] >= 0); assert(ext[i] <= dims2[i]); } unsigned int i = 0; // FIXME :maybe we shoud search the other way? while ((i < D) && (0 == ext[i])) i++; if (D == i) { md_copy2(D, dims1, strs1, dst, strs2, src, size); return; } long dims1_crop[D]; long dims2_crop[D]; long ext_dims[D]; md_copy_dims(D, dims1_crop, dims1); md_copy_dims(D, dims2_crop, dims2); md_copy_dims(D, ext_dims, dims1); dims1_crop[i] = dims2[i]; dims2_crop[i] = ext[i]; ext_dims[i] = ext[i]; ext[i] = 0; //printf("%d: %ld %ld %d\n", i, dim1[i], dim2[i], sizeof(dimensions)); md_circ_ext2(D, dims1_crop, strs1, dst, dims2, strs2, src, size); md_circ_ext2(D, ext_dims, strs1, dst + dims2[i] * strs1[i], dims2_crop, strs2, src, size); } /** * Circularly extend array (without strides) * */ void md_circ_ext(unsigned int D, const long dims1[D], void* dst, const long dims2[D], const void* src, size_t size) { md_circ_ext2(D, dims1, MD_STRIDES(D, dims1, size), dst, dims2, MD_STRIDES(D, dims2, size), src, size); } /** * Periodically extend array (with strides) * */ void md_periodic2(unsigned int D, const long dims1[D], const long strs1[D], void* dst, const long dims2[D], const long strs2[D], const void* src, size_t size) { long dims1B[2 * D]; long strs1B[2 * D]; long strs2B[2 * D]; for (unsigned int i = 0; i < D; i++) { assert(0 == dims1[i] % dims2[i]); // blocks dims1B[2 * i + 0] = dims2[i]; strs1B[2 * i + 0] = strs1[i]; strs2B[2 * i + 0] = strs2[i]; // periodic copies dims1B[2 * i + 0] = dims1[i] / dims2[i]; strs1B[2 * i + 0] = strs1[i] * dims2[i]; strs2B[2 * i + 0] = 0; } md_copy2(D, dims1B, strs1B, dst, strs2B, src, size); } /** * Periodically extend array (without strides) * */ void md_periodic(unsigned int D, const long dims1[D], void* dst, const long dims2[D], const void* src, size_t size) { md_periodic2(D, dims1, MD_STRIDES(D, dims1, size), dst, dims2, MD_STRIDES(D, dims2, size), src, size); } /** * Allocate CPU memory * * return pointer to CPU memory */ void* md_alloc(unsigned int D, const long dimensions[D], size_t size) { return xmalloc(md_calc_size(D, dimensions) * size); } /** * Allocate CPU memory and clear * * return pointer to CPU memory */ void* md_calloc(unsigned int D, const long dimensions[D], size_t size) { void* ptr = md_alloc(D, dimensions, size); md_clear(D, dimensions, ptr, size); return ptr; } #ifdef USE_CUDA /** * Allocate GPU memory * * return pointer to GPU memory */ void* md_alloc_gpu(unsigned int D, const long dimensions[D], size_t size) { return cuda_malloc(md_calc_size(D, dimensions) * size); } /** * Allocate GPU memory and copy from CPU pointer * * return pointer to GPU memory */ void* md_gpu_move(unsigned int D, const long dims[D], const void* ptr, size_t size) { if (NULL == ptr) return NULL; void* gpu_ptr = md_alloc_gpu(D, dims, size); md_copy(D, dims, gpu_ptr, ptr, size); return gpu_ptr; } #endif /** * Allocate memory on the same device (CPU/GPU) place as ptr * * return pointer to CPU memory if ptr is in CPU or to GPU memory if ptr is in GPU */ void* md_alloc_sameplace(unsigned int D, const long dimensions[D], size_t size, const void* ptr) { #ifdef USE_CUDA return (cuda_ondevice(ptr) ? md_alloc_gpu : md_alloc)(D, dimensions, size); #else assert(0 != ptr); return md_alloc(D, dimensions, size); #endif } /** * Free CPU/GPU memory * */ void md_free(const void* ptr) { #ifdef USE_CUDA if (cuda_ondevice(ptr)) cuda_free((void*)ptr); else #endif xfree(ptr); } bart-0.4.02/src/num/multind.h000066400000000000000000000267471320577655200157740ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __MULTIND_H #define __MULTIND_H 1 #include #include #include #include #include "misc/cppwrap.h" typedef void (*md_nary_fun_t)(void* data, void* ptr[]); typedef void (*md_trafo_fun_t)(void* data, long N, long str, void* ptr); typedef void (*md_loop_fun_t)(void* data, const long* pos); extern void md_nary(unsigned int C, unsigned int D, const long dim[__VLA(D)], const long* str[__VLA(C)], void* ptr[__VLA(C)], void* data, md_nary_fun_t fun); extern void md_parallel_nary(unsigned int C, unsigned int D, const long dim[__VLA(D)], unsigned long flags, const long* str[__VLA(C)], void* ptr[__VLA(C)], void* data, md_nary_fun_t fun); extern void md_parallel_loop(unsigned int D, const long dim[__VLA(D)], unsigned long flags, void* data, md_loop_fun_t fun); extern void md_loop(unsigned int D, const long dim[__VLA(D)], void* data, md_loop_fun_t fun); extern void md_septrafo2(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, const long strides[__VLA(D)], void* ptr, md_trafo_fun_t fun, void* _data); extern void md_septrafo(unsigned int D, const long dimensions[__VLA(D)], unsigned long flags, void* ptr, size_t size, md_trafo_fun_t fun, void* _data); extern void md_clear2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], void* ptr, size_t size); extern void md_clear(unsigned int D, const long dim[__VLA(D)], void* ptr, size_t size); extern void md_swap2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], void* optr, const long istr[__VLA(D)], void* iptr, size_t size); extern void md_swap(unsigned int D, const long dim[__VLA(D)], void* optr, void* iptr, size_t size); extern void md_circular_swap2(unsigned M, unsigned int D, const long dims[__VLA(D)], const long* strs[__VLA(M)], void* ptr[__VLA(M)], size_t size); extern void md_circular_swap(unsigned M, unsigned int D, const long dims[__VLA(D)], void* ptr[__VLA(M)], size_t size); extern void md_copy2(unsigned int D, const long dim[__VLA(D)], const long ostr[__VLA(D)], void* optr, const long istr[__VLA(D)], const void* iptr, size_t size); extern void md_copy(unsigned int D, const long dim[__VLA(D)], void* optr, const void* iptr, size_t size); extern void md_copy_block2(unsigned int D, const long pos[__VLA(D)], const long odim[__VLA(D)], const long ostr[__VLA(D)], void* optr, const long idim[__VLA(D)], const long istr[__VLA(D)], const void* iptr, size_t size); extern void md_copy_block(unsigned int D, const long pos[__VLA(D)], const long odim[__VLA(D)], void* optr, const long idim[__VLA(D)], const void* iptr, size_t size); extern void md_move_block2(unsigned int D, const long dim[__VLA(D)], const long opos[__VLA(D)], const long odim[__VLA(D)], const long ostr[__VLA(D)], void* optr, const long ipos[__VLA(D)], const long idim[__VLA(D)], const long istr[__VLA(D)], const void* iptr, size_t size); extern void md_move_block(unsigned int D, const long dim[__VLA(D)], const long opos[__VLA(D)], const long odim[__VLA(D)], void* optr, const long ipos[__VLA(D)], const long idim[__VLA(D)], const void* iptr, size_t size); extern void md_resize(unsigned int D, const long odim[__VLA(D)], void* optr, const long idim[__VLA(D)], const void* iptr, size_t size); extern void md_resize_center(unsigned int D, const long odim[__VLA(D)], void* optr, const long idim[__VLA(D)], const void* iptr, size_t size); extern void md_fill2(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], void* ptr, const void* iptr, size_t size); extern void md_fill(unsigned int D, const long dim[__VLA(D)], void* ptr, const void* iptr, size_t size); extern void md_slice2(unsigned int D, unsigned long flags, const long pos[__VLA(D)], const long dim[__VLA(D)], const long ostr[__VLA(D)], void* optr, const long istr[__VLA(D)], const void* iptr, size_t size); extern void md_slice(unsigned int D, unsigned long flags, const long pos[__VLA(D)], const long dim[__VLA(D)], void* optr, const void* iptr, size_t size); extern void md_transpose2(unsigned int D, unsigned int dim1, unsigned int dim2, const long odims[__VLA(D)], const long ostr[__VLA(D)], void* optr, const long idims[__VLA(D)], const long istr[__VLA(D)], const void* iptr, size_t size); extern void md_transpose(unsigned int D, unsigned int dim1, unsigned int dim2, const long odims[__VLA(D)], void* optr, const long idims[__VLA(D)], const void* iptr, size_t size); extern void md_permute2(unsigned int D, const unsigned int order[__VLA(D)], const long odims[__VLA(D)], const long ostr[__VLA(D)], void* optr, const long idims[__VLA(D)], const long istr[__VLA(D)], const void* iptr, size_t size); extern void md_permute(unsigned int D, const unsigned int order[__VLA(D)], const long odims[__VLA(D)], void* optr, const long idims[__VLA(D)], const void* iptr, size_t size); extern void md_flip2(unsigned int D, const long dims[__VLA(D)], unsigned long flags, const long ostr[__VLA(D)], void* optr, const long istr[__VLA(D)], const void* iptr, size_t size); extern void md_flip(unsigned int D, const long dims[__VLA(D)], unsigned long flags, void* optr, const void* iptr, size_t size); extern void md_swap_flip2(unsigned int D, const long dims[__VLA(D)], unsigned long flags, const long ostr[__VLA(D)], void* optr, const long istr[__VLA(D)], void* iptr, size_t size); extern void md_swap_flip(unsigned int D, const long dims[__VLA(D)], unsigned long flags, void* optr, void* iptr, size_t size); extern void md_copy_diag2(unsigned int D, const long dims[__VLA(D)], unsigned long flags, const long str1[__VLA(D)], void* dst, const long str2[__VLA(D)], const void* src, size_t size); extern void md_copy_diag(unsigned int D, const long dims[__VLA(D)], unsigned long flags, void* dst, const void* src, size_t size); extern void md_fill_diag(unsigned int D, const long dims[__VLA(D)], unsigned long flags, void* dst, const void* src, size_t size); extern void md_circ_shift2(unsigned int D, const long dim[__VLA(D)], const long center[__VLA(D)], const long str1[__VLA(D)], void* dst, const long str2[__VLA(D)], const void* src, size_t size); extern void md_circ_shift(unsigned int D, const long dim[__VLA(D)], const long center[__VLA(D)], void* dst, const void* src, size_t size); extern void md_circ_ext2(unsigned int D, const long dims1[__VLA(D)], const long strs1[__VLA(D)], void* dst, const long dims2[__VLA(D)], const long strs2[__VLA(D)], const void* src, size_t size); extern void md_circ_ext(unsigned int D, const long dims1[__VLA(D)], void* dst, const long dims2[__VLA(D)], const void* src, size_t size); extern void md_periodic2(unsigned int D, const long dims1[__VLA(D)], const long strs1[__VLA(D)], void* dst, const long dims2[__VLA(D)], const long strs2[__VLA(D)], const void* src, size_t size); extern void md_periodic(unsigned int D, const long dims1[__VLA(D)], void* dst, const long dims2[__VLA(D)], const void* src, size_t size); extern _Bool md_compare2(unsigned int D, const long dims[__VLA(D)], const long str1[__VLA(D)], const void* src1, const long str2[__VLA(D)], const void* src2, size_t size); extern _Bool md_compare(unsigned int D, const long dims[__VLA(D)], const void* src1, const void* src2, size_t size); typedef void* (*md_alloc_fun_t)(unsigned int D, const long dimensions[__VLA(D)], size_t size); extern void* md_alloc(unsigned int D, const long dimensions[__VLA(D)], size_t size); extern void* md_calloc(unsigned int D, const long dimensions[__VLA(D)], size_t size); #ifdef USE_CUDA extern void* md_alloc_gpu(unsigned int D, const long dimensions[__VLA(D)], size_t size); extern void* md_gpu_move(unsigned int D, const long dims[__VLA(D)], const void* ptr, size_t size); #endif extern void* md_alloc_sameplace(unsigned int D, const long dimensions[__VLA(D)], size_t size, const void* ptr); extern void md_free(const void* p); extern long md_calc_size(unsigned int D, const long dimensions[__VLA(D)]); extern long* md_calc_strides(unsigned int D, long str[__VLA(D)], const long dim[__VLA(D)], size_t size); extern long md_calc_offset(unsigned int D, const long strides[__VLA(D)], const long position[__VLA(D)]); extern unsigned int md_calc_blockdim(unsigned int D, const long dim[__VLA(D)], const long str[__VLA(D)], size_t size); extern void md_select_dims(unsigned int D, unsigned long flags, long odims[__VLA(D)], const long idims[__VLA(D)]); extern void md_copy_dims(unsigned int D, long odims[__VLA(D)], const long idims[__VLA(D)]); extern void md_copy_strides(unsigned int D, long odims[__VLA(D)], const long idims[__VLA(D)]); extern void md_merge_dims(unsigned int D, long odims[__VLA(D)], const long dims1[__VLA(D)], const long dims2[__VLA(D)]); extern _Bool md_check_compat(unsigned int D, unsigned long flags, const long dim1[__VLA(D)], const long dim2[__VLA(D)]); extern _Bool md_check_bounds(unsigned int D, unsigned long flags, const long dim1[__VLA(D)], const long dim2[__VLA(D)]); extern void md_singleton_dims(unsigned int D, long dims[__VLA(D)]); extern void md_singleton_strides(unsigned int D, long strs[__VLA(D)]); extern void md_set_dims(unsigned int D, long dims[__VLA(D)], long val); extern void md_min_dims(unsigned int D, unsigned long flags, long odims[__VLA(D)], const long idims1[__VLA(D)], const long idims2[__VLA(D)]); extern void md_max_dims(unsigned int D, unsigned long flags, long odims[__VLA(D)], const long idims1[__VLA(D)], const long idims2[__VLA(D)]); extern _Bool md_is_index(unsigned int D, const long pos[__VLA(D)], const long dims[__VLA(D)]); extern _Bool md_check_dimensions(unsigned int N, const long dims[__VLA(N)], unsigned int flags); extern void md_permute_dims(unsigned int D, const unsigned int order[__VLA(D)], long odims[__VLA(D)], const long idims[__VLA(D)]); extern void md_transpose_dims(unsigned int D, unsigned int dim1, unsigned int dim2, long odims[__VLA(D)], const long idims[__VLA(D)]); extern _Bool md_next(unsigned int D, const long dims[__VLA(D)], unsigned long flags, long pos[__VLA(D)]); extern unsigned long md_nontriv_dims(unsigned int D, const long dims[__VLA(D)]); #define MD_INIT_ARRAY(x, y) { [ 0 ... ((x) - 1) ] = (y) } #define MD_MAKE_ARRAY(T, ...) ((T[]){ __VA_ARGS__ }) #define MD_DIMS(...) MD_MAKE_ARRAY(long, __VA_ARGS__) #define MD_BIT(x) (1ul << (x)) #define MD_IS_SET(x, y) ((x) & MD_BIT(y)) #define MD_CLEAR(x, y) ((x) & ~MD_BIT(y)) #define MD_SET(x, y) ((x) | MD_BIT(y)) #define MD_CAST_ARRAY2_PTR(T, N, dims, x, a, b) \ (assert(((a) < (b)) && !md_check_dimensions((N), (dims), (1 << (a)) | (1 << (b)))), \ (T (*)[(dims)[b]][(dims)[a]])(x)) #define MD_CAST_ARRAY3_PTR(T, N, dims, x, a, b, c) \ (assert(((a) < (b)) && ((b) < (c)) && !md_check_dimensions((N), (dims), (1 << (a)) | (1 << (b) | (1 << (c))))), \ (T (*)[(dims)[c]][(dims)[b]][(dims)[a]])(x)) #define MD_CAST_ARRAY2(T, N, dims, x, a, b) (*MD_CAST_ARRAY2_PTR(T, N, dims, x, a, b)) #define MD_CAST_ARRAY3(T, N, dims, x, a, b, c) (*MD_CAST_ARRAY3_PTR(T, N, dims, x, a, b, c)) #define MD_ACCESS(N, strs, pos, x) ((x)[md_calc_offset((N), (strs), (pos)) / sizeof((x)[0])]) #define MD_STRIDES(N, dims, elsize) (md_calc_strides(N, alloca(N * sizeof(long)), dims, elsize)) #define MD_SINGLETON_DIMS(N) \ ({ \ unsigned int _N = (N); \ long* _dims = alloca(_N * sizeof(long)); \ md_singleton_dims(_N, _dims); \ _dims; \ }) #define MD_SINGLETON_STRS(N) \ ({ \ unsigned int _N = (N); \ long* _dims = alloca(_N * sizeof(long)); \ md_singleton_strides(_N, _dims); \ _dims; \ }) #include "misc/cppwrap.h" #endif // __MULTIND_H bart-0.4.02/src/num/ops.c000066400000000000000000000620201320577655200150740ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2017 Martin Uecker * 2014 Jonathan Tamir * 2014 Frank Ong * * operator expressions working on multi-dimensional arrays */ #include #include #include #include #include #include #include #include "num/multind.h" #include "num/iovec.h" #include "misc/misc.h" #include "misc/types.h" #include "misc/debug.h" #include "misc/shrdptr.h" #include "ops.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif #ifndef FL_SIZE #define FL_SIZE sizeof(float) #endif struct operator_s { unsigned int N; unsigned int io_flags; const struct iovec_s** domain; operator_data_t* data; void (*apply)(const operator_data_t* data, unsigned int N, void* args[N]); void (*del)(const operator_data_t* data); struct shared_obj_s sptr; }; static void operator_del(const struct shared_obj_s* sptr) { const struct operator_s* x = CONTAINER_OF(sptr, const struct operator_s, sptr); if (NULL != x->del) x->del(x->data); for (unsigned int i = 0; i < x->N; i++) iovec_free(x->domain[i]); xfree(x->domain); xfree(x); } /** * Create an operator (with strides) */ const struct operator_s* operator_generic_create2(unsigned int N, unsigned int io_flags, const unsigned int D[N], const long* dims[N], const long* strs[N], operator_data_t* data, operator_fun_t apply, operator_del_t del) { PTR_ALLOC(struct operator_s, op); PTR_ALLOC(const struct iovec_s*[N], dom); for (unsigned int i = 0; i < N; i++) (*dom)[i] = iovec_create2(D[i], dims[i], strs[i], CFL_SIZE); op->N = N; op->io_flags = io_flags; op->domain = *PTR_PASS(dom); op->data = data; op->apply = apply; op->del = del; shared_obj_init(&op->sptr, operator_del); return PTR_PASS(op); } /** * Create an operator (without strides) */ const struct operator_s* operator_generic_create(unsigned int N, unsigned int io_flags, const unsigned int D[N], const long* dims[N], operator_data_t* data, operator_fun_t apply, operator_del_t del) { const long* strs[N]; for (unsigned int i = 0; i < N; i++) strs[i] = MD_STRIDES(D[i], dims[i], CFL_SIZE); return operator_generic_create2(N, io_flags, D, dims, strs, data, apply, del); } /** * Create an operator (with strides) */ const struct operator_s* operator_create2(unsigned int ON, const long out_dims[ON], const long out_strs[ON], unsigned int IN, const long in_dims[IN], const long in_strs[IN], operator_data_t* data, operator_fun_t apply, operator_del_t del) { return operator_generic_create2(2, MD_BIT(0), (unsigned int[2]){ ON, IN }, (const long* [2]){ out_dims, in_dims }, (const long* [2]){ out_strs, in_strs }, data, apply, del); } /** * Create an operator (without strides) * * @param ON number of output dimensions * @param out_dims dimensions of output * @param IN number of input dimensions * @param in_dims dimensions of input * @param data data for applying the operation * @param apply function that applies the operation * @param del function that frees the data */ const struct operator_s* operator_create(unsigned int ON, const long out_dims[ON], unsigned int IN, const long in_dims[IN], operator_data_t* data, operator_fun_t apply, operator_del_t del) { return operator_create2(ON, out_dims, MD_STRIDES(ON, out_dims, CFL_SIZE), IN, in_dims, MD_STRIDES(IN, in_dims, CFL_SIZE), data, apply, del); } /** * Increment the reference count of an operator * * @param x operator */ const struct operator_s* operator_ref(const struct operator_s* x) { if (NULL != x) shared_obj_ref(&x->sptr); return x; } /** * Return the data of the associated operator * * @param x operator */ operator_data_t* operator_get_data(const struct operator_s* x) { return x->data; } /** * Free the operator struct * Note: also frees the data if the operator's reference count is zero * * @param x operator */ void operator_free(const struct operator_s* x) { if (NULL == x) return; shared_obj_destroy(&x->sptr); } /** * Return the number of args * * @param op operator */ unsigned int operator_nr_args(const struct operator_s* op) { return op->N; } /** * Return the iovec of arg n * * @param op operator * @param n arg number */ const struct iovec_s* operator_arg_domain(const struct operator_s* op, unsigned int n) { assert(n < op->N); return op->domain[n]; } /** * Return the dimensions and strides of the domain of an operator * * @param op operator */ const struct iovec_s* operator_domain(const struct operator_s* op) { return operator_arg_domain(op, 1); } /** * Return the dimensions and strides of the codomain of an operator * * @param op operator */ const struct iovec_s* operator_codomain(const struct operator_s* op) { return operator_arg_domain(op, 0); } struct operator_p_s { struct operator_s op; }; const struct operator_p_s* operator_p_ref(const struct operator_p_s* x) { if (NULL != x) operator_ref(&x->op); return x; } /** * Return the dimensions and strides of the domain of an operator_p * * @param op operator_p */ const struct iovec_s* operator_p_domain(const struct operator_p_s* op) { assert(3 == op->op.N); return op->op.domain[2]; } /** * Return the dimensions and strides of the codomain of an operator_p * * @param op operator_p */ const struct iovec_s* operator_p_codomain(const struct operator_p_s* op) { assert(3 == op->op.N); return op->op.domain[1]; } void operator_p_free(const struct operator_p_s* x) { if (NULL != x) operator_free(&x->op); } struct op_p_data_s { INTERFACE(operator_data_t); operator_data_t* data; operator_p_fun_t apply; operator_del_t del; }; static DEF_TYPEID(op_p_data_s); static void op_p_apply(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct op_p_data_s* data = CAST_DOWN(op_p_data_s, _data); assert(3 == N); data->apply(data->data, *((float*)args[0]), args[1], args[2]); } static void op_p_del(const operator_data_t* _data) { const struct op_p_data_s* data = CAST_DOWN(op_p_data_s, _data); data->del(data->data); free((void*)data); } operator_data_t* operator_p_get_data(const struct operator_p_s* _data) { const struct op_p_data_s* data = CAST_DOWN(op_p_data_s, operator_get_data(&_data->op)); return data->data; } /** * Create an operator with one parameter (without strides) */ const struct operator_p_s* operator_p_create2(unsigned int ON, const long out_dims[ON], const long out_strs[ON], unsigned int IN, const long in_dims[IN], const long in_strs[IN], operator_data_t* data, operator_p_fun_t apply, operator_del_t del) { PTR_ALLOC(struct operator_p_s, o); PTR_ALLOC(struct op_p_data_s, op); SET_TYPEID(op_p_data_s, op); op->data = data; op->apply = apply; op->del = del; PTR_ALLOC(const struct iovec_s*[3], dom); (*dom)[0] = iovec_create2(1, MD_DIMS(1), MD_DIMS(0), FL_SIZE); (*dom)[1] = iovec_create2(ON, out_dims, out_strs, CFL_SIZE); (*dom)[2] = iovec_create2(IN, in_dims, in_strs, CFL_SIZE); o->op.N = 3; o->op.io_flags = MD_BIT(1); o->op.domain = *PTR_PASS(dom); o->op.data = CAST_UP(PTR_PASS(op)); o->op.apply = op_p_apply; o->op.del = op_p_del; shared_obj_init(&o->op.sptr, operator_del); if (NULL == del) debug_printf(DP_WARN, "Warning: no delete function specified for operator_p_create! Possible memory leak.\n"); return PTR_PASS(o); } /** * Create an operator with one parameter (without strides) * * @param ON number of output dimensions * @param out_dims dimensions of output * @param IN number of input dimensions * @param in_dims dimensions of input * @param data data for applying the operation * @param apply function that applies the operation * @param del function that frees the data */ const struct operator_p_s* operator_p_create(unsigned int ON, const long out_dims[ON], unsigned int IN, const long in_dims[IN], operator_data_t* data, operator_p_fun_t apply, operator_del_t del) { return operator_p_create2(ON, out_dims, MD_STRIDES(ON, out_dims, CFL_SIZE), IN, in_dims, MD_STRIDES(IN, in_dims, CFL_SIZE), data, apply, del); } const struct operator_s* operator_p_upcast(const struct operator_p_s* op) { return &op->op; } struct identity_s { INTERFACE(operator_data_t); const struct iovec_s* domain; const struct iovec_s* codomain; }; static DEF_TYPEID(identity_s); static void identity_apply(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct identity_s* d = CAST_DOWN(identity_s, _data); assert(2 == N); md_copy2(d->domain->N, d->domain->dims, d->codomain->strs, args[0], d->domain->strs, args[1], d->domain->size); } static void identity_free(const operator_data_t* _data) { const struct identity_s* d = CAST_DOWN(identity_s, _data); iovec_free(d->domain); iovec_free(d->codomain); free((void*)d); } const struct operator_s* operator_identity_create2(unsigned int N, const long dims[N], const long ostrs[N], const long istrs[N]) { PTR_ALLOC(struct identity_s, data); SET_TYPEID(identity_s, data); data->domain = iovec_create2(N, dims, istrs, CFL_SIZE); data->codomain = iovec_create2(N, dims, ostrs, CFL_SIZE); return operator_create2(N, dims, ostrs, N, dims, istrs, CAST_UP(PTR_PASS(data)), identity_apply, identity_free); } /** * Create an Identity operator: I x * @param N number of dimensions * @param dims dimensions of input (domain) */ const struct operator_s* operator_identity_create(unsigned int N, const long dims[N]) { long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); return operator_identity_create2(N, dims, strs, strs); } struct operator_chain_s { INTERFACE(operator_data_t); const struct operator_s* a; const struct operator_s* b; }; static DEF_TYPEID(operator_chain_s); static void chain_apply(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct operator_chain_s* data = CAST_DOWN(operator_chain_s, _data); assert(2 == N); assert(2 == data->a->N); assert(2 == data->b->N); const struct iovec_s* iovec = data->a->domain[0]; complex float* tmp = md_alloc_sameplace(iovec->N, iovec->dims, iovec->size, args[0]); operator_apply_unchecked(data->a, tmp, args[1]); operator_apply_unchecked(data->b, args[0], tmp); md_free(tmp); } /* * Free data associated with chained operator */ static void chain_free(const operator_data_t* _data) { const struct operator_chain_s* data = CAST_DOWN(operator_chain_s, _data); operator_free(data->a); operator_free(data->b); free((void*)data); } /** * Create a new operator that first applies a, then applies b: * c(x) = b(a(x)) */ const struct operator_s* operator_chain(const struct operator_s* a, const struct operator_s* b) { PTR_ALLOC(struct operator_chain_s, c); SET_TYPEID(operator_chain_s, c); // check compatibility debug_printf(DP_DEBUG4, "operator chain:\n"); debug_print_dims(DP_DEBUG4, a->domain[0]->N, a->domain[0]->dims); debug_print_dims(DP_DEBUG4, b->domain[1]->N, b->domain[1]->dims); assert((2 == a->N) && (2 == b->N)); assert((MD_BIT(0) == a->io_flags) && (MD_BIT(0) == b->io_flags)); assert(a->domain[0]->N == b->domain[1]->N); assert(md_calc_size(a->domain[0]->N, a->domain[0]->dims) == md_calc_size(b->domain[1]->N, b->domain[1]->dims)); // check whether intermediate storage can be simple assert(a->domain[0]->N == md_calc_blockdim(a->domain[0]->N, a->domain[0]->dims, a->domain[0]->strs, a->domain[0]->size)); assert(b->domain[1]->N == md_calc_blockdim(b->domain[1]->N, b->domain[1]->dims, b->domain[1]->strs, b->domain[1]->size)); c->a = operator_ref(a); c->b = operator_ref(b); const struct iovec_s* dom = a->domain[1]; const struct iovec_s* cod = b->domain[0]; return operator_create2(cod->N, cod->dims, cod->strs, dom->N, dom->dims, dom->strs, CAST_UP(PTR_PASS(c)), chain_apply, chain_free); } const struct operator_s* operator_chainN(unsigned int N, const struct operator_s* ops[N]) { assert(N > 0); const struct operator_s* s = operator_identity_create(ops[0]->domain[0]->N, ops[0]->domain[1]->dims); for (unsigned int i = 0; i < N; i++) s = operator_chain(s, ops[i]); return s; } struct operator_stack_s { INTERFACE(operator_data_t); const struct operator_s* a; const struct operator_s* b; long dst_offset; long src_offset; }; static DEF_TYPEID(operator_stack_s); static void stack_apply(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct operator_stack_s* data = CAST_DOWN(operator_stack_s, _data); assert(2 == N); operator_apply_unchecked(data->a, args[0], args[1]); operator_apply_unchecked(data->b, args[0] + data->dst_offset, args[1] + data->src_offset); } static void stack_free(const operator_data_t* _data) { const struct operator_stack_s* data = CAST_DOWN(operator_stack_s, _data); operator_free(data->a); operator_free(data->b); free((void*)data); } static bool stack_compatible(unsigned int D, const struct iovec_s* a, const struct iovec_s* b) { if (a->N != b->N) return false; unsigned int N = a->N; for (unsigned int i = 0; i < N; i++) if ((D != i) && ((a->dims[i] != b->dims[i] || (a->strs[i] != b->strs[i])))) return false; if ((1 != a->dims[D]) || (1 != b->dims[D])) return false; return true; } static void stack_dims(unsigned int N, long dims[N], long strs[N], unsigned int D, const struct iovec_s* a, const struct iovec_s* b) { md_copy_dims(N, dims, a->dims); md_copy_strides(N, strs, a->strs); UNUSED(b); strs[D] = md_calc_size(N, a->dims) * CFL_SIZE; // FIXME dims[D] = 2; } /** * Create a new operator that stacks a and b along dimension D */ const struct operator_s* operator_stack(unsigned int D, unsigned int E, const struct operator_s* a, const struct operator_s* b) { PTR_ALLOC(struct operator_stack_s, c); SET_TYPEID(operator_stack_s, c); assert(stack_compatible(D, a->domain[0], b->domain[0])); assert(stack_compatible(E, a->domain[1], b->domain[1])); c->a = operator_ref(a); c->b = operator_ref(b); unsigned int cod_N = a->domain[0]->N; long cod_dims[cod_N]; long cod_strs[cod_N]; stack_dims(cod_N, cod_dims, cod_strs, D, a->domain[0], b->domain[0]); unsigned int dom_N = a->domain[1]->N; long dom_dims[dom_N]; long dom_strs[dom_N]; stack_dims(dom_N, dom_dims, dom_strs, E, a->domain[1], b->domain[1]); assert(dom_N == cod_N); c->dst_offset = cod_strs[D]; c->src_offset = dom_strs[D]; return operator_create2(cod_N, cod_dims, cod_strs, dom_N, dom_dims, dom_strs, CAST_UP(PTR_PASS(c)), stack_apply, stack_free); } void operator_generic_apply_unchecked(const struct operator_s* op, unsigned int N, void* args[N]) { debug_trace("ENTER %p\n", op->apply); op->apply(op->data, N, args); debug_trace("LEAVE %p\n", op->apply); } void operator_apply_unchecked(const struct operator_s* op, complex float* dst, const complex float* src) { operator_generic_apply_unchecked(op, 2, (void*[2]){ (void*)dst, (void*)src }); } void operator_apply2(const struct operator_s* op, unsigned int ON, const long odims[ON], const long ostrs[ON], complex float* dst, const long IN, const long idims[IN], const long istrs[ON], const complex float* src) { assert(2 == op->N); assert(iovec_check(op->domain[1], IN, idims, istrs)); assert(iovec_check(op->domain[0], ON, odims, ostrs)); operator_apply_unchecked(op, dst, src); } void operator_apply(const struct operator_s* op, unsigned int ON, const long odims[ON], complex float* dst, const long IN, const long idims[IN], const complex float* src) { operator_apply2(op, ON, odims, MD_STRIDES(ON, odims, CFL_SIZE), dst, IN, idims, MD_STRIDES(IN, idims, CFL_SIZE), src); } void operator_p_apply2(const struct operator_p_s* op, float mu, unsigned int ON, const long odims[ON], const long ostrs[ON], complex float* dst, const long IN, const long idims[IN], const long istrs[IN], const complex float* src) { assert(3 == op->op.N); assert(iovec_check(op->op.domain[2], IN, idims, istrs)); assert(iovec_check(op->op.domain[1], ON, odims, ostrs)); operator_p_apply_unchecked(op, mu, dst, src); } void operator_p_apply(const struct operator_p_s* op, float mu, unsigned int ON, const long odims[ON], complex float* dst, const long IN, const long idims[IN], const complex float* src) { operator_p_apply2(op, mu, ON, odims, MD_STRIDES(ON, odims, CFL_SIZE), dst, IN, idims, MD_STRIDES(IN, idims, CFL_SIZE), src); } void operator_p_apply_unchecked(const struct operator_p_s* op, float mu, complex float* dst, const complex float* src) { op->op.apply(op->op.data, 3, (void*[3]){ &mu, (void*)dst, (void*)src }); } struct op_bind_s { INTERFACE(operator_data_t); unsigned int D; unsigned int arg; const struct operator_s* op; void* ptr; }; static DEF_TYPEID(op_bind_s); static void op_bind_apply(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct op_bind_s* data = CAST_DOWN(op_bind_s, _data); assert(data->D == N + 1); void* n_args[N + 1]; for (unsigned int i = 0, j = 0; i < N; i++, j++) { // insert bound argument if (data->arg == i) n_args[j++] = data->ptr; n_args[j] = args[i]; } operator_generic_apply_unchecked(data->op, N + 1, n_args); } static void op_bind_del(const operator_data_t* _data) { const struct op_bind_s* data = CAST_DOWN(op_bind_s, _data); operator_free(data->op); } /** * Create a new operator that binds argument 'arg'. */ const struct operator_s* operator_bind2(const struct operator_s* op, unsigned int arg, unsigned int N, const long dims[N], const long strs[N], void* ptr) { unsigned int D = operator_nr_args(op); assert(arg < D); assert(!MD_IS_SET(op->io_flags, arg)); assert(iovec_check(operator_arg_domain(op, arg), N, dims, strs)); unsigned int nn[D - 1]; const long* ndims[D - 1]; const long* nstrs[D - 1]; unsigned int n_flags = 0u; for (unsigned int i = 0, j = 0; i < D; i++) { if (arg == i) continue; nn[j] = operator_arg_domain(op, i)->N; ndims[j] = operator_arg_domain(op, i)->dims; nstrs[j] = operator_arg_domain(op, i)->strs; if (MD_IS_SET(op->io_flags, i)) n_flags |= MD_BIT(j); j++; } PTR_ALLOC(struct op_bind_s, data); SET_TYPEID(op_bind_s, data); data->D = D; data->arg = arg; data->ptr = ptr; data->op = op; return operator_generic_create2(D - 1, n_flags, nn, ndims, nstrs, CAST_UP(PTR_PASS(data)), op_bind_apply, op_bind_del); } struct op_loop_s { INTERFACE(operator_data_t); unsigned int N; unsigned int D; const long** strs; const long** dims; const long* dims0; const struct operator_s* op; }; static DEF_TYPEID(op_loop_s); static void op_loop_del(const operator_data_t* _data) { const struct op_loop_s* data = CAST_DOWN(op_loop_s, _data); operator_free(data->op); for (unsigned int i = 0; i < data->N; i++) { free((void*)data->dims[i]); free((void*)data->strs[i]); } free((void*)data->strs); free((void*)data->dims); free((void*)data->dims0); free((void*)data); } static void op_loop_nary(void* _data, void* ptr[]) { const struct op_loop_s* data = _data; operator_generic_apply_unchecked(data->op, data->N, ptr); } static void op_loop_fun(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct op_loop_s* data = CAST_DOWN(op_loop_s, _data); assert(N == data->N); md_nary(N, data->D, data->dims0, data->strs, args, (void*)data, op_loop_nary); } static void merge_dims(unsigned int D, long odims[D], const long idims1[D], const long idims2[D]) { md_copy_dims(D, odims, idims1); for (unsigned int i = 0; i < D; i++) { assert((1 == odims[i]) | (1 == idims2[i])); if (1 == odims[i]) odims[i] = idims2[i]; } } const struct operator_s* (operator_loop2)(unsigned int N, const unsigned int D, const long dims[D], const long (*strs)[D], const struct operator_s* op) { assert(N == operator_nr_args(op)); unsigned int D2[N]; PTR_ALLOC(long[D], dims0); md_copy_dims(D, *dims0, dims); PTR_ALLOC(const long*[N], dims2); PTR_ALLOC(const long*[N], strs2); // TODO: we should have a flag and ignore args with flag for (unsigned int i = 0; i < N; i++) { const struct iovec_s* io = operator_arg_domain(op, i); assert(D == io->N); for (unsigned int j = 0; j < D; j++) { assert((0 == io->strs[j]) || (io->strs[j] == strs[i][j])); assert((1 == io->dims[j]) == (0 == io->strs[j])); } D2[i] = D; PTR_ALLOC(long[D], tdims); merge_dims(D, *tdims, dims, io->dims); PTR_ALLOC(long[D], tstrs); md_copy_strides(D, *tstrs, strs[i]); (*dims2)[i] = *PTR_PASS(tdims); (*strs2)[i] = *PTR_PASS(tstrs); } PTR_ALLOC(struct op_loop_s, data); SET_TYPEID(op_loop_s, data); data->N = N; data->D = D; data->op = op; data->dims0 = *PTR_PASS(dims0); data->dims = */*PTR_PASS*/(dims2); data->strs = */*PTR_PASS*/(strs2); const struct operator_s* rop = operator_generic_create2(N, op->io_flags, D2, *dims2, *strs2, CAST_UP(PTR_PASS(data)), op_loop_fun, op_loop_del); PTR_PASS(dims2); PTR_PASS(strs2); return rop; } const struct operator_s* operator_loop(unsigned int D, const long dims[D], const struct operator_s* op) { unsigned int N = operator_nr_args(op); long strs[N][D]; for (unsigned int i = 0; i < N; i++) { long tdims[D]; merge_dims(D, tdims, dims, operator_arg_domain(op, i)->dims); md_calc_strides(D, strs[i], tdims, operator_arg_domain(op, i)->size); } return operator_loop2(N, D, dims, strs, op); } struct copy_data_s { INTERFACE(operator_data_t); const struct operator_s* op; unsigned int N; const long** strs; }; static DEF_TYPEID(copy_data_s); static void copy_fun(const operator_data_t* _data, unsigned int N, void* args[N]) { const struct copy_data_s* data = CAST_DOWN(copy_data_s, _data); const struct operator_s* op = data->op; void* ptr[N]; assert(N == operator_nr_args(op)); for (unsigned int i = 0; i < N; i++) { const struct iovec_s* io = operator_arg_domain(op, i); ptr[i] = md_alloc(io->N, io->dims, io->size); if (!MD_IS_SET(op->io_flags, i)) md_copy2(io->N, io->dims, io->strs, ptr[i], data->strs[i], args[i], io->size); } operator_generic_apply_unchecked(op, N, ptr); for (unsigned int i = 0; i < N; i++) { const struct iovec_s* io = operator_arg_domain(op, i); if (MD_IS_SET(op->io_flags, i)) md_copy2(io->N, io->dims, data->strs[i], args[i], io->strs, ptr[i], io->size); md_free(ptr[i]); } } static void copy_del(const operator_data_t* _data) { const struct copy_data_s* data = CAST_DOWN(copy_data_s, _data); operator_free(data->op); for (unsigned int i = 0; i < data->N; i++) xfree(data->strs[i]); xfree(data->strs); xfree(data); } const struct operator_s* operator_copy_wrapper(unsigned int N, const long* strs[N], const struct operator_s* op) { assert(N == operator_nr_args(op)); // op = operator_ref(op); PTR_ALLOC(struct copy_data_s, data); SET_TYPEID(copy_data_s, data); data->op = op; unsigned int D[N]; const long* dims[N]; const long* (*strs2)[N] = TYPE_ALLOC(const long*[N]); for (unsigned int i = 0; i < N; i++) { const struct iovec_s* io = operator_arg_domain(op, i); D[i] = io->N; dims[i] = io->dims; long (*strsx)[io->N] = TYPE_ALLOC(long[io->N]); md_copy_strides(io->N, *strsx, strs[i]); (*strs2)[i] = *strsx; long tstrs[io->N]; md_calc_strides(io->N, tstrs, io->dims, CFL_SIZE); for (unsigned int i = 0; i < io->N; i++) assert(io->strs[i] == tstrs[i]); } data->N = N; data->strs = *strs2; return operator_generic_create2(N, op->io_flags, D, dims, *strs2, CAST_UP(PTR_PASS(data)), copy_fun, copy_del); } struct gpu_data_s { INTERFACE(operator_data_t); const struct operator_s* op; }; static DEF_TYPEID(gpu_data_s); static void gpuwrp_fun(const operator_data_t* _data, unsigned int N, void* args[N]) { #ifdef USE_CUDA const struct operator_s* op = CAST_DOWN(gpu_data_s, _data)->op; void* gpu_ptr[N]; assert(N == operator_nr_args(op)); debug_printf(DP_DEBUG1, "GPU start.\n"); for (unsigned int i = 0; i < N; i++) { const struct iovec_s* io = operator_arg_domain(op, i); if (MD_IS_SET(op->io_flags, i)) gpu_ptr[i] = md_alloc_gpu(io->N, io->dims, io->size); else gpu_ptr[i] = md_gpu_move(io->N, io->dims, args[i], io->size); } operator_generic_apply_unchecked(op, N, gpu_ptr); for (unsigned int i = 0; i < N; i++) { const struct iovec_s* io = operator_arg_domain(op, i); if (MD_IS_SET(op->io_flags, i)) md_copy(io->N, io->dims, args[i], gpu_ptr[i], io->size); md_free(gpu_ptr[i]); } debug_printf(DP_DEBUG1, "GPU end.\n"); #else UNUSED(_data); UNUSED(N); UNUSED(args); assert(0); #endif } static void gpuwrp_del(const operator_data_t* _data) { const struct gpu_data_s* data = CAST_DOWN(gpu_data_s, _data); operator_free(data->op); free((void*)data); } const struct operator_s* operator_gpu_wrapper(const struct operator_s* op) { unsigned int N = operator_nr_args(op); unsigned int D[N]; const long* dims[N]; const long* strs[N]; for (unsigned int i = 0; i < N; i++) { const struct iovec_s* io = operator_arg_domain(op, i); D[i] = io->N; dims[i] = io->dims; strs[i] = io->strs; } // op = operator_ref(op); PTR_ALLOC(struct gpu_data_s, data); SET_TYPEID(gpu_data_s, data); data->op = op; return operator_generic_create2(N, op->io_flags, D, dims, strs, CAST_UP(PTR_PASS(data)), gpuwrp_fun, gpuwrp_del); } bart-0.4.02/src/num/ops.h000066400000000000000000000141701320577655200151040ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __OPS_H #define __OPS_H #include "misc/cppwrap.h" #include "misc/types.h" typedef struct operator_data_s { TYPEID* TYPEID; } operator_data_t; typedef void (*operator_fun_t)(const operator_data_t* _data, unsigned int N, void* args[__VLA(N)]); typedef void (*operator_p_fun_t)(const operator_data_t* _data, float mu, _Complex float* _dst, const _Complex float* _src); typedef void (*operator_del_t)(const operator_data_t* _data); struct operator_s; struct operator_p_s; // create functions extern const struct operator_s* operator_create(unsigned int ON, const long out_dims[__VLA(ON)], unsigned int IN, const long in_dims[__VLA(IN)], operator_data_t* data, operator_fun_t apply, operator_del_t del); extern const struct operator_s* operator_create2(unsigned int ON, const long out_dims[__VLA(ON)], const long out_strs[__VLA(ON)], unsigned int IN, const long in_dims[__VLA(IN)], const long in_strs[__VLA(IN)], operator_data_t* data, operator_fun_t apply, operator_del_t del); extern const struct operator_p_s* operator_p_create(unsigned int ON, const long out_dims[__VLA(ON)], unsigned int IN, const long in_dims[__VLA(IN)], operator_data_t* data, operator_p_fun_t apply, operator_del_t del); extern const struct operator_p_s* operator_p_create2(unsigned int ON, const long out_dims[__VLA(ON)], const long out_strs[__VLA(ON)], unsigned int IN, const long in_dims[__VLA(IN)], const long in_strs[__VLA(IN)], operator_data_t* data, operator_p_fun_t apply, operator_del_t del); extern const struct operator_s* operator_generic_create(unsigned int N, unsigned int io_flags, const unsigned int D[__VLA(N)], const long* out_dims[__VLA(N)], operator_data_t* data, operator_fun_t apply, operator_del_t del); extern const struct operator_s* operator_generic_create2(unsigned int N, unsigned int io_flags, const unsigned int D[__VLA(N)], const long* out_dims[__VLA(N)], const long* out_strs[__VLA(N)], operator_data_t* data, operator_fun_t apply, operator_del_t del); extern const struct operator_s* operator_identity_create(unsigned int N, const long dims[__VLA(N)]); extern const struct operator_s* operator_identity_create2(unsigned int N, const long dims[__VLA(N)], const long ostr[__VLA(N)], const long istr[__VLA(N)]); extern const struct operator_s* operator_chain(const struct operator_s* a, const struct operator_s* b); extern const struct operator_s* operator_chainN(unsigned int N, const struct operator_s* ops[__VLA(N)]); //extern const struct operator_s* operator_mul(const struct operator_s* a, const struct operator_s* b); //extern const struct operator_s* operator_sum(const struct operator_s* a, const struct operator_s* b); extern const struct operator_s* operator_stack(unsigned int D, unsigned int E, const struct operator_s* a, const struct operator_s* b); extern const struct operator_s* operator_bind2(const struct operator_s* op, unsigned int arg, unsigned int N, const long dims[__VLA(N)], const long strs[__VLA(N)], void* ptr); // del functions extern void operator_free(const struct operator_s* x); extern void operator_p_free(const struct operator_p_s* x); extern const struct operator_s* operator_ref(const struct operator_s* x); extern const struct operator_p_s* operator_p_ref(const struct operator_p_s* x); // apply functions extern void operator_generic_apply_unchecked(const struct operator_s* op, unsigned int N, void* args[__VLA(N)]); extern void operator_apply(const struct operator_s* op, unsigned int ON, const long odims[__VLA(ON)], _Complex float* dst, const long IN, const long idims[__VLA(IN)], const _Complex float* src); extern void operator_apply2(const struct operator_s* op, unsigned int ON, const long odims[__VLA(ON)], const long ostrs[__VLA(ON)], _Complex float* dst, const long IN, const long idims[__VLA(IN)], const long istrs[__VLA(IN)], const _Complex float* src); extern void operator_p_apply(const struct operator_p_s* op, float mu, unsigned int ON, const long odims[__VLA(ON)], _Complex float* dst, const long IN, const long idims[__VLA(IN)], const _Complex float* src); extern void operator_p_apply2(const struct operator_p_s* op, float mu, unsigned int ON, const long odims[__VLA(ON)], const long ostrs[__VLA(ON)], _Complex float* dst, const long IN, const long idims[__VLA(IN)], const long istrs[__VLA(IN)], const _Complex float* src); extern void operator_apply_unchecked(const struct operator_s* op, _Complex float* dst, const _Complex float* src); extern void operator_p_apply_unchecked(const struct operator_p_s* op, float mu, _Complex float* dst, const _Complex float* src); // get functions struct iovec_s; extern unsigned int operator_nr_args(const struct operator_s* op); extern const struct iovec_s* operator_arg_domain(const struct operator_s* op, unsigned int n); extern const struct iovec_s* operator_domain(const struct operator_s* op); extern const struct iovec_s* operator_codomain(const struct operator_s* op); extern const struct iovec_s* operator_p_domain(const struct operator_p_s* op); extern const struct iovec_s* operator_p_codomain(const struct operator_p_s* op); extern operator_data_t* operator_get_data(const struct operator_s* op); extern operator_data_t* operator_p_get_data(const struct operator_p_s* x); extern const struct operator_s* operator_p_upcast(const struct operator_p_s* op); extern const struct operator_s* operator_copy_wrapper(unsigned int N, const long* strs[N], const struct operator_s* op); extern const struct operator_s* operator_gpu_wrapper(const struct operator_s* op); extern const struct operator_s* operator_loop2(unsigned int N, const unsigned int D, const long dims[D], const long (*strs)[D], const struct operator_s* op); #if __GNUC__ < 5 #include "misc/pcaa.h" #define operator_loop2(N, D, dims, strs, op) \ operator_loop2(N, D, dims, AR2D_CAST(long, N, D, strs), op) #endif extern const struct operator_s* operator_loop(unsigned int D, const long dims[D], const struct operator_s* op); #include "misc/cppwrap.h" #endif bart-0.4.02/src/num/optimize.c000066400000000000000000000313471320577655200161430ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2017 Martin Uecker * * * Optimization framework for operations on multi-dimensional arrays. * */ #include #include #include #include #include #include #include "misc/misc.h" #include "misc/debug.h" #include "num/multind.h" #include "num/vecops.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "num/simplex.h" #include "optimize.h" /* * Helper functions: * * 1. detect aliasing * 2. detect if dimensions can be merged * 3. compute memory footprint * */ #if 0 static bool regular(long dim, long str) { return (dim > 0) && (str > 0); } static bool singular(long dim, long str) { assert(dim > 0); return (1 == dim) || (0 == str); } static bool enclosed(const long dims[2], const long strs[2]) { assert(regular(dims[0], strs[0])); assert(regular(dims[1], strs[1])); return (strs[1] >= dims[0] * strs[0]); } // assumes no overlap static long memory_footprint(int N, const long dims[N], const long strs[N]) { unsigned int flags = 0; for (int i = 0; i < N; i++) flags |= (0 == strs[i]); long dims2[N]; md_select_dims(N, ~flags, dims2, dims); return md_calc_size(N, dims2); } #endif /* * Generic optimizations strategy: * * 1. ordering of dimensions by stride * 2. merging of dimensions * 3. splitting and ordering (cache-oblivious algorithms) * 4. parallelization * */ /* strategies: - cache-oblivous algorithms (e.g. transpose) - use of accelerators - parallelization - vectorization - reordering of memory access - temporaries - loop merging - splitting */ /* * Each parameter is either input or output. The pointers must valid * and all accesses using any position inside the range given by * dimensions and using corresponding strides must be inside of the * adressed memory region. Pointers pointing inside the same region * can be passed multipe times. */ void merge_dims(unsigned int D, unsigned int N, long dims[N], long (*ostrs[D])[N]) { for (int i = N - 2; i >= 0; i--) { bool domerge = true; for (unsigned int j = 0; j < D; j++) // mergeable domerge &= (*ostrs[j])[i + 1] == dims[i] * (*ostrs[j])[i]; if (domerge) { for (unsigned int j = 0; j < D; j++) (*ostrs[j])[i + 1] = 0; dims[i + 0] *= dims[i + 1]; dims[i + 1] = 1; } } } unsigned int remove_empty_dims(unsigned int D, unsigned int N, long dims[N], long (*ostrs[D])[N]) { unsigned int o = 0; for (unsigned int i = 0; i < N; i++) { if (1 != dims[i]) { dims[o] = dims[i]; for (unsigned int j = 0; j < D; j++) (*ostrs[j])[o] = (*ostrs[j])[i]; o++; } } return o; } static int cmp_strides(const void* _data, unsigned int a, unsigned int b) { const long* strs = _data; long d = strs[a] - strs[b]; if (d > 0) return 1; if (d < 0) return -1; return 0; } static void compute_permutation(unsigned int N, unsigned int ord[N], const long strs[N]) { for (unsigned int i = 0; i < N; i++) ord[i] = i; quicksort(N, ord, (const void*)strs, cmp_strides); } static void reorder_long(unsigned int N, unsigned int ord[N], long x[N]) { long tmp[N]; memcpy(tmp, x, N * sizeof(long)); for (unsigned int i = 0; i < N; i++) x[i] = tmp[ord[i]]; } /* * Jim Demmel's generic blocking theorem */ static void demmel_factors(unsigned int D, unsigned int N, float blocking[N], long (*strs[D])[N]) { float delta[D][N]; for (unsigned int d = 0; d < D; d++) for (unsigned int n = 0; n < N; n++) delta[d][n] = (0 != (*strs[d])[n]) ? 1. : 0.; // now maximize 1^T x subject to Delta x <= 1 // M^{x_n} yields blocking factors where M is cache size (maybe needs to be devided by D?) float ones[MAX(N, D)]; for (unsigned int n = 0; n < MAX(N, D); n++) ones[n] = 1.; simplex(D, N, blocking, ones, ones, (const float (*)[N])delta); } static long find_factor(long x, float blocking) { //long m = (long)(1. + sqrt((double)x)); long m = (long)(1. + pow((double)x, blocking)); for (long i = m; i > 1; i--) if (0 == x % i) return (x / i); return 1; } static bool split_dims(unsigned int D, unsigned int N, long dims[N + 1], long (*ostrs[D])[N + 1], float blocking[N]) { if (0 == N) return false; long f; if ((dims[N - 1] > 1024) && (1 < (f = find_factor(dims[N - 1], blocking[N - 1])))) { #if 1 dims[N - 1] = dims[N - 1] / f; dims[N] = f; for (unsigned int j = 0; j < D; j++) (*ostrs[j])[N] = (*ostrs[j])[N - 1] * dims[N - 1]; blocking[N - 1] = blocking[N - 1]; blocking[N] = blocking[N - 1]; #else dims[N] = 1; for (unsigned int j = 0; j < D; j++) (*ostrs[j])[N] = 0; #endif return true; } // could not split, make room and try lower dimensions dims[N] = dims[N - 1]; blocking[N] = blocking[N - 1]; for (unsigned int j = 0; j < D; j++) (*ostrs[j])[N] = (*ostrs[j])[N - 1]; if (split_dims(D, N - 1, dims, ostrs, blocking)) return true; dims[N - 1] = dims[N]; for (unsigned int j = 0; j < D; j++) (*ostrs[j])[N - 1] = (*ostrs[j])[N]; blocking[N - 1] = blocking[N]; return false; } unsigned int simplify_dims(unsigned int D, unsigned int N, long dims[N], long (*strs[D])[N]) { merge_dims(D, N, dims, strs); unsigned int ND = remove_empty_dims(D, N, dims, strs); if (0 == ND) { // atleast return a single dimension dims[0] = 1; for (unsigned int j = 0; j < D; j++) (*strs[j])[0] = 0; ND = 1; } return ND; } unsigned int optimize_dims(unsigned int D, unsigned int N, long dims[N], long (*strs[D])[N]) { unsigned int ND = simplify_dims(D, N, dims, strs); debug_print_dims(DP_DEBUG4, ND, dims); float blocking[N]; // actually those are not the blocking factors // as used below but relative to fast memory //demmel_factors(D, ND, blocking, strs); UNUSED(demmel_factors); #if 0 debug_printf(DP_DEBUG4, "DB: "); for (unsigned int i = 0; i < ND; i++) debug_printf(DP_DEBUG4, "%f\t", blocking[i]); debug_printf(DP_DEBUG4, "\n"); #endif #if 1 for (unsigned int i = 0; i < ND; i++) blocking[i] = 0.5; // blocking[i] = 1.; #endif // try to split dimensions according to blocking factors // use space up to N bool split = false; do { if (N == ND) break; split = split_dims(D, ND, dims, strs, blocking); if (split) ND++; } while(split); // printf("Split %c :", split ? 'y' : 'n'); // print_dims(ND, dims); long max_strides[ND]; for (unsigned int i = 0; i < ND; i++) { max_strides[i] = 0; for (unsigned int j = 0; j < D; j++) max_strides[i] = MAX(max_strides[i], (*strs[j])[i]); } unsigned int ord[ND]; compute_permutation(ND, ord, max_strides); // for (unsigned int i = 0; i < ND; i++) // printf("%d: %ld %d\n", i, max_strides[i], ord[i]); #if 1 for (unsigned int j = 0; j < D; j++) reorder_long(ND, ord, *strs[j]); reorder_long(ND, ord, dims); #endif #if 0 printf("opt dims\n"); print_dims(ND, dims); if (D > 0) print_dims(ND, *strs[0]); if (D > 1) print_dims(ND, *strs[1]); if (D > 2) print_dims(ND, *strs[2]); #endif return ND; } /** * compute minimal dimension of largest contiguous block(s) * */ unsigned int min_blockdim(unsigned int D, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]) { unsigned int mbd = N; for (unsigned int i = 0; i < D; i++) mbd = MIN(mbd, md_calc_blockdim(N, dims, *strs[i], size[i])); return mbd; } static void compute_enclosures(unsigned int N, bool matrix[N][N], const long dims[N], const long strides[N]) { long ext[N]; for (unsigned int i = 0; i < N; i++) ext[i] = dims[i] * labs(strides[i]); for (unsigned int i = 0; i < N; i++) for (unsigned int j = 0; j < N; j++) matrix[i][j] = (ext[i] <= labs(strides[j])); } /** * compute set of parallelizable dimensions * */ static unsigned long parallelizable(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]) { // we assume no input / output overlap // (i.e. inputs which are also outputs have to be marked as output) // a dimension is parallelizable if all output operations // for that dimension are independent // for all output operations: // check - all other dimensions have strides greater or equal // the extend of this dimension or have an extend smaller or // equal the stride of this dimension // no overlap: [222] // [111111111111] // [333333333] // overlap: [222] // [1111111111111111] // [333333333] unsigned long flags = (1 << N) - 1; for (unsigned int d = 0; d < D; d++) { if (MD_IS_SET(io, d)) { bool m[N][N]; compute_enclosures(N, m, dims, *strs[d]); // print_dims(N, dims); // print_dims(N, *strs[d]); for (unsigned int i = 0; i < N; i++) { unsigned int a = 0; for (unsigned int j = 0; j < N; j++) if (m[i][j] || m[j][i]) a++; // printf("%d %d %d\n", d, i, a); if ((a != N - 1) || ((size_t)labs((*strs[d])[i]) < size[d])) flags = MD_CLEAR(flags, i); } } } return flags; } extern long num_chunk_size; long num_chunk_size = 32 * 1024; /** * compute set of dimensions to parallelize * */ unsigned long dims_parallel(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]) { unsigned long flags = parallelizable(D, io, N, dims, strs, size); unsigned int i = N; long reps = md_calc_size(N, dims); unsigned long oflags = 0; while (i-- > 0) { if (MD_IS_SET(flags, i)) { reps /= dims[i]; if (reps < num_chunk_size) break; oflags = MD_SET(oflags, i); } } return oflags; } #ifdef USE_CUDA static bool use_gpu(int p, void* ptr[p]) { bool gpu = false; for (int i = 0; i < p; i++) gpu |= cuda_ondevice(ptr[i]); for (int i = 0; i < p; i++) gpu &= cuda_accessible(ptr[i]); #if 0 // FIXME: fails for copy if (!gpu) { for (int i = 0; i < p; i++) assert(!cuda_ondevice(ptr[i])); } #endif return gpu; } #endif extern double md_flp_total_time; double md_flp_total_time = 0.; // automatic parallelization extern bool num_auto_parallelize; bool num_auto_parallelize = true; struct nary_opt_s { md_nary_opt_fun_t fun; struct nary_opt_data_s* data; }; static void nary_opt(void* _data, void* ptr[]) { struct nary_opt_s* data = _data; data->fun(data->data, ptr); } /** * Optimized n-op. * * @param N number of arguments ' @param io bitmask indicating input/output * @param D number of dimensions * @param dim dimensions * @param nstr strides for arguments and dimensions * @param nptr argument pointers * @param sizes size of data for each argument, e.g. complex float * @param too n-op function * @param data_ptr pointer to additional data used by too */ void optimized_nop(unsigned int N, unsigned int io, unsigned int D, const long dim[D], const long (*nstr[N])[D], void* const nptr[N], size_t sizes[N], md_nary_opt_fun_t too, void* data_ptr) { assert(N > 0); if (0 == D) { long dim1[1] = { 1 }; long tstrs[N][1]; long (*nstr1[N])[1]; for (unsigned int i = 0; i < N; i++) { tstrs[i][0] = 0; nstr1[i] = &tstrs[i]; } optimized_nop(N, io, 1, dim1, (void*)nstr1, nptr, sizes, too, data_ptr); return; } long tdims[D]; md_copy_dims(D, tdims, dim); long tstrs[N][D]; long (*nstr1[N])[D]; void* nptr1[N]; for (unsigned int i = 0; i < N; i++) { md_copy_strides(D, tstrs[i], *nstr[i]); nstr1[i] = &tstrs[i]; nptr1[i] = nptr[i]; } int ND = optimize_dims(N, D, tdims, nstr1); int skip = min_blockdim(N, ND, tdims, nstr1, sizes); unsigned long flags = 0; debug_printf(DP_DEBUG4, "MD-Fun. Io: %d Input: ", io); debug_print_dims(DP_DEBUG4, D, dim); #ifdef USE_CUDA if (num_auto_parallelize && !use_gpu(N, nptr1)) { #else if (num_auto_parallelize) { #endif flags = dims_parallel(N, io, ND, tdims, nstr1, sizes); while ((0 != flags) && (ffs(flags) <= skip)) skip--; flags = flags >> skip; } const long* nstr2[N]; for (unsigned int i = 0; i < N; i++) nstr2[i] = *nstr1[i] + skip; #ifdef USE_CUDA debug_printf(DP_DEBUG4, "This is a %s call\n.", use_gpu(N, nptr1) ? "gpu" : "cpu"); struct nary_opt_data_s data = { md_calc_size(skip, tdims), use_gpu(N, nptr1) ? &gpu_ops : &cpu_ops, data_ptr }; #else struct nary_opt_data_s data = { md_calc_size(skip, tdims), &cpu_ops, data_ptr }; #endif debug_printf(DP_DEBUG4, "Vec: %d (%ld) Opt.: ", skip, data.size); debug_print_dims(DP_DEBUG4, ND, tdims); double start = timestamp(); md_parallel_nary(N, ND - skip, tdims + skip, flags, nstr2, nptr1, &(struct nary_opt_s){ too, &data }, nary_opt); double end = timestamp(); #pragma omp critical md_flp_total_time += end - start; debug_printf(DP_DEBUG4, "MD time: %f\n", end - start); } bart-0.4.02/src/num/optimize.h000066400000000000000000000025311320577655200161410ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifdef __cplusplus #error This file does not support C++ #endif #include extern void merge_dims(unsigned int D, unsigned int N, long dims[N], long (*ostrs[D])[N]); extern unsigned int remove_empty_dims(unsigned int D, unsigned int N, long dims[N], long (*ostrs[D])[N]); extern unsigned int simplify_dims(unsigned int D, unsigned int N, long dims[N], long (*strs[D])[N]); extern unsigned int optimize_dims(unsigned int D, unsigned int N, long dims[N], long (*strs[D])[N]); extern unsigned int min_blockdim(unsigned int D, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]); extern unsigned long dims_parallel(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]); struct vec_ops; struct nary_opt_data_s { long size; const struct vec_ops* ops; void* data_ptr; }; typedef void (*md_nary_opt_fun_t)(struct nary_opt_data_s* data, void* ptr[]); extern void optimized_nop(unsigned int N, unsigned int io, unsigned int D, const long dim[D], const long (*nstr[N])[D], void* const nptr[N], size_t sizes[N], md_nary_opt_fun_t too, void* data_ptr); bart-0.4.02/src/num/polynom.c000066400000000000000000000071571320577655200160020ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include #include "polynom.h" complex double polynom_eval(complex double x, int N, const complex double coeff[N + 1]) { // Horner's method: a_0 + x * (a_1 + x * (a_2 + ...) return coeff[0] + ((0 == N) ? 0. : (x * polynom_eval(x, N - 1, coeff + 1))); } void (polynom_derivative)(int N, complex double out[N], const complex double in[N + 1]) { for (int i = 0; i < N; i++) out[i] = (i + 1) * in[i + 1]; } void polynom_integral(int N, complex double out[N + 2], const complex double in[N + 1]) { out[0] = 0.; for (int i = 0; i <= N; i++) out[i + 1] = in[i] / (i + 1); } complex double polynom_integrate(complex double st, complex double end, int N, const complex double coeff[N + 1]) { complex double int_coeff[N + 2]; polynom_integral(N, int_coeff, coeff); return polynom_eval(end, N + 1, int_coeff) - polynom_eval(st, N + 1, int_coeff); } void polynom_monomial(int N, complex double coeff[N + 1], int O) { for (int i = 0; i <= N; i++) coeff[i] = (O == i) ? 1. : 0.; } void polynom_from_roots(int N, complex double coeff[N + 1], const complex double root[N]) { // Vieta's formulas for (int i = 0; i <= N; i++) coeff[i] = 0.; // assert N < for (unsigned long b = 0; b < (1u << N); b++) { complex double prod = 1.; int count = 0; for (int i = 0; i < N; i++) { if (b & (1 << i)) { prod *= -root[i]; count++; } } coeff[N - count] += prod; } } void polynom_scale(int N, complex double out[N + 1], complex double scale, const complex double in[N + 1]) { complex double prod = 1.; for (int i = 0; i <= N; i++) { out[i] = prod * in[i]; prod *= scale; } } void polynom_shift(int N, complex double out[N + 1], complex double shift, const complex double in[N + 1]) { // Taylor shift (there are faster FFT-based methods) for (int i = 0; i <= N; i++) out[i] = 0.; complex double tmp[N + 1]; for (int i = 0; i <= N; i++) tmp[i] = in[i]; complex double prod = 1.; for (int i = 0; i <= N; i++) { for (int j = 0; j <= (N - i); j++) out[j] += prod * tmp[j]; polynom_derivative(N - i, tmp, tmp); prod *= shift; prod /= (i + 1); } } void quadratic_formula(complex double x[2], complex double coeff[3]) { complex double c = coeff[0]; complex double b = coeff[1]; complex double a = coeff[2]; assert(0. != a); complex double t = csqrt(cpow(b, 2.) - 4. * a * c); x[0] = (-b + t) / (2. * a); x[1] = (-b - t) / (2. * a); // FIXME: precision // Citardauq Formula // x[1] = 2. * c / (-b + s * t); } void cubic_formula(complex double x[3], complex double coeff[4]) { complex double a = coeff[3]; complex double b = coeff[2]; complex double c = coeff[1]; complex double d = coeff[0]; assert(0. != a); // depressed form t^3 + p t + q with t = -b / (3 a) complex double p = (3. * a * c - cpow(b, 2.)) / (3. * cpow(a, 2.)); complex double q = (2. * cpow(b, 3.) - 9. * a * b * c + 27. * cpow(a, 2.) * d) / (27. * cpow(a, 3.)); // Vieta's substitution: quadratic in w^3 with t = w - p / (3 w) complex double qp[3] = { -cpow(p, 3.) / 27., q, 1. }; complex double qw[2]; quadratic_formula(qw, qp); if (0. == qw[0]) qw[0] = qw[1]; complex double w1 = cpow(qw[0], 1. / 3.); complex double ksi = 0.5 * (-1. + sqrt(3.) * 1.i); for (int i = 0; i < 3; i++) { complex double wi = cpow(ksi, i) * w1; complex double ti = (0. == wi) ? 0. : (wi - p / (3. * wi)); x[i] = ti - b / (3. * a); } } bart-0.4.02/src/num/polynom.h000066400000000000000000000017311320577655200157770ustar00rootroot00000000000000 #include extern complex double polynom_eval(complex double x, int N, const complex double coeff[N + 1]); extern void polynom_derivative(int N, complex double out[N], const complex double in[N + 1]); extern void polynom_integral(int N, complex double out[N + 2], const complex double in[N + 1]); extern complex double polynom_integrate(complex double st, complex double end, int N, const complex double coeff[N + 1]); extern void polynom_monomial(int N, complex double coeff[N + 1], int O); extern void polynom_from_roots(int N, complex double coeff[N + 1], const complex double root[N]); extern void polynom_scale(int N, complex double out[N + 1], complex double scale, const complex double in[N + 1]); extern void polynom_shift(int N, complex double out[N + 1], complex double shift, const complex double in[N + 1]); extern void quadratic_formula(complex double x[2], complex double coeff[3]); extern void cubic_formula(complex double x[3], complex double coeff[4]); bart-0.4.02/src/num/qform.c000066400000000000000000000022201320577655200154130ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. */ #include #include #include "num/linalg.h" #include "qform.h" float gradient_form(const float qf[3], float phi) { float x = cosf(phi); float y = sinf(phi); return x * x * qf[0] + 2. * x * y * qf[2] + y * y * qf[1]; } void fit_quadratic_form(float qf[3], unsigned int N, const float phi[N], const float v[N]) { complex float lhs[3] = { 0., 0., 0. }; complex float mat[3][3] = { { 0. } }; for (unsigned int i = 0; i < N; i++) { float x = cosf(phi[i]); float y = sinf(phi[i]); lhs[0] += x * x * v[i]; lhs[1] += y * y * v[i]; lhs[2] += 2. * x * y * v[i]; mat[0][0] += x * x * x * x; mat[0][1] += x * x * y * y; mat[0][2] += x * x * 2. * x * y; mat[1][0] += y * y * x * x; mat[1][1] += y * y * y * y; mat[1][2] += y * y * 2. * x * y; mat[2][0] += 2. * x * y * x * x; mat[2][1] += 2. * x * y * y * y; mat[2][2] += 2. * x * y * 2. * x * y; } complex float inv[3][3]; complex float out[3]; mat_inverse(3, inv, mat); mat_vecmul(3, 3, out, inv, lhs); qf[0] = out[0]; qf[1] = out[1]; qf[2] = out[2]; } bart-0.4.02/src/num/qform.h000066400000000000000000000002371320577655200154260ustar00rootroot00000000000000 extern float gradient_form(const float qf[3], float phi); extern void fit_quadratic_form(float qf[3], unsigned int N, const float phi[N], const float v[N]); bart-0.4.02/src/num/rand.c000066400000000000000000000026671320577655200152320ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker * 2013 Dara Bahri */ #define _GNU_SOURCE #include #include #include #include "num/multind.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "rand.h" unsigned int num_rand_seed = 123; void num_rand_init(unsigned int seed) { num_rand_seed = seed; } double uniform_rand(void) { double ret; #pragma omp critical ret = rand_r(&num_rand_seed) / (double)RAND_MAX; return ret; } /** * Box-Muller */ complex double gaussian_rand(void) { double u1, u2, s; do { u1 = 2. * uniform_rand() - 1.; u2 = 2. * uniform_rand() - 1.; s = u1 * u1 + u2 * u2; } while (s > 1.); double re = sqrt(-2. * log(s) / s) * u1; double im = sqrt(-2. * log(s) / s) * u2; return re + 1.i * im; } void md_gaussian_rand(unsigned int D, const long dims[D], complex float* dst) { #ifdef USE_CUDA if (cuda_ondevice(dst)) { complex float* tmp = md_alloc(D, dims, sizeof(complex float)); md_gaussian_rand(D, dims, tmp); md_copy(D, dims, dst, tmp, sizeof(complex float)); md_free(tmp); return; } #endif //#pragma omp parallel for for (long i = 0; i < md_calc_size(D, dims); i++) dst[i] = (complex float)gaussian_rand(); } bart-0.4.02/src/num/rand.h000066400000000000000000000007261320577655200152310ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/cppwrap.h" extern double uniform_rand(void); extern _Complex double gaussian_rand(void); extern void md_gaussian_rand(unsigned int D, const long dims[__VLA(D)], _Complex float* dst); extern void num_rand_init(unsigned int seed); #include "misc/cppwrap.h" bart-0.4.02/src/num/shuffle.c000066400000000000000000000060511320577655200157310ustar00rootroot00000000000000 #include "num/multind.h" #include "misc/debug.h" #include "misc/misc.h" #include "shuffle.h" #if 0 void md_shuffle2(unsigned int N, const long dims[N], const long factors[N], const long ostrs[N], void* out, const long istrs[N], const void* in, size_t size) { long dims2[2 * N]; long ostrs2[2 * N]; long istrs2[2 * N]; for (unsigned int i = 0; i < N; i++) { assert(0 == dims[i] % factors[i]); long f2 = dims[i] / factors[i]; dims2[0 * N + i] = f2; dims2[1 * N + i] = factors[i]; ostrs2[1 * N + i] = ostrs[i]; ostrs2[0 * N + i] = ostrs[i] * f2; istrs2[0 * N + i] = istrs[i] * factors[i]; istrs2[1 * N + i] = istrs[i]; } md_copy2(2 * N, dims2, ostrs2, out, istrs2, in, size); } void md_shuffle(unsigned int N, const long dims[N], const long factors[N], void* out, const void* in, size_t size) { long strs[N]; md_calc_strides(N, strs, dims, size); md_shuffle2(N, dims, factors, strs, out, strs, in, size); } #endif static void decompose_dims(unsigned int N, long dims2[2 * N], long ostrs2[2 * N], long istrs2[2 * N], const long factors[N], const long odims[N + 1], const long ostrs[N + 1], const long idims[N], const long istrs[N]) { long prod = 1; for (unsigned int i = 0; i < N; i++) { long f2 = idims[i] / factors[i]; assert(0 == idims[i] % factors[i]); assert(odims[i] == idims[i] / factors[i]); dims2[1 * N + i] = factors[i]; dims2[0 * N + i] = f2; istrs2[0 * N + i] = istrs[i] * factors[i]; istrs2[1 * N + i] = istrs[i]; ostrs2[0 * N + i] = ostrs[i]; ostrs2[1 * N + i] = ostrs[N] * prod; prod *= factors[i]; } assert(odims[N] == prod); } void md_decompose2(unsigned int N, const long factors[N], const long odims[N + 1], const long ostrs[N + 1], void* out, const long idims[N], const long istrs[N], const void* in, size_t size) { long dims2[2 * N]; long ostrs2[2 * N]; long istrs2[2 * N]; decompose_dims(N, dims2, ostrs2, istrs2, factors, odims, ostrs, idims, istrs); md_copy2(2 * N, dims2, ostrs2, out, istrs2, in, size); } void md_decompose(unsigned int N, const long factors[N], const long odims[N + 1], void* out, const long idims[N], const void* in, size_t size) { long ostrs[N + 1]; md_calc_strides(N + 1, ostrs, odims, size); long istrs[N]; md_calc_strides(N, istrs, idims, size); md_decompose2(N, factors, odims, ostrs, out, idims, istrs, in, size); } void md_recompose2(unsigned int N, const long factors[N], const long odims[N], const long ostrs[N], void* out, const long idims[N + 1], const long istrs[N + 1], const void* in, size_t size) { long dims2[2 * N]; long ostrs2[2 * N]; long istrs2[2 * N]; decompose_dims(N, dims2, istrs2, ostrs2, factors, idims, istrs, odims, ostrs); md_copy2(2 * N, dims2, ostrs2, out, istrs2, in, size); } void md_recompose(unsigned int N, const long factors[N], const long odims[N], void* out, const long idims[N + 1], const void* in, size_t size) { long ostrs[N]; md_calc_strides(N, ostrs, odims, size); long istrs[N + 1]; md_calc_strides(N + 1, istrs, idims, size); md_recompose2(N, factors, odims, ostrs, out, idims, istrs, in, size); } bart-0.4.02/src/num/shuffle.h000066400000000000000000000023261320577655200157370ustar00rootroot00000000000000 #include #include "misc/cppwrap.h" extern void md_shuffle2(unsigned int N, const long dims[__VLA(N)], const long factors[__VLA(N)], const long ostrs[__VLA(N)], void* out, const long istrs[__VLA(N)], const void* in, size_t size); extern void md_shuffle(unsigned int N, const long dims[__VLA(N)], const long factors[__VLA(N)], void* out, const void* in, size_t size); extern void md_decompose2(unsigned int N, const long factors[__VLA(N)], const long odims[__VLA(N + 1)], const long ostrs[__VLA(N + 1)], void* out, const long idims[__VLA(N)], const long istrs[__VLA(N)], const void* in, size_t size); extern void md_decompose(unsigned int N, const long factors[__VLA(N)], const long odims[__VLA(N + 1)], void* out, const long idims[__VLA(N)], const void* in, size_t size); extern void md_recompose2(unsigned int N, const long factors[__VLA(N)], const long odims[__VLA(N)], const long ostrs[__VLA(N)], void* out, const long idims[__VLA(N + 1)], const long istrs[__VLA(N + 1)], const void* in, size_t size); extern void md_recompose(unsigned int N, const long factors[__VLA(N)], const long odims[__VLA(N)], void* out, const long idims[__VLA(N + 1)], const void* in, size_t size); #include "misc/cppwrap.h" bart-0.4.02/src/num/simplex.c000066400000000000000000000104371320577655200157610ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2014 Martin Uecker */ #include #include #include #include "simplex.h" /* * transform matrix so that (d, n) = 1 and (:, n) = 0 */ static void trafo(unsigned int D, unsigned int N, float A[D][N], unsigned int d, unsigned int n) { float mul = A[d][n]; for (unsigned int k = 0; k < N; k++) A[d][k] /= mul; for (unsigned int l = 0; l < D; l++) { if (l != d) { mul = A[l][n]; for (unsigned int k = 0; k < N; k++) A[l][k] -= mul * A[d][k]; } } } static bool feasible_p(unsigned int D, unsigned int N, const float x[N], /*const*/ float A[D + 1][N + 1]) { bool ok = true; for (unsigned int j = 0; j < D; j++) { float sum = 0.; for (unsigned int i = 0; i < N; i++) sum += A[1 + j][i] * x[i]; ok &= (0 == A[1 + j][N] - sum); } return ok; } static void solution(unsigned int D, unsigned int N, float x[N], /*const*/ float A[D + 1][N + 1]) { // this is needed to deel with duplicate columns bool used[D]; for (unsigned int i = 0; i < D; i++) used[i] = false; for (unsigned int i = 0; i < N; i++) { x[i] = -1.; int pos = -1; for (unsigned int j = 0; j < D; j++) { if (0. == A[1 + j][i]) continue; if ((1. == A[1 + j][i]) && (-1. == x[i]) && !used[j]) { x[i] = A[1 + j][N]; pos = j; used[j] = true; } else { x[i] = -1.; break; } } if (-1. == x[i]) { // non-basic x[i] = 0.; if (-1 != pos) used[pos] = false; } } //assert(feasible_p(D, N, x, A)); } extern void print_tableaux(unsigned int D, unsigned int N, /*const*/ float A[D + 1][N + 1]); void print_tableaux(unsigned int D, unsigned int N, /*const*/ float A[D + 1][N + 1]) { float x[N]; solution(D, N, x, A); float y[D]; for (unsigned int j = 0; j < D; j++) { y[j] = 0.; for (unsigned int i = 0; i < N; i++) y[j] += A[1 + j][i] * x[i]; } printf(" "); for (unsigned int i = 0; i < N; i++) printf("x%d ", i); printf("\nSolution: "); for (unsigned int i = 0; i < N; i++) printf(" %0.2f ", x[i]); printf("(%s)\n", (feasible_p(D, N, x, A)) ? "feasible" : "infeasible"); printf(" Max "); for (unsigned int i = 0; i < N; i++) printf("%+0.2f ", A[0][i]); printf(" %+0.2f s.t.:\n", A[0][N]); for (unsigned int j = 0; j < D; j++) { printf(" "); for (unsigned int i = 0; i < N; i++) printf("%+0.2f ", A[1 + j][i]); printf("= %+0.2f | %+0.2f\n", A[1 + j][N], y[j]); } printf("Objective: %0.2f\n", A[0][N]); } /* * maximize c^T x subject to Ax = b and x >= 0 * * inplace, b is last column of A, c first row */ static void simplex2(unsigned int D, unsigned int N, float A[D + 1][N + 1]) { // 2. Loop over all columns // print_tableaux(D, N, A); while (true) { unsigned int i = 0; for (i = 0; i < N; i++) if (A[0][i] < 0.) break; if (i == N) break; // 3. find pivot element // Bland's rule int pivot_index = -1; float pivot_value = 0.; for (unsigned int j = 1; j < D + 1; j++) { if (0. < A[j][i]) { float nval = A[j][N] / A[j][i]; if ((-1 == pivot_index) || (nval < pivot_value)) { pivot_value = nval; pivot_index = j; } } } if (-1 == pivot_index) break; // printf("PI %dx%d\n", pivot_index, i); trafo(D + 1, N + 1, A, pivot_index, i); // print_tableaux(D, N, A); float x[N]; solution(D, N, x, A); assert(feasible_p(D, N, x, A)); } // print_tableaux(D, N, A); } /* * maximize c^T x subject to Ax <= b and x >= 0 */ void (simplex)(unsigned int D, unsigned int N, float x[N], const float c[N], const float b[D], const float A[D][N]) { // 1. Step: slack variables // max c^T x Ax + z = b x,z >= 0 float A2[D + 1][N + D + 1]; for (unsigned int i = 0; i < N + D + 1; i++) { A2[0][i] = (i < N) ? -c[i] : 0.; for (unsigned int j = 0; j < D; j++) { if (i < N) A2[1 + j][i] = A[j][i]; else if (i == N + D) A2[1 + j][i] = b[j]; else A2[1 + j][i] = (i - N == j) ? 1. : 0.; } } simplex2(D, N + D, A2); // extract results: float x2[D + N]; solution(D, D + N, x2, A2); for (unsigned int i = 0; i < N; i++) x[i] = x2[i]; } bart-0.4.02/src/num/simplex.h000066400000000000000000000004041320577655200157570ustar00rootroot00000000000000 extern void simplex(unsigned int D, unsigned int N, float x[N], const float c[N], const float b[D], const float A[D][N]); #if __GNUC__ < 5 #include "misc/pcaa.h" #define simplex(D, N, x, c, b, A) \ simplex(D, N, x, c, b, AR2D_CAST(float, D, N, A)) #endif bart-0.4.02/src/num/specfun.c000066400000000000000000000023361320577655200157420ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * Martin Uecker */ #include #include "misc/misc.h" #include "num/chebfun.h" #include "specfun.h" /* FIXME: improve precision * (but should be good enough for our purposes...) */ static const float coeff_0to8[] = { 0.143432, 0.144372, 0.147260, 0.152300, 0.159883, 0.170661, 0.185731, 0.207002, 0.238081, 0.286336, 0.366540, 0.501252, 0.699580, 0.906853, 1.000000, }; static const float coeff_8toinf[] = { 0.405687, 0.405664, 0.405601, 0.405494, 0.405349, 0.405164, 0.404945, 0.404692, 0.404413, 0.404107, 0.403782, 0.403439, 0.403086, 0.402724, 0.402359, 0.401995, 0.401637, 0.401287, 0.400951, 0.400631, 0.400332, 0.400055, 0.399805, 0.399582, 0.399391, 0.399231, 0.399106, 0.399012, 0.398998, 0.399001 }; /* * modified bessel function */ double bessel_i0(double x) { if (x < 0.) return bessel_i0(-x); if (x < 8.) return exp(x) * chebeval(x / 4. - 1., ARRAY_SIZE(coeff_0to8), coeff_0to8); return exp(x) * chebeval(16. / x - 1., ARRAY_SIZE(coeff_8toinf), coeff_8toinf) / sqrt(x); } bart-0.4.02/src/num/specfun.h000066400000000000000000000000471320577655200157440ustar00rootroot00000000000000 extern double bessel_i0(double x); bart-0.4.02/src/num/splines.c000066400000000000000000000214701320577655200157540ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include "splines.h" static long binomial(unsigned int n, unsigned int k) { long result = 1; for (unsigned int i = 1; i <= k; i++) result *= (n + 1 - i); for (unsigned int i = 1; i <= k; i++) result /= i; return result; } /* basis */ double bernstein(unsigned int n, unsigned int v, double x) { assert(v <= n); return binomial(n, v) * pow(x, v) * pow(1. - x, n - v); } static double lerp(double t, double a, double b) { return (1. - t) * a + t * b; } static void de_casteljau_step(unsigned int N, double out[static N], double t, const double coeff[static N + 1]) { for (unsigned int i = 0; i < N; i++) out[i] = lerp(t, coeff[i], coeff[i + 1]); } static double de_casteljau(double t, unsigned int N, const double coeff[static N + 1]) { if (0 == N) return coeff[0]; double coeff2[N]; de_casteljau_step(N, coeff2, t, coeff); return de_casteljau(t, N - 1, coeff2); } static void de_casteljau_split(double t, unsigned int N, double coeffA[static N + 1], double coeffB[static N + 1], const double coeff[static N + 1]) { coeffA[0] = coeff[0]; coeffB[N] = coeff[N]; if (0 == N) return; double coeff2[N]; de_casteljau_step(N, coeff2, t, coeff); de_casteljau_split(t, N - 1, coeffA + 1, coeffB, coeff2); } void bezier_split(double t, unsigned int N, double coeffA[static N + 1], double coeffB[static N + 1], const double coeff[static N + 1]) { de_casteljau_split(t, N, coeffA, coeffB, coeff); } double bezier_curve(double u, unsigned int N, const double k[static N + 1]) { return de_casteljau(u, N, k); } void bezier_increase_degree(unsigned int N, double coeff2[static N + 2], const double coeff[static N + 1]) { coeff2[0] = coeff[0]; for (unsigned int i = 1; i <= N; i++) coeff2[i] = lerp(i / (1. + N), coeff[i], coeff[i - 1]); coeff2[N + 1] = coeff[N]; } double bezier_surface(double u, double v, unsigned int N, unsigned int M, const double k[static N + 1][M + 1]) { double coeff[N + 1]; for (unsigned int i = 0; i <= N; i++) coeff[i] = bezier_curve(u, M, k[i]); return bezier_curve(v, N, coeff); } double bezier_patch(double u, double v, const double k[4][4]) { return bezier_surface(u, v, 3, 3, k); } static void cspline2bezier(double out[4], const double in[4]) { const double m[4][4] = { { 1., 1., 0., 0., }, { 0., 1./ 3., 0., 0., }, { 0., 0., 1., 1., }, { 0., 0., -1./3., 0., }, }; for (int i = 0; i < 4; i++) { out[i] = 0.; for (int j = 0; j < 4; j++) out[i] += m[j][i] * in[j]; } } // cubic hermite spline double cspline(double t, const double coeff[4]) { double coeff2[4]; cspline2bezier(coeff2, coeff); return bezier_curve(t, 3, coeff2); } static double frac(double u, double v) { if (0. == v) { // assert(0. == u); return 0.; } return u / v; } /* * bspline blending function of order p with n + 1 knots. i enumerates the basis. */ double bspline(unsigned int n, unsigned int i, unsigned int p, const double tau[static n + 1], double u) { assert(i + p < n); assert(tau[i] <= tau[i + 1]); assert((tau[0] <= u) && (u <= tau[n])); if (0 == p) return ((tau[i] <= u) && (u < tau[i + 1])) ? 1. : 0.; assert(tau[i] <= tau[i + p + 1]); double a = frac(u - tau[i], tau[i + p] - tau[i]); double b = frac(tau[i + p + 1] - u, tau[i + p + 1] - tau[i + 1]); return a * bspline(n, i, p - 1, tau, u) + b * bspline(n, i + 1, p - 1, tau, u); } double bspline_derivative(unsigned int n, unsigned int i, unsigned int p, const double tau[static n + 1], double x) { assert(p > 0); double a = frac(p, tau[i + p] - tau[i]); double b = frac(p, tau[i + p + 1] - tau[i + 1]); return a * bspline(n, i, p - 1, tau, x) - b * bspline(n, i + 1, p - 1, tau, x); } double nurbs(unsigned int n, unsigned int p, const double tau[static n + 1], const double coord[static n - p], const double w[static n - p], double x) { #if 0 double sum = 0.; double nrm = 0.; for (unsigned int i = 0; i < n + 0 - p; i++) { double b = bspline(n, i, p, tau, x); sum += w[i] * coord[i] * b; nrm += w[i] * b; } #else double coordw[n - p]; for (unsigned int i = 0; i < n + 0 - p; i++) coordw[i] = w[i] * coord[i]; double sum = bspline_curve(n, p, tau, coordw, x); double nrm = bspline_curve(n, p, tau, w, x); #endif return sum / nrm; } static void cox_deboor_step(unsigned int N, double out[static N], double x, unsigned int p, const double tau[static N + p + 1], const double coeff[static N + 1]) { unsigned int k = p - N + 1; for (unsigned int s = 0; s < N; s++) { double t = frac(x - tau[s + k], tau[s + p + 1] - tau[s + k]); out[s] = lerp(t, coeff[s], coeff[s + 1]); } } static double cox_deboor_i(double x, unsigned int N, unsigned int p, const double tau[static N + 1], const double coeff[static N + 1]) { if (0 == N) return coeff[0]; double coeff2[N]; cox_deboor_step(N, coeff2, x, p, tau, coeff); return cox_deboor_i(x, N - 1, p, tau, coeff2); } #if 0 static double cox_deboor_r(unsigned int n, unsigned int p, unsigned int k, unsigned int s, const double t2[static n + 1], const double v2[static n + 1 - p], double x) { if (0 == k) return v2[s]; double t = (x - t2[s]) / (t2[s + p - k + 1] - t2[s]); double a = cox_deboor_r(n, p, k - 1, s - 1, t2, v2, x); double b = cox_deboor_r(n, p, k - 1, s - 0, t2, v2, x); return lerp(t, a, b); } #endif static unsigned int find_span(unsigned int n, const double t[static n + 1], double x) { assert(x >= t[0]); unsigned int i = 0; while (x >= t[i]) i++; i--; return i; } static double cox_deboor(unsigned int n, unsigned int p, const double t[static n + 1], const double v[static n + 1 - p], double x) { int i = find_span(n, t, x); // return cox_deboor_r(n, p, p, p, t + i - p, v + i - p, x); return cox_deboor_i(x, p, p, t + i - p, v + i - p); } double bspline_curve(unsigned int n, unsigned int p, const double t[static n + 1], const double v[static n - p], double x) { return cox_deboor(n, p, t, v, x); } static void bspline_coeff_derivative(unsigned int n, unsigned int p, double t2[static n - 1], double v2[static n - p - 1], const double t[static n + 1], const double v[static n - p]) { for (unsigned int i = 1; i < n; i++) t2[i - 1] = t[i]; for (unsigned int i = 0; i < n - p - 1; i++) v2[i] = (float)p / (t[i + p + 1] - t[i + 1]) * (v[i + 1] - v[i]); } void bspline_coeff_derivative_n(unsigned int k, unsigned int n, unsigned int p, double t2[static n + 1 - 2 * k], double v2[static n - p - k], const double t[static n + 1], const double v[static n - p]) { if (0 == k) { for (unsigned int i = 0; i < n + 1; i++) t2[i] = t[i]; for (unsigned int i = 0; i < n - p; i++) v2[i] = v[i]; } else { double t1[n - 1]; double v1[n - p - 1]; bspline_coeff_derivative(n, p, t1, v1, t, v); bspline_coeff_derivative_n(k - 1, n - 1, p - 1, t2, v2, t1, v1); } } double bspline_curve_derivative(unsigned int k, unsigned int n, unsigned int p, const double t[static n + 1], const double v[static n - p], double x) { double t2[n + 1 - 2 * k]; double v2[n - p - k]; bspline_coeff_derivative_n(k, n, p, t2, v2, t, v); return cox_deboor(n - 2 * k, p - k, t2, v2, x); } static double newton_raphson(int iter, double x0, void* data, double (*fun)(void* data, double x), double (*der)(void* data, double x)) { return (0 == iter) ? x0 : newton_raphson(iter - 1, x0 - fun(data, x0) / der(data, x0), data, fun, der); } struct bspline_s { unsigned int n; unsigned int p; const double* t; const double* v; }; static double n_fun(void* _data, double x) { struct bspline_s* data = _data; return bspline_curve(data->n, data->p, data->t, data->v, x); } static double n_der(void* _data, double x) { struct bspline_s* data = _data; return bspline_curve_derivative(1, data->n, data->p, data->t, data->v, x); } double bspline_curve_zero(unsigned int n, unsigned int p, const double tau[static n + 1], const double v[static n - p]) { return newton_raphson(20, (tau[n] + tau[0]) / 2., &(struct bspline_s){ n, p, tau, v }, n_fun, n_der); } void bspline_knot_insert(double x, unsigned int n, unsigned int p, double t2[static n + 2], double v2[n - p + 1], const double tau[static n + 1], const double v[static n - p]) { unsigned int k = find_span(n, tau, x); // knots for (unsigned int i = 0; i <= k; i++) t2[i] = tau[i]; t2[k + 1] = x; for (unsigned int i = k + 1; i < n; i++) t2[i + 1] = tau[i]; unsigned int r = k - p + 1; unsigned int s = k; for (unsigned int i = 0; i < r; i++) v2[i] = v[i]; for (unsigned int i = r; i <= s; i++) { double a = (x - tau[i]) / (tau[i + p] - tau[i]); v2[i] = (1. - a) * v[i - 1] + a * v[i]; } for (unsigned int i = s; i < n - p; i++) v2[i + 1] = v[i]; } bart-0.4.02/src/num/splines.h000066400000000000000000000035001320577655200157530ustar00rootroot00000000000000 extern double bernstein(unsigned int n, unsigned int v, double x); extern double bezier_curve(double u, unsigned int N, const double k[static N + 1]); extern void bezier_split(double t, unsigned int N, double coeffA[static N + 1], double coeffB[static N + 1], const double coeff[static N + 1]); extern void bezier_increase_degree(unsigned int N, double coeff2[static N + 2], const double coeff[static N + 1]); extern double cspline(double t, const double coeff[4]); extern double bezier_surface(double u, double v, unsigned int N, unsigned int M, const double k[static N + 1][M + 1]); extern double bezier_patch(double u, double v, const double k[4][4]); extern double bspline(unsigned int n, unsigned int i, unsigned int p, const double tau[static n + 1], double x); extern double bspline_derivative(unsigned int n, unsigned int i, unsigned int p, const double tau[static n + 1], double x); extern double bspline_curve(unsigned int n, unsigned int p, const double t[static n + 1], const double v[static n - p], double x); extern double bspline_curve_derivative(unsigned int k, unsigned int n, unsigned int p, const double t[static n + 1], const double v[static n - p], double x); extern void bspline_coeff_derivative_n(unsigned int k, unsigned int n, unsigned int p, double t2[static n - 1], double v2[n - p - 2], const double t[static n + 1], const double v[static n - p]); extern double bspline_curve_zero(unsigned int n, unsigned int p, const double tau[static n + 1], const double v[static n - p]); extern void bspline_knot_insert(double x, unsigned int n, unsigned int p, double t2[static n + 2], double v2[n - p + 1], const double tau[static n + 1], const double v[static n - p]); extern double nurbs(unsigned int n, unsigned int p, const double tau[static n + 1], const double coord[static n - p], const double w[static n - p], double x); bart-0.4.02/src/num/vec3.c000066400000000000000000000025421320577655200151360ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include "vec3.h" void vec3_saxpy(vec3_t dst, const vec3_t src1, float alpha, const vec3_t src2) { for (unsigned int i = 0; i < 3; i++) dst[i] = src1[i] + alpha * src2[i]; } void vec3_sub(vec3_t dst, const vec3_t src1, const vec3_t src2) { vec3_saxpy(dst, src1, -1., src2); } void vec3_add(vec3_t dst, const vec3_t src1, const vec3_t src2) { vec3_saxpy(dst, src1, +1., src2); } void vec3_copy(vec3_t dst, const vec3_t src) { vec3_saxpy(dst, src, 0., src); } void vec3_clear(vec3_t dst) { vec3_saxpy(dst, dst, -1., dst); } float vec3_sdot(const vec3_t a, const vec3_t b) { float ret = 0.; for (unsigned int i = 0; i < 3; i++) ret += a[i] * b[i]; return ret; } float vec3_norm(const vec3_t x) { return sqrtf(vec3_sdot(x, x)); } void vec3_rot(vec3_t dst, const vec3_t src1, const vec3_t src2) { vec3_t tmp; tmp[0] = src1[1] * src2[2] - src1[2] * src2[1]; tmp[1] = src1[2] * src2[0] - src1[0] * src2[2]; tmp[2] = src1[0] * src2[1] - src1[1] * src2[0]; vec3_copy(dst, tmp); } void vec3_smul(vec3_t dst, const vec3_t src, float alpha) { vec3_saxpy(dst, (vec3_t){ 0., 0., 0. }, alpha, src); } bart-0.4.02/src/num/vec3.h000066400000000000000000000011071320577655200151370ustar00rootroot00000000000000 typedef float vec3_t[3]; extern void vec3_saxpy(vec3_t dst, const vec3_t src1, float alpha, const vec3_t src2); extern void vec3_sub(vec3_t dst, const vec3_t src1, const vec3_t src2); extern void vec3_add(vec3_t dst, const vec3_t src1, const vec3_t src2); extern void vec3_copy(vec3_t dst, const vec3_t src); extern void vec3_clear(vec3_t dst); extern float vec3_sdot(const vec3_t a, const vec3_t b); extern float vec3_norm(const vec3_t x); extern void vec3_rot(vec3_t dst, const vec3_t src1, const vec3_t src2); extern void vec3_smul(vec3_t dst, const vec3_t src, float alpha); bart-0.4.02/src/num/vecops.c000066400000000000000000000332271320577655200156010ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2016. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2011-2016 Martin Uecker * 2014 Frank Ong * 2014-2017 Jon Tamir * 2017 Sofia Dimoudi * * * This file defines basic operations on vectors of floats/complex floats * for operations on the CPU which are are used by higher level code * (mainly num/flpmath.c and num/italgos.c) to implement more complex * operations. The functions are exported by pointers stored in the * global variable cpu_ops of type struct vec_ops. Identical functions * are implemented for the GPU in gpukrnls.c. * */ #include #include #include #include #ifdef _OPENMP #include #endif #include "misc/misc.h" #include "misc/debug.h" #include "vecops.h" /** * Allocate memory for array of floats. * Note: be sure to pass 2*N if allocating for complex float * * @param N number of elements */ static float* allocate(long N) { assert(N >= 0); return xmalloc((size_t)N * sizeof(float)); } static void del(float* vec) { free(vec); } static void copy(long N, float* dst, const float* src) { for (long i = 0; i < N; i++) dst[i] = src[i]; } static void float2double(long N, double* dst, const float* src) { for (long i = 0; i < N; i++) dst[i] = src[i]; } static void double2float(long N, float* dst, const double* src) { for (long i = 0; i < N; i++) dst[i] = src[i]; } /* * Set vector to all-zeros * * @param N vector length * @param vec vector */ static void clear(long N, float* vec) { for (long i = 0; i < N; i++) vec[i] = 0.; } static double dot(long N, const float* vec1, const float* vec2) { double res = 0.; for (long i = 0; i < N; i++) res += vec1[i] * vec2[i]; //res = fma((double)vec1[i], (double)vec2[i], res); return res; } /** * Compute l2 norm of vec * * @param N vector length * @param vec vector */ static double norm(long N, const float* vec) { double res = 0.; for (long i = 0; i < N; i++) res += vec[i] * vec[i]; //res = fma((double)vec[i], (double)vec[i], res); return sqrt(res); } /** * Compute l1 norm of vec * * @param N vector length * @param vec vector */ static double asum(long N, const float* vec) { double res = 0.; for (long i = 0; i < N; i++) res += fabsf(vec[i]); return res; } /** * Compute l1 norm of complex vec * * @param N vector length * @param vec vector */ static double zl1norm(long N, const complex float* vec) { double res = 0.; for (long i = 0; i < N; i++) res += cabsf(vec[i]); return res; } static void axpbz(long N, float* dst, const float a1, const float* src1, const float a2, const float* src2) { for (long i = 0; i < N; i++) dst[i] = a1 * src1[i] + a2 * src2[i]; } static void axpy(long N, float* dst, float alpha, const float* src) { axpbz(N, dst, 1., dst, alpha, src); //dst[i] = fmaf(alpha, src[i], dst[i]); } static void xpay(long N, float beta, float* dst, const float* src) { axpbz(N, dst, beta, dst, 1., src); //dst[i] = fmaf(beta, dst[i], src[i]); } static void smul(long N, float alpha, float* dst, const float* src) { axpbz(N, dst, 0., src, alpha, src); //dst[i] = fmaf(alpha, src[i], 0.f); } static void add(long N, float* dst, const float* src1, const float* src2) { #if 1 if (dst == src1) { for (long i = 0; i < N; i++) dst[i] += src2[i]; } else #endif for (long i = 0; i < N; i++) dst[i] = src1[i] + src2[i]; } static void sub(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] = src1[i] - src2[i]; } static void mul(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] = src1[i] * src2[i]; } static void vec_div(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) //dst[i] = src1[i] / src2[i]; dst[i] = (src2[i] == 0) ? 0.f : src1[i] / src2[i]; } static void fmac(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] += src1[i] * src2[i]; //dst[i] = fmaf(src1[i], src2[i], dst[i]); } static void fmac2(long N, double* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] += src1[i] * src2[i]; } static void zmul(long N, complex float* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] = src1[i] * src2[i]; } static void zdiv(long N, complex float* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] = (src2[i] == 0) ? 0.f : src1[i] / src2[i]; } static void zpow(long N, complex float* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] = cpowf(src1[i], src2[i]); } static void zfmac(long N, complex float* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] += src1[i] * src2[i]; } static void zfmac2(long N, complex double* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] += src1[i] * src2[i]; } static void zmulc(long N, complex float* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] = src1[i] * conjf(src2[i]); } static void zfmacc(long N, complex float* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] += src1[i] * conjf(src2[i]); } static void zfmacc2(long N, complex double* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] += src1[i] * conjf(src2[i]); } static void zconj(long N, complex float* dst, const complex float* src) { for (long i = 0; i < N; i++) dst[i] = conjf(src[i]); } static void zcmp(long N, complex float* dst, const complex float* src1, const complex float* src2) { for (long i = 0; i < N; i++) dst[i] = (src1[i] == src2[i]) ? 1. : 0.; } static void zdiv_reg(long N, complex float* dst, const complex float* src1, const complex float* src2, complex float lambda) { for (long i = 0; i < N; i++) dst[i] = (src2[i] == 0) ? 0.f : src1[i] / (lambda + src2[i]); } static void zphsr(long N, complex float* dst, const complex float* src) { for (long i = 0; i < N; i++) { float s = cabsf(src[i]); /* Note: the comparison (0 == src[i]) is not enough with `--fast-math` * with gcc 4.4.3 (but seems to work for 4.7.3, different computer) * Test: * complex float a = FLT_MIN; * complex float c = a / cabsf(a); * assert(!(isnan(creal(c)) || isnan(cimag(c)))); */ dst[i] = (0. == s) ? 1. : (src[i] / s); } } static void zexpj(long N, complex float* dst, const complex float* src) { for (long i = 0; i < N; i++) dst[i] = cexpf(1.I * src[i]); } static void zarg(long N, complex float* dst, const complex float* src) { for (long i = 0; i < N; i++) dst[i] = cargf(src[i]); } static void max(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] = MAX(src1[i], src2[i]); } static void min(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] = MIN(src1[i], src2[i]); } static void vec_pow(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] = powf(src1[i], src2[i]); } static void vec_sqrt(long N, float* dst, const float* src) { for (long i = 0; i < N; i++) dst[i] = sqrtf(src[i]); } static void vec_le(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] = (src1[i] <= src2[i]); } static void vec_ge(long N, float* dst, const float* src1, const float* src2) { for (long i = 0; i < N; i++) dst[i] = (src1[i] >= src2[i]); } /** * Step (1) of soft thesholding, y = ST(x, lambda). * Only computes the residual, resid = MAX( (abs(x) - lambda)/abs(x)), 0 ) * * @param N number of elements * @param lambda threshold parameter * @param d pointer to destination, resid * @param x pointer to input */ static void zsoftthresh_half(long N, float lambda, complex float* d, const complex float* x) { for (long i = 0; i < N; i++) { float norm = cabsf(x[i]); float red = norm - lambda; d[i] = (red > 0.) ? (red / norm) : 0.; } } static void zsoftthresh(long N, float lambda, complex float* d, const complex float* x) { for (long i = 0; i < N; i++) { float norm = cabsf(x[i]); float red = norm - lambda; d[i] = (red > 0.) ? (red / norm) * x[i]: 0.; } } static void softthresh_half(long N, float lambda, float* d, const float* x) { for (long i = 0; i < N; i++) { float norm = fabsf(x[i]); float red = norm - lambda; d[i] = (red > 0.) ? (red / norm) : 0.; } } static void softthresh(long N, float lambda, float* d, const float* x) { for (long i = 0; i < N; i++) { float norm = fabsf(x[i]); float red = norm - lambda; d[i] = (red > 0.) ? (red / norm) * x[i] : 0.; } } /** * Return the absolute value of the kth largest array element * To be used for hard thresholding * * @param N number of elements * @param k the sorted element index to pick * @param ar the input complex array * * @returns the absolute value of the kth largest array element. * */ static float klargest_complex_partsort( unsigned int N, unsigned int k, const complex float* ar) { assert(k <= N); complex float* tmp = (complex float*)xmalloc(N * sizeof(complex float)); copy(2 * N, (float*)tmp, (float*)ar); float thr = quickselect_complex(tmp, N, k); xfree(tmp); return thr; } /** * Hard thesholding, y = HT(x, thr). * computes the thresholded vector, y = x * (abs(x) >= t(kmax)) * * @param N number of elements * @param k threshold parameter, index of kth largest element of sorted x * @param d pointer to destination, y * @param x pointer to input */ static void zhardthresh(long N, unsigned int k, complex float* d, const complex float* x) { float thr = klargest_complex_partsort(N, k, x); for (long i = 0; i < N; i++) { float norm = cabsf(x[i]); d[i] = (norm > thr) ? x[i] : 0.; } } /** * Hard thesholding mask, m = HS(x, thr). * computes the non-zero complex support vector, m = 1.0 * (abs(x) >= t(kmax)) * This mask should be applied by complex multiplication. * * @param N number of elements * @param k threshold parameter, index of kth largest element of sorted x * @param d pointer to destination * @param x pointer to input */ static void zhardthresh_mask(long N, unsigned int k, complex float* d, const complex float* x) { float thr = klargest_complex_partsort(N, k, x); for (long i = 0; i < N; i++) { float norm = cabsf(x[i]); d[i] = (norm > thr) ? 1. : 0.; } } static void swap(long N, float* a, float* b) { for (long i = 0; i < N; i++) { float tmp = a[i]; a[i] = b[i]; b[i] = tmp; } } // identical copy in num/fft.c static double fftmod_phase(long length, int j) { long center1 = length / 2; double shift = (double)center1 / (double)length; return ((double)j - (double)center1 / 2.) * shift; } static complex double fftmod_phase2(long n, int j, bool inv, double phase) { phase += fftmod_phase(n, j); double rem = phase - floor(phase); double sgn = inv ? -1. : 1.; #if 1 if (rem == 0.) return 1.; if (rem == 0.5) return -1.; if (rem == 0.25) return 1.i * sgn; if (rem == 0.75) return -1.i * sgn; #endif return cexp(M_PI * 2.i * sgn * rem); } static void zfftmod(long N, complex float* dst, const complex float* src, unsigned int n, bool inv, double phase) { for (long i = 0; i < N; i++) for (unsigned int j = 0; j < n; j++) dst[i * n + j] = src[i * n + j] * fftmod_phase2(n, j, inv, phase); } /* * If you add functions here, please also add to gpuops.c/gpukrnls.cu */ const struct vec_ops cpu_ops = { .float2double = float2double, .double2float = double2float, .dot = dot, .asum = asum, .zl1norm = zl1norm, .add = add, .sub = sub, .mul = mul, .div = vec_div, .fmac = fmac, .fmac2 = fmac2, .axpy = axpy, .pow = vec_pow, .sqrt = vec_sqrt, .le = vec_le, .ge = vec_ge, .zmul = zmul, .zdiv = zdiv, .zfmac = zfmac, .zfmac2 = zfmac2, .zmulc = zmulc, .zfmacc = zfmacc, .zfmacc2 = zfmacc2, .zpow = zpow, .zphsr = zphsr, .zconj = zconj, .zexpj = zexpj, .zarg = zarg, .zcmp = zcmp, .zdiv_reg = zdiv_reg, .zfftmod = zfftmod, .max = max, .min = min, .zsoftthresh = zsoftthresh, .zsoftthresh_half = zsoftthresh_half, .softthresh = softthresh, .softthresh_half = softthresh_half, .zhardthresh = zhardthresh, .zhardthresh_mask = zhardthresh_mask, }; // defined in iter/vec.h struct vec_iter_s { float* (*allocate)(long N); void (*del)(float* x); void (*clear)(long N, float* x); void (*copy)(long N, float* a, const float* x); void (*swap)(long N, float* a, float* x); double (*norm)(long N, const float* x); double (*dot)(long N, const float* x, const float* y); void (*sub)(long N, float* a, const float* x, const float* y); void (*add)(long N, float* a, const float* x, const float* y); void (*smul)(long N, float alpha, float* a, const float* x); void (*xpay)(long N, float alpha, float* a, const float* x); void (*axpy)(long N, float* a, float alpha, const float* x); void (*axpbz)(long N, float* out, const float a, const float* x, const float b, const float* z); void (*zmul)(long N, complex float* dst, const complex float* src1, const complex float* src2); }; extern const struct vec_iter_s cpu_iter_ops; const struct vec_iter_s cpu_iter_ops = { .allocate = allocate, .del = del, .clear = clear, .copy = copy, .dot = dot, .norm = norm, .axpy = axpy, .xpay = xpay, .axpbz = axpbz, .smul = smul, .add = add, .sub = sub, .swap = swap, .zmul = zmul }; bart-0.4.02/src/num/vecops.h000066400000000000000000000071641320577655200156070ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2016. Martin Uecker. * Copyright 2017. University of Oxford. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __VECOPS_H #define __VECOPS_H extern const struct vec_ops cpu_ops; struct vec_ops { void (*float2double)(long N, double* dst, const float* src); void (*double2float)(long N, float* dst, const double* src); double (*dot)(long N, const float* vec1, const float* vec2); double (*asum)(long N, const float* vec); double (*zl1norm)(long N, const _Complex float* vec); void (*axpy)(long N, float* a, float alpha, const float* x); void (*axpbz)(long N, float* out, const float a, const float* x, const float b, const float* z); void (*pow)(long N, float* dst, const float* src1, const float* src2); void (*sqrt)(long N, float* dst, const float* src); void (*le)(long N, float* dst, const float* src1, const float* src2); void (*ge)(long N, float* dst, const float* src1, const float* src2); void (*add)(long N, float* dst, const float* src1, const float* src2); void (*sub)(long N, float* dst, const float* src1, const float* src2); void (*mul)(long N, float* dst, const float* src1, const float* src2); void (*div)(long N, float* dst, const float* src1, const float* src2); void (*fmac)(long N, float* dst, const float* src1, const float* src2); void (*fmac2)(long N, double* dst, const float* src1, const float* src2); void (*zmul)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); void (*zdiv)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); void (*zfmac)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); void (*zfmac2)(long N, _Complex double* dst, const _Complex float* src1, const _Complex float* src2); void (*zmulc)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); void (*zfmacc)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); void (*zfmacc2)(long N, _Complex double* dst, const _Complex float* src1, const _Complex float* src2); void (*zpow)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); void (*zphsr)(long N, _Complex float* dst, const _Complex float* src); void (*zconj)(long N, _Complex float* dst, const _Complex float* src); void (*zexpj)(long N, _Complex float* dst, const _Complex float* src); void (*zarg)(long N, _Complex float* dst, const _Complex float* src); void (*zcmp)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2); void (*zdiv_reg)(long N, _Complex float* dst, const _Complex float* src1, const _Complex float* src2, _Complex float lambda); void (*zfftmod)(long N, _Complex float* dst, const _Complex float* src, unsigned int n, _Bool inv, double phase); void (*max)(long N, float* dst, const float* src1, const float* src2); void (*min)(long N, float* dst, const float* src1, const float* src2); void (*zsoftthresh_half)(long N, float lambda, _Complex float* dst, const _Complex float* src); void (*zsoftthresh)(long N, float lambda, _Complex float* dst, const _Complex float* src); void (*softthresh_half)(long N, float lambda, float* dst, const float* src); void (*softthresh)(long N, float lambda, float* dst, const float* src); // void (*swap)(long N, float* a, float* b); void (*zhardthresh)(long N, unsigned int k, _Complex float* d, const _Complex float* x); void (*zhardthresh_mask)(long N, unsigned int k, _Complex float* d, const _Complex float* x); }; #endif bart-0.4.02/src/num/wavelet.c000066400000000000000000000155431320577655200157520ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2013 Martin Uecker * * * Implementation of CDF97 wavelets. * * Ingrid Daubechies and Wil Sweldens, Factoring wavelet transforms into * lifting steps. Journal of Fourier Analysis and Applications 1998, * Volume 4, Issue 3, pp 247-269 * */ #include #include #include #include "num/multind.h" //#include "num/parallel.h" #ifdef USE_CUDA #include "num/gpuops.h" #ifdef BERKELEY_SVN #include "num/wlcuda.h" #endif #endif #include "wavelet.h" const float a[4] = { -1.586134342, -0.05298011854, 0.8829110762, 0.4435068522 }; const float scale = 1.149604398; /** * This is designed to work for every n. * For odd n, we split n = a + b with * a = b + 1 where a is the number of * coarse coefficients. This splitting is * implicit by putting the first * value into the coarse coefficients. */ static void predict(int n, float a, int str, float* x) { for (int i = 1; i < n - 1; i += 2) x[i * str] += a * (x[(i - 1) * str] + x[(i + 1) * str]); if (0 == n % 2) x[(n - 1) * str] += a * (x[(n - 2) * str] + x[0]); // periodic // x[(n - 1) * str] += 2. * a * x[(n - 2) * str]; // non-periodic } static void update(int n, float a, int str, float* x) { for (int i = 2; i < n - 1; i += 2) x[i * str] += a * (x[(i - 1) * str] + x[(i + 1) * str]); if (0 == n % 2) // +-+-+- x[0] += a * (x[(n - 1) * str] + x[1 * str]); // periodic // x[0] += 2. * a * x[1 * str]; // non-periodic else { // +-+-+ x[0] += 2. * a * x[1 * str]; x[(n - 1) * str] += 2. * a * x[(n - 2) * str]; } } static void cdf97(int n, int str, float* x) { predict(n, a[0], str, x); update(n, a[1], str, x); predict(n, a[2], str, x); update(n, a[3], str, x); for (int i = 0; i < n; i++) x[i * str] *= (0 == i % 2) ? scale : (1. / scale); } static void icdf97(int n, int str, float* x) { for (int i = 0; i < n; i++) x[i * str] *= (0 == i % 2) ? (1. / scale) : scale; update(n, -a[3], str, x); predict(n, -a[2], str, x); update(n, -a[1], str, x); predict(n, -a[0], str, x); } static long num_coeff(long n) { return n / 2; } static long num_scale(long n) { return n - num_coeff(n); } static void resort(int n, int str, float* src) { float tmp[n]; for (int i = 0; i < num_scale(n); i++) tmp[i] = src[(i * 2 + 0) * str]; for (int i = 0; i < num_coeff(n); i++) tmp[num_scale(n) + i] = src[(i * 2 + 1) * str]; for (int i = 0; i < n; i++) src[i * str] = tmp[i]; } static void iresort(int n, int str, float* src) { float tmp[n]; for (int i = 0; i < num_scale(n); i++) tmp[i * 2 + 0] = src[i * str]; for (int i = 0; i < num_coeff(n); i++) tmp[i * 2 + 1] = src[(num_scale(n) + i) * str]; for (int i = 0; i < n; i++) src[i * str] = tmp[i]; } static void cdf97_line(void* _data, long n, long str, void* ptr) { assert(NULL == _data); cdf97(n, str / 4, ptr); resort(n, str / 4, ptr); } static void icdf97_line(void* _data, long n, long str, void* ptr) { assert(NULL == _data); iresort(n, str / 4, ptr); icdf97(n, str / 4, ptr); } static void cdf97_line_nosort(void* _data, long n, long str, void* ptr) { assert(NULL == _data); #ifdef USE_CUDA if (cuda_ondevice(ptr)) #ifdef BERKELEY_SVN cuda_cdf97(n, str / 4, ptr); #else assert(0); #endif else #endif cdf97(n, str / 4, ptr); } static void icdf97_line_nosort(void* _data, long n, long str, void* ptr) { assert(NULL == _data); #ifdef USE_CUDA if (cuda_ondevice(ptr)) #ifdef BERKELEY_SVN cuda_icdf97(n, str / 4, ptr); #else assert(0); #endif else #endif icdf97(n, str / 4, ptr); } void md_wavtrafo2(int D, const long dims[D], unsigned int flags, const long strs[D], void* ptr, md_trafo_fun_t fun, bool inv, bool nosort) { if (0 == flags) return; bool rec = true; for (int i = 0; i < D; i++) { if (1 == dims[i]) flags = MD_CLEAR(flags, i); if (MD_IS_SET(flags, i)) rec &= (dims[i] > 32); } if (!inv) md_septrafo2(D, dims, flags, strs, ptr, fun, NULL); //md_parallel_septrafo2(D, dims, flags, strs, ptr, fun, NULL); if (rec) { long dims2[D]; md_select_dims(D, ~0, dims2, dims); for (int i = 0; i < D; i++) if (MD_IS_SET(flags, i)) dims2[i] = num_scale(dims[i]); long strs2[D]; md_copy_strides(D, strs2, strs); for (int i = 0; i < D; i++) if (nosort && (MD_IS_SET(flags, i))) strs2[i] *= 2; md_wavtrafo2(D, dims2, flags, strs2, ptr, fun, inv, nosort); } if (inv) md_septrafo2(D, dims, flags, strs, ptr, fun, NULL); //md_parallel_septrafo2(D, dims, flags, strs, ptr, fun, NULL); } void md_wavtrafo(int D, const long dims[D], unsigned int flags, void* ptr, size_t size, md_trafo_fun_t fun, bool inv, bool nosort) { long strs[D]; md_calc_strides(D, strs, dims, size); md_wavtrafo2(D, dims, flags, strs, ptr, fun, inv, nosort); } void md_wavtrafoz2(int D, const long dims[D], unsigned int flags, const long strs[D], complex float* x, md_trafo_fun_t fun, bool inv, bool nosort) { long dims2[D + 1]; dims2[0] = 2; // complex float md_copy_dims(D, dims2 + 1, dims); long strs2[D + 1]; strs2[0] = sizeof(float); md_copy_strides(D, strs2 + 1, strs); md_wavtrafo2(D + 1, dims2, flags << 1, strs2, (void*)x, fun, inv, nosort); } void md_wavtrafoz(int D, const long dims[D], unsigned int flags, complex float* ptr, md_trafo_fun_t fun, bool inv, bool nosort) { long strs[D]; md_calc_strides(D, strs, dims, sizeof(complex float)); md_wavtrafoz2(D, dims, flags, strs, ptr, fun, inv, nosort); } void md_cdf97z(int D, const long dims[D], unsigned int flags, complex float* data) { md_wavtrafoz(D, dims, flags, data, cdf97_line_nosort, false, true); } void md_icdf97z(int D, const long dims[D], unsigned int flags, complex float* data) { md_wavtrafoz(D, dims, flags, data, icdf97_line_nosort, true, true); } void md_cdf97z2(int D, const long dims[D], unsigned int flags, const long strs[D], complex float* data) { md_wavtrafoz2(D, dims, flags, strs, data, cdf97_line_nosort, false, true); } void md_icdf97z2(int D, const long dims[D], unsigned int flags, const long strs[D], complex float* data) { md_wavtrafoz2(D, dims, flags, strs, data, icdf97_line_nosort, true, true); } // FIXME: slow void md_resortz(int D, const long dims[D], unsigned int flags, complex float* data) { md_wavtrafoz(D, dims, flags, data, icdf97_line_nosort, true, true); md_wavtrafoz(D, dims, flags, data, cdf97_line, false, false); } void md_iresortz(int D, const long dims[D], unsigned int flags, complex float* data) { md_wavtrafoz(D, dims, flags, data, icdf97_line, true, false); md_wavtrafoz(D, dims, flags, data, cdf97_line_nosort, false, true); } #if 0 const float d4[6] = { -sqrt(3.), sqrt(3.) / 4., -(2. - sqrt(3.)) / 4., 1., sqrt(2. + sqrt(3.)), sqrt(2. - sqrt(3.)) }; static void d4update(int n, float x[n]) { } void deb4 #endif bart-0.4.02/src/num/wavelet.h000066400000000000000000000030331320577655200157460ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "num/multind.h" #include "misc/cppwrap.h" extern void md_wavtrafo2(int D, const long dims[__VLA(D)], unsigned int flags, const long strs[__VLA(D)], void* ptr, md_trafo_fun_t fun, _Bool inv, _Bool nosort); extern void md_wavtrafo(int D, const long dims[__VLA(D)], unsigned int flags, void* ptr, size_t size, md_trafo_fun_t fun, _Bool inv, _Bool nosort); extern void md_wavtrafoz2(int D, const long dims[__VLA(D)], unsigned int flags, const long strs[__VLA(D)], _Complex float* x, md_trafo_fun_t fun, _Bool inv, _Bool nosort); extern void md_wavtrafoz(int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* ptr, md_trafo_fun_t fun, _Bool inv, _Bool nosort); extern void md_cdf97z(int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* data); extern void md_icdf97z(int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* data); extern void md_cdf97z2(int D, const long dims[__VLA(D)], unsigned int flags, const long strs[__VLA(D)], _Complex float* data); extern void md_icdf97z2(int D, const long dims[__VLA(D)], unsigned int flags, const long strs[__VLA(D)], _Complex float* data); extern void md_resortz(int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* data); extern void md_iresortz(int D, const long dims[__VLA(D)], unsigned int flags, _Complex float* data); #include "misc/cppwrap.h" bart-0.4.02/src/ones.c000066400000000000000000000021041320577655200144350ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2014 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/io.h" #include "misc/misc.h" static const char usage_str[] = "dims dim1 ... dimn name"; static const char help_str[] = "Create an array filled with ones with {dims} dimensions of size {dim1} to {dimn}.\n"; int main_ones(int argc, char* argv[]) { mini_cmdline(&argc, argv, -3, usage_str, help_str); num_init(); int N = atoi(argv[1]); assert(N >= 0); assert(argc == 3 + N); long dims[N]; for (int i = 0; i < N; i++) { dims[i] = atoi(argv[2 + i]); assert(dims[i] >= 1); } complex float* x = create_cfl(argv[2 + N], N, dims); md_zfill(N, dims, x, 1.); unmap_cfl(N, dims, x); exit(0); } bart-0.4.02/src/pattern.c000066400000000000000000000024531320577655200151550ustar00rootroot00000000000000/* Copyright 2013, 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2013 Martin Uecker * 2016 Jonathan Tamir */ #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/opts.h" static const char usage_str[] = " "; static const char help_str[] = "Compute sampling pattern from kspace\n"; int main_pattern(int argc, char* argv[]) { unsigned int flags = COIL_FLAG; const struct opt_s opts[] = { OPT_UINT('s', &flags, "bitmask", "Squash dimensions selected by bitmask"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); unsigned int N = DIMS; long in_dims[N]; long out_dims[N]; complex float* kspace = load_cfl(argv[1], N, in_dims); md_select_dims(N, ~flags, out_dims, in_dims); complex float* pattern = create_cfl(argv[2], N, out_dims); estimate_pattern(N, in_dims, flags, pattern, kspace); unmap_cfl(N, in_dims, kspace); unmap_cfl(N, out_dims, pattern); exit(0); } bart-0.4.02/src/phantom.c000066400000000000000000000050451320577655200151460ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013, 2015 Martin Uecker */ #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mri.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #include "simu/phantom.h" static const char usage_str[] = ""; static const char help_str[] = "Image and k-space domain phantoms."; int main_phantom(int argc, char* argv[]) { bool kspace = false; bool d3 = false; int sens = 0; int osens = -1; int xdim = -1; bool out_sens = false; bool tecirc = false; bool circ = false; const char* traj = NULL; long dims[DIMS] = { [0 ... DIMS - 1] = 1 }; dims[0] = 128; dims[1] = 128; dims[2] = 1; const struct opt_s opts[] = { OPT_INT('s', &sens, "nc", "nc sensitivities"), OPT_INT('S', &osens, "", "Output nc sensitivities"), OPT_SET('k', &kspace, "k-space"), OPT_STRING('t', &traj, "file", "trajectory"), OPT_SET('c', &circ, "()"), OPT_SET('m', &tecirc, "()"), OPT_INT('x', &xdim, "n", "dimensions in y and z"), OPT_SET('3', &d3, "3D"), }; cmdline(&argc, argv, 1, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); if (tecirc) { circ = true; dims[TE_DIM] = 32; } if (-1 != osens) { out_sens = true; sens = osens; } if (-1 != xdim) dims[0] = dims[1] = xdim; if (d3) dims[2] = dims[0]; long sdims[DIMS]; complex float* samples = NULL; if (NULL != traj) { samples = load_cfl(traj, DIMS, sdims); dims[0] = 1; dims[1] = sdims[1]; dims[2] = sdims[2]; } if (sens) dims[3] = sens; complex float* out = create_cfl(argv[1], DIMS, dims); if (out_sens) { assert(NULL == traj); assert(!kspace); calc_sens(dims, out); } else if (circ) { assert(NULL == traj); if (1 < dims[TE_DIM]) { assert(!d3); calc_moving_circ(dims, out, kspace); } else { (d3 ? calc_circ3d : calc_circ)(dims, out, kspace); // calc_ring(dims, out, kspace); } } else { //assert(1 == dims[COIL_DIM]); if (NULL == samples) { (d3 ? calc_phantom3d : calc_phantom)(dims, out, kspace); } else { dims[0] = 3; (d3 ? calc_phantom3d_noncart : calc_phantom_noncart)(dims, out, samples); dims[0] = 1; } } if (NULL != traj) free((void*)traj); if (NULL != samples) unmap_cfl(3, sdims, samples); unmap_cfl(DIMS, dims, out); exit(0); } bart-0.4.02/src/pics.c000066400000000000000000000423111320577655200144330ustar00rootroot00000000000000/* Copyright 2013-2017. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker * 2014-2016 Frank Ong * 2014-2017 Jon Tamir * */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "num/ops.h" #include "iter/misc.h" #include "linops/linop.h" #include "linops/sampling.h" #include "linops/someops.h" #include "noncart/nufft.h" #include "sense/recon.h" #include "sense/model.h" #include "sense/optcom.h" #include "misc/debug.h" #include "misc/mri.h" #include "misc/utils.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #include "grecon/optreg.h" static const char usage_str[] = " "; static const char help_str[] = "Parallel-imaging compressed-sensing reconstruction."; static const struct linop_s* sense_nc_init(const long max_dims[DIMS], const long map_dims[DIMS], const complex float* maps, const long ksp_dims[DIMS], const long traj_dims[DIMS], const complex float* traj, struct nufft_conf_s conf, const complex float* weights, struct operator_s** precond_op, bool sms) { long coilim_dims[DIMS]; long img_dims[DIMS]; md_select_dims(DIMS, ~MAPS_FLAG, coilim_dims, max_dims); md_select_dims(DIMS, ~COIL_FLAG, img_dims, max_dims); const struct linop_s* fft_op = nufft_create(DIMS, ksp_dims, coilim_dims, traj_dims, traj, weights, conf); const struct linop_s* maps_op = maps2_create(coilim_dims, map_dims, img_dims, maps); if (sms) { /** * Apply Fourier encoding in image space (after coil * sensitivity weighting but before NUFFT). */ const struct linop_s* fft_slice = linop_fft_create(DIMS, map_dims, SLICE_FLAG); fft_op = linop_chain(fft_slice, fft_op); linop_free(fft_slice); } const struct linop_s* lop = linop_chain(maps_op, fft_op); //precond_op[0] = (struct operator_s*) nufft_precond_create( fft_op ); precond_op[0] = NULL; linop_free(maps_op); linop_free(fft_op); return lop; } int main_pics(int argc, char* argv[]) { // Initialize default parameters struct sense_conf conf = sense_defaults; float bpsense_eps = -1.; bool randshift = true; unsigned int maxiter = 30; float step = -1.; // Start time count double start_time = timestamp(); // Read input options struct nufft_conf_s nuconf = nufft_conf_defaults; nuconf.toeplitz = true; float restrict_fov = -1.; const char* pat_file = NULL; const char* traj_file = NULL; bool scale_im = false; bool eigen = false; float scaling = 0.; // Simultaneous Multi-Slice bool sms = false; unsigned int llr_blk = 8; const char* image_truth_file = NULL; bool im_truth = false; const char* image_start_file = NULL; bool warm_start = false; bool dynamic_rho = false; bool hogwild = false; bool fast = false; float admm_rho = iter_admm_defaults.rho; unsigned int admm_maxitercg = iter_admm_defaults.maxitercg; unsigned int gpun = 0; struct opt_reg_s ropts; opt_reg_init(&ropts); unsigned int loop_flags = 0u; const struct opt_s opts[] = { { 'l', true, opt_reg, &ropts, "1/-l2\t\ttoggle l1-wavelet or l2 regularization." }, OPT_FLOAT('r', &ropts.lambda, "lambda", "regularization parameter"), { 'R', true, opt_reg, &ropts, " :A:B:C\tgeneralized regularization options (-Rh for help)" }, OPT_SET('c', &conf.rvc, "real-value constraint"), OPT_FLOAT('s', &step, "step", "iteration stepsize"), OPT_UINT('i', &maxiter, "iter", "max. number of iterations"), OPT_STRING('t', &traj_file, "file", "k-space trajectory"), OPT_CLEAR('n', &randshift, "disable random wavelet cycle spinning"), OPT_SET('g', &conf.gpu, "use GPU"), OPT_UINT('G', &gpun, "gpun", "use GPU device gpun"), OPT_STRING('p', &pat_file, "file", "pattern or weights"), OPT_SELECT('I', enum algo_t, &ropts.algo, IST, "select IST"), OPT_UINT('b', &llr_blk, "blk", "Lowrank block size"), OPT_SET('e', &eigen, "Scale stepsize based on max. eigenvalue"), OPT_SET('H', &hogwild, "(hogwild)"), OPT_SET('D', &dynamic_rho, "(dynamic_rho)"), OPT_SET('F', &fast, "(fast)"), OPT_STRING('T', &image_truth_file, "file", "(truth file)"), OPT_STRING('W', &image_start_file, "", "Warm start with "), OPT_INT('d', &debug_level, "level", "Debug level"), OPT_INT('O', &conf.rwiter, "rwiter", "(reweighting)"), OPT_FLOAT('o', &conf.gamma, "gamma", "(reweighting)"), OPT_FLOAT('u', &admm_rho, "rho", "ADMM rho"), OPT_UINT('C', &admm_maxitercg, "iter", "ADMM max. CG iterations"), OPT_FLOAT('q', &conf.cclambda, "cclambda", "(cclambda)"), OPT_FLOAT('f', &restrict_fov, "rfov", "restrict FOV"), OPT_SELECT('m', enum algo_t, &ropts.algo, ADMM, "select ADMM"), OPT_FLOAT('w', &scaling, "val", "inverse scaling of the data"), OPT_SET('S', &scale_im, "re-scale the image after reconstruction"), OPT_UINT('B', &loop_flags, "flags", "batch-mode"), OPT_SET('K', &nuconf.pcycle, "randshift for NUFFT"), OPT_FLOAT('P', &bpsense_eps, "eps", "Basis Pursuit formulation, || y- Ax ||_2 <= eps"), OPT_SELECT('a', enum algo_t, &ropts.algo, PRIDU, "select Primal Dual"), OPT_SET('M', &sms, "Simultaneous Multi-Slice reconstruction"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); if (NULL != image_truth_file) im_truth = true; if (NULL != image_start_file) warm_start = true; if (0 <= bpsense_eps) conf.bpsense = true; long max_dims[DIMS]; long map_dims[DIMS]; long pat_dims[DIMS]; long img_dims[DIMS]; long coilim_dims[DIMS]; long ksp_dims[DIMS]; long traj_dims[DIMS]; // load kspace and maps and get dimensions complex float* kspace = load_cfl(argv[1], DIMS, ksp_dims); if (sms) { debug_printf(DP_INFO, "SMS reconstruction: MB = %ld\n", ksp_dims[SLICE_DIM]); nuconf.toeplitz = false; // no longer toeplitz-shaped because of chaining of operators (see later)?! } complex float* maps = load_cfl(argv[2], DIMS, map_dims); unsigned int map_flags = md_nontriv_dims(DIMS, map_dims); map_flags |= FFT_FLAGS | SENS_FLAGS; complex float* traj = NULL; if (NULL != traj_file) traj = load_cfl(traj_file, DIMS, traj_dims); md_copy_dims(DIMS, max_dims, ksp_dims); md_copy_dims(5, max_dims, map_dims); md_select_dims(DIMS, ~COIL_FLAG, img_dims, max_dims); md_select_dims(DIMS, ~MAPS_FLAG, coilim_dims, max_dims); if (!md_check_compat(DIMS, ~(MD_BIT(MAPS_DIM)|FFT_FLAGS), img_dims, map_dims)) error("Dimensions of image and sensitivities do not match!\n"); assert(1 == ksp_dims[MAPS_DIM]); if (conf.gpu) num_init_gpu_device(gpun); else num_init(); // print options if (conf.gpu) debug_printf(DP_INFO, "GPU reconstruction\n"); if (map_dims[MAPS_DIM] > 1) debug_printf(DP_INFO, "%ld maps.\nESPIRiT reconstruction.\n", map_dims[MAPS_DIM]); if (conf.bpsense) debug_printf(DP_INFO, "Basis Pursuit formulation\n"); if (hogwild) debug_printf(DP_INFO, "Hogwild stepsize\n"); if (dynamic_rho) debug_printf(DP_INFO, "ADMM Dynamic stepsize\n"); if (im_truth) debug_printf(DP_INFO, "Compare to truth\n"); assert(!((conf.rwiter > 1) && (nuconf.toeplitz || conf.bpsense))); // initialize sampling pattern complex float* pattern = NULL; if (NULL != pat_file) { pattern = load_cfl(pat_file, DIMS, pat_dims); assert(md_check_compat(DIMS, COIL_FLAG, ksp_dims, pat_dims)); } else { md_select_dims(DIMS, ~COIL_FLAG, pat_dims, ksp_dims); pattern = md_alloc(DIMS, pat_dims, CFL_SIZE); estimate_pattern(DIMS, ksp_dims, COIL_FLAG, pattern, kspace); } if (NULL != traj_file) { if (NULL == pat_file) { md_free(pattern); pattern = NULL; } else { long ksp_strs[DIMS]; md_calc_strides(DIMS, ksp_strs, ksp_dims, CFL_SIZE); long pat_strs[DIMS]; md_calc_strides(DIMS, pat_strs, pat_dims, CFL_SIZE); md_zmul2(DIMS, ksp_dims, ksp_strs, kspace, ksp_strs, kspace, pat_strs, pattern); } } else { // print some statistics long T = md_calc_size(DIMS, pat_dims); long samples = (long)pow(md_znorm(DIMS, pat_dims, pattern), 2.); debug_printf(DP_INFO, "Size: %ld Samples: %ld Acc: %.2f\n", T, samples, (float)T / (float)samples); } if (NULL == traj_file) { fftmod(DIMS, ksp_dims, FFT_FLAGS, kspace, kspace); fftmod(DIMS, map_dims, FFT_FLAGS, maps, maps); } // apply fov mask to sensitivities if (-1. != restrict_fov) { float restrict_dims[DIMS] = { [0 ... DIMS - 1] = 1. }; restrict_dims[0] = restrict_fov; restrict_dims[1] = restrict_fov; restrict_dims[2] = restrict_fov; apply_mask(DIMS, map_dims, maps, restrict_dims); } // initialize forward_op and precond_op const struct linop_s* forward_op = NULL; const struct operator_s* precond_op = NULL; if (NULL == traj_file) forward_op = sense_init(max_dims, map_flags, maps); else forward_op = sense_nc_init(max_dims, map_dims, maps, ksp_dims, traj_dims, traj, nuconf, pattern, (struct operator_s**) &precond_op, sms); // apply scaling if (0. == scaling) { if (NULL == traj_file) { scaling = estimate_scaling(ksp_dims, NULL, kspace); } else { complex float* adj = md_alloc(DIMS, img_dims, CFL_SIZE); linop_adjoint(forward_op, DIMS, img_dims, adj, DIMS, ksp_dims, kspace); scaling = estimate_scaling_norm(1., md_calc_size(DIMS, img_dims), adj, false); md_free(adj); } } if (0. == scaling ) { debug_printf(DP_WARN, "Estimated scale is zero. Set to one."); scaling = 1.; } else { debug_printf(DP_DEBUG1, "Inverse scaling of the data: %f\n", scaling); md_zsmul(DIMS, ksp_dims, kspace, kspace, 1. / scaling); if (conf.bpsense) { bpsense_eps /= scaling; debug_printf(DP_DEBUG1, "scaling basis pursuit eps: %.3f\n", bpsense_eps); } } complex float* image = create_cfl(argv[3], DIMS, img_dims); md_clear(DIMS, img_dims, image, CFL_SIZE); long img_truth_dims[DIMS]; complex float* image_truth = NULL; if (im_truth) { image_truth = load_cfl(image_truth_file, DIMS, img_truth_dims); //md_zsmul(DIMS, img_dims, image_truth, image_truth, 1. / scaling); xfree(image_truth_file); } long img_start_dims[DIMS]; complex float* image_start = NULL; if (warm_start) { debug_printf(DP_DEBUG1, "Warm start: %s\n", image_start_file); image_start = load_cfl(image_start_file, DIMS, img_start_dims); assert(md_check_compat(DIMS, 0u, img_start_dims, img_dims)); xfree(image_start_file); // if rescaling at the end, assume the input has also been rescaled if (scale_im && (scaling != 0.)) md_zsmul(DIMS, img_dims, image_start, image_start, 1. / scaling); } assert((0u == loop_flags) || (NULL == image_start)); assert((0u == loop_flags) || (NULL == traj_file)); assert(!(loop_flags & COIL_FLAG)); const complex float* image_start1 = image_start; long loop_dims[DIMS]; md_select_dims(DIMS, loop_flags, loop_dims, max_dims); long img1_dims[DIMS]; md_select_dims(DIMS, ~loop_flags, img1_dims, img_dims); long ksp1_dims[DIMS]; md_select_dims(DIMS, ~loop_flags, ksp1_dims, ksp_dims); long max1_dims[DIMS]; md_select_dims(DIMS, ~loop_flags, max1_dims, max_dims); long pat1_dims[DIMS]; md_select_dims(DIMS, ~loop_flags, pat1_dims, pat_dims); complex float* pattern1 = NULL; if (NULL != pattern) { pattern1 = md_alloc(DIMS, pat1_dims, CFL_SIZE); md_slice(DIMS, loop_flags, (const long[DIMS]){ [0 ... DIMS - 1] = 0 }, pat_dims, pattern1, pattern, CFL_SIZE); } // FIXME: re-initialize forward_op and precond_op if ((NULL == traj_file) && !sms) { forward_op = sense_init(max1_dims, map_flags, maps); // basis pursuit requires the full forward model to add as a linop constraint if (conf.bpsense) { const struct linop_s* sample_op = linop_sampling_create(max1_dims, pat1_dims, pattern1); struct linop_s* tmp = linop_chain(forward_op, sample_op); linop_free(sample_op); linop_free(forward_op); forward_op = tmp; } } // initialize prox functions const struct operator_p_s* thresh_ops[NUM_REGS] = { NULL }; const struct linop_s* trafos[NUM_REGS] = { NULL }; opt_reg_configure(DIMS, img1_dims, &ropts, thresh_ops, trafos, llr_blk, randshift, conf.gpu); if (conf.bpsense) opt_bpursuit_configure(&ropts, thresh_ops, trafos, forward_op, kspace, bpsense_eps); int nr_penalties = ropts.r; struct reg_s* regs = ropts.regs; enum algo_t algo = ropts.algo; // initialize algorithm italgo_fun2_t italgo = iter2_call_iter; struct iter_call_s iter2_data; SET_TYPEID(iter_call_s, &iter2_data); iter_conf* iconf = CAST_UP(&iter2_data); struct iter_conjgrad_conf cgconf = iter_conjgrad_defaults; struct iter_fista_conf fsconf = iter_fista_defaults; struct iter_ist_conf isconf = iter_ist_defaults; struct iter_admm_conf mmconf = iter_admm_defaults; struct iter_niht_conf ihconf = iter_niht_defaults; struct iter_chambolle_pock_conf pdconf = iter_chambolle_pock_defaults; if ((CG == algo) && (1 == nr_penalties) && (L2IMG != regs[0].xform)) algo = FISTA; if (conf.bpsense) assert(ADMM == algo || PRIDU == algo); else if (nr_penalties > 1) algo = ADMM; if ((IST == algo) || (FISTA == algo)) { // For non-Cartesian trajectories, the default // will usually not work. TODO: The same is true // for sensitivities which are not normalized, but // we do not detect this case. if ((NULL != traj_file) && (-1. == step) && !eigen) debug_printf(DP_WARN, "No step size specified.\n"); if (-1. == step) step = 0.95; } if ((CG == algo) || (ADMM == algo)) if (-1. != step) debug_printf(DP_INFO, "Stepsize ignored.\n"); if (eigen) { double maxeigen = estimate_maxeigenval(forward_op->normal); debug_printf(DP_INFO, "Maximum eigenvalue: %.2e\n", maxeigen); step /= maxeigen; } switch (algo) { case CG: debug_printf(DP_INFO, "conjugate gradients\n"); assert((0 == nr_penalties) || ((1 == nr_penalties) && (L2IMG == regs[0].xform))); cgconf = iter_conjgrad_defaults; cgconf.maxiter = maxiter; cgconf.l2lambda = (0 == nr_penalties) ? 0. : regs[0].lambda; iter2_data.fun = iter_conjgrad; iter2_data._conf = CAST_UP(&cgconf); nr_penalties = 0; break; case IST: debug_printf(DP_INFO, "IST\n"); assert(1 == nr_penalties); isconf = iter_ist_defaults; isconf.maxiter = maxiter; isconf.step = step; isconf.hogwild = hogwild; iter2_data.fun = iter_ist; iter2_data._conf = CAST_UP(&isconf); break; case ADMM: debug_printf(DP_INFO, "ADMM\n"); mmconf = iter_admm_defaults; mmconf.maxiter = maxiter; mmconf.maxitercg = admm_maxitercg; mmconf.rho = admm_rho; mmconf.hogwild = hogwild; mmconf.fast = fast; mmconf.dynamic_rho = dynamic_rho; mmconf.ABSTOL = 0.; mmconf.RELTOL = 0.; italgo = iter2_admm; iconf = CAST_UP(&mmconf); break; case PRIDU: debug_printf(DP_INFO, "Primal Dual\n"); assert(2 == nr_penalties); pdconf = iter_chambolle_pock_defaults; pdconf.maxiter = maxiter; pdconf.sigma = 1. * scaling; pdconf.tau = 1. / pdconf.sigma; pdconf.theta = 1; pdconf.decay = (hogwild ? .95 : 1); pdconf.tol = 1E-4; italgo = iter2_chambolle_pock; iconf = CAST_UP(&pdconf); break; case FISTA: debug_printf(DP_INFO, "FISTA\n"); assert(1 == nr_penalties); fsconf = iter_fista_defaults; fsconf.maxiter = maxiter; fsconf.step = step; fsconf.hogwild = hogwild; iter2_data.fun = iter_fista; iter2_data._conf = CAST_UP(&fsconf); break; case NIHT: debug_printf(DP_INFO, "NIHT\n"); ihconf = iter_niht_defaults; ihconf.maxiter = maxiter; ihconf.do_warmstart=warm_start; italgo = iter2_niht; iconf = CAST_UP(&ihconf); conf.gpu = false; // gpu not implemented, disable break; default: assert(0); } bool trafos_cond = ((PRIDU == algo) || (ADMM == algo) || ((NIHT == algo) && (regs[0].xform == NIHTWAV))); const struct operator_s* op = sense_recon_create(&conf, max1_dims, forward_op, pat1_dims, (NULL != traj_file) ? NULL : (conf.bpsense ? NULL : pattern1), italgo, iconf, image_start1, nr_penalties, thresh_ops, trafos_cond ? trafos : NULL, precond_op); long strsx[2][DIMS]; const long* strs[2] = { strsx[0], strsx[1] }; md_calc_strides(DIMS, strsx[0], img_dims, CFL_SIZE); md_calc_strides(DIMS, strsx[1], ksp_dims, CFL_SIZE); for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(loop_flags, i)) { strsx[0][i] = 0; strsx[1][i] = 0; } } if (0 != loop_flags) { op = operator_copy_wrapper(2, strs, op); op = operator_loop(DIMS, loop_dims, op); } operator_apply(op, DIMS, img_dims, image, DIMS, conf.bpsense ? img_dims : ksp_dims, conf.bpsense ? NULL : kspace); operator_free(op); opt_reg_free(&ropts, thresh_ops, trafos); if (scale_im) md_zsmul(DIMS, img_dims, image, image, scaling); // clean up if (NULL != pat_file) unmap_cfl(DIMS, pat_dims, pattern); else md_free(pattern); if (NULL != pattern1) md_free(pattern1); unmap_cfl(DIMS, map_dims, maps); unmap_cfl(DIMS, ksp_dims, kspace); unmap_cfl(DIMS, img_dims, image); if (NULL != traj) unmap_cfl(DIMS, traj_dims, traj); if (im_truth) unmap_cfl(DIMS, img_dims, image_truth); if (image_start) unmap_cfl(DIMS, img_dims, image_start); double end_time = timestamp(); debug_printf(DP_INFO, "Total Time: %f\n", end_time - start_time); exit(0); } bart-0.4.02/src/pocsense.c000066400000000000000000000115751320577655200153240ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2014 Jonathan Tamir */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "num/ops.h" #include "num/iovec.h" #include "linops/someops.h" #include "linops/linop.h" #include "linops/waveop.h" #include "iter/iter.h" #include "iter/prox.h" #include "iter/thresh.h" #include "sense/pocs.h" #include "sense/optcom.h" #include "misc/mri.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" static const char usage_str[] = " "; static const char help_str[] = "Perform POCSENSE reconstruction."; int main_pocsense(int argc, char* argv[]) { float alpha = 0.; int maxiter = 50; bool l1wav = false; float lambda = -1.; bool use_gpu = false; bool use_admm = false; float admm_rho = -1.; int l1type = 2; const struct opt_s opts[] = { OPT_INT('i', &maxiter, "iter", "max. number of iterations"), OPT_FLOAT('r', &alpha, "alpha", "regularization parameter"), OPT_INT('l', &l1type, "1/-l2", "toggle l1-wavelet or l2 regularization"), OPT_SET('g', &use_gpu, "()"), OPT_FLOAT('o', &lambda, "", "()"), OPT_FLOAT('m', &admm_rho, "", "()"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); if (1 == l1type) l1wav = true; else if (2 == l1type) l1wav = false; else error("Unknown regularization type."); unsigned int N = DIMS; long dims[N]; long ksp_dims[N]; complex float* kspace_data = load_cfl(argv[1], N, ksp_dims); complex float* sens_maps = load_cfl(argv[2], N, dims); for (int i = 0; i < 4; i++) // sizes2[4] may be > 1 if (ksp_dims[i] != dims[i]) error("Dimensions of kspace and sensitivities do not match!\n"); assert(1 == ksp_dims[MAPS_DIM]); num_init(); long dims1[N]; md_select_dims(N, ~(COIL_FLAG|MAPS_FLAG), dims1, dims); // ----------------------------------------------------------- // memory allocation complex float* result = create_cfl(argv[3], N, ksp_dims); complex float* pattern = md_alloc(N, dims1, CFL_SIZE); // ----------------------------------------------------------- // pre-process data float scaling = estimate_scaling(ksp_dims, NULL, kspace_data); md_zsmul(N, ksp_dims, kspace_data, kspace_data, 1. / scaling); estimate_pattern(N, ksp_dims, COIL_FLAG, pattern, kspace_data); // ----------------------------------------------------------- // l1-norm threshold operator const struct operator_p_s* thresh_op = NULL; const struct linop_s* wave_op = NULL; if (l1wav) { long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(ksp_dims[0], 16); minsize[1] = MIN(ksp_dims[1], 16); minsize[2] = MIN(ksp_dims[2], 16); long strs[DIMS]; md_calc_strides(DIMS, strs, ksp_dims, CFL_SIZE); wave_op = linop_wavelet_create(DIMS, FFT_FLAGS, ksp_dims, strs, minsize, false); thresh_op = prox_unithresh_create(DIMS, wave_op, alpha, COIL_FLAG, use_gpu); } #if 0 else { thresh_op = prox_leastsquares_create(DIMS, ksp_dims, alpha, NULL); } #endif // ----------------------------------------------------------- // italgo interface italgo_fun2_t italgo = NULL; iter_conf* iconf = NULL; struct iter_pocs_conf pconf = iter_pocs_defaults; pconf.maxiter = maxiter; struct iter_admm_conf mmconf = iter_admm_defaults; mmconf.maxiter = maxiter; mmconf.rho = admm_rho; struct linop_s* eye = linop_identity_create(DIMS, ksp_dims); struct linop_s* ops[3] = { eye, eye, eye }; struct linop_s** ops2 = NULL; if (use_admm) { italgo = iter2_admm; iconf = CAST_UP(&mmconf); ops2 = ops; } else { italgo = iter2_pocs; iconf = CAST_UP(&pconf); } // ----------------------------------------------------------- // pocsense recon debug_printf(DP_INFO, "Reconstruction...\n"); fftmod(N, ksp_dims, FFT_FLAGS, kspace_data, kspace_data); if (use_gpu) #ifdef USE_CUDA pocs_recon_gpu2(italgo, iconf, (const struct linop_s**)ops2, dims, thresh_op, alpha, lambda, result, sens_maps, pattern, kspace_data); #else assert(0); #endif else pocs_recon2(italgo, iconf, (const struct linop_s**)ops2, dims, thresh_op, alpha, lambda, result, sens_maps, pattern, kspace_data); ifftmod(N, ksp_dims, FFT_FLAGS, result, result); debug_printf(DP_INFO, "Done.\n"); md_zsmul(N, ksp_dims, result, result, scaling); linop_free(eye); md_free(pattern); if (NULL != thresh_op) operator_p_free(thresh_op); if (NULL != wave_op) linop_free(wave_op); unmap_cfl(N, ksp_dims, result); unmap_cfl(N, ksp_dims, kspace_data); unmap_cfl(N, dims, sens_maps); exit(0); } bart-0.4.02/src/poisson.c000066400000000000000000000137101320577655200151700ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2013, 2015 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/rand.h" #include "num/init.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/pd.h" #include "misc/opts.h" static void random_point(int D, float p[static D]) { for (int i = 0; i < D; i++) p[i] = uniform_rand(); } static float dist(int D, const float a[static D], const float b[static D]) { float r = 0.; for (int i = 0; i < D; i++) r += powf(a[i] - b[i], 2.); return sqrtf(r); } static float maxn(int D, const float a[static D], const float b[static D]) { float r = 0.; for (int i = 0; i < D; i++) r = MAX(fabsf(a[i] - b[i]), r); return r; } static const char usage_str[] = ""; static const char help_str[] = "Computes Poisson-disc sampling pattern."; int main_poisson(int argc, char* argv[]) { int yy = 128; int zz = 128; bool cutcorners = false; float vardensity = 0.; bool vd_def = false; int T = 1; int rnd = 0; int randseed = 11235; bool msk = true; int points = -1; float mindist = 1. / 1.275; float yscale = 1.; float zscale = 1.; unsigned int calreg = 0; const struct opt_s opts[] = { OPT_INT('Y', &yy, "size", "size dimension 1"), OPT_INT('Z', &zz, "size", "size dimension 2"), OPT_FLOAT('y', &yscale, "acc", "acceleration dim 1"), OPT_FLOAT('z', &zscale, "acc", "acceleration dim 2"), OPT_UINT('C', &calreg, "size", "size of calibration region"), OPT_SET('v', &vd_def, "variable density"), OPT_FLOAT('V', &vardensity, "", "(variable density)"), OPT_SET('e', &cutcorners, "elliptical scanning"), OPT_FLOAT('D', &mindist, "", "()"), OPT_INT('T', &T, "", "()"), OPT_CLEAR('m', &msk, "()"), OPT_INT('R', &points, "", "()"), OPT_INT('s', &randseed, "seed", "random seed"), }; cmdline(&argc, argv, 1, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); num_rand_init(randseed); if (vd_def && (0. == vardensity)) vardensity = 20.; if (-1 != points) rnd = 1; assert((yscale >= 1.) && (zscale >= 1.)); // compute mindest and scaling float kspext = MAX(yy, zz); int Pest = T * (int)(1.2 * powf(kspext, 2.) / (yscale * zscale)); mindist /= kspext; yscale *= (float)kspext / (float)yy; zscale *= (float)kspext / (float)zz; if (vardensity != 0.) { // TODO } long dims[5] = { 1, yy, zz, T, 1 }; complex float (*mask)[T][zz][yy] = NULL; if (msk) { mask = MD_CAST_ARRAY3_PTR(complex float, 5, dims, create_cfl(argv[1], 5, dims), 1, 2, 3); md_clear(5, dims, &(*mask)[0][0][0], sizeof(complex float)); } int M = rnd ? (points + 1) : Pest; int P; while (true) { PTR_ALLOC(float[M][2], points); PTR_ALLOC(int[M], kind); // int (*kind)[M] = TYPE_ALLOC(int[M]); (*kind)[0] = 0; if (!rnd) { (*points)[0][0] = 0.5; (*points)[0][1] = 0.5; if (1 == T) { P = poissondisc(2, M, 1, vardensity, mindist, *points); } else { float (*delta)[T][T] = TYPE_ALLOC(float[T][T]); float dd[T]; for (int i = 0; i < T; i++) dd[i] = mindist; mc_poisson_rmatrix(2, T, *delta, dd); P = poissondisc_mc(2, T, M, 1, vardensity, *delta, *points, *kind); } } else { // random pattern P = M - 1; for (int i = 0; i < P; i++) random_point(2, (*points)[i]); } if (P < M) { for (int i = 0; i < P; i++) { (*points)[i][0] = ((*points)[i][0] - 0.5) * yscale + 0.5; (*points)[i][1] = ((*points)[i][1] - 0.5) * zscale + 0.5; } // throw away points outside float center[2] = { 0.5, 0.5 }; int j = 0; for (int i = 0; i < P; i++) { if ((cutcorners ? dist : maxn)(2, center, (*points)[i]) <= 0.5) { (*points)[j][0] = (*points)[i][0]; (*points)[j][1] = (*points)[i][1]; j++; } } P = j; if (msk) { // rethink module here for (int i = 0; i < P; i++) { int yy = (int)floorf((*points)[i][0] * dims[1]); int zz = (int)floorf((*points)[i][1] * dims[2]); if ((yy < 0) || (yy >= dims[1]) || (zz < 0) || (zz >= dims[2])) continue; if (1 == T) (*mask)[0][zz][yy] = 1.;//cexpf(2.i * M_PI * (float)(*kind)[i] / (float)T); else (*mask)[(*kind)[i]][zz][yy] = 1.;//cexpf(2.i * M_PI * (float)(*kind)[i] / (float)T); } } else { #if 1 long sdims[2] = { 3, P }; //complex float (*samples)[P][3] = (void*)create_cfl(argv[1], 2, sdims); complex float (*samples)[P][3] = MD_CAST_ARRAY2_PTR(complex float, 2, sdims, create_cfl(argv[1], 2, sdims), 0, 1); for (int i = 0; i < P; i++) { (*samples)[i][0] = 0.; (*samples)[i][1] = ((*points)[i][0] - 0.5) * dims[1]; (*samples)[i][2] = ((*points)[i][1] - 0.5) * dims[2]; // printf("%f %f\n", creal(samples[3 * i + 0]), creal(samples[3 * i + 1])); } unmap_cfl(2, sdims, &(*samples)[0][0]); #endif } PTR_FREE(points); PTR_FREE(kind); break; } // repeat with more points M *= 2; PTR_FREE(points); PTR_FREE(kind); } // calibration region assert((mask != NULL) || (0 == calreg)); assert((calreg <= dims[1]) && (calreg <= dims[2])); for (unsigned int i = 0; i < calreg; i++) { for (unsigned int j = 0; j < calreg; j++) { int y = (dims[1] - calreg) / 2 + i; int z = (dims[2] - calreg) / 2 + j; for (int k = 0; k < T; k++) { if (0. == (*mask)[k][z][y]) { (*mask)[k][z][y] = 1.; P++; } } } } printf("points: %d", P); if (1 != T) printf(", classes: %d", T); if (NULL != mask) { float f = cutcorners ? (M_PI / 4.) : 1.; printf(", grid size: %ldx%ld%s = %ld (R = %f)", dims[1], dims[2], cutcorners ? "x(pi/4)" : "", (long)(f * dims[1] * dims[2]), f * T * dims[1] * dims[2] / (float)P); unmap_cfl(5, dims, &(*mask)[0][0][0]); } printf("\n"); exit(0); } bart-0.4.02/src/repmat.c000066400000000000000000000026361320577655200147730ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Author: * 2012-2014 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #define DIMS 16 static const char usage_str[] = "dimension repetitions "; static const char help_str[] = "Repeat input array multiple times along a certain dimension.\n"; int main_repmat(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); long in_dims[DIMS]; long out_dims[DIMS]; complex float* in_data = load_cfl(argv[3], DIMS, in_dims); int dim = atoi(argv[1]); int rep = atoi(argv[2]); assert(dim < DIMS); assert(rep >= 0); assert(1 == in_dims[dim]); md_copy_dims(DIMS, out_dims, in_dims); out_dims[dim] = rep; complex float* out_data = create_cfl(argv[4], DIMS, out_dims); long in_strs[DIMS]; long out_strs[DIMS]; md_calc_strides(DIMS, in_strs, in_dims, CFL_SIZE); md_calc_strides(DIMS, out_strs, out_dims, CFL_SIZE); md_copy2(DIMS, out_dims, out_strs, out_data, in_strs, in_data, CFL_SIZE); unmap_cfl(DIMS, out_dims, out_data); unmap_cfl(DIMS, in_dims, in_data); exit(0); } bart-0.4.02/src/reshape.c000066400000000000000000000034211320577655200151230ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Martin Uecker */ #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #define DIMS 16 #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "flags dim1 ... dimN "; static const char help_str[] = "Reshape selected dimensions.\n"; int main_reshape(int argc, char* argv[]) { cmdline(&argc, argv, 3, 100, usage_str, help_str, 0, NULL); num_init(); unsigned int flags = atoi(argv[1]); unsigned int n = bitcount(flags); assert((int)n + 3 == argc - 1); long in_dims[DIMS]; long in_strs[DIMS]; long out_dims[DIMS]; long out_strs[DIMS]; complex float* in_data = load_cfl(argv[n + 2], DIMS, in_dims); md_calc_strides(DIMS, in_strs, in_dims, CFL_SIZE); md_copy_dims(DIMS, out_dims, in_dims); unsigned int j = 0; for (unsigned int i = 0; i < DIMS; i++) if (MD_IS_SET(flags, i)) out_dims[i] = atoi(argv[j++ + 2]); assert(j == n); assert(md_calc_size(DIMS, in_dims) == md_calc_size(DIMS, out_dims)); md_calc_strides(DIMS, out_strs, out_dims, CFL_SIZE); for (unsigned int i = 0; i < DIMS; i++) if (!(MD_IS_SET(flags, i) || (in_strs[i] == out_strs[i]))) error("Dimensions are not consistent at index %d.\n", i); complex float* out_data = create_cfl(argv[n + 3], DIMS, out_dims); md_copy(DIMS, in_dims, out_data, in_data, CFL_SIZE); unmap_cfl(DIMS, in_dims, in_data); unmap_cfl(DIMS, out_dims, out_data); exit(0); } bart-0.4.02/src/resize.c000066400000000000000000000032771320577655200150060ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2014 Jonathan Tamir */ #include #include #include "num/multind.h" #include "num/init.h" #include "misc/resize.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "dim1 size1 ... dimn sizen "; static const char help_str[] = "Resizes an array along dimensions to sizes by truncating or zero-padding."; int main_resize(int argc, char* argv[]) { bool center = false; const struct opt_s opts[] = { OPT_SET('c', ¢er, "center"), }; cmdline(&argc, argv, 4, 1000, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); unsigned int N = DIMS; int count = argc - 3; assert((count > 0) && (count % 2 == 0)); long in_dims[N]; long out_dims[N]; void* in_data = load_cfl(argv[argc - 2], N, in_dims); md_copy_dims(N, out_dims, in_dims); for (int i = 0; i < count; i += 2) { unsigned int dim = atoi(argv[i + 1]); unsigned int size = atoi(argv[i + 2]); assert(dim < N); assert(size >= 1); out_dims[dim] = size; } void* out_data = create_cfl(argv[argc - 1], N, out_dims); (center ? md_resize_center : md_resize)(N, out_dims, out_data, in_dims, in_data, CFL_SIZE); unmap_cfl(N, in_dims, in_data); unmap_cfl(N, out_dims, out_data); exit(0); } bart-0.4.02/src/rof.c000066400000000000000000000041071320577655200142640ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Martin Uecker * * * Rudin LI, Osher S, Fatemi E. Nonlinear total variation based * noise removal algorithms, Physica D: Nonlinear Phenomena * 60:259-268 (1992) * */ #include #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/iovec.h" #include "num/ops.h" #include "num/init.h" #include "linops/linop.h" #include "linops/someops.h" #include "linops/grad.h" #include "misc/mmio.h" #include "misc/misc.h" #include "iter/prox.h" #include "iter/thresh.h" #include "iter/iter2.h" #include "iter/iter.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Perform total variation denoising along dims .\n"; int main_rof(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); long dims[DIMS]; float lambda = atof(argv[1]); int flags = atoi(argv[2]); complex float* in_data = load_cfl(argv[3], DIMS, dims); complex float* out_data = create_cfl(argv[4], DIMS, dims); // TV operator const struct linop_s* tv_op = linop_grad_create(DIMS, dims, flags); // const struct linop_s* tv_op = linop_identity_create(DIMS, dims); struct iter_admm_conf conf = iter_admm_defaults; conf.maxiter = 50; conf.rho = .1; const struct operator_p_s* thresh_prox = prox_thresh_create(DIMS + 1, linop_codomain(tv_op)->dims, lambda, MD_BIT(DIMS), false); iter2_admm(CAST_UP(&conf), linop_identity_create(DIMS, dims)->forward, 1, MAKE_ARRAY(thresh_prox), MAKE_ARRAY(tv_op), NULL, NULL, 2 * md_calc_size(DIMS, dims), (float*)out_data, (const float*)in_data, NULL); linop_free(tv_op); operator_p_free(thresh_prox); unmap_cfl(DIMS, dims, in_data); unmap_cfl(DIMS, dims, out_data); exit(0); } bart-0.4.02/src/rss.c000066400000000000000000000022011320577655200142760ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Calculates root of sum of squares along selected dimensions.\n"; int main_rss(int argc, char* argv[argc]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); num_init(); long dims[DIMS]; complex float* data = load_cfl(argv[2], DIMS, dims); int flags = atoi(argv[1]); assert(0 <= flags); long odims[DIMS]; md_select_dims(DIMS, ~flags, odims, dims); complex float* out = create_cfl(argv[3], DIMS, odims); md_zrss(DIMS, dims, flags, out, data); unmap_cfl(DIMS, dims, data); unmap_cfl(DIMS, odims, out); exit(0); } bart-0.4.02/src/sake.c000066400000000000000000000040231320577655200144160ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013, 2015 Martin Uecker * * * Peter J. Shin, Peder E.Z. Larson, Michael A. Ohliger, Michael Elad, * John M. Pauly, Daniel B. Vigneron and Michael Lustig, Calibrationless * Parallel Imaging Reconstruction Based on Structured Low-Rank Matrix * Completion, 2013, accepted to Magn Reson Med. * * Zhongyuan Bi, Martin Uecker, Dengrong Jiang, Michael Lustig, and Kui Ying. * Robust Low-rank Matrix Completion for sparse motion correction in auto * calibration PI. Annual Meeting ISMRM, Salt Lake City 2013, * In Proc. Intl. Soc. Mag. Recon. Med 21; 2584 (2013) */ #include #include #include "num/init.h" #include "num/multind.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/opts.h" #include "sake/sake.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Use SAKE algorithm to recover a full k-space from undersampled\n" "data using low-rank matrix completion."; int main_sake(int argc, char* argv[]) { float alpha = 0.22; int iter = 50; float lambda = 1.; const struct opt_s opts[] = { OPT_INT('i', &iter, "iter", "tnumber of iterations"), OPT_FLOAT('s', &alpha, "size", "rel. size of the signal subspace"), OPT_FLOAT('o', &lambda, "", "()"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); assert((0. <= alpha) && (alpha <= 1.)); assert(iter >= 0); assert((0. <= lambda) && (lambda <= 1.)); long dims[DIMS]; num_init(); complex float* in_data = load_cfl(argv[1], DIMS, dims); complex float* out_data = create_cfl(argv[2], DIMS, dims); lrmc(alpha, iter, lambda, DIMS, dims, out_data, in_data); unmap_cfl(DIMS, dims, out_data); unmap_cfl(DIMS, dims, in_data); exit(0); } bart-0.4.02/src/sake/000077500000000000000000000000001320577655200142535ustar00rootroot00000000000000bart-0.4.02/src/sake/sake.c000066400000000000000000000141341320577655200153450ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker * 2014 Jonathan Tamir * * * Peter J. Shin, Peder E.Z. Larson, Michael A. Ohliger, Michael Elad, * John M. Pauly, Daniel B. Vigneron and Michael Lustig, Calibrationless * Parallel Imaging Reconstruction Based on Structured Low-Rank Matrix * Completion, Magn Reson Med. Epub (2014) * * Zhongyuan Bi, Martin Uecker, Dengrong Jiang, Michael Lustig, and Kui Ying. * Robust Low-rank Matrix Completion for sparse motion correction in auto * calibration PI. Annual Meeting ISMRM, Salt Lake City 2013, * In Proc. Intl. Soc. Mag. Recon. Med 21; 2584 (2013) */ #include #include #include #include #include "num/lapack.h" #include "num/linalg.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/casorati.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/mri.h" #include "sake.h" #undef DIMS // FIXME #define DIMS 16 #if 0 static float thresh(float lambda, float x) { float norm = fabs(x); float red = norm - lambda; return (red > 0.) ? ((red / norm) * (x)) : 0.; } #endif static complex float cthresh(float lambda, complex float x) { float norm = cabsf(x); float red = norm - lambda; return (red > 0.) ? ((red / norm) * x) : 0.; } static void robust_consistency(float lambda, const long dims[5], complex float* dst, const complex float* pattern, const complex float* kspace) { assert(1 == dims[4]); size_t size = md_calc_size(5, dims); for (unsigned int i = 0; i < size; i++) if (1. == pattern[i % (size / dims[3])]) dst[i] = kspace[i] + cthresh(lambda, dst[i] - kspace[i]); } #if 1 #define RAVINE #endif #ifdef RAVINE static void ravine(unsigned int N, const long dims[N], float* ftp, complex float* xa, complex float* xb) { float ft = *ftp; float tfo = ft; ft = (1.f + sqrtf(1.f + 4.f * ft * ft)) / 2.f; *ftp = ft; md_swap(N, dims, xa, xb, CFL_SIZE); complex float val = (1.f - tfo) / ft - 1.f; long dims1[N]; md_singleton_dims(N, dims1); long strs1[N]; long strs[N]; md_calc_strides(N, strs1, dims1, CFL_SIZE); md_calc_strides(N, strs, dims, CFL_SIZE); md_zfmac2(N, dims, strs, xa, strs1, &val, strs, xa); val *= -1.; md_zfmac2(N, dims, strs, xa, strs1, &val, strs, xb); } #endif static void lowrank(float alpha, int D, const long dims[D], complex float* matrix) { assert(1 == dims[MAPS_DIM]); debug_printf(DP_DEBUG3, "mat_dims = \t"); debug_print_dims(DP_DEBUG3, D, dims); long kern_min[4] = { 6, 6, 6, dims[COIL_DIM] }; long kern_dims[D]; md_set_dims(D, kern_dims, 1); md_min_dims(4, ~0u, kern_dims, kern_min, dims); debug_printf(DP_DEBUG3, "kern_dims = \t"); debug_print_dims(DP_DEBUG3, D, kern_dims); long calmat_dims[2]; casorati_dims(D, calmat_dims, kern_dims, dims); debug_printf(DP_DEBUG3, "calmat_dims = \t"); debug_print_dims(DP_DEBUG3, 2, calmat_dims); complex float* calmat = md_alloc(2, calmat_dims, CFL_SIZE); long str[D]; md_calc_strides(D, str, dims, CFL_SIZE); casorati_matrix(D, kern_dims, calmat_dims, calmat, dims, str, matrix); int N = calmat_dims[0]; int M = calmat_dims[1]; debug_printf(DP_INFO, "%dx%d\n", N, M); if (-1. != alpha) { long dimsU[2] = { N, N }; long dimsV[2] = { M, M }; complex float* U = md_alloc(2, dimsU, CFL_SIZE); complex float* VT = md_alloc(2, dimsV, CFL_SIZE); // complex float* U = create_cfl("U", 2, dimsU); // complex float* VT = create_cfl("VT", 2, dimsV); float* S = xmalloc(MIN(N, M) * sizeof(float)); debug_printf(DP_INFO, "SVD..\n"); //lapack_svd(N, M, (complex float (*)[M])U, (complex float (*)[N])VT, S, (complex float (*)[N])calmat); lapack_svd_econ(N, M, (complex float (*)[M])U, (complex float (*)[N])VT, S, (complex float (*)[N])calmat); // CHECK debug_printf(DP_INFO, "done.\n"); // put it back together long dimU2[2] = { N, MIN(N, M) }; long dimV2[2] = { MIN(N, M), M }; complex float* U2 = md_alloc(2, dimU2, CFL_SIZE); complex float* V2 = md_alloc(2, dimV2, CFL_SIZE); md_resize(2, dimU2, U2, dimsU, U, CFL_SIZE); md_resize(2, dimV2, V2, dimsV, VT, CFL_SIZE); for (int i = 0; i < M; i++) { // printf("%f\t", S[i]); for (int j = 0; j < MIN(N, M); j++) //V2[i * MIN(N, M) + j] *= thresh(alpha, S[j]); V2[i * MIN(N, M) + j] *= (j < alpha * (float)MIN(N, M)) ? S[j] : 0.; // thresh(alpha, S[j]); } mat_mul(M, MIN(M, N), N, (complex float (*)[N])calmat, (const complex float (*)[MIN(M, N)])V2, (const complex float (*)[N])U2); md_free(U); md_free(U2); md_free(VT); md_free(V2); free(S); } md_clear(D, dims, matrix, CFL_SIZE); casorati_matrixH(D, kern_dims, dims, str, matrix, calmat_dims, calmat); md_zsmul(D, dims, matrix, matrix, 1. / (double)md_calc_size(3, kern_dims)); // FIXME: not right at the border md_free(calmat); } void lrmc(float alpha, int iter, float lambda, int N, const long dims[N], complex float* out, const complex float* in) { long dims1[N]; md_select_dims(N, ~COIL_FLAG, dims1, dims); md_copy(N, dims, out, in, CFL_SIZE); complex float* pattern = md_alloc(N, dims1, CFL_SIZE); //assert(5 == N); estimate_pattern(N, dims, COIL_FLAG, pattern, in); complex float* comp = md_alloc(N, dims1, CFL_SIZE); md_zfill(N, dims1, comp, 1.); lowrank(-1., N, dims1, comp); #ifdef RAVINE complex float* o = md_alloc(N, dims, CFL_SIZE); md_clear(N, dims, o, CFL_SIZE); float fl = 1.; #endif long strs1[N]; md_calc_strides(N, strs1, dims1, CFL_SIZE); long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); for (int i = 0; i < iter; i++) { debug_printf(DP_INFO, "%d\n", i); if (-1. != lambda) robust_consistency(lambda, dims, out, pattern, in); else data_consistency(dims, out, pattern, in, out); lowrank(alpha, N, dims, out); md_zdiv2(N, dims, strs, out, strs, out, strs1, comp); #ifdef RAVINE ravine(N, dims, &fl, out, o); #endif } debug_printf(DP_INFO, "Done.\n"); #ifdef RAVINE md_free(o); #endif md_free(comp); md_free(pattern); } bart-0.4.02/src/sake/sake.h000066400000000000000000000002771320577655200153550ustar00rootroot00000000000000 #include "misc/cppwrap.h" extern void lrmc(float alpha, int iter, float lambda, int N, const long dims[__VLA(N)], _Complex float* out, const _Complex float* in); #include "misc/cppwrap.h" bart-0.4.02/src/saxpy.c000066400000000000000000000025721320577655200146460ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "scale "; static const char help_str[] = "Multiply input1 with scale factor and add input2.\n"; int main_saxpy(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); complex float scale; if (0 != parse_cfl(&scale, argv[1])) { fprintf(stderr, "ERROR: %s is not a complex number.\n", argv[1]); exit(1); } const int N = DIMS; long dims1[N]; long dims2[N]; complex float* data1 = load_cfl(argv[2], N, dims1); complex float* data2 = load_cfl(argv[3], N, dims2); for (int i = 0; i < N; i++) assert(dims1[i] == dims2[i]); complex float* out = create_cfl(argv[4], N, dims2); #pragma omp parallel for for (long i = 0; i < md_calc_size(N, dims1); i++) out[i] = scale * data1[i] + data2[i]; unmap_cfl(N, dims1, data1); unmap_cfl(N, dims2, data2); unmap_cfl(N, dims2, out); exit(0); } bart-0.4.02/src/scale.c000066400000000000000000000023051320577655200145630ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "factor "; static const char help_str[] = "Scale array by {factor}. The scale factor can be a complex number.\n"; int main_scale(int argc, char* argv[]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); num_init(); complex float scale; // = atof(argv[1]); if (0 != parse_cfl(&scale, argv[1])) { fprintf(stderr, "ERROR: scale factor %s is not a number.\n", argv[1]); exit(1); } const int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[2], N, dims); complex float* odata = create_cfl(argv[3], N, dims); md_zsmul(N, dims, odata, idata, scale); unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/sdot.c000066400000000000000000000023531320577655200144500ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2015 Martin Uecker */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Compute dot product along selected dimensions."; int main_sdot(int argc, char* argv[]) { cmdline(&argc, argv, 2, 2, usage_str, help_str, 0, NULL); num_init(); int N = DIMS; long in1_dims[N]; long in2_dims[N]; complex float* in1_data = load_cfl(argv[1], N, in1_dims); complex float* in2_data = load_cfl(argv[2], N, in2_dims); for (int i = 0; i < N; i++) assert(in1_dims[i] == in2_dims[i]); // compute scalar product complex float value = md_zscalar(N, in1_dims, in1_data, in2_data); printf("%+e%+ei\n", crealf(value), cimagf(value)); unmap_cfl(N, in1_dims, in1_data); unmap_cfl(N, in2_dims, in2_data); exit(0); } bart-0.4.02/src/sense/000077500000000000000000000000001320577655200144455ustar00rootroot00000000000000bart-0.4.02/src/sense/bprecon.h000066400000000000000000000031661320577655200162540ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __SENSE_H #define __SENSE_H #ifdef __cplusplus extern "C" { #endif #include "misc/mri.h" struct iter_conf_s; struct operator_p_s; struct linop_s; /** * configuration parameters for basis pursuit sense reconstruction * * @param iter_admm_conf configuration struct for admm algorithm * @param rvc TRUE for real-valued constraint * @param lambda l2 regularization penalty * @param eps data consistency error * @param linop_obj linear operator for computing the objective value */ struct bpsense_conf { struct iter_conf_s* iconf; _Bool rvc; float lambda; float eps; const struct linop_s* l1op_obj; }; extern const struct bpsense_conf bpsense_defaults; #ifdef USE_CUDA extern void bpsense_recon_gpu(struct bpsense_conf* conf, const long dims[DIMS], _Complex float* image, const _Complex float* maps, const long pat_dims[DIMS], const _Complex float* pattern, const struct linop_s* l1op, const struct operator_p_s* l1prox, const long ksp_dims[DIMS], const _Complex float* kspace, const _Complex float* image_truth); #endif extern void bpsense_recon(struct bpsense_conf* conf, const long dims[DIMS], _Complex float* image, const _Complex float* maps, const long pat_dims[DIMS], const _Complex float* pattern, const struct linop_s* l1op, const struct operator_p_s* l1prox, const long ksp_dims[DIMS], const _Complex float* kspace, const _Complex float* image_truth); #ifdef __cplusplus } #endif #endif bart-0.4.02/src/sense/model.c000066400000000000000000000172021320577655200157130ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2014 Frank Ong * * * Ra JB, Rim CY. Fast imaging using subencoding data sets from multiple detectors. * Magn Reson Med 1993; 30:142-145. * * Pruessmann KP, Weiger M, Scheidegger MB, Boesiger P. SENSE: Sensitivity encoding for fast * MRI. Magn Reson Med 1999; 42:952-962. * * Pruessmann KP, Weiger M, Boernert P, Boesiger P. Advances in sensitivity * encoding with arbitrary k-space trajectories. * Magn Reson Med 2001; 46:638-651. * * Uecker M, Lai P, Murphy MJ, Virtue P, Elad M, Pauly JM, Vasanawala SS, Lustig M. * ESPIRiT - An Eigenvalue Approach to Autocalibrating Parallel MRI: Where SENSE * meets GRAPPA. Magn Reson Med 2014; 71:990-1001. * */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/ops.h" #ifdef USE_CUDA #include "num/gpuops.h" #endif #include "linops/linop.h" #include "linops/someops.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "model.h" /** * data structure for holding the sense data. * * @param max_dims maximal dimensions * @param dims_mps maps dimensions * @param dims_ksp kspace dimensions * @param img_dims final image dimensions * @param strs_mps strides for maps * @param strs_ksp strides for kspace * @param strs_img strides for image * @param sens sensitivity maps */ struct maps_data { INTERFACE(linop_data_t); long max_dims[DIMS]; long mps_dims[DIMS]; long ksp_dims[DIMS]; long img_dims[DIMS]; long strs_mps[DIMS]; long strs_ksp[DIMS]; long strs_img[DIMS]; /*const*/ complex float* sens; #ifdef USE_CUDA const complex float* gpu_sens; #endif complex float* norm; }; static DEF_TYPEID(maps_data); #ifdef USE_CUDA static const complex float* get_sens(const struct maps_data* data, bool gpu) { const complex float* sens = data->sens; if (gpu) { if (NULL == data->gpu_sens) ((struct maps_data*)data)->gpu_sens = md_gpu_move(DIMS, data->mps_dims, data->sens, CFL_SIZE); sens = data->gpu_sens; } return sens; } #endif static void maps_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct maps_data* data = CAST_DOWN(maps_data, _data); #ifdef USE_CUDA const complex float* sens = get_sens(data, cuda_ondevice(src)); #else const complex float* sens = data->sens; #endif md_clear(DIMS, data->ksp_dims, dst, CFL_SIZE); md_zfmac2(DIMS, data->max_dims, data->strs_ksp, dst, data->strs_img, src, data->strs_mps, sens); } static void maps_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct maps_data* data = CAST_DOWN(maps_data, _data); #ifdef USE_CUDA const complex float* sens = get_sens(data, cuda_ondevice(src)); #else const complex float* sens = data->sens; #endif // dst = sum( conj(sens) .* tmp ) md_clear(DIMS, data->img_dims, dst, CFL_SIZE); md_zfmacc2(DIMS, data->max_dims, data->strs_img, dst, data->strs_ksp, src, data->strs_mps, sens); } static void maps_init_normal(struct maps_data* data) { if (NULL != data->norm) return; // FIXME: gpu/cpu mixed use data->norm = md_alloc_sameplace(DIMS, data->img_dims, CFL_SIZE, data->sens); md_zrss(DIMS, data->mps_dims, COIL_FLAG, data->norm, data->sens); md_zmul(DIMS, data->img_dims, data->norm, data->norm, data->norm); } static void maps_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { struct maps_data* data = CAST_DOWN(maps_data, _data); maps_init_normal(data); md_zmul(DIMS, data->img_dims, dst, src, data->norm); } /* * ( AT A + lambda I) x = b */ static void maps_apply_pinverse(const linop_data_t* _data, float lambda, complex float* dst, const complex float* src) { struct maps_data* data = CAST_DOWN(maps_data, _data); maps_init_normal(data); md_zsadd(DIMS, data->img_dims, data->norm, data->norm, lambda); md_zdiv(DIMS, data->img_dims, dst, src, data->norm); md_zsadd(DIMS, data->img_dims, data->norm, data->norm, -lambda); } static void maps_free_data(const linop_data_t* _data) { const struct maps_data* data = CAST_DOWN(maps_data, _data); md_free((void*)data->sens); if (NULL != data->norm) { md_free((void*)data->norm); } #ifdef USE_CUDA if (NULL != data->gpu_sens) { md_free((void*)data->gpu_sens); } #endif free((void*)data); } static struct maps_data* maps_create_data(const long max_dims[DIMS], unsigned int sens_flags, const complex float* sens) { PTR_ALLOC(struct maps_data, data); SET_TYPEID(maps_data, data); // maximal dimensions md_copy_dims(DIMS, data->max_dims, max_dims); // sensitivity dimensions md_select_dims(DIMS, sens_flags, data->mps_dims, max_dims); md_calc_strides(DIMS, data->strs_mps, data->mps_dims, CFL_SIZE); md_select_dims(DIMS, ~MAPS_FLAG, data->ksp_dims, max_dims); md_calc_strides(DIMS, data->strs_ksp, data->ksp_dims, CFL_SIZE); md_select_dims(DIMS, ~COIL_FLAG, data->img_dims, max_dims); md_calc_strides(DIMS, data->strs_img, data->img_dims, CFL_SIZE); complex float* nsens = md_alloc(DIMS, data->mps_dims, CFL_SIZE); md_copy(DIMS, data->mps_dims, nsens, sens, CFL_SIZE); data->sens = nsens; #ifdef USE_CUDA data->gpu_sens = NULL; #endif data->norm = NULL; return PTR_PASS(data); } /** * Create maps operator, m = S x * * @param max_dims maximal dimensions across all data structures * @param sens_flags active map dimensions * @param sens sensitivities */ struct linop_s* maps_create(const long max_dims[DIMS], unsigned int sens_flags, const complex float* sens) { struct maps_data* data = maps_create_data(max_dims, sens_flags, sens); // scale the sensitivity maps by the FFT scale factor fftscale(DIMS, data->mps_dims, FFT_FLAGS, data->sens, data->sens); return linop_create(DIMS, data->ksp_dims, DIMS, data->img_dims, CAST_UP(data), maps_apply, maps_apply_adjoint, maps_apply_normal, maps_apply_pinverse, maps_free_data); } struct linop_s* maps2_create(const long coilim_dims[DIMS], const long maps_dims[DIMS], const long img_dims[DIMS], const complex float* maps) { long max_dims[DIMS]; unsigned int sens_flags = 0; for (unsigned int i = 0; i < DIMS; i++) if (1 != maps_dims[i]) sens_flags = MD_SET(sens_flags, i); assert(1 == coilim_dims[MAPS_DIM]); assert(1 == img_dims[COIL_DIM]); assert(maps_dims[COIL_DIM] == coilim_dims[COIL_DIM]); assert(maps_dims[MAPS_DIM] == img_dims[MAPS_DIM]); for (unsigned int i = 0; i < DIMS; i++) max_dims[i] = MAX(coilim_dims[i], MAX(maps_dims[i], img_dims[i])); struct maps_data* data = maps_create_data(max_dims, sens_flags, maps); return linop_create(DIMS, coilim_dims, DIMS, img_dims, CAST_UP(data), maps_apply, maps_apply_adjoint, maps_apply_normal, maps_apply_pinverse, maps_free_data); } /** * Create sense operator, y = F S x, * where F is the Fourier transform and S is the sensitivity maps * * @param max_dims maximal dimensions across all data structures * @param sens_flags active map dimensions * @param sens sensitivities */ struct linop_s* sense_init(const long max_dims[DIMS], unsigned int sens_flags, const complex float* sens) { long ksp_dims[DIMS]; md_select_dims(DIMS, ~MAPS_FLAG, ksp_dims, max_dims); struct linop_s* fft = linop_fft_create(DIMS, ksp_dims, FFT_FLAGS); struct linop_s* maps = maps_create(max_dims, sens_flags, sens); struct linop_s* sense_op = linop_chain(maps, fft); linop_free(fft); linop_free(maps); return sense_op; } bart-0.4.02/src/sense/model.h000066400000000000000000000013551320577655200157220ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifdef __cplusplus extern "C" { #endif #include "misc/mri.h" struct operator_s; struct vec_ops; extern struct linop_s* sense_init(const long max_dims[DIMS], unsigned int sens_flags, const complex float* sens); extern struct linop_s* maps_create(const long max_dims[DIMS], unsigned int sens_flags, const complex float* sens); extern struct linop_s* maps2_create(const long coilim_dims[DIMS], const long maps_dims[DIMS], const long img_dims[DIMS], const complex float* maps); #ifdef __cplusplus } #endif bart-0.4.02/src/sense/optcom.c000066400000000000000000000141121320577655200161110ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Author: * 2012 Martin Uecker */ #include #include #include #include #include "sense/model.h" #include "linops/linop.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "misc/mri.h" #include "misc/misc.h" #include "misc/debug.h" #include "optcom.h" /** * Optimally combine coil images. * * Assumptions: * @param data fully sampled data * @param sens physical (unormalized) coil sensitivites * @param alpha the best estimator has alpha > 0. */ void optimal_combine(const long dims[DIMS], float alpha, complex float* image, const complex float* sens, const complex float* data) { long dims_one[DIMS]; long dims_img[DIMS]; long dims_cim[DIMS]; md_select_dims(DIMS, ~(COIL_FLAG|MAPS_FLAG), dims_one, dims); md_select_dims(DIMS, ~(COIL_FLAG), dims_img, dims); md_select_dims(DIMS, ~(MAPS_FLAG), dims_cim, dims); const struct linop_s* sense_data = sense_init(dims, FFT_FLAGS|COIL_FLAG|MAPS_FLAG, sens); linop_adjoint(sense_data, DIMS, dims_img, image, DIMS, dims_cim, data); linop_free(sense_data); complex float* norm = md_alloc(DIMS, dims_img, CFL_SIZE); md_zrss(DIMS, dims, COIL_FLAG, norm, sens); long imsize = md_calc_size(DIMS, dims_img); for (unsigned int i = 0; i < imsize; i++) image[i] /= (powf(cabsf(norm[i]), 2.) + alpha); md_free(norm); } void rss_combine(const long dims[DIMS], complex float* image, const complex float* data) { complex float* tmp = md_alloc_sameplace(DIMS, dims, CFL_SIZE, data); ifft(DIMS, dims, FFT_FLAGS, tmp, data); fftscale(DIMS, dims, FFT_FLAGS, tmp, tmp); md_zrss(DIMS, dims, COIL_FLAG, image, tmp); md_free(tmp); } static int compare_cmpl_magn(const void* a, const void* b) { return (int)copysignf(1., (cabsf(*(complex float*)a) - cabsf(*(complex float*)b))); } float estimate_scaling_norm(float rescale, long imsize, complex float* tmpnorm, bool compat) { qsort(tmpnorm, (size_t)imsize, sizeof(complex float), compare_cmpl_magn); float median = cabsf(tmpnorm[imsize / 2]) / rescale; //median float p90 = cabsf(tmpnorm[(int)trunc(imsize * 0.9)]) / rescale; float max = cabsf(tmpnorm[imsize - 1]) / rescale; float scale = ((max - p90) < 2 * (p90 - median)) ? p90 : max; if (compat) scale = median; debug_printf(DP_DEBUG1, "Scaling: %f%c (max = %f/p90 = %f/median = %f)\n", scale, (scale == max) ? '!' : ' ', max, p90, median); return scale; } extern float estimate_scaling_cal(const long dims[DIMS], const complex float* sens, const long cal_dims[DIMS], const complex float* cal_data, bool compat) { long img_dims[DIMS]; md_select_dims(DIMS, ~COIL_FLAG, img_dims, cal_dims); long imsize = md_calc_size(DIMS, img_dims); complex float* tmp1 = md_alloc(DIMS, img_dims, CFL_SIZE); float rescale = sqrtf((float)dims[0] / (float)cal_dims[0]) * sqrtf((float)dims[1] / (float)cal_dims[1]) * sqrtf((float)dims[2] / (float)cal_dims[2]); if (NULL == sens) { rss_combine(cal_dims, tmp1, cal_data); } else { optimal_combine(cal_dims, 0., tmp1, sens, cal_data); } float scale = estimate_scaling_norm(rescale, imsize, tmp1, compat); md_free(tmp1); return scale; } static float estimate_scaling_internal(const long dims[DIMS], const complex float* sens, const long strs[DIMS], const complex float* data, bool compat) { assert(1 == dims[MAPS_DIM]); long small_dims[DIMS]; long cal_size[3] = { 32, 32, 32 }; // maybe we should just extract a fixed-sized block here? complex float* tmp = extract_calib2(small_dims, cal_size, dims, strs, data, false); float scaling = estimate_scaling_cal(dims, sens, small_dims, tmp, compat); md_free(tmp); return scaling; } float estimate_scaling2(const long dims[DIMS], const complex float* sens, const long strs[DIMS], const complex float* data2) { return estimate_scaling_internal(dims, sens, strs, data2, false); } float estimate_scaling(const long dims[DIMS], const complex float* sens, const complex float* data2) { long strs[DIMS]; md_calc_strides(DIMS, strs, dims, CFL_SIZE); return estimate_scaling2(dims, sens, strs, data2); } float estimate_scaling_old2(const long dims[DIMS], const complex float* sens, const long strs[DIMS], const complex float* data) { return estimate_scaling_internal(dims, sens, strs, data, true); } void fake_kspace(const long dims[DIMS], complex float* kspace, const complex float* sens, const complex float* image) { long dims_one[DIMS]; long dims_img[DIMS]; long dims_ksp[DIMS]; md_select_dims(DIMS, ~(COIL_FLAG | MAPS_FLAG), dims_one, dims); md_select_dims(DIMS, ~COIL_FLAG, dims_img, dims); md_select_dims(DIMS, ~MAPS_FLAG, dims_ksp, dims); const struct linop_s* sense_data = sense_init(dims, FFT_FLAGS|COIL_FLAG|MAPS_FLAG, sens); linop_forward(sense_data, DIMS, dims_ksp, kspace, DIMS, dims_img, image); linop_free(sense_data); } void replace_kspace(const long dims[DIMS], complex float* out, const complex float* kspace, const complex float* sens, const complex float* image) { long dims_one[DIMS]; long dims_img[DIMS]; long dims_ksp[DIMS]; md_select_dims(DIMS, ~(COIL_FLAG|MAPS_FLAG), dims_one, dims); md_select_dims(DIMS, ~(COIL_FLAG), dims_img, dims); md_select_dims(DIMS, ~(MAPS_FLAG), dims_ksp, dims); complex float* data = md_alloc(DIMS, dims_ksp, CFL_SIZE); fake_kspace(dims, data, sens, image); complex float* pattern = md_alloc(DIMS, dims_one, CFL_SIZE); estimate_pattern(DIMS, dims_ksp, COIL_FLAG, pattern, kspace); data_consistency(dims_ksp, out, pattern, kspace, data); md_free(pattern); md_free(data); } void replace_kspace2(const long dims[DIMS], complex float* out, const complex float* kspace, const complex float* sens, const complex float* image) { long dims_ksp[DIMS]; md_select_dims(DIMS, ~MAPS_FLAG, dims_ksp, dims); complex float* data = md_alloc(DIMS, dims_ksp, CFL_SIZE); replace_kspace(dims, data, kspace, sens, image); rss_combine(dims, out, data); // optimal_combine(dims, 0.1, out, sens, data); md_free(data); } bart-0.4.02/src/sense/optcom.h000066400000000000000000000031561320577655200161240ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifdef __cplusplus extern "C" { #endif #include #include "misc/mri.h" extern void rss_combine(const long dims[DIMS], _Complex float* image, const _Complex float* data); extern void optimal_combine(const long dims[DIMS], float alpha, _Complex float* image, const _Complex float* sens, const _Complex float* data); extern float estimate_scaling_norm(float rescale, long imsize, _Complex float* tmpnorm, bool compat); extern float estimate_scaling(const long dims[DIMS], const _Complex float* sens, const _Complex float* data); extern float estimate_scaling2(const long dims[DIMS], const _Complex float* sens, const long strs[DIMS], const _Complex float* data); extern float estimate_scaling_old2(const long dims[DIMS], const _Complex float* sens, const long strs[DIMS], const _Complex float* data); extern void fake_kspace(const long dims[DIMS], _Complex float* kspace, const _Complex float* sens, const _Complex float* image); extern void replace_kspace(const long dims[DIMS], _Complex float* out, const _Complex float* kspace, const _Complex float* sens, const _Complex float* image); extern void replace_kspace2(const long dims[DIMS], _Complex float* out, const _Complex float* kspace, const _Complex float* sens, const _Complex float* image); extern float estimate_scaling_cal(const long dims[DIMS], const _Complex float* sens, const long cal_dims[DIMS], const _Complex float* cal_data, _Bool compat); #ifdef __cplusplus } #endif bart-0.4.02/src/sense/pocs.c000066400000000000000000000213771320577655200155670ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012, 2016 Martin Uecker * 2014 Jonathan Tamir * * * Samsonov AA, Kholmovski EG, Parker DL, Johnson CR. POCSENSE: POCS-based * reconstruction for sensitivity encoded magnetic resonance imaging. * Magn Reson Med 2004; 52:1397–1406. * */ #include #include #include #include #include "misc/misc.h" #include "misc/mri.h" #include "misc/debug.h" #include "misc/types.h" #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/gpuops.h" #include "num/ops.h" #include "linops/linop.h" #include "iter/iter.h" #include "iter/prox.h" #include "iter/monitor.h" #include "sense/model.h" #include "pocs.h" struct data { INTERFACE(operator_data_t); const struct linop_s* sense_op; complex float* tmp; float alpha; // l1 or l2 regularization float lambda; // robust consistency const struct operator_p_s* thresh; const complex float* kspace; const complex float* pattern; const complex float* fftmod_mat; long dims_ksp[DIMS]; long dims_pat[DIMS]; long strs_ksp[DIMS]; long strs_pat[DIMS]; }; static DEF_TYPEID(data); static void xupdate_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { const struct data* data = CAST_DOWN(data, _data); UNUSED(mu); md_zsmul(DIMS, data->dims_ksp, dst, src, 1. / (data->alpha == 0 ? 2. : 3.)); } static complex float cthresh(float lambda, complex float x) { float norm = cabsf(x); float red = norm - lambda; return (red > 0.) ? ((red / norm) * x) : 0.; } static void robust_consistency(float lambda, const long dims[DIMS], complex float* dst, const complex float* pattern, const complex float* kspace) { assert(1 == dims[MAPS_DIM]); size_t size = md_calc_size(DIMS, dims); for (unsigned int i = 0; i < size; i++) if (1. == pattern[i % (size / dims[COIL_DIM])]) dst[i] = kspace[i] + cthresh(lambda, dst[i] - kspace[i]); } static void sparsity_proj_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { const struct data* data = CAST_DOWN(data, _data); const long* dims = data->dims_ksp; ifft(DIMS, dims, FFT_FLAGS, dst, src); // FIXME fftmod is slow #if 0 fftscale(DIMS, dims, FFT_FLAGS, dst, dst); ifftmod(DIMS, dims, FFT_FLAGS, dst, dst); #else md_zmulc2(DIMS, dims, data->strs_ksp, dst, data->strs_ksp, dst, data->strs_pat, data->fftmod_mat); #endif operator_p_apply(data->thresh, mu, DIMS, dims, dst, DIMS, dims, dst); #if 0 fftmod(DIMS, dims, FFT_FLAGS, dst, dst); fftscale(DIMS, dims, FFT_FLAGS, dst, dst); #else md_zmul2(DIMS, dims, data->strs_ksp, dst, data->strs_ksp, dst, data->strs_pat, data->fftmod_mat); #endif fft(DIMS, dims, FFT_FLAGS, dst, dst); } static void data_consistency_proj_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { UNUSED(mu); const struct data* data = CAST_DOWN(data, _data); if (-1. != data->lambda) robust_consistency(data->lambda, data->dims_ksp, dst, data->pattern, data->kspace); else data_consistency(data->dims_ksp, dst, data->pattern, data->kspace, src); } static void sense_proj_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { UNUSED(mu); const struct data* data = CAST_DOWN(data, _data); // assumes normalized sensitivities linop_adjoint_unchecked(data->sense_op, data->tmp, src); linop_forward_unchecked(data->sense_op, dst, data->tmp); } static void proj_del(const operator_data_t* _data) { UNUSED(_data); } static float compute_norm(const void* _data, const float* ksp) { const struct data* data = _data; float norm = md_znorm(DIMS, data->dims_ksp, (complex float*)ksp); //assert(isnormal(norm)); return norm; } void pocs_recon(const long dims[DIMS], const struct operator_p_s* thresh, int maxiter, float alpha, float lambda, complex float* result, const complex float* maps, const complex float* pattern, const complex float* kspace) { struct iter_pocs_conf pconf = iter_pocs_defaults; pconf.maxiter = maxiter; pocs_recon2(iter2_pocs, &pconf, NULL, dims, thresh, alpha, lambda, result, maps, pattern, kspace); } void pocs_recon2(italgo_fun2_t italgo, void* iconf, const struct linop_s* ops[3], const long dims[DIMS], const struct operator_p_s* thresh_op, float alpha, float lambda, complex float* result, const complex float* maps, const complex float* pattern, const complex float* kspace) { #ifdef USE_CUDA bool use_gpu = cuda_ondevice(kspace); #else bool use_gpu = false; #endif long dims_pat[DIMS]; long dims_img[DIMS]; long dims_ksp[DIMS]; md_select_dims(DIMS, ~(COIL_FLAG | MAPS_FLAG), dims_pat, dims); md_select_dims(DIMS, ~(MAPS_FLAG), dims_ksp, dims); md_select_dims(DIMS, ~(COIL_FLAG), dims_img, dims); long strs_pat[DIMS]; long strs_ksp[DIMS]; md_calc_strides(DIMS, strs_pat, dims_pat, CFL_SIZE); md_calc_strides(DIMS, strs_ksp, dims_ksp, CFL_SIZE); struct data data; SET_TYPEID(data, &data); data.pattern = pattern; data.kspace = kspace; data.lambda = lambda; data.alpha = alpha; md_copy_dims(DIMS, data.dims_ksp, dims_ksp); md_copy_dims(DIMS, data.dims_pat, dims_pat); md_copy_strides(DIMS, data.strs_ksp, strs_ksp); md_copy_strides(DIMS, data.strs_pat, strs_pat); data.sense_op = sense_init(dims, FFT_FLAGS|MAPS_FLAG|COIL_FLAG, maps); data.thresh = thresh_op; #ifdef USE_CUDA data.tmp = (use_gpu ? md_alloc_gpu : md_alloc)(DIMS, dims_img, CFL_SIZE); #else assert(!use_gpu); data.tmp = md_alloc(DIMS, dims_img, CFL_SIZE); #endif complex float* fftmod_mat = md_alloc_sameplace(DIMS, dims_pat, CFL_SIZE, kspace); complex float one[1] = { 1. }; md_fill(DIMS, dims_pat, fftmod_mat, one, CFL_SIZE ); fftscale(DIMS, dims_pat, FFT_FLAGS, fftmod_mat, fftmod_mat); fftmod(DIMS, dims_pat, FFT_FLAGS, fftmod_mat, fftmod_mat); data.fftmod_mat = fftmod_mat; const struct operator_p_s* sense_proj = operator_p_create(DIMS, dims_ksp, DIMS, dims_ksp, CAST_UP(&data), sense_proj_apply, proj_del); const struct operator_p_s* data_consistency_proj = operator_p_create(DIMS, dims_ksp, DIMS, dims_ksp, CAST_UP(&data), data_consistency_proj_apply, proj_del); const struct operator_p_s* sparsity_proj = NULL; if (NULL != thresh_op) sparsity_proj = operator_p_create(DIMS, dims_ksp, DIMS, dims_ksp, CAST_UP(&data), sparsity_proj_apply, proj_del); else sparsity_proj = prox_leastsquares_create(DIMS, dims_ksp, alpha, NULL); const struct operator_p_s* prox_ops[3] = { data_consistency_proj, sense_proj, sparsity_proj }; //const struct operator_p_s* prox_ops[3] = { data_consistency_proj, sense_proj, thresh_op }; const struct operator_p_s* xupdate_op = operator_p_create(DIMS, dims_ksp, DIMS, dims_ksp, CAST_UP(&data), xupdate_apply, proj_del); long size = 2 * md_calc_size(DIMS, dims_ksp); md_clear(DIMS, dims_ksp, result, CFL_SIZE); italgo(iconf, NULL, (alpha == 0.) ? 2 : 3, prox_ops, ops, NULL, xupdate_op, size, (float*)result, NULL, create_monitor(size, NULL, (void*)&data, compute_norm)); debug_printf(DP_INFO, "Done\n"); md_free(data.tmp); md_free(fftmod_mat); linop_free(data.sense_op); operator_p_free(sense_proj); operator_p_free(data_consistency_proj); operator_p_free(sparsity_proj); operator_p_free(xupdate_op); } #ifdef USE_CUDA void pocs_recon_gpu(const long dims[DIMS], const struct operator_p_s* thresh, int maxiter, float alpha, float lambda, complex float* result, const complex float* maps, const complex float* pattern, const complex float* kspace) { struct iter_pocs_conf pconf = iter_pocs_defaults; pconf.maxiter = maxiter; pocs_recon_gpu2(iter2_pocs, &pconf, NULL, dims, thresh, alpha, lambda, result, maps, pattern, kspace); } void pocs_recon_gpu2(italgo_fun2_t italgo, void* iconf, const struct linop_s** ops, const long dims[DIMS], const struct operator_p_s* thresh, float alpha, float lambda, complex float* result, const complex float* maps, const complex float* pattern, const complex float* kspace) { long dims_pat[DIMS]; long dims_ksp[DIMS]; md_select_dims(DIMS, ~(COIL_FLAG | MAPS_FLAG), dims_pat, dims); md_select_dims(DIMS, ~MAPS_FLAG, dims_ksp, dims); complex float* gpu_maps = md_gpu_move(DIMS, dims, maps, CFL_SIZE); complex float* gpu_pat = md_gpu_move(DIMS, dims_pat, pattern, CFL_SIZE); complex float* gpu_ksp = md_gpu_move(DIMS, dims_ksp, kspace, CFL_SIZE); complex float* gpu_result = md_gpu_move(DIMS, dims_ksp, result, CFL_SIZE); pocs_recon2(italgo, iconf, ops, dims, thresh, alpha, lambda, gpu_result, gpu_maps, gpu_pat, gpu_ksp); md_copy(DIMS, dims_ksp, result, gpu_result, CFL_SIZE); md_free(gpu_result); md_free(gpu_pat); md_free(gpu_ksp); md_free(gpu_maps); } #endif bart-0.4.02/src/sense/pocs.h000066400000000000000000000026441320577655200155700ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include "misc/mri.h" #include "iter/iter2.h" #ifdef __cplusplus extern "C" { #endif struct operator_p_s; struct linop_s; extern void pocs_recon(const long dims[DIMS], const struct operator_p_s* thresh_data, int maxiter, float alpha, float lambda, _Complex float* result, const _Complex float* maps, const _Complex float* pattern, const _Complex float* kspace); extern void pocs_recon2(italgo_fun2_t italgo, void* iconf, const struct linop_s** ops, const long dims[DIMS], const struct operator_p_s* thresh_data, float alpha, float lambda, _Complex float* result, const _Complex float* maps, const _Complex float* pattern, const _Complex float* kspace); #ifdef USE_CUDA extern void pocs_recon_gpu(const long dims[DIMS], const struct operator_p_s* thresh, int maxiter, float alpha, float lambda, _Complex float* result, const _Complex float* maps, const _Complex float* pattern, const _Complex float* kspace); extern void pocs_recon_gpu2(italgo_fun2_t italgo, void* iconf, const struct linop_s** ops, const long dims[DIMS], const struct operator_p_s* thresh, float alpha, float lambda, _Complex float* result, const _Complex float* maps, const _Complex float* pattern, const _Complex float* kspace); #endif #ifdef __cplusplus } #endif bart-0.4.02/src/sense/recon.c000066400000000000000000000131511320577655200157200ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2016 Martin Uecker * 2013-2014 Jonathan Tamir * 2014 Frank Ong * * * Ra JB, Rim CY. Fast imaging using subencoding data sets from multiple * detectors. Magn Reson Med 1993; 30:142-145. * * Pruessmann KP, Weiger M, Scheidegger MB, Boesiger P. SENSE: Sensitivity * encoding for fast MRI. Magn Reson Med 1999; 42:952-962. * * Pruessmann KP, Weiger M, Boernert P, Boesiger P. Advances in sensitivity * encoding with arbitrary k-space trajectories. * Magn Reson Med 2001; 46:638-651. * * Uecker M, Lai P, Murphy MJ, Virtue P, Elad M, Pauly JM, Vasanawala SS, * Lustig M. ESPIRiT - An Eigenvalue Approach to Autocalibrating Parallel MRI: * Where SENSE meets GRAPPA. Magn Reson Med 2014; 71:990-1001. * */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/iovec.h" #include "linops/linop.h" #include "linops/sampling.h" #include "linops/someops.h" #include "linops/realval.h" #include "iter/iter.h" #include "iter/lsqr.h" #include "iter/lad.h" #include "misc/debug.h" #include "misc/misc.h" #include "misc/mri.h" #include "sense/model.h" #include "recon.h" const struct sense_conf sense_defaults = { .rvc = false, .gpu = false, .rwiter = 1, .gamma = -1., .cclambda = 0., .bpsense = false, }; /** * Data structure for storing all relevant recon information * * @param pattern sampling mask * @param transfer_data optional data to be applied to transfer function * @param transfer optional transfer function to apply normal equations (e.g. weights) * @param sense_data data structure for holding sense information * @param tmp temporary storage in kspace domain * @param conf sense configuration * @param img_dims dimensions of image */ struct data { // Optional function to apply normal equations: // For example, used for sampling mask, weights /* const */ complex float* pattern; const struct operator_s* sense_data; complex float* tmp; const complex float* kspace; struct sense_conf* conf; long img_dims[DIMS]; long ksp_dims[DIMS]; long pat_dims[DIMS]; }; void debug_print_sense_conf(int level, const struct sense_conf* conf) { debug_printf(level, "sense conf:\n"); debug_printf(level, "\trvc: %s\n", conf->rvc ? "on" : "off"); debug_printf(level, "\trwiter: %d\n", conf->rwiter); debug_printf(level, "\tgamma: %f\n", conf->gamma); debug_printf(level, "\tcclambda: %f\n", conf->cclambda); debug_printf(level, "\n\n"); } // copied from flpmath.c (for md_sqrt below ) static void real_from_complex_dims(unsigned int D, long odims[D + 1], const long idims[D]) { odims[0] = 2; md_copy_dims(D, odims + 1, idims); } const struct operator_s* sense_recon_create(const struct sense_conf* conf, const long dims[DIMS], const struct linop_s* sense_op, const long pat_dims[DIMS], const complex float* pattern, italgo_fun2_t italgo, iter_conf* iconf, const complex float* init, unsigned int num_funs, const struct operator_p_s* thresh_op[num_funs], const struct linop_s* thresh_funs[num_funs], const struct operator_s* precond_op) { struct lsqr_conf lsqr_conf = { conf->cclambda, conf->gpu }; const struct operator_s* op = NULL; assert(DIMS == linop_domain(sense_op)->N); long img_dims[DIMS]; md_copy_dims(DIMS, img_dims, linop_domain(sense_op)->dims); long ksp_dims[DIMS]; md_copy_dims(DIMS, ksp_dims, linop_codomain(sense_op)->dims); if (conf->rvc) { assert(!conf->bpsense); // FIXME: add rvc as separate constraint or build into forward model earlier struct linop_s* rvc = linop_realval_create(DIMS, img_dims); struct linop_s* tmp_op = linop_chain(rvc, sense_op); linop_free(rvc); linop_free(sense_op); sense_op = tmp_op; } if (1 < conf->rwiter) { assert(!conf->bpsense); // not compatible struct linop_s* sampling = linop_sampling_create(dims, pat_dims, pattern); struct linop_s* tmp_op = linop_chain(sense_op, sampling); linop_free(sampling); linop_free(sense_op); sense_op = tmp_op; unsigned int flags = 0; for (unsigned int i = 0; i < DIMS; i++) if (pat_dims[i] > 1) flags = MD_SET(flags, i); const struct lad_conf lad_conf = { conf->rwiter, conf->gamma, flags, &lsqr_conf }; op = lad2_create(&lad_conf, italgo, iconf, (const float*)init, sense_op, num_funs, thresh_op, thresh_funs); } else if (NULL == pattern) { if (conf->bpsense) op = lsqr2_create(&lsqr_conf, italgo, iconf, (const float*)init, NULL, NULL, num_funs, thresh_op, thresh_funs, NULL); else op = lsqr2_create(&lsqr_conf, italgo, iconf, (const float*)init, sense_op, precond_op, num_funs, thresh_op, thresh_funs, NULL); } else { assert(!conf->bpsense); // pattern should be built into forward model complex float* weights = md_alloc(DIMS, pat_dims, CFL_SIZE); #if 0 // buggy // md_zsqrt(DIMS, pat_dims, weights, pattern); #else long dimsR[DIMS + 1]; real_from_complex_dims(DIMS, dimsR, pat_dims); md_sqrt(DIMS + 1, dimsR, (float*)weights, (const float*)pattern); #endif // FIXME: weights is never freed struct linop_s* weights_op = linop_cdiag_create(DIMS, ksp_dims, ~COIL_FLAG, weights); op = wlsqr2_create(&lsqr_conf, italgo, iconf, (const float*)init, sense_op, weights_op, precond_op, num_funs, thresh_op, thresh_funs, NULL); } return op; } bart-0.4.02/src/sense/recon.h000066400000000000000000000023571320577655200157330ustar00rootroot00000000000000/* Copyright 2013-2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #ifndef __SENSE_H #define __SENSE_H 1 #include "misc/mri.h" #include "iter/iter.h" #include "iter/iter2.h" #include "misc/cppwrap.h" /** * configuration parameters for sense reconstruction * * @param rvc TRUE for real-values constraints */ struct sense_conf { _Bool rvc; _Bool gpu; int rwiter; // should be moved into a recon_lad float gamma; // .. float cclambda; bool bpsense; }; extern const struct sense_conf sense_defaults; struct operator_s; struct operator_p_s; extern const struct operator_s* sense_recon_create(const struct sense_conf* conf, const long dims[DIMS], const struct linop_s* sense_op, const long pat_dims[DIMS], const complex float* pattern, italgo_fun2_t italgo, iter_conf* iconf, const complex float* init, unsigned int num_funs, const struct operator_p_s* thresh_op[num_funs], const struct linop_s* thresh_funs[num_funs], const struct operator_s* precond_op); extern void debug_print_sense_conf(int debug_level, const struct sense_conf* conf); #include "misc/cppwrap.h" #endif bart-0.4.02/src/show.c000066400000000000000000000046071320577655200144630ustar00rootroot00000000000000/* Copyright 2013-2016. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013, 2015-2016 Martin Uecker * 2015-2016 Jon Tamir */ #define _GNU_SOURCE #include #include #include #include #include "num/multind.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = ""; static const char help_str[] = "Outputs values or meta data."; static void print_cfl(unsigned int N, const long dims[N], const complex float* data, const char* fmt, const char* sep) { // find first non-trivial dimension unsigned int l = 0; while ((l < N - 1) && (1 == dims[l])) l++; long T = md_calc_size(N, dims); const char* allowed_fmts[] = { "%+e%+ei", "%+f%+fi", }; for (unsigned int i = 0; i < ARRAY_SIZE(allowed_fmts); i++) if (0 == strcmp(allowed_fmts[i], fmt)) goto ok; debug_printf(DP_ERROR, "Invalid format string.\n"); return; ok: for (long i = 0; i < T; i++) { printf(fmt, crealf(data[i]), cimagf(data[i])); printf("%s", (0 == (i + 1) % dims[l]) ? "\n" : sep); } } int main_show(int argc, char* argv[]) { bool meta = false; int showdim = -1; const char* sep = strdup("\t"); const char* fmt = strdup("%+e%+ei"); const struct opt_s opts[] = { OPT_SET('m', &meta, "show meta data"), OPT_INT('d', &showdim, "dim", "show size of dimension"), OPT_STRING('s', &sep, "sep", "use as the separator"), OPT_STRING('f', &fmt, "format", "use as the format. Default: \"\%+e\%+ei\""), }; cmdline(&argc, argv, 1, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); unsigned int N = DIMS; long dims[N]; complex float* data = load_cfl(argv[1], N, dims); if (-1 != showdim) { assert((showdim >= 0) && (showdim < (int)N)); printf("%ld\n", dims[showdim]); goto out; } if (meta) { printf("Type: complex float\n"); printf("Dimensions: %d\n", N); printf("AoD:"); for (unsigned int i = 0; i < N; i++) printf("\t%ld", dims[i]); printf("\n"); goto out; } print_cfl(N, dims, data, fmt, sep); out: unmap_cfl(N, dims, data); xfree(sep); xfree(fmt); exit(0); } bart-0.4.02/src/simu/000077500000000000000000000000001320577655200143055ustar00rootroot00000000000000bart-0.4.02/src/simu/biot_savart.c000066400000000000000000000032521320577655200167700ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2017 Martin Uecker * * Biot-Savart law. * */ #include #include #include "misc/misc.h" #include "num/vec3.h" #include "biot_savart.h" typedef float vec3_t[3]; void biot_savart(vec3_t b, const vec3_t r, unsigned int N, const vec3_t curve[static N]) { double c = 1. / (4. * M_PI); /* mu_o */ vec3_clear(b); for (unsigned int i = 0; i < N; i++) { vec3_t l; vec3_sub(l, curve[(i + 1) % N], curve[i]); vec3_t d; vec3_sub(d, r, curve[i]); double n = vec3_norm(d); if (0. == n) continue; vec3_t x; vec3_rot(x, l, d); vec3_smul(x, x, c / pow(n, 3.)); //saxpy vec3_add(b, b, x); } } void vec3_ring(unsigned int N, vec3_t ring[N], const vec3_t c, const vec3_t n, float r) { assert(1.E-7 > fabsf(1.f - vec3_norm(n))); // compute vec orth to n vec3_t b1 = { 1., 1., 1. }; int d = 0; for (unsigned int i = 0; i < 3; i++) if (fabsf(n[d]) < fabsf(n[i])) d = i; b1[d] = -(n[0] + n[1] + n[2] - n[d]) / n[d]; vec3_smul(b1, b1, 1. / vec3_norm(b1)); assert(1.E-7 > fabsf(1.f - vec3_norm(b1))); assert(1.E-7 > fabsf(vec3_sdot(n, b1))); vec3_t b2; vec3_rot(b2, b1, n); assert(1.E-7 > fabsf(1.f - vec3_norm(b2))); assert(1.E-7 > fabsf(vec3_sdot(n, b2))); assert(1.E-7 > fabsf(vec3_sdot(b1, b2))); for (unsigned int i = 0; i < N; i++) { float x = sinf(2. * M_PI * i / N); float y = cosf(2. * M_PI * i / N); vec3_copy(ring[i], c); vec3_saxpy(ring[i], ring[i], r * x, b1); vec3_saxpy(ring[i], ring[i], r * y, b2); } } bart-0.4.02/src/simu/biot_savart.h000066400000000000000000000003361320577655200167750ustar00rootroot00000000000000 typedef float vec3_t[3]; extern void biot_savart(vec3_t x, const vec3_t r, unsigned int N, const vec3_t curve[static N]); extern void vec3_ring(unsigned int N, vec3_t ring[N], const vec3_t c, const vec3_t n, float r); bart-0.4.02/src/simu/coil.c000066400000000000000000000014141320577655200153770ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2017 Martin Uecker * * Simple coil using Biot-Savart law. * */ #include #include #include #include "misc/misc.h" #include "simu/biot_savart.h" #include "coil.h" complex float coil(float x[3], unsigned int N, unsigned int i) { assert(i < N); vec3_t c[50]; vec3_t nc1 = { 1.5 * sinf(2. * M_PI * i / N), 1.5 * cosf(2. * M_PI * i / N), 0. }; vec3_t nc2 = { sinf(2. * M_PI * i / N), cosf(2. * M_PI * i / N), 0. }; vec3_ring(50, c, nc1, nc2, 0.2); vec3_t b; biot_savart(b, x, 50, (const vec3_t*)c); return b[0] + 1.i * b[1]; } bart-0.4.02/src/simu/coil.h000066400000000000000000000001111320577655200153750ustar00rootroot00000000000000 extern complex float coil(float x[3], unsigned int N, unsigned int i); bart-0.4.02/src/simu/phantom.c000066400000000000000000000172011320577655200161200ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2012-2017 Martin Uecker * * Simple numerical phantom which simulates image-domain or * k-space data with multiple channels. * */ #include #include #include #include #include #include "num/multind.h" #include "num/loop.h" #include "misc/misc.h" #include "misc/mri.h" #include "simu/shepplogan.h" #include "simu/sens.h" #include "simu/coil.h" #include "phantom.h" #define MAX_COILS 8 #define COIL_COEFF 5 typedef complex float (*krn_t)(void* _data, const double mpos[3]); static complex float xsens(unsigned int c, double mpos[3], void* data, krn_t fun) { assert(c < MAX_COILS); #if 1 complex float val = 0.; long sh = (COIL_COEFF - 1) / 2; for (int i = 0; i < COIL_COEFF; i++) for (int j = 0; j < COIL_COEFF; j++) val += sens_coeff[c][i][j] * cexpf(-2.i * M_PI * ((i - sh) * mpos[0] + (j - sh) * mpos[1]) / 4.); #else float p[3] = { mpos[0], mpos[1], mpos[2] }; complex float val = coil(p, MAX_COILS, c); #endif return val * fun(data, mpos); } /* * To simulate channels, we simply convolve with a few Fourier coefficients * of the sensitivities. See: * * M Guerquin-Kern, L Lejeune, KP Pruessmann, and M Unser, * Realistic Analytical Phantoms for Parallel Magnetic Resonance Imaging * IEEE TMI 31:626-636 (2012) */ static complex float ksens(unsigned int c, double mpos[3], void* data, krn_t fun) { assert(c < MAX_COILS); complex float val = 0.; for (int i = 0; i < COIL_COEFF; i++) { for (int j = 0; j < COIL_COEFF; j++) { long sh = (COIL_COEFF - 1) / 2; double mpos2[3] = { mpos[0] + (double)(i - sh) / 4., mpos[1] + (double)(j - sh) / 4., mpos[2] }; val += sens_coeff[c][i][j] * fun(data, mpos2); } } return val; } static complex float nosens(unsigned int c, double mpos[3], void* data, krn_t fun) { UNUSED(c); return fun(data, mpos); } struct data1 { bool sens; const long dims[3]; void* data; krn_t fun; }; static complex float xkernel(void* _data, const long pos[]) { struct data1* data = _data; double mpos[3] = { (double)(pos[0] - data->dims[0] / 2) / (0.5 * (double)data->dims[0]), (double)(pos[1] - data->dims[1] / 2) / (0.5 * (double)data->dims[1]), (double)(pos[2] - data->dims[2] / 2) / (0.5 * (double)data->dims[2]) }; return (data->sens ? xsens : nosens)(pos[COIL_DIM], mpos, data->data, data->fun); } static complex float kkernel(void* _data, const long pos[]) { struct data1* data = _data; double mpos[3] = { (double)(pos[0] - data->dims[0] / 2) / 2., (double)(pos[1] - data->dims[1] / 2) / 2., (double)(pos[2] - data->dims[2] / 2) / 2. }; return (data->sens ? ksens : nosens)(pos[COIL_DIM], mpos, data->data, data->fun); } struct data2 { const complex float* traj; long istrs[DIMS]; bool sens; void* data; krn_t fun; }; static complex float nkernel(void* _data, const long pos[]) { struct data2* data = _data; double mpos[3]; mpos[0] = data->traj[md_calc_offset(3, data->istrs, pos) + 0] / 2.; mpos[1] = data->traj[md_calc_offset(3, data->istrs, pos) + 1] / 2.; mpos[2] = data->traj[md_calc_offset(3, data->istrs, pos) + 2] / 2.; return (data->sens ? ksens : nosens)(pos[COIL_DIM], mpos, data->data, data->fun); } struct krn_data { bool kspace; unsigned int N; const struct ellipsis_s* el; }; static complex float krn(void* _data, const double mpos[3]) { struct krn_data* data = _data; return phantom(data->N, data->el, mpos, data->kspace); } struct krn3d_data { bool kspace; unsigned int N; const struct ellipsis3d_s* el; }; static complex float krn3d(void* _data, const double mpos[3]) { struct krn3d_data* data = _data; return phantom3d(data->N, data->el, mpos, data->kspace); } static void sample(unsigned int N, const long dims[N], complex float* out, unsigned int D, const struct ellipsis_s* el, bool kspace) { struct data1 data = { .sens = (dims[COIL_DIM] > 1), .dims = { dims[0], dims[1], dims[2] }, .data = &(struct krn_data){ kspace, D, el }, .fun = krn, }; md_parallel_zsample(N, dims, out, &data, kspace ? kkernel : xkernel); } void calc_phantom(const long dims[DIMS], complex float* out, bool kspace) { sample(DIMS, dims, out, 10, shepplogan_mod, kspace); } static void sample3d(unsigned int N, const long dims[N], complex float* out, unsigned int D, const struct ellipsis3d_s* el, bool kspace) { struct data1 data = { .sens = (dims[COIL_DIM] > 1), .dims = { dims[0], dims[1], dims[2] }, .data = &(struct krn3d_data){ kspace, D, el }, .fun = krn3d, }; md_parallel_zsample(N, dims, out, &data, kspace ? kkernel : xkernel); } void calc_phantom3d(const long dims[DIMS], complex float* out, bool kspace) { sample3d(DIMS, dims, out, 10, shepplogan3d, kspace); } static void sample_noncart(const long dims[DIMS], complex float* out, const complex float* traj, unsigned int D, const struct ellipsis_s* el) { struct data2 data = { .traj = traj, .sens = (dims[COIL_DIM] > 1), .data = &(struct krn_data){ true, D, el }, .fun = krn, }; assert(3 == dims[0]); long odims[DIMS]; md_select_dims(DIMS, 2 + 4 + 8, odims, dims); long sdims[DIMS]; md_select_dims(DIMS, 1 + 2 + 4, sdims, dims); md_calc_strides(DIMS, data.istrs, sdims, 1); md_parallel_zsample(DIMS, odims, out, &data, nkernel); } static void sample3d_noncart(const long dims[DIMS], complex float* out, const complex float* traj, unsigned int D, const struct ellipsis3d_s* el) { struct data2 data = { .traj = traj, .sens = (dims[COIL_DIM] > 1), .data = &(struct krn3d_data){ true, D, el }, .fun = krn3d, }; assert(3 == dims[0]); long odims[DIMS]; md_select_dims(DIMS, 2 + 4 + 8, odims, dims); long sdims[DIMS]; md_select_dims(DIMS, 1 + 2 + 4, sdims, dims); md_calc_strides(DIMS, data.istrs, sdims, 1); md_parallel_zsample(DIMS, odims, out, &data, nkernel); } void calc_phantom_noncart(const long dims[DIMS], complex float* out, const complex float* traj) { sample_noncart(dims, out, traj, 10, shepplogan_mod); } void calc_phantom3d_noncart(const long dims[DIMS], complex float* out, const complex float* traj) { sample3d_noncart(dims, out, traj, 10, shepplogan3d); } static complex float cnst_one(void* _data, const double mpos[2]) { UNUSED(_data); UNUSED(mpos); return 1.; } void calc_sens(const long dims[DIMS], complex float* sens) { struct data1 data = { .sens = true, .dims = { dims[0], dims[1], dims[2] }, .data = NULL, .fun = cnst_one, }; md_parallel_zsample(DIMS, dims, sens, &data, xkernel); } void calc_circ(const long dims[DIMS], complex float* out, bool kspace) { sample(DIMS, dims, out, 1, phantom_disc, kspace); } void calc_circ3d(const long dims[DIMS], complex float* out, bool kspace) { sample3d(DIMS, dims, out, 1, phantom_disc3d, kspace); } void calc_ring(const long dims[DIMS], complex float* out, bool kspace) { sample(DIMS, dims, out, 4, phantom_ring, kspace); } void calc_moving_circ(const long dims[DIMS], complex float* out, bool kspace) { struct ellipsis_s disc[1] = { phantom_disc[0] }; disc[0].axis[0] /= 3; disc[0].axis[1] /= 3; long strs[DIMS]; md_calc_strides(DIMS, strs, dims, sizeof(complex float)); long dims1[DIMS]; md_select_dims(DIMS, ~MD_BIT(TE_DIM), dims1, dims); for (int i = 0; i < dims[TE_DIM]; i++) { disc[0].center[0] = 0.5 * sin(2. * M_PI * (float)i / (float)dims[TE_DIM]); disc[0].center[1] = 0.5 * cos(2. * M_PI * (float)i / (float)dims[TE_DIM]); sample(DIMS, dims1, (void*)out + strs[TE_DIM] * i, 1, disc, kspace); } } bart-0.4.02/src/simu/phantom.h000066400000000000000000000014161320577655200161260ustar00rootroot00000000000000 #include "misc/mri.h" extern void calc_phantom(const long dims[DIMS], complex float* out, _Bool ksp); extern void calc_phantom_noncart(const long dims[3], complex float* out, const complex float* traj); extern void calc_sens(const long dims[DIMS], complex float* sens); extern void calc_circ(const long dims[DIMS], complex float* img, _Bool ksp); extern void calc_ring(const long dims[DIMS], complex float* img, _Bool ksp); extern void calc_phantom3d(const long dims[DIMS], complex float* out, _Bool ksp); extern void calc_phantom3d_noncart(const long dims[3], complex float* out, const complex float* traj); extern void calc_circ3d(const long dims[DIMS], complex float* out, _Bool kspace); extern void calc_moving_circ(const long dims[DIMS], complex float* out, bool kspace); bart-0.4.02/src/simu/sens.c000066400000000000000000000144121320577655200154230ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2013 Martin Uecker */ #include #include "sens.h" const complex float sens_coeff[8][5][5] = { { // coil0 { +2.202641e+02+7.959795e+02i, +7.555631e+02-1.436550e+03i, +2.545977e+03+6.890254e+02i, -1.556657e+03+1.514510e+03i, +2.119276e+02-1.494780e+03i }, { +1.298826e+03-2.217249e+02i, +5.104937e+03-1.257278e+03i, +2.718070e+03+9.347223e+03i, -4.162577e+03+4.012845e+03i, -2.934868e+02-1.494939e+02i }, { -2.586698e+03+3.241368e+02i, +2.655842e+02-1.423245e+04i, +3.444878e+04-5.959087e-07i, +2.655845e+02+1.423245e+04i, -2.586697e+03-3.241368e+02i }, { -2.934868e+02+1.494939e+02i, -4.162577e+03-4.012845e+03i, +2.718070e+03-9.347224e+03i, +5.104937e+03+1.257278e+03i, +1.298826e+03+2.217249e+02i }, { +2.119276e+02+1.494780e+03i, -1.556657e+03-1.514511e+03i, +2.545977e+03-6.890254e+02i, +7.555631e+02+1.436550e+03i, +2.202641e+02-7.959794e+02i }, }, { // coil1 { -5.140192e+02+3.919716e+02i, +1.894585e+03-6.871808e+02i, -1.588401e+03-9.072462e+02i, +1.612868e+03+3.162505e+03i, -1.764654e+03-7.521263e+01i }, { +1.468621e+03+2.462891e+03i, +9.060663e+02-3.810722e+03i, +1.295797e+04+1.274518e+03i, +9.795861e+03+4.041410e+03i, +5.382768e+03+1.496270e+03i }, { +6.432245e+03-1.873246e+03i, -6.606911e+03+1.347433e+03i, +1.469823e+04-2.828712e+04i, +6.094570e+03-3.186246e+03i, +1.553516e+03-6.658626e+03i }, { +1.552164e+02-2.608411e+03i, -4.773840e+02+4.141277e+03i, -1.697275e+04-4.176371e+03i, +1.530776e+02-6.096529e+03i, -1.256617e+03-3.806721e+03i }, { +1.427317e+03-6.593576e+02i, +3.361672e+03+2.074898e+03i, -1.207303e+03+5.915274e+03i, -1.866361e+03+2.202192e+02i, -2.244576e+03+5.222404e+02i }, }, { // coil2 { +1.501215e+03+5.783638e+02i, -4.342622e+02-3.816680e+03i, +1.776472e+03+3.912266e+03i, +7.807321e+03-4.950209e+03i, -2.595354e+03-2.460056e+03i }, { +2.369316e+03-2.313158e+03i, +9.616589e+02-1.347896e+02i, +1.163739e+04-1.087814e+04i, +5.465111e+03-1.369369e+04i, -2.497076e+03-7.660470e+03i }, { -1.154424e+03-5.152002e+03i, +5.065459e+03+2.144815e+03i, -8.206305e+03-2.679990e+04i, -1.085187e+04-1.832581e+03i, -1.138738e+04+2.371703e+03i }, { -1.613378e+03+1.783068e+03i, +2.226849e+03-2.917332e+03i, -1.126524e+04+1.535570e+04i, -1.155414e+03+9.211537e+02i, +2.441054e+03+3.475593e+03i }, { +2.149000e+03-1.050538e+03i, +1.897796e+02-2.738874e+03i, +9.831870e+03+3.711983e+01i, +6.797585e+02+1.121489e+03i, +2.977299e+03-2.676358e+02i }, }, { // coil3 { +9.225188e+02-1.609105e+03i, +3.003085e+02-4.805967e+03i, +3.010118e+03-5.193128e+03i, -7.148681e+03-6.014095e+03i, -5.118819e+03+6.886482e+03i }, { -7.304860e+01-1.906521e+03i, +4.666944e+03-8.186537e+03i, -4.628897e+03-1.670177e+04i, -9.002204e+03-5.413904e+03i, -5.991946e+03+3.112899e+03i }, { -1.707291e+03+1.371777e+03i, +6.265779e+03-4.096543e+03i, -1.623247e+04-1.249279e+04i, -5.735404e+03+1.462694e+04i, +3.235129e+03+7.745793e+03i }, { +2.658034e+03-2.003479e+03i, -5.550209e+03-6.058204e+02i, +4.231377e+03+1.547026e+04i, +5.596798e+03-8.212248e+03i, -2.146848e+02-8.379209e+03i }, { +7.590588e+02-2.001688e+03i, +4.298658e+03-8.379312e+02i, +8.961587e+02-1.114809e+04i, -1.882075e+03+3.505513e+03i, -9.920860e+01-1.958216e+01i }, }, { // coil4 { +1.038110e+03+1.053870e+03i, +9.244695e+03+1.892697e+03i, -3.192582e+03-6.490971e+03i, -9.345550e+02-5.915621e+02i, -5.048445e+03-5.094557e+02i }, { +6.029613e+03+9.528574e+03i, +1.726803e+04+1.136792e+04i, +1.704588e+04-6.327428e+03i, -1.018518e+03-4.875073e+03i, -1.385726e+03-2.624488e+03i }, { -5.607942e+03+3.239110e+03i, -6.596133e+02+1.426120e+04i, +1.907908e+04+6.801775e+03i, -9.809665e+03-9.162689e+03i, +2.646640e+03+3.444627e+03i }, { +3.773911e+03-3.830415e+03i, -2.280376e+03-6.118903e+03i, -9.791950e+03-1.582865e+03i, +1.482043e+04+8.187234e+03i, +2.503851e+03-5.622277e+03i }, { -3.532414e+02+2.797883e+03i, +1.086923e+03+4.907002e+03i, +2.711129e+03-4.658596e+02i, -6.884295e+03+1.658655e+03i, +2.195509e+03+1.532364e+03i }, }, { // coil5 { -2.660406e+03+1.130288e+03i, -2.684663e+03-4.547482e+03i, -1.091036e+03+1.039176e+04i, +3.794462e+03-3.726057e+03i, -9.599513e+02+2.510410e+03i }, { +2.453693e+03-1.127183e+04i, +3.368645e+03-1.562005e+04i, -6.638387e+03-2.965991e+03i, +3.831320e+03+2.531652e+03i, -2.198729e+03-5.958389e+02i }, { +1.428178e+04+9.163301e+02i, +1.684055e+04-6.384269e+03i, +1.658437e+04-1.232798e+04i, -1.406222e+04+1.113432e+04i, +3.592359e+03-6.771321e+03i }, { -1.660563e+03+2.839105e+03i, -2.896235e+02+1.029472e+03i, -3.908763e+03+3.005041e+03i, +4.267630e+03-1.222684e+04i, +3.162250e+03+3.059668e+03i }, { +1.229621e+03+1.214108e+03i, +3.580651e+03+6.040717e+02i, +7.638826e+02+2.616962e+03i, +4.679096e+03+3.197829e+03i, -2.025938e+03+1.366729e+03i }, }, { // coil6 { +1.532993e+03+3.085543e+03i, +1.698160e+02-2.059932e+03i, +8.620593e+03+5.947357e+03i, -1.818730e+03-3.486291e+03i, +2.804918e+03+2.657346e+02i }, { -5.363040e+03-8.659919e+03i, -1.811624e+03-1.273996e+04i, +5.336066e+02+4.722509e+03i, +5.288265e+03-4.540653e+02i, -1.550763e+03+3.172655e+03i }, { +1.484031e+04-8.435797e+03i, +1.909462e+04-1.153338e+04i, +2.644743e+04+1.609464e+03i, -1.835121e+04+1.685986e+04i, -2.060884e+03-1.164399e+04i }, { +3.565514e+03+4.812732e+03i, -5.792477e+02-1.059542e+03i, -6.948085e+03+1.922669e+03i, -3.102303e+03-1.536498e+04i, +8.313277e+03-1.413219e+03i }, { +3.645370e+03+3.144318e+03i, +5.896468e+03-1.094847e+03i, +3.137289e+03+4.010566e+03i, +5.041110e+03-3.595689e+03i, +4.239519e+03+4.264271e+03i }, }, { // coil7 { +9.757094e+02-4.944947e+02i, -1.479025e+03-3.433955e+03i, +6.554174e+03-3.580007e+03i, -5.973520e+02+2.335976e+03i, -1.082375e+03-1.958220e+03i }, { -2.523083e+03+1.680365e+03i, -4.212411e+03-1.392797e+03i, +2.731082e+03+4.483586e+03i, -2.006126e+03-1.093595e+03i, +1.536401e+03-1.071117e+03i }, { -9.129410e+03-5.654936e+03i, -6.060043e+03-2.092658e+04i, +2.299950e+04-2.185407e+04i, +9.382429e+03+1.937377e+04i, -1.130825e+04+2.193837e+03i }, { +6.269709e+02-2.393860e+03i, -1.782260e+03-2.694656e+03i, +2.224400e+03-3.487669e+01i, -4.697312e+03+2.063353e+03i, -1.975369e+03-3.797778e+03i }, { +8.142875e+02-3.201539e+03i, +4.320247e+01-5.839946e+03i, +7.607719e+03-2.151906e+03i, -1.149005e+03+1.656691e+03i, -3.673436e+01-2.858548e+03i }, } }; bart-0.4.02/src/simu/sens.h000066400000000000000000000000631320577655200154250ustar00rootroot00000000000000 extern const complex float sens_coeff[8][5][5]; bart-0.4.02/src/simu/shepplogan.c000066400000000000000000000165441320577655200166230ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * 2013-2016 Martin Uecker */ #define _GNU_SOURCE #include #include #include #include #include "simu/shepplogan.h" #include "misc/misc.h" // AK Jain, 439. // modified Toft 199-200 multiply -0.98 -> 0.8 // multiply the rest by ten const struct ellipsis_s shepplogan[10] = { { 1., { .69, .92 }, { 0., 0. }, 0. }, { -.98, { .6624, .8740 }, { 0., -.0184 }, 0. }, { -.02, { .1100, .3100 }, { .22, 0. }, -18. / 360. * 2. * M_PI }, { -.02, { .1600, .4100 }, { -.22, 0 }, 18. / 360. * 2. * M_PI }, { .01, { .2100, .2500 }, { 0, .35 }, 0. }, { .01, { .0460, .0460 }, { 0, .1 }, 0. }, { .01, { .0460, .0460 }, { 0, -.1 }, 0. }, { .01, { .0460, .0230 }, { -.08, -.605 }, 0. }, { .01, { .0230, .0230 }, { 0, -.606 }, 0. }, { .01, { .0230, .0460 }, { .06, -.605 }, 0. } }; const struct ellipsis_s shepplogan_mod[10] = { { 1., { .69, .92 }, { 0., 0. }, 0. }, { -.8, { .6624, .8740 }, { 0., -.0184 }, 0. }, { -.2, { .1100, .3100 }, { .22, 0. }, -18. / 360. * 2. * M_PI }, { -.2, { .1600, .4100 }, { -.22, 0 }, 18. / 360. * 2. * M_PI }, { .1, { .2100, .2500 }, { 0, .35 }, 0 }, { .1, { .0460, .0460 }, { 0, .1 }, 0 }, { .1, { .0460, .0460 }, { 0, -.1 }, 0 }, { .1, { .0460, .0230 }, { -.08, -.605 }, 0 }, { .1, { .0230, .0230 }, { 0, -.606 }, 0 }, { .1, { .0230, .0460 }, { .06, -.605 }, 0 } }; const struct ellipsis_s phantom_disc[1] = { { 1., { 1., 1. }, { 0., 0. }, 0. } }; const struct ellipsis3d_s phantom_disc3d[1] = { { 1., { 1., 1., 1. }, { 0., 0., 0. }, 0. } }; // old: imaginary ring outside from 0.5 to 0.49 const struct ellipsis_s phantom_ring[4] = { { 1., { 0.75, 0.75 }, { 0., 0. }, 0. }, { -1. + 1.i, { 0.5, 0.5 }, { 0., 0. }, 0. }, { -1.i, { 0.48, 0.48 }, { 0., 0. }, 0. }, { 1., { 0.48, 0.48 }, { 0., 0. }, 0. }, // { 1., { 0.48, 0.48 }, { 0., 0. }, 0. }, }; /* Magnetic Resonance in Medicine 58:430--436 (2007) * Three-Dimensional Analytical Magnetic Resonance * Imaging Phantom in the Fourier Domain * Cheng Guan Koay, Joelle E. Sarlls, and Evren Özarslan */ const struct ellipsis3d_s shepplogan3d[10] = { { 2., { .6900, .9200, .9000 }, { .000, .000, .000 }, 0. }, { -.8, { .6624, .8740, .8800 }, { .000, .000, .000 }, 0. }, { -.2, { .4100, .1600, .2100 }, { -.220, .000, -.250 }, 3. * M_PI / 5. }, { -.2, { .3100, .1100, .2200 }, { .220, .000, -.250 }, 2. * M_PI / 5. }, { .2, { .2100, .2500, .5000 }, { .000, .350, -.250 }, 0. }, { .2, { .0460, .0460, .0460 }, { .000, .100, -.250 }, 0. }, { .1, { .0460, .0230, .0200 }, { -.080, -.650, -.250 }, 0. }, { .1, { .0460, .0230, .0200 }, { .060, -.650, -.250 }, M_PI / 2. }, { .2, { .0560, .0400, .1000 }, { .060, -.105, .625 }, M_PI / 2. }, { -.2, { .0560, .0560, .1000 }, { .000, .100, .625 }, 0. } }; static double sinc(double x) { return (0. == x) ? 1. : (sin(x) / x); } static double jinc(double x) { return (0. == x) ? 1. : (2. * j1(x) / x); } static void rot2d(double x[2], const double in[2], double angle) { x[0] = cos(angle) * in[0] + sin(angle) * in[1]; x[1] = sin(angle) * in[0] - cos(angle) * in[1]; } complex double xellipsis(const double center[2], const double axis[2], double angle, const double p[2]) { double p90[2]; p90[0] = -p[1]; p90[1] = p[0]; double pshift[2]; pshift[0] = p90[0] + center[0]; pshift[1] = p90[1] + center[1]; double prot[2]; rot2d(prot, pshift, angle); double radius = pow(prot[0] / axis[0], 2.) + pow(prot[1] / axis[1], 2.); return (radius <= 1.) ? 1. : 0.; } complex double kellipsis(const double center[2], const double axis[2], double angle, const double p[2]) { double p90[2]; p90[0] = -p[1]; p90[1] = p[0]; double prot[2]; rot2d(prot, p90, angle); double radius = sqrt(pow(prot[0] * axis[0], 2.) + pow(prot[1] * axis[1], 2.)); complex double res = jinc(2. * M_PI * radius) * (axis[0] * axis[1]); return res * cexp(2.i * M_PI * (p90[0] * center[0] + p90[1] * center[1])) / sqrtf(2. * M_PI) * 2.; } complex double xrectangle(const double center[2], const double axis[2], double angle, const double p[2]) { double p90[2]; p90[0] = -p[1]; p90[1] = p[0]; double pshift[2]; pshift[0] = p90[0] + center[0]; pshift[1] = p90[1] + center[1]; double prot[2]; rot2d(prot, pshift, angle); double radius = fabs(prot[0] / axis[0]) + fabs(prot[1] / axis[1]); return (radius <= 1.) ? 1. : 0.; } complex double krectangle(const double center[2], const double axis[2], double angle, const double p[2]) { double p90[2]; p90[0] = -p[1]; p90[1] = p[0]; double prot[2]; rot2d(prot, p90, angle); complex double res = sinc(2. * M_PI * prot[0] * axis[0]) * sinc(2. * M_PI * prot[1] * axis[1]) * (axis[0] * axis[1]); return res * cexp(2.i * M_PI * (p90[0] * center[0] + p90[1] * center[1])) / sqrtf(2. * M_PI) * 2.; } complex double phantom(unsigned int N, const struct ellipsis_s arr[N], const double pos[3], bool ksp) { complex double res = 0.; for (unsigned int i = 0; i < N; i++) res += arr[i].intensity * (ksp ? kellipsis : xellipsis)(arr[i].center, arr[i].axis, arr[i].angle, pos); return res; } complex double phantomX(unsigned int N, const struct ellipsis_s arr[N], const double pos[2], bool ksp) { complex double res = 0.; for (unsigned int i = 0; i < N; i++) res += arr[i].intensity * (ksp ? krectangle : xrectangle)(arr[i].center, arr[i].axis, arr[i].angle, pos); return res; } static double ksphere3(double x) { return (0. == x) ? (1. / 3.) : ((sin(x) - x * cos(x)) / pow(x, 3.)); } complex double xellipsis3d(const double center[3], const double axis[3], double angle, const double p[3]) { double p90[3]; p90[0] = -p[1]; p90[1] = p[0]; p90[2] = p[2]; double pshift[3]; pshift[0] = p90[0] + center[0]; pshift[1] = p90[1] + center[1]; pshift[2] = p90[2] + center[2]; double prot[3]; rot2d(prot, pshift, angle); prot[2] = pshift[2]; double radius = pow(prot[0] / axis[0], 2.) + pow(prot[1] / axis[1], 2.) + pow(prot[2] / axis[2], 2.); return (radius <= 1.) ? 1. : 0.; } complex double kellipsis3d(const double center[3], const double axis[3], double angle, const double p[3]) { double p90[3]; p90[0] = -p[1]; p90[1] = p[0]; p90[2] = p[2]; double pshift[3]; pshift[0] = p90[0] + center[0]; pshift[1] = p90[1] + center[1]; pshift[2] = p90[2] + center[2]; double prot[3]; rot2d(prot, pshift, angle); prot[2] = pshift[2]; double radius = sqrt(pow(prot[0] * axis[0], 2.) + pow(prot[1] * axis[1], 2.) + pow(prot[2] * axis[2], 2.)); complex double res = ksphere3(2. * M_PI * radius) * (axis[0] * axis[1] * axis[2]); return res * cexp(2.i * M_PI * (p90[0] * center[0] + p90[1] * center[1] + p90[2] * center[2])) / sqrtf(M_PI) * sqrtf(8.); } complex double phantom3d(unsigned int N, const struct ellipsis3d_s arr[N], const double pos[3], bool ksp) { complex double res = 0.; for (unsigned int i = 0; i < N; i++) res += arr[i].intensity * (ksp ? kellipsis3d : xellipsis3d)(arr[i].center, arr[i].axis, arr[i].angle, pos); return res; } bart-0.4.02/src/simu/shepplogan.h000066400000000000000000000035141320577655200166210ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #include "misc/cppwrap.h" struct ellipsis_s { complex double intensity; double axis[2]; double center[2]; double angle; }; struct ellipsis3d_s { complex double intensity; double axis[3]; double center[3]; double angle; }; extern const struct ellipsis_s shepplogan[10]; extern const struct ellipsis_s shepplogan_mod[10]; extern const struct ellipsis_s phantom_disc[1]; extern const struct ellipsis_s phantom_ring[4]; extern const struct ellipsis3d_s phantom_disc3d[1]; extern const struct ellipsis3d_s shepplogan3d[10]; extern complex double xellipsis(const double center[2], const double axis[2], double angle, const double p[2]); extern complex double kellipsis(const double center[2], const double axis[2], double angle, const double p[2]); extern complex double xellipsis3d(const double center[3], const double axis[3], double angle, const double p[3]); extern complex double kellipsis3d(const double center[3], const double axis[3], double angle, const double p[3]); extern complex double xrectangle(const double center[2], const double axis[2], double angle, const double p[2]); extern complex double krectangle(const double center[2], const double axis[2], double angle, const double p[2]); extern complex double phantom(unsigned int N, const struct ellipsis_s arr[__VLA(N)], const double pos[3], _Bool ksp); extern complex double phantomX(unsigned int N, const struct ellipsis_s arr[__VLA(N)], const double pos[3], _Bool ksp); extern complex double phantom3d(unsigned int N, const struct ellipsis3d_s arr[__VLA(N)], const double pos[3], _Bool ksp); #include "misc/cppwrap.h" bart-0.4.02/src/slice.c000066400000000000000000000026051320577655200145760ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "dimension position "; static const char help_str[] = "Extracts a slice from {position} along {dimension}.\n"; int main_slice(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); long in_dims[DIMS]; long out_dims[DIMS]; complex float* in_data = load_cfl(argv[3], DIMS, in_dims); int dim = atoi(argv[1]); int pos = atoi(argv[2]); assert(dim < DIMS); assert(pos >= 0); assert(pos < in_dims[dim]); for (int i = 0; i < DIMS; i++) out_dims[i] = in_dims[i]; out_dims[dim] = 1; complex float* out_data = create_cfl(argv[4], DIMS, out_dims); long pos2[DIMS] = { [0 ... DIMS - 1] = 0 }; pos2[dim] = pos; md_slice(DIMS, MD_BIT(dim), pos2, in_dims, out_data, in_data, CFL_SIZE); unmap_cfl(DIMS, out_dims, out_data); unmap_cfl(DIMS, in_dims, in_data); exit(0); } bart-0.4.02/src/spow.c000066400000000000000000000022721320577655200144670ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "exponent "; static const char help_str[] = "Raise array to the power of {exponent}. The exponent can be a complex number.\n"; int main_spow(int argc, char* argv[argc]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); num_init(); complex float expo; if (0 != parse_cfl(&expo, argv[1])) { fprintf(stderr, "ERROR: exponent %s is not a number.\n", argv[1]); exit(1); } const int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[2], N, dims); complex float* odata = create_cfl(argv[3], N, dims); md_zspow(N, dims, odata, idata, expo); unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/sqpics.c000066400000000000000000000445671320577655200150160ustar00rootroot00000000000000/* Copyright 2013-2015. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2015 Martin Uecker * 2014-2016 Frank Ong * 2014-2015 Jonathan Tamir * */ #include #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "num/ops.h" #include "num/iovec.h" #include "iter/prox.h" #include "iter/thresh.h" #include "iter/misc.h" #include "linops/linop.h" #include "linops/someops.h" #include "linops/grad.h" #include "linops/sum.h" #include "linops/sampling.h" #include "iter/iter.h" #include "iter/iter2.h" #include "noncart/nufft.h" //#include "sense/recon.h" #include "sense/model.h" #include "sense/optcom.h" #include "wavelet/wavthresh.h" #include "lowrank/lrthresh.h" #include "misc/debug.h" #include "misc/mri.h" #include "misc/utils.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #define NUM_REGS 10 static const char usage_str[] = " "; static const char help_str[] = "Parallel-imaging compressed-sensing reconstruction."; static void help_reg(void) { printf( "Generalized regularization options (experimental)\n\n" "-R :A:B:C\t is regularization type (single letter),\n" "\t\tA is transform flags, B is joint threshold flags,\n" "\t\tand C is regularization value. Specify any number\n" "\t\tof regularization terms.\n\n" "-R Q:C \tl2-norm in image domain\n" "-R I:B:C \tl1-norm in image domain\n" "-R W:A:B:C\tl1-wavelet\n" "-R T:A:B:C\ttotal variation\n" "-R T:7:0:.01\t3D isotropic total variation with 0.01 regularization.\n" "-R L:7:7:.02\tLocally low rank with spatial decimation and 0.02 regularization.\n" "-R M:7:7:.03\tMulti-scale low rank with spatial decimation and 0.03 regularization.\n" ); } static const struct linop_s* sense_nc_init(const long max_dims[DIMS], const long map_dims[DIMS], const complex float* maps, const long ksp_dims[DIMS], const long traj_dims[DIMS], const complex float* traj, struct nufft_conf_s conf, struct operator_s** precond_op) { long coilim_dims[DIMS]; long img_dims[DIMS]; md_select_dims(DIMS, ~MAPS_FLAG, coilim_dims, max_dims); md_select_dims(DIMS, ~COIL_FLAG, img_dims, max_dims); const struct linop_s* fft_op = nufft_create(DIMS, ksp_dims, coilim_dims, traj_dims, traj, NULL, conf); const struct linop_s* maps_op = maps2_create(coilim_dims, map_dims, img_dims, maps); //precond_op[0] = (struct operator_s*) nufft_precond_create( fft_op ); precond_op[0] = NULL; const struct linop_s* lop = linop_chain(maps_op, fft_op); linop_free(maps_op); linop_free(fft_op); return lop; } struct reg_s { enum { L1WAV, TV, LLR, MLR, IMAGL1, IMAGL2, L1IMG, L2IMG } xform; unsigned int xflags; unsigned int jflags; float lambda; }; enum algo_t { CG, IST, FISTA, ADMM }; struct opt_reg_s { float lambda; enum algo_t algo; struct reg_s regs[NUM_REGS]; unsigned int r; }; static bool opt_reg(void* ptr, char c, const char* optarg) { struct opt_reg_s* p = ptr; struct reg_s* regs = p->regs; const int r = p->r; const float lambda = p->lambda; assert(r < NUM_REGS); char rt[5]; switch (c) { case 'R': { // first get transform type int ret = sscanf(optarg, "%4[^:]", rt); assert(1 == ret); // next switch based on transform type if (strcmp(rt, "W") == 0) { regs[r].xform = L1WAV; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); } else if (strcmp(rt, "L") == 0) { regs[r].xform = LLR; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); } else if (strcmp(rt, "M") == 0) { regs[r].xform = regs[0].xform; regs[r].xflags = regs[0].xflags; regs[r].jflags = regs[0].jflags; regs[r].lambda = regs[0].lambda; regs[0].xform = MLR; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[0].xflags, ®s[0].jflags, ®s[0].lambda); assert(3 == ret); } else if (strcmp(rt, "T") == 0) { regs[r].xform = TV; int ret = sscanf(optarg, "%*[^:]:%d:%d:%f", ®s[r].xflags, ®s[r].jflags, ®s[r].lambda); assert(3 == ret); p->algo = ADMM; } else if (strcmp(rt, "R1") == 0) { regs[r].xform = IMAGL1; int ret = sscanf(optarg, "%*[^:]:%d:%f", ®s[r].jflags, ®s[r].lambda); assert(2 == ret); regs[r].xflags = 0u; p->algo = ADMM; } else if (strcmp(rt, "R2") == 0) { regs[r].xform = IMAGL2; int ret = sscanf(optarg, "%*[^:]:%d:%f", ®s[r].jflags, ®s[r].lambda); assert(2 == ret); regs[r].xflags = 0u; p->algo = ADMM; } else if (strcmp(rt, "I") == 0) { regs[r].xform = L1IMG; int ret = sscanf(optarg, "%*[^:]:%d:%f", ®s[r].jflags, ®s[r].lambda); assert(2 == ret); regs[r].xflags = 0u; } else if (strcmp(rt, "Q") == 0) { regs[r].xform = L2IMG; int ret = sscanf(optarg, "%*[^:]:%f", ®s[r].lambda); assert(1 == ret); regs[r].xflags = 0u; regs[r].jflags = 0u; } else if (strcmp(rt, "h") == 0) { help_reg(); exit(0); } else { error("Unrecognized regularization type: \"%s\" (-Rh for help).\n", rt); } p->r++; break; } case 'l': assert(r < NUM_REGS); regs[r].lambda = lambda; regs[r].xflags = 0u; regs[r].jflags = 0u; if (0 == strcmp("1", optarg)) { regs[r].xform = L1WAV; regs[r].xflags = 7u; } else if (0 == strcmp("2", optarg)) { regs[r].xform = L2IMG; } else { error("Unknown regularization type.\n"); } p->lambda = -1.; p->r++; break; } return false; } int main_sqpics(int argc, char* argv[]) { // Initialize default parameters bool use_gpu = false; bool randshift = true; unsigned int maxiter = 30; float step = -1.; // Start time count double start_time = timestamp(); // Read input options struct nufft_conf_s nuconf = nufft_conf_defaults; nuconf.toeplitz = false; float restrict_fov = -1.; const char* pat_file = NULL; const char* traj_file = NULL; bool scale_im = false; bool eigen = false; float scaling = 0.; unsigned int llr_blk = 8; const char* image_truth_file = NULL; bool im_truth = false; const char* image_start_file = NULL; bool warm_start = false; bool hogwild = false; bool fast = false; float admm_rho = iter_admm_defaults.rho; unsigned int admm_maxitercg = iter_admm_defaults.maxitercg; struct opt_reg_s ropts; ropts.r = 0; ropts.algo = CG; ropts.lambda = -1.; const struct opt_s opts[] = { { 'l', true, opt_reg, &ropts, "1/-l2\t\ttoggle l1-wavelet or l2 regularization." }, OPT_FLOAT('r', &ropts.lambda, "lambda", "regularization parameter"), { 'R', true, opt_reg, &ropts, " :A:B:C\tgeneralized regularization options (-Rh for help)" }, //OPT_SET('c', &conf.rvc, "real-value constraint"), OPT_FLOAT('s', &step, "step", "iteration stepsize"), OPT_UINT('i', &maxiter, "iter", "max. number of iterations"), OPT_STRING('t', &traj_file, "file", "k-space trajectory"), OPT_CLEAR('n', &randshift, "disable random wavelet cycle spinning"), OPT_SET('g', &use_gpu, "use GPU"), OPT_STRING('p', &pat_file, "file", "pattern or weights"), OPT_SELECT('I', enum algo_t, &ropts.algo, IST, "(select IST)"), OPT_UINT('b', &llr_blk, "blk", "Lowrank block size"), OPT_SET('e', &eigen, "Scale stepsize based on max. eigenvalue"), OPT_SET('H', &hogwild, "(hogwild)"), OPT_SET('F', &fast, "(fast)"), OPT_STRING('T', &image_truth_file, "file", "(truth file)"), OPT_STRING('W', &image_start_file, "", "Warm start with "), OPT_INT('d', &debug_level, "level", "Debug level"), OPT_FLOAT('u', &admm_rho, "rho", "ADMM rho"), OPT_UINT('C', &admm_maxitercg, "iter", "ADMM max. CG iterations"), OPT_FLOAT('f', &restrict_fov, "rfov", "restrict FOV"), OPT_SELECT('m', enum algo_t, &ropts.algo, ADMM, "Select ADMM"), OPT_FLOAT('w', &scaling, "val", "scaling"), OPT_SET('S', &scale_im, "Re-scale the image after reconstruction"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); if (NULL != image_truth_file) im_truth = true; if (NULL != image_start_file) warm_start = true; long max_dims[DIMS]; long map_dims[DIMS]; long pat_dims[DIMS]; long img_dims[DIMS]; long coilim_dims[DIMS]; long ksp_dims[DIMS]; long traj_dims[DIMS]; // load kspace and maps and get dimensions complex float* kspace = load_cfl(argv[1], DIMS, ksp_dims); complex float* maps = load_cfl(argv[2], DIMS, map_dims); complex float* traj = NULL; if (NULL != traj_file) traj = load_cfl(traj_file, DIMS, traj_dims); md_copy_dims(DIMS, max_dims, ksp_dims); md_copy_dims(5, max_dims, map_dims); md_select_dims(DIMS, ~COIL_FLAG, img_dims, max_dims); md_select_dims(DIMS, ~MAPS_FLAG, coilim_dims, max_dims); if (!md_check_compat(DIMS, ~(MD_BIT(MAPS_DIM)|FFT_FLAGS), img_dims, map_dims)) error("Dimensions of image and sensitivities do not match!\n"); assert(1 == ksp_dims[MAPS_DIM]); (use_gpu ? num_init_gpu : num_init)(); // print options if (use_gpu) debug_printf(DP_INFO, "GPU reconstruction\n"); if (map_dims[MAPS_DIM] > 1) debug_printf(DP_INFO, "%ld maps.\nESPIRiT reconstruction.\n", map_dims[MAPS_DIM]); if (hogwild) debug_printf(DP_INFO, "Hogwild stepsize\n"); if (im_truth) debug_printf(DP_INFO, "Compare to truth\n"); // initialize sampling pattern complex float* pattern = NULL; if (NULL != pat_file) { pattern = load_cfl(pat_file, DIMS, pat_dims); assert(md_check_compat(DIMS, COIL_FLAG, ksp_dims, pat_dims)); } else { md_select_dims(DIMS, ~COIL_FLAG, pat_dims, ksp_dims); pattern = md_alloc(DIMS, pat_dims, CFL_SIZE); estimate_pattern(DIMS, ksp_dims, COIL_FLAG, pattern, kspace); } if ((NULL != traj_file) && (NULL == pat_file)) { md_free(pattern); pattern = NULL; nuconf.toeplitz = true; } else { // print some statistics long T = md_calc_size(DIMS, pat_dims); long samples = (long)pow(md_znorm(DIMS, pat_dims, pattern), 2.); debug_printf(DP_INFO, "Size: %ld Samples: %ld Acc: %.2f\n", T, samples, (float)T / (float)samples); } if (NULL == traj_file) { fftmod(DIMS, ksp_dims, FFT_FLAGS, kspace, kspace); fftmod(DIMS, map_dims, FFT_FLAGS, maps, maps); } // apply fov mask to sensitivities if (-1. != restrict_fov) { float restrict_dims[DIMS] = { [0 ... DIMS - 1] = 1. }; restrict_dims[0] = restrict_fov; restrict_dims[1] = restrict_fov; restrict_dims[2] = restrict_fov; apply_mask(DIMS, map_dims, maps, restrict_dims); } // initialize forward_op and precond_op const struct linop_s* forward_op = NULL; const struct operator_s* precond_op = NULL; if (NULL == traj_file) forward_op = sense_init(max_dims, FFT_FLAGS|COIL_FLAG|MAPS_FLAG, maps); else forward_op = sense_nc_init(max_dims, map_dims, maps, ksp_dims, traj_dims, traj, nuconf, (struct operator_s**) &precond_op); // apply scaling if (scaling == 0.) { if (NULL == traj_file) { scaling = estimate_scaling(ksp_dims, NULL, kspace); } else { complex float* adj = md_alloc(DIMS, img_dims, CFL_SIZE); linop_adjoint(forward_op, DIMS, img_dims, adj, DIMS, ksp_dims, kspace); scaling = estimate_scaling_norm(1., md_calc_size(DIMS, img_dims), adj, false); md_free(adj); } } else debug_printf(DP_DEBUG1, "Scaling: %f\n", scaling); if (scaling != 0.) md_zsmul(DIMS, ksp_dims, kspace, kspace, 1. / scaling); float lambda = ropts.lambda; if (-1. == lambda) lambda = 0.; // if no penalities specified but regularization // parameter is given, add a l2 penalty struct reg_s* regs = ropts.regs; if ((0 == ropts.r) && (lambda > 0.)) { regs[0].xform = L2IMG; regs[0].xflags = 0u; regs[0].jflags = 0u; regs[0].lambda = lambda; ropts.r = 1; } // initialize thresh_op const struct operator_p_s* thresh_ops[NUM_REGS] = { NULL }; const struct linop_s* trafos[NUM_REGS] = { NULL }; int nr_penalties = ropts.r; long blkdims[MAX_LEV][DIMS]; int levels; for (int nr = 0; nr < nr_penalties; nr++) { // fix up regularization parameter if (-1. == regs[nr].lambda) regs[nr].lambda = lambda; switch (regs[nr].xform) { case L1WAV: debug_printf(DP_INFO, "l1-wavelet regularization: %f\n", regs[nr].lambda); long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(img_dims[0], 16); minsize[1] = MIN(img_dims[1], 16); minsize[2] = MIN(img_dims[2], 16); unsigned int wflags = 0; for (unsigned int i = 0; i < DIMS; i++) { if ((1 < img_dims[i]) && MD_IS_SET(regs[nr].xflags, i)) { wflags = MD_SET(wflags, i); minsize[i] = MIN(img_dims[i], 16); } } trafos[nr] = linop_identity_create(DIMS, img_dims); thresh_ops[nr] = prox_wavelet_thresh_create(DIMS, img_dims, wflags, regs[nr].jflags, minsize, regs[nr].lambda, randshift); break; case TV: debug_printf(DP_INFO, "TV regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_grad_create(DIMS, img_dims, regs[nr].xflags); thresh_ops[nr] = prox_thresh_create(DIMS + 1, linop_codomain(trafos[nr])->dims, regs[nr].lambda, regs[nr].jflags | MD_BIT(DIMS), use_gpu); break; case LLR: debug_printf(DP_INFO, "lowrank regularization: %f\n", regs[nr].lambda); // add locally lowrank penalty levels = llr_blkdims(blkdims, regs[nr].jflags, img_dims, llr_blk); assert(1 == levels); img_dims[LEVEL_DIM] = levels; for(int l = 0; l < levels; l++) #if 0 blkdims[l][MAPS_DIM] = img_dims[MAPS_DIM]; #else blkdims[l][MAPS_DIM] = 1; #endif int remove_mean = 0; trafos[nr] = linop_identity_create(DIMS, img_dims); thresh_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, remove_mean, use_gpu); break; case MLR: debug_printf(DP_INFO, "multi-scale lowrank regularization: %f\n", regs[nr].lambda); levels = multilr_blkdims(blkdims, regs[nr].jflags, img_dims, 8, 1); img_dims[LEVEL_DIM] = levels; max_dims[LEVEL_DIM] = levels; for(int l = 0; l < levels; l++) blkdims[l][MAPS_DIM] = 1; trafos[nr] = linop_identity_create(DIMS, img_dims); thresh_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, 0, use_gpu); const struct linop_s* decom_op = linop_sum_create(img_dims, use_gpu); const struct linop_s* tmp_op = forward_op; forward_op = linop_chain(decom_op, forward_op); linop_free(decom_op); linop_free(tmp_op); break; case IMAGL1: debug_printf(DP_INFO, "l1 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); thresh_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case IMAGL2: debug_printf(DP_INFO, "l2 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); thresh_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; case L1IMG: debug_printf(DP_INFO, "l1 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); thresh_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case L2IMG: debug_printf(DP_INFO, "l2 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); thresh_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; } } int nr = nr_penalties; struct linop_s* sampling = linop_sampling_create(max_dims, pat_dims, pattern); struct linop_s* tmp_op = linop_chain(forward_op, sampling); linop_free(sampling); linop_free(forward_op); forward_op = tmp_op; trafos[nr] = forward_op; thresh_ops[nr] = prox_l2norm_create(DIMS, ksp_dims, 1.); nr_penalties++; const float** biases = xmalloc(sizeof(float*) * nr_penalties); for (int i = 0; i < nr_penalties - 1; i++) biases[i] = NULL; biases[nr] = (float*)kspace; complex float* image = create_cfl(argv[3], DIMS, img_dims); md_clear(DIMS, img_dims, image, CFL_SIZE); long img_truth_dims[DIMS]; complex float* image_truth = NULL; if (im_truth) { image_truth = load_cfl(image_truth_file, DIMS, img_truth_dims); //md_zsmul(DIMS, img_dims, image_truth, image_truth, 1. / scaling); } long img_start_dims[DIMS]; complex float* image_start = NULL; if (warm_start) { debug_printf(DP_DEBUG1, "Warm start: %s\n", image_start_file); image_start = load_cfl(image_start_file, DIMS, img_start_dims); assert(md_check_compat(DIMS, 0u, img_start_dims, img_dims)); md_copy(DIMS, img_dims, image, image_start, CFL_SIZE); free((void*)image_start_file); unmap_cfl(DIMS, img_dims, image_start); // if rescaling at the end, assume the input has also been rescaled if (scale_im && scaling != 0.) md_zsmul(DIMS, img_dims, image, image, 1. / scaling); } // initialize algorithm struct iter_admm_conf mmconf; debug_printf(DP_INFO, "ADMM\n"); mmconf = iter_admm_defaults; mmconf.maxiter = maxiter; mmconf.maxitercg = admm_maxitercg; mmconf.rho = admm_rho; mmconf.hogwild = hogwild; mmconf.fast = fast; //mmconf.dynamic_rho = true; mmconf.ABSTOL = 0.; mmconf.RELTOL = 0.; long size = 2 * md_calc_size(DIMS, img_dims); iter2_admm(CAST_UP(&mmconf), NULL, nr_penalties, thresh_ops, trafos, biases, NULL, size, (float*)image, NULL, NULL); #if 0 if (use_gpu) #ifdef USE_CUDA sqpics_recon2_gpu(&conf, max_dims, image, forward_op, pat_dims, pattern, italgo, iconf, nr_penalties, thresh_ops, (ADMM == algo) ? trafos : NULL, ksp_dims, kspace, image_truth, precond_op); #else assert(0); #endif else sqpics_recon2(&conf, max_dims, image, forward_op, pat_dims, pattern, italgo, iconf, nr_penalties, thresh_ops, (ADMM == algo) ? trafos : NULL, ksp_dims, kspace, image_truth, precond_op); #endif if (scale_im) md_zsmul(DIMS, img_dims, image, image, scaling); // clean up if (NULL != pat_file) unmap_cfl(DIMS, pat_dims, pattern); else md_free(pattern); unmap_cfl(DIMS, map_dims, maps); unmap_cfl(DIMS, ksp_dims, kspace); unmap_cfl(DIMS, img_dims, image); if (NULL != traj) unmap_cfl(DIMS, traj_dims, traj); if (im_truth) { free((void*)image_truth_file); unmap_cfl(DIMS, img_dims, image_truth); } double end_time = timestamp(); debug_printf(DP_INFO, "Total Time: %f\n", end_time - start_time); exit(0); } bart-0.4.02/src/squeeze.c000066400000000000000000000022221320577655200151530ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jon Tamir */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 32 #endif static const char usage_str[] = " "; static const char help_str[] = "Remove singleton dimensions of array.\n"; int main_squeeze(int argc, char* argv[]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); num_init(); long idims[DIMS]; long odims[DIMS] = MD_INIT_ARRAY(DIMS, 1); complex float* idata = load_cfl(argv[1], DIMS, idims); unsigned int j = 0; for (unsigned int i = 0; i < DIMS; i++) if (1 < idims[i]) odims[j++] = idims[i]; if (0 == j) j = 1; complex float* odata = create_cfl(argv[2], j, odims); md_copy(DIMS, idims, odata, idata, CFL_SIZE); unmap_cfl(DIMS, idims, idata); unmap_cfl(j, odims, odata); exit(0); } bart-0.4.02/src/std.c000066400000000000000000000021441320577655200142670ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Jon Tamir */ #include #include #include #include "num/multind.h" #include "num/init.h" #include "num/flpmath.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Compute standard deviation along selected dimensions specified by the {bitmask}"; int main_std(int argc, char* argv[]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); num_init(); long flags = atoi(argv[1]); long idims[DIMS]; long odims[DIMS]; complex float* in = load_cfl(argv[2], DIMS, idims); md_select_dims(DIMS, ~flags, odims, idims); complex float* out = create_cfl(argv[3], DIMS, odims); md_zstd(DIMS, idims, flags, out, in); unmap_cfl(DIMS, idims, in); unmap_cfl(DIMS, odims, out); exit(0); } bart-0.4.02/src/svd.c000066400000000000000000000032661320577655200142770ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013, 2015 Martin Uecker */ #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/lapack.h" #include "misc/misc.h" #include "misc/mmio.h" #include "misc/opts.h" static const char usage_str[] = " "; static const char help_str[] = "Compute singular-value-decomposition (SVD).\n"; int main_svd(int argc, char* argv[]) { bool econ = false; const struct opt_s opts[] = { OPT_SET('e', &econ, "econ"), }; cmdline(&argc, argv, 4, 4, usage_str, help_str, ARRAY_SIZE(opts), opts); int N = 2; long dims[N]; complex float* in = load_cfl(argv[1], N, dims); long dimsU[2] = { dims[0], econ ? MIN(dims[0], dims[1]) : dims[0] }; long dimsS[2] = { MIN(dims[0], dims[1]), 1 }; long dimsVH[2] = { econ ? MIN(dims[0], dims[1]) : dims[1], dims[1] }; complex float* U = create_cfl(argv[2], N, dimsU); complex float* S = create_cfl(argv[3], N, dimsS); complex float* VH = create_cfl(argv[4], N, dimsVH); float* SF = md_alloc(2, dimsS, FL_SIZE); (econ ? lapack_svd_econ : lapack_svd)(dims[0], dims[1], MD_CAST_ARRAY2(complex float, 2, dimsU, U, 0, 1), MD_CAST_ARRAY2(complex float, 2, dimsVH, VH, 0, 1), SF, MD_CAST_ARRAY2(complex float, 2, dims, in, 0, 1)); for (int i = 0 ; i < dimsS[0]; i++) S[i] = SF[i]; md_free(SF); unmap_cfl(N, dims, in); unmap_cfl(N, dimsU, U); unmap_cfl(N, dimsS, S); unmap_cfl(N, dimsVH, VH); exit(0); } bart-0.4.02/src/threshold.c000066400000000000000000000110151320577655200154660ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2017 Martin Uecker * 2015-2016 Jon Tamir * 2015 Frank Ong */ #include #include #include "num/flpmath.h" #include "num/multind.h" #include "num/init.h" #include "iter/prox.h" #include "iter/thresh.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #include "lowrank/lrthresh.h" #include "linops/waveop.h" #include "dfwavelet/prox_dfwavelet.h" // FIXME: lowrank interface should not be coupled to mri.h -- it should take D as an input #ifndef DIMS #define DIMS 16 #endif // FIXME: consider moving this to a more accessible location? static void wthresh(unsigned int D, const long dims[D], float lambda, unsigned int flags, complex float* out, const complex float* in) { long minsize[D]; md_singleton_dims(D, minsize); long course_scale[3] = MD_INIT_ARRAY(3, 16); md_copy_dims(3, minsize, course_scale); unsigned int wflags = 7; // FIXME for (unsigned int i = 0; i < 3; i++) if (dims[i] < minsize[i]) wflags = MD_CLEAR(wflags, i); long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); const struct linop_s* w = linop_wavelet_create(D, wflags, dims, strs, minsize, false); const struct operator_p_s* p = prox_unithresh_create(D, w, lambda, flags, false); operator_p_apply(p, 1., D, dims, out, D, dims, in); operator_p_free(p); } static void lrthresh(unsigned int D, const long dims[D], int llrblk, float lambda, unsigned int flags, complex float* out, const complex float* in) { long blkdims[MAX_LEV][D]; int levels = llr_blkdims(blkdims, ~flags, dims, llrblk); UNUSED(levels); const struct operator_p_s* p = lrthresh_create(dims, false, ~flags, (const long (*)[])blkdims, lambda, false, false, false); operator_p_apply(p, 1., D, dims, out, D, dims, in); operator_p_free(p); } static void dfthresh(unsigned int D, const long dims[D], float lambda, complex float* out, const complex float* in) { long minsize[3]; md_singleton_dims(3, minsize); long coarse_scale[3] = MD_INIT_ARRAY(3, 16); md_min_dims(3, ~0u, minsize, dims, coarse_scale); complex float res[3]; res[0] = 1.; res[1] = 1.; res[2] = 1.; assert(3 == dims[TE_DIM]); const struct operator_p_s* p = prox_dfwavelet_create(dims, minsize, res, TE_DIM, lambda, false); operator_p_apply(p, 1., D, dims, out, D, dims, in); operator_p_free(p); } static void hard_thresh(unsigned int D, const long dims[D], float lambda, complex float* out, const complex float* in) { long size = md_calc_size(DIMS, dims) * 2; const float* inf = (const float*)in; float* outf = (float*)out; #pragma omp parallel for for (long i = 0; i < size; i++) outf[i] = inf[i] > lambda ? inf[i] : 0.; } static const char usage_str[] = "lambda "; static const char help_str[] = "Perform (soft) thresholding with parameter lambda."; int main_threshold(int argc, char* argv[]) { unsigned int flags = 0; enum th_type { NONE, WAV, LLR, DFW, MPDFW, HARD } th_type = NONE; int llrblk = 8; const struct opt_s opts[] = { OPT_SELECT('H', enum th_type, &th_type, HARD, "hard thresholding"), OPT_SELECT('W', enum th_type, &th_type, WAV, "daubechies wavelet soft-thresholding"), OPT_SELECT('L', enum th_type, &th_type, LLR, "locally low rank soft-thresholding"), OPT_SELECT('D', enum th_type, &th_type, DFW, "divergence-free wavelet soft-thresholding"), OPT_UINT('j', &flags, "bitmask", "joint soft-thresholding"), OPT_INT('b', &llrblk, "blocksize", "locally low rank block size"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); const int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[2], N, dims); complex float* odata = create_cfl(argv[3], N, dims); float lambda = atof(argv[1]); switch (th_type) { case WAV: wthresh(N, dims, lambda, flags, odata, idata); break; case LLR: lrthresh(N, dims, llrblk, lambda, flags, odata, idata); break; case DFW: dfthresh(N, dims, lambda, odata, idata); break; case HARD: hard_thresh(N, dims, lambda, odata, idata); break; default: md_zsoftthresh(N, dims, lambda, flags, odata, idata); } unmap_cfl(N, dims, idata); unmap_cfl(N, dims, odata); exit(0); } bart-0.4.02/src/toimg.c000066400000000000000000000064161320577655200146220ustar00rootroot00000000000000/* Copyright 2013-2015 The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013, 2015 Martin Uecker * 2015 Jonathan Tamir */ #include #include #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/mmio.h" #include "misc/png.h" #include "misc/dicom.h" #ifndef DIMS #define DIMS 16 #endif #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif static const char usage_str[] = "[-h] "; static const char help_str[] = "Create magnitude images as png or proto-dicom.\n" "The first two non-singleton dimensions will\n" "be used for the image, and the other dimensions\n" "will be looped over.\n"; static void toimg(bool dicom, const char* name, long inum, float max, long h, long w, const complex float* data) { int len = strlen(name); assert(len >= 1); int nr_bytes = dicom ? 2 : 3; unsigned char (*buf)[h][w][nr_bytes] = TYPE_ALLOC(unsigned char[h][w][nr_bytes]); float max_val = dicom ? 65535. : 255.; for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { unsigned int value = max_val * (cabsf(data[j * h + i]) / max); if (!dicom) { (*buf)[i][j][0] = value; (*buf)[i][j][1] = value; (*buf)[i][j][2] = value; } else { (*buf)[i][j][0] = (value >> 0) & 0xFF; (*buf)[i][j][2] = (value >> 8) & 0xFF; } } } (dicom ? dicom_write : png_write_rgb24)(name, w, h, inum, &(*buf)[0][0][0]); free(buf); } static void toimg_stack(const char* name, bool dicom, const long dims[DIMS], const complex float* data) { long data_size = md_calc_size(DIMS, dims); long sq_dims[DIMS] = { [0 ... DIMS - 1] = 1 }; int l = 0; for (int i = 0; i < DIMS; i++) if (1 != dims[i]) sq_dims[l++] = dims[i]; float max = 0.; for (long i = 0; i < data_size; i++) max = MAX(cabsf(data[i]), max); if (0. == max) max = 1.; int len = strlen(name); assert(len >= 1); long num_imgs = md_calc_size(DIMS - 2, sq_dims + 2); long img_size = md_calc_size(2, sq_dims); debug_printf(DP_INFO, "Writing %d image(s)...", num_imgs); #pragma omp parallel for for (long i = 0; i < num_imgs; i++) { char name_i[len + 10]; // extra space for ".0000.png" if (num_imgs > 1) sprintf(name_i, "%s-%04ld.%s", name, i, dicom ? "dcm" : "png"); else sprintf(name_i, "%s.%s", name, dicom ? "dcm" : "png"); toimg(dicom, name_i, i, max, sq_dims[0], sq_dims[1], data + i * img_size); } debug_printf(DP_INFO, "done.\n", num_imgs); } int main_toimg(int argc, char* argv[]) { bool dicom = mini_cmdline_bool(&argc, argv, 'd', 2, usage_str, help_str); num_init(); // -d option is deprecated char* ext = rindex(argv[2], '.'); if (NULL != ext) { assert(!dicom); if (0 == strcmp(ext, ".dcm")) dicom = true; else if (0 != strcmp(ext, ".png")) error("Unknown file extension."); *ext = '\0'; } long dims[DIMS]; complex float* data = load_cfl(argv[1], DIMS, dims); toimg_stack(argv[2], dicom, dims, data); unmap_cfl(DIMS, dims, data); exit(0); } bart-0.4.02/src/traj.c000066400000000000000000000132511320577655200144360ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2015-2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-2017 Martin Uecker */ #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/opts.h" static const char usage_str[] = ""; static const char help_str[] = "Computes k-space trajectories."; static void euler(float dir[3], float phi, float psi) { dir[0] = cosf(phi) * cosf(psi); dir[1] = sinf(phi) * cosf(psi); dir[2] = sinf(psi); } /* We allow an arbitrary quadratic form to account for * non-physical coordinate systems. * Moussavi et al., MRM 71:308-312 (2014) */ static void gradient_delay(float d[3], float coeff[2][3], float phi, float psi) { float dir[3]; euler(dir, phi, psi); float mat[3][3] = { { coeff[0][0], coeff[0][2], coeff[1][1] }, { coeff[0][2], coeff[0][1], coeff[1][2] }, { coeff[1][1], coeff[1][2], coeff[1][0] }, }; for (unsigned int i = 0; i < 3; i++) { d[i] = 0.; for (unsigned int j = 0; j < 3; j++) d[i] += mat[i][j] * dir[j]; } } enum part_mode { REGULAR, LINEAR, ALIGNED }; static int remap(enum part_mode mode, int all, int turns, int mb, int n) { int spp = all / (turns * mb); int spt = all / turns; // int ind_sp = ((n % (spp * mb)) % spp) * mb * turns; int ind_sp = (n % spp) * mb * turns; int ind_fr = ((n % (turns * spp)) / spp) * mb; // int ind_pr = (n / (turns * spp)) * turns; switch (mode) { case REGULAR: return (n % spt) * turns + n / spt; case LINEAR: return ind_sp + ind_fr;// + ind_pr; case ALIGNED: return ind_sp + ind_fr; } assert(0); } int main_traj(int argc, char* argv[]) { int X = 128; int Y = 128; int mb = 0; int accel = 1; bool radial = false; bool golden = false; bool aligned = false; bool dbl = false; bool pGold = false; int turns = 1; bool d3d = false; bool transverse = false; float gdelays[2][3] = { { 0., 0., 0. }, { 0., 0., 0. } }; const struct opt_s opts[] = { OPT_INT('x', &X, "x", "readout samples"), OPT_INT('y', &Y, "y", "phase encoding lines"), OPT_INT('a', &accel, "a", "acceleration"), OPT_INT('t', &turns, "t", "turns"), OPT_INT('m', &mb, "mb", "SMS multiband factor"), OPT_SET('l', &aligned, "aligned partition angle"), OPT_SET('g', &pGold, "golden angle in partition direction"), OPT_SET('r', &radial, "radial"), OPT_SET('G', &golden, "golden-ratio sampling"), OPT_SET('D', &dbl, "double base angle"), OPT_FLVEC3('q', &gdelays[0], "delays", "gradient delays: x, y, xy"), OPT_FLVEC3('Q', &gdelays[1], "delays", "(gradient delays: z, xz, yz)"), OPT_SET('O', &transverse, "correct transverse gradient error for radial tajectories"), OPT_SET('3', &d3d, "3D"), }; cmdline(&argc, argv, 1, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); int spp = Y; // spokes per partition if (0 != mb) Y = Y * mb * turns; // total number of spokes int N = X * Y / accel; long dims[DIMS] = { [0 ... DIMS - 1] = 1 }; dims[0] = 3; dims[1] = X; if (0 == mb) { mb = 1; } else { dims[TIME_DIM] = turns; dims[SLICE_DIM] = mb; } enum part_mode mode = LINEAR; if (golden) { radial = true; if ((turns != 1) || (mb != 1)) error("No turns and SMS implemented for golden angle!"); } else if (dbl || radial) { radial = true; if (d3d) error("3D radial trajectory not implemented yet!"); if ((mb != 1) && (turns != 1)) if (0 == turns % mb) error("'turns % multiband factor' must be nonzero!"); if (aligned || pGold) mode = ALIGNED; } else { if ((turns != 1) || (mb != 1)) error("No turns or spokes in Cartesian trajectories please!"); } dims[2] = (radial ? spp : (Y / accel)); complex float* samples = create_cfl(argv[1], DIMS, dims); int p = 0; for (int j = 0; j < Y; j += accel) { for (int i = 0; i < X; i++) { if (radial) { /* golden-ratio sampling * * Winkelmann S, Schaeffter T, Koehler T, Eggers H, Doessel O. * An optimal radial profile order based on the Golden Ratio * for time-resolved MRI. IEEE TMI 26:68--76 (2007) */ double golden_angle = 3. - sqrtf(5.); double base = golden ? ((2. - golden_angle) / 2.) : (1. / (float)Y); double angle = M_PI * (float)remap(mode, Y, turns, mb, j) * (dbl ? 2. : 1.) * base; double read = (float)i + 0.5 - (float)X / 2.; double angle2 = 0.; if (d3d) { int split = sqrtf(Y); angle2 = 2. * M_PI * j * split * base; } if (!(aligned || pGold)) { int pt_ind = j / (turns * spp); double angle_part = M_PI / (float)Y * turns; angle += pt_ind * angle_part; } if (pGold) { int part = (int)((j % (spp * mb)) / spp); // current partition angle += fmod(part * M_PI / spp * (sqrt(5.) - 1) / 2, M_PI / spp); } float d[3] = { 0., 0., 0 }; gradient_delay(d, gdelays, angle, angle2); float read_dir[3]; euler(read_dir, angle, angle2); if (!transverse) { // project to read direction float delay = 0.; for (unsigned int i = 0; i < 3; i++) delay += read_dir[i] * d[i]; for (unsigned int i = 0; i < 3; i++) d[i] = delay * read_dir[i]; } samples[p * 3 + 0] = d[1] + read * read_dir[1]; samples[p * 3 + 1] = d[0] + read * read_dir[0]; samples[p * 3 + 2] = d[2] + read * read_dir[2]; } else { samples[p * 3 + 0] = (i - X / 2); samples[p * 3 + 1] = (j - Y / 2); samples[p * 3 + 2] = 0; } p++; } } assert(p == N - 0); unmap_cfl(3, dims, samples); exit(0); } bart-0.4.02/src/transpose.c000066400000000000000000000023061320577655200155130ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/mri.h" #include "misc/misc.h" static const char usage_str[] = "dim1 dim2 "; static const char help_str[] = "Transpose dimensions {dim1} and {dim2}.\n"; int main_transpose(int argc, char* argv[]) { mini_cmdline(&argc, argv, 4, usage_str, help_str); num_init(); int N = DIMS; long idims[N]; int dim1 = atoi(argv[1]); int dim2 = atoi(argv[2]); assert((0 <= dim1) && (dim1 < N)); assert((0 <= dim2) && (dim2 < N)); complex float* idata = load_cfl(argv[3], N, idims); long odims[N]; md_transpose_dims(N, dim1, dim2, odims, idims); complex float* odata = create_cfl(argv[4], N, odims); md_transpose(N, dim1, dim2, odims, odata, idims, idata, sizeof(complex float)); unmap_cfl(N, idims, idata); unmap_cfl(N, odims, odata); exit(0); } bart-0.4.02/src/twixread.c000066400000000000000000000161631320577655200153320ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014-2016 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/mmio.h" #include "misc/debug.h" #include "misc/opts.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(complex float) #endif /* Information about twix files can be found here: * (Matlab code by Philipp Ehses and others) * https://github.com/cjohnevans/Gannet2.0/blob/master/mapVBVD.m */ struct hdr_s { uint32_t offset; uint32_t nscans; uint32_t measid; uint32_t fileid; uint64_t datoff; // uint64_t length; }; static void xread(int fd, void* buf, size_t size) { if (size != (size_t)read(fd, buf, size)) error("reading file"); } static void xseek(int fd, off_t pos) { if (-1 == lseek(fd, pos, SEEK_SET)) error("seeking"); } static bool siemens_meas_setup(int fd, struct hdr_s* hdr) { off_t start = 0; xseek(fd, start); xread(fd, hdr, sizeof(struct hdr_s)); // check for VD version bool vd = ((hdr->offset < 10000) && (hdr->nscans < 64)); if (vd) { debug_printf(DP_INFO, "VD Header. MeasID: %d FileID: %d Scans: %d\n", hdr->measid, hdr->fileid, hdr->nscans); start += hdr->datoff; xseek(fd, start); // reread offset xread(fd, &hdr->offset, sizeof(hdr->offset)); } else { debug_printf(DP_INFO, "VB Header.\n"); hdr->nscans = 1; } start += hdr->offset; xseek(fd, start); return vd; } struct mdh2 { // second part of mdh uint32_t evalinfo[2]; uint16_t samples; uint16_t channels; uint16_t sLC[14]; uint16_t dummy1[2]; uint16_t clmnctr; uint16_t dummy2[5]; uint16_t linectr; uint16_t partctr; }; static int siemens_bounds(bool vd, int fd, long min[DIMS], long max[DIMS]) { char scan_hdr[vd ? 192 : 0]; size_t size = sizeof(scan_hdr); if (size != (size_t)read(fd, scan_hdr, size)) return -1; long pos[DIMS] = { 0 }; for (pos[COIL_DIM] = 0; pos[COIL_DIM] < max[COIL_DIM]; pos[COIL_DIM]++) { char chan_hdr[vd ? 32 : 128]; size_t size = sizeof(chan_hdr); if (size != (size_t)read(fd, chan_hdr, size)) return -1; struct mdh2 mdh; memcpy(&mdh, vd ? (scan_hdr + 40) : (chan_hdr + 20), sizeof(mdh)); if (0 == max[READ_DIM]) { max[READ_DIM] = mdh.samples; max[COIL_DIM] = mdh.channels; } if (max[READ_DIM] != mdh.samples) return -1; if (max[COIL_DIM] != mdh.channels) return -1; pos[PHS1_DIM] = mdh.sLC[0]; pos[AVG_DIM] = mdh.sLC[1]; pos[SLICE_DIM] = mdh.sLC[2]; pos[PHS2_DIM] = mdh.sLC[3]; pos[TE_DIM] = mdh.sLC[4]; pos[TIME_DIM] = mdh.sLC[6]; pos[TIME2_DIM] = mdh.sLC[7]; for (unsigned int i = 0; i < DIMS; i++) { max[i] = MAX(max[i], pos[i] + 1); min[i] = MIN(min[i], pos[i] + 0); } size = max[READ_DIM] * CFL_SIZE; char buf[size]; if (size != (size_t)read(fd, buf, size)) return -1; } return 0; } static int siemens_adc_read(bool vd, int fd, bool linectr, bool partctr, const long dims[DIMS], long pos[DIMS], complex float* buf) { char scan_hdr[vd ? 192 : 0]; xread(fd, scan_hdr, sizeof(scan_hdr)); for (pos[COIL_DIM] = 0; pos[COIL_DIM] < dims[COIL_DIM]; pos[COIL_DIM]++) { char chan_hdr[vd ? 32 : 128]; xread(fd, chan_hdr, sizeof(chan_hdr)); struct mdh2 mdh; memcpy(&mdh, vd ? (scan_hdr + 40) : (chan_hdr + 20), sizeof(mdh)); if (0 == pos[COIL_DIM]) { // TODO: rethink this pos[PHS1_DIM] = mdh.sLC[0] + (linectr ? mdh.linectr : 0); pos[AVG_DIM] = mdh.sLC[1]; pos[SLICE_DIM] = mdh.sLC[2]; pos[PHS2_DIM] = mdh.sLC[3] + (partctr ? mdh.partctr : 0); pos[TE_DIM] = mdh.sLC[4]; pos[TIME_DIM] = mdh.sLC[6]; pos[TIME2_DIM] = mdh.sLC[7]; } debug_print_dims(DP_DEBUG1, DIMS, pos); if (dims[READ_DIM] != mdh.samples) { debug_printf(DP_WARN, "Wrong number of samples: %d != %d.\n", dims[READ_DIM], mdh.samples); return -1; } if ((0 != mdh.channels) && (dims[COIL_DIM] != mdh.channels)) { debug_printf(DP_WARN, "Wrong number of channels: %d != %d.\n", dims[COIL_DIM], mdh.channels); return -1; } xread(fd, buf + pos[COIL_DIM] * dims[READ_DIM], dims[READ_DIM] * CFL_SIZE); } pos[COIL_DIM] = 0; return 0; } static const char usage_str[] = " "; // fprintf(fd, "Usage: %s [...] [-a A] \n", name); static const char help_str[] = "Read data from Siemens twix (.dat) files."; int main_twixread(int argc, char* argv[argc]) { long adcs = 0; bool autoc = false; bool linectr = false; bool partctr = false; long dims[DIMS]; md_singleton_dims(DIMS, dims); struct opt_s opts[] = { OPT_LONG('x', &(dims[READ_DIM]), "X", "number of samples (read-out)"), OPT_LONG('y', &(dims[PHS1_DIM]), "Y", "phase encoding steps"), OPT_LONG('z', &(dims[PHS2_DIM]), "Z", "partition encoding steps"), OPT_LONG('s', &(dims[SLICE_DIM]), "S", "number of slices"), OPT_LONG('v', &(dims[AVG_DIM]), "V", "number of averages"), OPT_LONG('c', &(dims[COIL_DIM]), "C", "number of channels"), OPT_LONG('n', &(dims[TIME_DIM]), "N", "number of repetitions"), OPT_LONG('a', &adcs, "A", "total number of ADCs"), OPT_SET('A', &autoc, "automatic [guess dimensions]"), OPT_SET('L', &linectr, "use linectr offset"), OPT_SET('P', &partctr, "use partctr offset"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); if (0 == adcs) adcs = dims[PHS1_DIM] * dims[PHS2_DIM] * dims[SLICE_DIM] * dims[TIME_DIM]; debug_print_dims(DP_DEBUG1, DIMS, dims); int ifd; if (-1 == (ifd = open(argv[1], O_RDONLY))) error("error opening file."); struct hdr_s hdr; bool vd = siemens_meas_setup(ifd, &hdr); long off[DIMS] = { 0 }; if (autoc) { long max[DIMS] = { [COIL_DIM] = 1000 }; long min[DIMS] = { 0 }; // min is always 0 adcs = 0; while (true) { if (-1 == siemens_bounds(vd, ifd, min, max)) break; debug_print_dims(DP_DEBUG3, DIMS, max); adcs++; } for (unsigned int i = 0; i < DIMS; i++) { off[i] = -min[i]; dims[i] = max[i] + off[i]; } debug_printf(DP_DEBUG2, "Dimensions: "); debug_print_dims(DP_DEBUG2, DIMS, dims); debug_printf(DP_DEBUG2, "Offset: "); debug_print_dims(DP_DEBUG2, DIMS, off); siemens_meas_setup(ifd, &hdr); // reset } complex float* out = create_cfl(argv[2], DIMS, dims); md_clear(DIMS, dims, out, CFL_SIZE); long adc_dims[DIMS]; md_select_dims(DIMS, READ_FLAG|COIL_FLAG, adc_dims, dims); void* buf = md_alloc(DIMS, adc_dims, CFL_SIZE); while (adcs--) { long pos[DIMS] = { [0 ... DIMS - 1] = 0 }; if (-1 == siemens_adc_read(vd, ifd, linectr, partctr, dims, pos, buf)) { debug_printf(DP_WARN, "Stopping.\n"); break; } for (unsigned int i = 0; i < DIMS; i++) pos[i] += off[i]; debug_print_dims(DP_DEBUG1, DIMS, pos); if (!md_is_index(DIMS, pos, dims)) { debug_printf(DP_WARN, "Index out of bounds.\n"); continue; } md_copy_block(DIMS, pos, dims, out, adc_dims, buf, CFL_SIZE); } md_free(buf); unmap_cfl(DIMS, dims, out); exit(0); } bart-0.4.02/src/var.c000066400000000000000000000021321320577655200142620ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Jon Tamir */ #include #include #include #include "num/multind.h" #include "num/init.h" #include "num/flpmath.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "bitmask "; static const char help_str[] = "Compute variance along selected dimensions specified by the {bitmask}"; int main_var(int argc, char* argv[]) { mini_cmdline(&argc, argv, 3, usage_str, help_str); num_init(); long flags = atoi(argv[1]); long idims[DIMS]; long odims[DIMS]; complex float* in = load_cfl(argv[2], DIMS, idims); md_select_dims(DIMS, ~flags, odims, idims); complex float* out = create_cfl(argv[3], DIMS, odims); md_zvar(DIMS, idims, flags, out, in); unmap_cfl(DIMS, idims, in); unmap_cfl(DIMS, odims, out); exit(0); } bart-0.4.02/src/vec.c000066400000000000000000000017131320577655200142530ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/io.h" #include "misc/misc.h" static const char usage_str[] = "val1 val2 ... valN name"; static const char help_str[] = "Create a vector of values.\n"; int main_vec(int argc, char* argv[]) { mini_cmdline(&argc, argv, -1, usage_str, help_str); num_init(); long dims[1] = { argc - 2 }; complex float* x = create_cfl(argv[argc - 1], 1, dims); for (int i = 0; i < argc - 2; i++) if (0 != parse_cfl(&x[i], argv[1 + i])) error("argument %d/%d is not a number: %s", i, argc - 2, argv[1 + i]); unmap_cfl(1, dims, x); exit(0); } bart-0.4.02/src/version.c000066400000000000000000000030211320577655200151550ustar00rootroot00000000000000/* Copyright 2015-2016. Martin Uecker * Copyright 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015-2-16 Martin Uecker * 2016 Jonathan Tamir */ #include #include #include #include "misc/misc.h" #include "misc/opts.h" #include "misc/version.h" static const char usage_str[] = "[-h]"; static const char help_str[] = "Print BART version. The version string is of the form\n" "TAG or TAG-COMMITS-SHA as produced by 'git describe'. It\n" "specifies the last release (TAG), and (if git is used)\n" "the number of commits (COMMITS) since this release and\n" "the abbreviated hash of the last commit (SHA). If there\n" "are local changes '-dirty' is added at the end.\n"; int main_version(int argc, char* argv[]) { bool verbose = false; const struct opt_s opts[] = { OPT_SET('V', &verbose, "Output verbose info"), }; cmdline(&argc, argv, 0, 0, usage_str, help_str, ARRAY_SIZE(opts), opts); printf("%s\n", bart_version); if (verbose) { #ifdef __GNUC__ printf("GCC_VERSION=%s\n", __VERSION__); #endif printf("CUDA="); #ifdef USE_CUDA printf("1\n"); #else printf("0\n"); #endif printf("ACML="); #ifdef USE_ACML printf("1\n"); #else printf("0\n"); #endif printf("FFTWTHREADS="); #ifdef FFTWTHREADS printf("1\n"); #else printf("0\n"); #endif } exit(0); } bart-0.4.02/src/walsh.c000066400000000000000000000035651320577655200146230ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2015-2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013-2016 Martin Uecker */ #include #include #include "misc/mmio.h" #include "misc/misc.h" #include "misc/mri.h" #include "misc/opts.h" #include "misc/debug.h" #include "num/multind.h" #include "num/fft.h" #include "calib/walsh.h" static const char usage_str[] = " "; static const char help_str[] = "Estimate coil sensitivities using walsh method (use with ecaltwo)."; int main_walsh(int argc, char* argv[]) { long bsize[3] = { 20, 20, 20 }; long calsize[3] = { 24, 24, 24 }; const struct opt_s opts[] = { OPT_VEC3('r', &calsize, "cal_size", "Limits the size of the calibration region."), OPT_VEC3('R', &calsize, "", "()"), OPT_VEC3('b', &bsize, "block_size", "Block size."), OPT_VEC3('B', &bsize, "", "()"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); long dims[DIMS]; complex float* in_data = load_cfl(argv[1], DIMS, dims); assert((dims[0] == 1) || (calsize[0] < dims[0])); assert((dims[1] == 1) || (calsize[1] < dims[1])); assert((dims[2] == 1) || (calsize[2] < dims[2])); assert(1 == dims[MAPS_DIM]); long caldims[DIMS]; complex float* cal_data = extract_calib(caldims, calsize, dims, in_data, false); unmap_cfl(DIMS, dims, in_data); debug_printf(DP_INFO, "Calibration region %ldx%ldx%ld\n", caldims[0], caldims[1], caldims[2]); dims[COIL_DIM] = dims[COIL_DIM] * (dims[COIL_DIM] + 1) / 2; complex float* out_data = create_cfl(argv[2], DIMS, dims); walsh(bsize, dims, out_data, caldims, cal_data); debug_printf(DP_INFO, "Done.\n"); md_free(cal_data); unmap_cfl(DIMS, dims, out_data); exit(0); } bart-0.4.02/src/wave.c000066400000000000000000000144711320577655200144450ustar00rootroot00000000000000/* Copyright 2015. The Regents of the University of California. * Copyright 2015. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2015 Berkin Bilgic * 2015 Martin Uecker * * B Bilgic, BA Gagoski, SF Cauley, AP Fan, JR Polimeni, PE Grant, * LL Wald, and K Setsompop, Wave-CAIPI for highly accelerated 3D * imaging. Magn Reson Med (2014) doi: 10.1002/mrm.25347 */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "iter/iter.h" #include "iter/lsqr.h" #include "linops/linop.h" #include "linops/sampling.h" #include "linops/someops.h" #include "misc/debug.h" #include "misc/mri.h" #include "misc/utils.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #include "sense/model.h" #include "sense/optcom.h" #include "wavelet/wavthresh.h" // create wavecaipi operator static struct linop_s* wavecaipi_create(const long dims[DIMS], long img_read, const complex float* wave) { // Wave-CAIPI linear operator created by chaining zero-padding, readout fft, // psf multiplication, ky-kz fft long img_dims[DIMS]; md_select_dims(DIMS, FFT_FLAGS|COIL_FLAG, img_dims, dims); img_dims[READ_DIM] = img_read; struct linop_s* fft_read = linop_fft_create(DIMS, dims, READ_FLAG); struct linop_s* fft_yz = linop_fft_create(DIMS, dims, PHS1_FLAG|PHS2_FLAG); struct linop_s* resize = linop_resize_create(DIMS, dims, img_dims); struct linop_s* wavemod = linop_cdiag_create(DIMS, dims, FFT_FLAGS, wave); struct linop_s* wc_op = linop_chain(linop_chain(linop_chain(resize, fft_read), wavemod), fft_yz); linop_free(fft_read); linop_free(fft_yz); linop_free(resize); linop_free(wavemod); return wc_op; } static const char usage_str[] = " "; static const char help_str[] = "Perform iterative wavecaipi reconstruction."; int main_wave(int argc, char* argv[]) { float lambda = 0.; float step = 0.95; bool l1wav = false; bool hogwild = false; bool randshift = true; bool adjoint = false; int maxiter = 50; const struct opt_s opts[] = { OPT_SET('l', &l1wav, "use L1 penalty"), OPT_SET('a', &adjoint, "adjoint"), OPT_INT('i', &maxiter, "iter", "max. iterations"), OPT_FLOAT('r', &lambda, "lambda", "regularization parameter"), }; cmdline(&argc, argv, 4, 4, usage_str, help_str, ARRAY_SIZE(opts), opts); long map_dims[DIMS]; long pat_dims[DIMS]; long img_dims[DIMS]; long ksp_dims[DIMS]; long max_dims[DIMS]; long wav_dims[DIMS]; // load kspace and maps and get dimensions complex float* kspace = load_cfl(argv[1], DIMS, ksp_dims); complex float* maps = load_cfl(argv[2], DIMS, map_dims); complex float* wave = load_cfl(argv[3], DIMS, wav_dims); md_copy_dims(DIMS, max_dims, ksp_dims); max_dims[MAPS_DIM] = map_dims[MAPS_DIM]; md_select_dims(DIMS, ~COIL_FLAG, pat_dims, ksp_dims); md_select_dims(DIMS, ~(COIL_FLAG|READ_FLAG), img_dims, max_dims); img_dims[READ_DIM] = map_dims[READ_DIM]; for (int i = 1; i < 4; i++) // sizes2[4] may be > 1 if (ksp_dims[i] != map_dims[i]) error("Dimensions of kspace and sensitivities do not match!\n"); // FIXME: add more sanity checking of dimensions assert(1 == ksp_dims[MAPS_DIM]); num_init(); // initialize sampling pattern complex float* pattern = md_alloc(DIMS, pat_dims, CFL_SIZE); estimate_pattern(DIMS, ksp_dims, COIL_FLAG, pattern, kspace); // print some statistics size_t T = md_calc_size(DIMS, pat_dims); long samples = (long)pow(md_znorm(DIMS, pat_dims, pattern), 2.); debug_printf(DP_INFO, "Size: %ld Samples: %ld Acc: %.2f\n", T, samples, (float)T / (float)samples); // apply scaling float scaling = estimate_scaling(ksp_dims, NULL, kspace); if (scaling != 0.) md_zsmul(DIMS, ksp_dims, kspace, kspace, 1. / scaling); const struct operator_p_s* thresh_op = NULL; // wavelet operator if (l1wav) { long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(img_dims[0], 16); minsize[1] = MIN(img_dims[1], 16); minsize[2] = MIN(img_dims[2], 16); thresh_op = prox_wavelet_thresh_create(DIMS, img_dims, FFT_FLAGS, 0u, minsize, lambda, randshift); } complex float* image = create_cfl(argv[4], DIMS, img_dims); md_clear(DIMS, img_dims, image, CFL_SIZE); fftmod(DIMS, ksp_dims, FFT_FLAGS, kspace, kspace); fftmod(DIMS, map_dims, FFT_FLAGS, maps, maps); // initialize iterative algorithm italgo_fun_t italgo = NULL; iter_conf* iconf = NULL; struct iter_conjgrad_conf cgconf; struct iter_fista_conf fsconf; if (!l1wav) { // configuration for CG recon cgconf = iter_conjgrad_defaults; cgconf.maxiter = maxiter; // max no of iterations cgconf.l2lambda = lambda; // regularization parameter cgconf.tol = 1.E-3; // cg tolerance italgo = iter_conjgrad; iconf = CAST_UP(&cgconf); } else { // use FISTA for wavelet regularization fsconf = iter_fista_defaults; fsconf.maxiter = maxiter; fsconf.step = step; fsconf.hogwild = hogwild; italgo = iter_fista; iconf = CAST_UP(&fsconf); } // create sense maps operator // struct linop_s* mapsop = maps_create(map_dims, FFT_FLAGS|COIL_FLAG|MAPS_FLAG, maps, false); md_zsmul(DIMS, map_dims, maps, maps, 1. / sqrt((double)(ksp_dims[0] * ksp_dims[1] * ksp_dims[2]))); struct linop_s* mapsop = maps2_create(map_dims, map_dims, img_dims, maps); // create wave caipi operator struct linop_s* waveop = wavecaipi_create(ksp_dims, img_dims[READ_DIM], wave); // create sense operator by chaining coil sens and wave operators struct linop_s* sense_op = linop_chain(mapsop, waveop); // create forward operator by adding sampling mask to sense operator struct linop_s* forward = linop_chain(sense_op, linop_sampling_create(ksp_dims, pat_dims, pattern)); struct lsqr_conf lsqr_conf = { 0., false }; // reconstruction with LSQR if (adjoint) linop_adjoint(forward, DIMS, img_dims, image, DIMS, ksp_dims, kspace); else lsqr(DIMS, &lsqr_conf, italgo, iconf, forward, thresh_op, img_dims, image, ksp_dims, kspace, NULL); unmap_cfl(DIMS, map_dims, maps); unmap_cfl(DIMS, wav_dims, wave); unmap_cfl(DIMS, ksp_dims, kspace); unmap_cfl(DIMS, img_dims, image); exit(0); } bart-0.4.02/src/wavelet.c000066400000000000000000000036401320577655200151460ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include #include #include "num/flpmath.h" #include "num/multind.h" #include "num/iovec.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/debug.h" #include "misc/opts.h" #include "linops/linop.h" #include "linops/waveop.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "flags [dims] "; static const char help_str[] = "Perform wavelet transform."; int main_wavelet(int argc, char* argv[]) { bool adj = false; const struct opt_s opts[] = { OPT_SET('a', &adj, "adjoint (specify dims)"), }; cmdline(&argc, argv, 3, 100, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); unsigned int flags = atof(argv[1]); unsigned int n = adj ? bitcount(flags) : 0; assert((int)n + 3 == argc - 1); const unsigned int N = DIMS; long idims[N]; complex float* idata = load_cfl(argv[n + 2], N, idims); long dims[N]; md_copy_dims(N, dims, idims); if (adj) { unsigned int j = 0; for (unsigned int i = 0; i < N; i++) if (MD_IS_SET(flags, i)) dims[i] = atoi(argv[j++ + 2]); assert(j == n); } long minsize[N]; for (unsigned int i = 0; i < N; i++) minsize[i] = MD_IS_SET(flags, i) ? 16 : dims[i]; long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); const struct linop_s* w = linop_wavelet_create(N, flags, dims, strs, minsize, false); long odims[N]; md_copy_dims(N, odims, (adj ? linop_domain : linop_codomain)(w)->dims); complex float* odata = create_cfl(argv[n + 3], N, odims); (adj ? linop_adjoint : linop_forward)(w, N, odims, odata, N, idims, idata); unmap_cfl(N, idims, idata); unmap_cfl(N, odims, odata); exit(0); } bart-0.4.02/src/wavelet/000077500000000000000000000000001320577655200147775ustar00rootroot00000000000000bart-0.4.02/src/wavelet/wavelet.c000066400000000000000000000476371320577655200166330ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Frank Ong * 2013-2017 Martin Uecker */ /* * md_*-based multi-dimensional wavelet implementation * * - 3 levels (1d, md, md-hierarchical) * - all higher-level code should work for GPU as well * * Bugs: * * - GPU version is not optimized * - memory use could possible be reduced * * Missing: * * - different boundary conditions * (symmetric, periodic, zero) */ #include #include #include #include #include #include "misc/misc.h" #include "misc/debug.h" #include "num/flpmath.h" #include "num/multind.h" #include "num/ops.h" #ifdef USE_CUDA #include "num/gpuops.h" #include "wavelet/wl3-cuda.h" #endif #include "wavelet.h" // layer 1 - 1-dimensional wavelet transform static unsigned int bandsize(unsigned int imsize, unsigned int flen) { return (imsize + flen - 1) / 2; } static complex float* access(const long str[3], complex float* x, long i, long j, long k) { return (void*)x + str[2] * i + str[1] * j + str[0] * k; } static const complex float* caccess(const long str[3], const complex float* x, long i, long j, long k) { return (const void*)x + str[2] * i + str[1] * j + str[0] * k; } static int coord(int l, int x, int flen, int k) { int n = 2 * l + 1 - (flen - 1) + k; if (n < 0) n = -n - 1; if (n >= x) n = x - 1 - (n - x); return n; } static void wavelet_down3(const long dims[3], const long out_str[3], complex float* out, const long in_str[3], const complex float* in, unsigned int flen, const float filter[flen]) { #pragma omp parallel for collapse(3) for (unsigned int i = 0; i < dims[2]; i++) for (unsigned int j = 0; j < bandsize(dims[1], flen); j++) for (unsigned int k = 0; k < dims[0]; k++) { *access(out_str, out, i, j, k) = 0.; for (unsigned int l = 0; l < flen; l++) { int n = coord(j, dims[1], flen, l); *access(out_str, out, i, j, k) += *(caccess(in_str, in, i, n, k)) * filter[flen - l - 1]; } } } static void wavelet_up3(const long dims[3], const long out_str[3], complex float* out, const long in_str[3], const complex float* in, unsigned int flen, const float filter[flen]) { // md_clear2(3, dims, out_str, out, CFL_SIZE); #pragma omp parallel for collapse(3) for (unsigned int i = 0; i < dims[2]; i++) for (unsigned int j = 0; j < dims[1]; j++) for (unsigned int k = 0; k < dims[0]; k++) { // *access(out_str, out, i, j, k) = 0.; for (unsigned int l = ((j + flen / 2 - 0) - (flen - 1)) % 2; l < flen; l += 2) { int n = ((j + flen / 2 - 0) - (flen - 1) + l) / 2; if ((0 <= n) && ((unsigned int)n < bandsize(dims[1], flen))) *access(out_str, out, i, j, k) += *caccess(in_str, in, i, n, k) * filter[flen - l - 1]; } } } void fwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* low, complex float* hgh, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "fwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long odims[N]; md_copy_dims(N, odims, dims); odims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, odims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, istr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, odims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, odims + o, ostr + o, CFL_SIZE * md_calc_size(o, odims))); // merge dims long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, dims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, odims) }; #ifdef USE_CUDA if (cuda_ondevice(in)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[0][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[0][1], FL_SIZE); wl3_cuda_down3(wdims, wostr, low, wistr, in, flen, flow); wl3_cuda_down3(wdims, wostr, hgh, wistr, in, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif // no clear needed wavelet_down3(wdims, wostr, low, wistr, in, flen, filter[0][0]); wavelet_down3(wdims, wostr, hgh, wistr, in, flen, filter[0][1]); } void iwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* out, const long istr[N], const complex float* low, const complex float* hgh, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "ifwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long idims[N]; md_copy_dims(N, idims, dims); idims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, idims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, ostr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, idims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, idims + o, istr + o, CFL_SIZE * md_calc_size(o, idims))); long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, idims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, dims) }; md_clear(3, wdims, out, CFL_SIZE); // we cannot clear because we merge outputs #ifdef USE_CUDA if (cuda_ondevice(out)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[1][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[1][1], FL_SIZE); wl3_cuda_up3(wdims, wostr, out, wistr, low, flen, flow); wl3_cuda_up3(wdims, wostr, out, wistr, hgh, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif wavelet_up3(wdims, wostr, out, wistr, low, flen, filter[1][0]); wavelet_up3(wdims, wostr, out, wistr, hgh, flen, filter[1][1]); } // layer 2 - multi-dimensional wavelet transform static void wavelet_dims_r(unsigned int N, unsigned int n, unsigned int flags, long odims[2 * N], const long dims[N], const long flen) { if (MD_IS_SET(flags, n)) { odims[0 + n] = bandsize(dims[n], flen); odims[N + n] = 2; } if (n > 0) wavelet_dims_r(N, n - 1, flags, odims, dims, flen); } void wavelet_dims(unsigned int N, unsigned int flags, long odims[2 * N], const long dims[N], const long flen) { md_copy_dims(N, odims, dims); md_singleton_dims(N, odims + N); wavelet_dims_r(N, N - 1, flags, odims, dims, flen); } void fwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[2 * N], complex float* out, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]) { long odims[2 * N]; wavelet_dims(N, flags, odims, dims, flen); assert(md_calc_size(2 * N, odims) >= md_calc_size(N, dims)); // FIXME one of these is unnecessary if we use the output complex float* tmpA = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out); complex float* tmpB = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out); long tidims[2 * N]; md_copy_dims(N, tidims, dims); md_singleton_dims(N, tidims + N); long tistrs[2 * N]; md_calc_strides(2 * N, tistrs, tidims, CFL_SIZE); long todims[2 * N]; md_copy_dims(2 * N, todims, tidims); long tostrs[2 * N]; // maybe we should push the randshift into lower levels //md_copy2(N, dims, tistrs, tmpA, istr, in, CFL_SIZE); md_circ_shift2(N, dims, shifts, tistrs, tmpA, istr, in, CFL_SIZE); for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { todims[0 + i] = odims[0 + i]; todims[N + i] = odims[N + i]; md_calc_strides(2 * N, tostrs, todims, CFL_SIZE); fwt1(2 * N, i, tidims, tostrs, tmpB, (void*)tmpB + tostrs[N + i], tistrs, tmpA, flen, filter); md_copy_dims(2 * N, tidims, todims); md_copy_dims(2 * N, tistrs, tostrs); complex float* swap = tmpA; tmpA = tmpB; tmpB = swap; } } md_copy2(2 * N, todims, ostr, out, tostrs, tmpA, CFL_SIZE); md_free(tmpA); md_free(tmpB); } void iwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[N], complex float* out, const long istr[2 * N], const complex float* in, const long flen, const float filter[2][2][flen]) { long idims[2 * N]; wavelet_dims(N, flags, idims, dims, flen); assert(md_calc_size(2 * N, idims) >= md_calc_size(N, dims)); complex float* tmpA = md_alloc_sameplace(2 * N, idims, CFL_SIZE, out); complex float* tmpB = md_alloc_sameplace(2 * N, idims, CFL_SIZE, out); long tidims[2 * N]; md_copy_dims(2 * N, tidims, idims); long tistrs[2 * N]; md_calc_strides(2 * N, tistrs, tidims, CFL_SIZE); long todims[2 * N]; md_copy_dims(2 * N, todims, tidims); long tostrs[2 * N]; long ishifts[N]; for (unsigned int i = 0; i < N; i++) ishifts[i] = -shifts[i]; md_copy2(2 * N, tidims, tistrs, tmpA, istr, in, CFL_SIZE); for (int i = N - 1; i >= 0; i--) { // run backwards to maintain contigous blocks if (MD_IS_SET(flags, i)) { todims[0 + i] = dims[0 + i]; todims[N + i] = 1; md_calc_strides(2 * N, tostrs, todims, CFL_SIZE); iwt1(2 * N, i, todims, tostrs, tmpB, tistrs, tmpA, (void*)tmpA + tistrs[N + i], flen, filter); md_copy_dims(2 * N, tidims, todims); md_copy_dims(2 * N, tistrs, tostrs); complex float* swap = tmpA; tmpA = tmpB; tmpB = swap; } } //md_copy2(N, dims, ostr, out, tostrs, tmpA, CFL_SIZE); md_circ_shift2(N, dims, ishifts, ostr, out, tostrs, tmpA, CFL_SIZE); md_free(tmpA); md_free(tmpB); } // layer 3 - hierarchical multi-dimensional wavelet transform static long wavelet_filter_flags(unsigned int N, long flags, const long dims[N], const long min[N]) { for (unsigned int i = 0; i < N; i++) if (dims[i] < min[i]) // CHECK flags = MD_CLEAR(flags, i); return flags; } long wavelet_num_levels(unsigned int N, unsigned int flags, const long dims[N], const long min[N], const long flen) { if (0 == flags) return 1; long wdims[2 * N]; wavelet_dims(N, flags, wdims, dims, flen); return 1 + wavelet_num_levels(N, wavelet_filter_flags(N, flags, wdims, min), wdims, min, flen); } static long wavelet_coeffs_r(unsigned int levels, unsigned int N, unsigned int flags, const long dims[N], const long min[N], const long flen) { long wdims[2 * N]; wavelet_dims(N, flags, wdims, dims, flen); long coeffs = md_calc_size(N, wdims); long bands = md_calc_size(N, wdims + N); assert((0 == flags) == (0 == levels)); if (0 == flags) return bands * coeffs; return coeffs * (bands - 1) + wavelet_coeffs_r(levels - 1, N, wavelet_filter_flags(N, flags, wdims, min), wdims, min, flen); } long wavelet_coeffs(unsigned int N, unsigned int flags, const long dims[N], const long min[N], const long flen) { unsigned int levels = wavelet_num_levels(N, flags, dims, min, flen); assert(levels > 0); return wavelet_coeffs_r(levels - 1, N, flags, dims, min, flen); } void wavelet_thresh(unsigned int N, float lambda, unsigned int flags, unsigned int jflags, const long shifts[N], const long dims[N], complex float* out, const complex float* in, const long minsize[N], long flen, const float filter[2][2][flen]) { assert(0 == (flags & jflags)); long wdims[N]; wavelet_coeffs2(N, flags, wdims, dims, minsize, flen); long wstr[N]; md_calc_strides(N, wstr, wdims, CFL_SIZE); complex float* tmp = md_alloc_sameplace(N, wdims, CFL_SIZE, out); long str[N]; md_calc_strides(N, str, dims, CFL_SIZE); fwt2(N, flags, shifts, wdims, wstr, tmp, dims, str, in, minsize, flen, filter); md_zsoftthresh(N, wdims, lambda, jflags, tmp, tmp); iwt2(N, flags, shifts, dims, str, out, wdims, wstr, tmp, minsize, flen, filter); md_free(tmp); } void wavelet_coeffs2(unsigned int N, unsigned int flags, long odims[N], const long dims[N], const long min[N], const long flen) { md_select_dims(N, ~flags, odims, dims); if (0 == flags) return; unsigned int levels = wavelet_num_levels(N, flags, dims, min, flen); assert(levels > 0); long wdims[N]; md_select_dims(N, flags, wdims, dims); // remove unmodified dims unsigned int b = ffs(flags) - 1; odims[b] = wavelet_coeffs_r(levels - 1, N, flags, wdims, min, flen); } static bool wavelet_check_dims(unsigned int N, unsigned int flags, const long dims[N], const long minsize[N]) { for (unsigned int i = 0; i < N; i++) if (MD_IS_SET(flags, i)) if ((minsize[i] <= 2) || (dims[i] < minsize[i])) return false; return true; } static void embed(unsigned int N, unsigned int flags, long ostr[N], const long dims[N], const long str[N]) { unsigned int b = ffs(flags) - 1; long dims1[N]; md_select_dims(N, flags, dims1, dims); md_calc_strides(N, ostr, dims1, str[b]); for (unsigned int i = 0; i < N; i++) if (!MD_IS_SET(flags, i)) ostr[i] = str[i]; } void fwt2(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], const long ostr[N], complex float* out, const long idims[N], const long istr[N], const complex float* in, const long minsize[N], long flen, const float filter[2][2][flen]) { assert(wavelet_check_dims(N, flags, idims, minsize)); if (0 == flags) { // note: recursion does *not* end here assert(md_check_compat(N, 0u, odims, idims)); md_copy2(N, idims, ostr, out, istr, in, CFL_SIZE); return; } // check output dimensions long odims2[N]; wavelet_coeffs2(N, flags, odims2, idims, minsize, flen); assert(md_check_compat(N, 0u, odims2, odims)); long wdims2[2 * N]; wavelet_dims(N, flags, wdims2, idims, flen); // only consider transform dims... long dims1[N]; md_select_dims(N, flags, dims1, idims); long wdims[2 * N]; wavelet_dims(N, flags, wdims, dims1, flen); long level_coeffs = md_calc_size(2 * N, wdims); // ... which get embedded in dimension b unsigned int b = ffs(flags) - 1; long ostr2[2 * N]; md_calc_strides(2 * N, ostr2, wdims, ostr[b]); // merge with original strides for (unsigned int i = 0; i < N; i++) if (!MD_IS_SET(flags, i)) ostr2[i] = ostr[i]; assert(odims[b] >= level_coeffs); long offset = (odims[b] - level_coeffs) * (ostr[b] / CFL_SIZE); long bands = md_calc_size(N, wdims + N); long coeffs = md_calc_size(N, wdims + 0); debug_printf(DP_DEBUG4, "fwt2: flags:%d lcoeffs:%ld coeffs:%ld (space:%ld) bands:%ld str:%ld off:%ld\n", flags, level_coeffs, coeffs, odims2[b], bands, ostr[b], offset / istr[b]); // subtract coefficients in high band odims2[b] -= (bands - 1) * coeffs; assert(odims2[b] > 0); long shifts0[N]; for (unsigned int i = 0; i < N; i++) shifts0[i] = 0; unsigned int flags2 = wavelet_filter_flags(N, flags, wdims, minsize); assert((0 == offset) == (0u == flags2)); fwtN(N, flags, shifts, idims, ostr2, out + offset, istr, in, flen, filter); if (0 != flags2) { long odims3[N]; wavelet_coeffs2(N, flags2, odims3, wdims2, minsize, flen); long ostr3[N]; embed(N, flags, ostr3, odims3, ostr); fwt2(N, flags2, shifts0, odims3, ostr3, out, wdims2, ostr2, out + offset, minsize, flen, filter); } } void iwt2(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], const long ostr[N], complex float* out, const long idims[N], const long istr[N], const complex float* in, const long minsize[N], const long flen, const float filter[2][2][flen]) { assert(wavelet_check_dims(N, flags, odims, minsize)); if (0 == flags) { // note: recursion does *not* end here assert(md_check_compat(N, 0u, odims, idims)); md_copy2(N, idims, ostr, out, istr, in, CFL_SIZE); return; } // check input dimensions long idims2[N]; wavelet_coeffs2(N, flags, idims2, odims, minsize, flen); assert(md_check_compat(N, 0u, idims2, idims)); long wdims2[2 * N]; wavelet_dims(N, flags, wdims2, odims, flen); // only consider transform dims... long dims1[N]; md_select_dims(N, flags, dims1, odims); long wdims[2 * N]; wavelet_dims(N, flags, wdims, dims1, flen); long level_coeffs = md_calc_size(2 * N, wdims); // ... which get embedded in dimension b unsigned int b = ffs(flags) - 1; long istr2[2 * N]; md_calc_strides(2 * N, istr2, wdims, istr[b]); // merge with original strides for (unsigned int i = 0; i < N; i++) if (!MD_IS_SET(flags, i)) istr2[i] = istr[i]; assert(idims[b] >= level_coeffs); long offset = (idims[b] - level_coeffs) * (istr[b] / CFL_SIZE); long bands = md_calc_size(N, wdims + N); long coeffs = md_calc_size(N, wdims + 0); // subtract coefficients in high band idims2[b] -= (bands - 1) * coeffs; assert(idims2[b] > 0); debug_printf(DP_DEBUG4, "ifwt2: flags:%d lcoeffs:%ld coeffs:%ld (space:%ld) bands:%ld str:%ld off:%ld\n", flags, level_coeffs, coeffs, idims2[b], bands, istr[b], offset / ostr[b]); // fix me we need temp storage complex float* tmp = md_alloc_sameplace(2 * N, wdims2, CFL_SIZE, out); long tstr[2 * N]; md_calc_strides(2 * N, tstr, wdims2, CFL_SIZE); md_copy2(2 * N, wdims2, tstr, tmp, istr2, in + offset, CFL_SIZE); long shifts0[N]; for (unsigned int i = 0; i < N; i++) shifts0[i] = 0; unsigned int flags2 = wavelet_filter_flags(N, flags, wdims, minsize); assert((0 == offset) == (0u == flags2)); if (0u != flags2) { long idims3[N]; wavelet_coeffs2(N, flags2, idims3, wdims2, minsize, flen); long istr3[N]; embed(N, flags, istr3, idims3, istr); iwt2(N, flags2, shifts0, wdims2, tstr, tmp, idims3, istr3, in, minsize, flen, filter); } iwtN(N, flags, shifts, odims, ostr, out, tstr, tmp, flen, filter); md_free(tmp); } void fwt(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], complex float* out, const long idims[N], const complex float* in, const long minsize[N], long flen, const float filter[2][2][flen]) { fwt2(N, flags, shifts, odims, MD_STRIDES(N, odims, CFL_SIZE), out, idims, MD_STRIDES(N, idims, CFL_SIZE), in, minsize, flen, filter); } void iwt(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], complex float* out, const long idims[N], const complex float* in, const long minsize[N], const long flen, const float filter[2][2][flen]) { iwt2(N, flags, shifts, odims, MD_STRIDES(N, odims, CFL_SIZE), out, idims, MD_STRIDES(N, idims, CFL_SIZE), in, minsize, flen, filter); } const float wavelet_haar[2][2][2] = { { { +0.7071067811865475, +0.7071067811865475 }, { -0.7071067811865475, +0.7071067811865475 }, }, { { +0.7071067811865475, +0.7071067811865475 }, { +0.7071067811865475, -0.7071067811865475 }, }, }; const float wavelet_dau2[2][2][4] = { { { -0.1294095225512603, +0.2241438680420134, +0.8365163037378077, +0.4829629131445341 }, { -0.4829629131445341, +0.8365163037378077, -0.2241438680420134, -0.1294095225512603 }, }, { { +0.4829629131445341, +0.8365163037378077, +0.2241438680420134, -0.1294095225512603 }, { -0.1294095225512603, -0.2241438680420134, +0.8365163037378077, -0.4829629131445341 }, }, }; const float wavelet_cdf44[2][2][10] = { { { +0.00000000000000000, +0.03782845550726404 , -0.023849465019556843, -0.11062440441843718 , +0.37740285561283066, +0.85269867900889385, +0.37740285561283066 , -0.11062440441843718 , -0.023849465019556843, +0.03782845550726404 }, { +0.00000000000000000, -0.064538882628697058, +0.040689417609164058, +0.41809227322161724 , -0.7884856164055829, +0.41809227322161724, +0.040689417609164058, -0.064538882628697058, +0.00000000000000000 , +0.00000000000000000 }, }, { { +0.00000000000000000, -0.064538882628697058, -0.040689417609164058, +0.41809227322161724 , +0.7884856164055829, +0.41809227322161724, -0.040689417609164058, -0.064538882628697058, +0.000000000000000000, +0.00000000000000000 }, { +0.00000000000000000, -0.03782845550726404 , -0.023849465019556843, +0.11062440441843718 , +0.37740285561283066, -0.85269867900889385, +0.37740285561283066 , +0.11062440441843718 , -0.023849465019556843, -0.03782845550726404 }, }, }; bart-0.4.02/src/wavelet/wavelet.h000066400000000000000000000061011320577655200166150ustar00rootroot00000000000000/* Copyright 2014-2015. The Regents of the University of California. * Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. */ #include #include extern const float wavelet_haar[2][2][2]; extern const float wavelet_dau2[2][2][4]; extern const float wavelet_cdf44[2][2][10]; // layer 1 extern void fwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* low, complex float* hgh, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]); extern void iwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* out, const long istr[N], const complex float* low, const complex float* hgh, const long flen, const float filter[2][2][flen]); // layer 2 extern void fwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[2 * N], complex float* out, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]); extern void iwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[N], complex float* out, const long istr[2 * N], const complex float* in, const long flen, const float filter[2][2][flen]); extern void wavelet_dims(unsigned int N, unsigned int flags, long odims[2 * N], const long dims[N], const long flen); // layer 3 extern void fwt(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], complex float* out, const long idims[N], const complex float* in, const long minsize[N], const long flen, const float filter[2][2][flen]); extern void iwt(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], complex float* out, const long idims[N], const complex float* in, const long minsize[N], const long flen, const float filter[2][2][flen]); extern long wavelet_num_levels(unsigned int N, unsigned int flags, const long dims[N], const long min[N], const long flen); extern long wavelet_coeffs(unsigned int N, unsigned int flags, const long dims[N], const long min[N], const long flen); extern void wavelet_coeffs2(unsigned int N, unsigned int flags, long odims[N], const long dims[N], const long min[N], const long flen); extern void fwt2(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], const long ostr[N], complex float* out, const long idims[N], const long istr[N], const complex float* in, const long minsize[N], long flen, const float filter[2][2][flen]); extern void iwt2(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], const long ostr[N], complex float* out, const long idims[N], const long istr[N], const complex float* in, const long minsize[N], const long flen, const float filter[2][2][flen]); extern void wavelet_thresh(unsigned int N, float lambda, unsigned int flags, unsigned int jflags, const long shifts[N], const long dims[N], complex float* out, const complex float* in, const long minsize[N], long flen, const float filter[2][2][flen]); bart-0.4.02/src/wavelet/wavthresh.c000066400000000000000000000061271320577655200171640ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * Copyright 2016-2017. Martin Uecker * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2013 Frank Ong * 2013-2017 Martin Uecker */ #define _GNU_SOURCE #include #include #include #include #include "misc/misc.h" #include "misc/types.h" #include "num/multind.h" #include "num/ops.h" #include "wavelet/wavelet.h" #include "wavthresh.h" struct wavelet_thresh_s { INTERFACE(operator_data_t); unsigned int N; const long* dims; const long* minsize; unsigned int flags; unsigned int jflags; float lambda; bool randshift; int rand_state; }; static DEF_TYPEID(wavelet_thresh_s); static int rand_lim(unsigned int* state, int limit) { int divisor = RAND_MAX / (limit + 1); int retval; do { retval = rand_r(state) / divisor; } while (retval > limit); return retval; } static void wavelet_thresh_apply(const operator_data_t* _data, float mu, complex float* out, const complex float* in) { const struct wavelet_thresh_s* data = CAST_DOWN(wavelet_thresh_s, _data); long shift[data->N]; for (unsigned int i = 0; i < data->N; i++) shift[i] = 0; if (data->randshift) { for (unsigned int i = 0; i < data->N; i++) { if (MD_IS_SET(data->flags, i)) { int levels = wavelet_num_levels(data->N, MD_BIT(i), data->dims, data->minsize, 4); shift[i] = rand_lim((unsigned int*)&data->rand_state, 1 << levels); assert(shift[i] < data->dims[i]); } } } wavelet_thresh(data->N, data->lambda * mu, data->flags, data->jflags, shift, data->dims, out, in, data->minsize, 4, wavelet_dau2); } static void wavelet_thresh_del(const operator_data_t* _data) { const struct wavelet_thresh_s* data = CAST_DOWN(wavelet_thresh_s, _data); xfree(data->dims); xfree(data->minsize); xfree(data); } /** * Proximal operator for l1-norm with Wavelet transform: f(x) = lambda || W x ||_1 * * @param N number of dimensions * @param dims dimensions of x * @param flags bitmask for Wavelet transform * @param jflags bitmask for joint thresholding * @param minsize minimium size of coarse Wavelet scale * @param lambda threshold parameter * @param randshift random shifting */ const struct operator_p_s* prox_wavelet_thresh_create(unsigned int N, const long dims[N], unsigned int flags, unsigned int jflags, const long minsize[N], float lambda, bool randshift) { PTR_ALLOC(struct wavelet_thresh_s, data); SET_TYPEID(wavelet_thresh_s, data); data->N = N; long (*ndims)[N] = TYPE_ALLOC(long[N]); md_copy_dims(N, (*ndims), dims); data->dims = *ndims; long (*nminsize)[N] = TYPE_ALLOC(long[N]); md_copy_dims(N, (*nminsize), minsize); data->minsize = *nminsize; data->flags = flags; data->jflags = jflags; data->lambda = lambda; data->randshift = randshift; data->rand_state = 1; return operator_p_create(N, dims, N, dims, CAST_UP(PTR_PASS(data)), wavelet_thresh_apply, wavelet_thresh_del); } bart-0.4.02/src/wavelet/wavthresh.h000066400000000000000000000003321320577655200171610ustar00rootroot00000000000000 #include extern const struct operator_p_s* prox_wavelet_thresh_create(unsigned int N, const long dims[N], unsigned int flags, unsigned int jflags, const long minsize[N], float lambda, bool randshift); bart-0.4.02/src/wavelet/wl3-cuda.cu000066400000000000000000000072531320577655200167560ustar00rootroot00000000000000/* Copyright 2014. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2014 Martin Uecker */ #include #include #include #include #include #include "misc/misc.h" #include "wl3-cuda.h" #ifndef CFL_SIZE #define CFL_SIZE sizeof(_Complex float) #endif __device__ long Wdot(dim3 a, dim3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } __device__ dim3 Wpmuladd(dim3 a, dim3 b, dim3 c) { dim3 r(a.x * b.x + c.x, a.y * b.y + c.y, a.z * b.z + c.z); return r; } __device__ dim3 Wpmul(dim3 a, dim3 b) { dim3 r(a.x * b.x, a.y * b.y, a.z * b.z); return r; } __host__ __device__ int bandsize(unsigned int imsize, unsigned int flen) { return (imsize + flen - 1) / 2; } __host__ __device__ int coord(int l, int x, int flen, int k) { int n = 2 * l + 1 - (flen - 1) + k; if (n < 0) n = -n - 1; if (n >= x) n = x - 1 - (n - x); return n; } __global__ void kern_down3(dim3 dims, dim3 ostr, cuFloatComplex* out, dim3 istr, const cuFloatComplex* in, unsigned int flen, const float* filter) { dim3 ind = Wpmuladd(blockIdx, blockDim, threadIdx); if ((ind.x >= dims.x) || (ind.y >= bandsize(dims.y, flen)) || (ind.z >= dims.z)) return; cuFloatComplex y = make_cuFloatComplex(0., 0.); for (unsigned int l = 0; l < flen; l++) { int n = coord(ind.y, dims.y, flen, l); dim3 ac = ind; ac.y = n; y.x += in[Wdot(ac, istr)].x * filter[flen - l - 1]; y.y += in[Wdot(ac, istr)].y * filter[flen - l - 1]; } out[Wdot(ind, ostr)] = y; } __global__ void kern_up3(dim3 dims, dim3 ostr, cuFloatComplex* out, dim3 istr, const cuFloatComplex* in, unsigned int flen, const float* filter) { dim3 ind = Wpmuladd(blockIdx, blockDim, threadIdx); if ((ind.x >= dims.x) || (ind.y >= dims.y) || (ind.z >= dims.z)) return; // cuFloatComplex y = make_cuFloatComplex(0., 0.); cuFloatComplex y = out[Wdot(ind, ostr)]; for (unsigned int l = ((ind.y + flen / 2 - 0) - (flen - 1)) % 2; l < flen; l += 2) { int n = ((ind.y + flen / 2 - 0) - (flen - 1) + l) / 2; dim3 ac = ind; ac.y = n; if ((0 <= n) && ((unsigned int)n < bandsize(dims.y, flen))) { y.x += in[Wdot(ac, istr)].x * filter[flen - l - 1]; y.y += in[Wdot(ac, istr)].y * filter[flen - l - 1]; } } out[Wdot(ind, ostr)] = y; } // extern "C" size_t cuda_shared_mem; extern "C" void wl3_cuda_down3(const long dims[3], const long out_str[3], _Complex float* out, const long in_str[3], const _Complex float* in, unsigned int flen, const float filter[__VLA(flen)]) { dim3 dims3(dims[0], dims[1], dims[2]); dim3 ostrs(out_str[0] / CFL_SIZE, out_str[1] / CFL_SIZE, out_str[2] / CFL_SIZE); dim3 istrs(in_str[0] / CFL_SIZE, in_str[1] / CFL_SIZE, in_str[2] / CFL_SIZE); long d1 = bandsize(dims[1], flen); int T = 8; dim3 th(T, T, T); dim3 bl((dims[0] + T - 1) / T, (d1 + T - 1) / T, (dims[2] + T - 1) / T); kern_down3<<< bl, th >>>(dims3, ostrs, (cuFloatComplex*)out, istrs, (const cuFloatComplex*)in, flen, filter); } extern "C" void wl3_cuda_up3(const long dims[3], const long out_str[3], _Complex float* out, const long in_str[3], const _Complex float* in, unsigned int flen, const float filter[__VLA(flen)]) { dim3 dims3(dims[0], dims[1], dims[2]); dim3 ostrs(out_str[0] / CFL_SIZE, out_str[1] / CFL_SIZE, out_str[2] / CFL_SIZE); dim3 istrs(in_str[0] / CFL_SIZE, in_str[1] / CFL_SIZE, in_str[2] / CFL_SIZE); int T = 8; dim3 th(T, T, T); dim3 bl((dims[0] + T - 1) / T, (dims[1] + T - 1) / T, (dims[2] + T - 1) / T); kern_up3<<< bl, th >>>(dims3, ostrs, (cuFloatComplex*)out, istrs, (const cuFloatComplex*)in, flen, filter); } bart-0.4.02/src/wavelet/wl3-cuda.h000066400000000000000000000006711320577655200165730ustar00rootroot00000000000000 #include "misc/cppwrap.h" extern void wl3_cuda_down3(const long dims[3], const long out_str[3], _Complex float* out, const long in_str[3], const _Complex float* in, unsigned int flen, const float filter[__VLA(flen)]); extern void wl3_cuda_up3(const long dims[3], const long out_str[3], _Complex float* out, const long in_str[3], const _Complex float* in, unsigned int flen, const float filter[__VLA(flen)]); #include "misc/cppwrap.h" bart-0.4.02/src/wavepsf.c000066400000000000000000000122361320577655200151530ustar00rootroot00000000000000/* Copyright 2017. Massachusetts Institute of Technology. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Siddharth Iyer * * Bilgic B, Gagoski BA, Cauley SF, Fan AP, Polimeni JR, Grant PE, Wald LL, Setsompop K. * Wave‐CAIPI for highly accelerated 3D imaging. Magnetic resonance in medicine. * 2015 Jun 1;73(6):2152-62. */ #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/fft.h" #include "num/init.h" #include "misc/debug.h" #include "misc/mri.h" #include "misc/utils.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" // Larmor frequency in Hertz per Gauss #ifndef LARMOR #define LARMOR 4257.56 #endif static const char usage_str[] = ""; static const char help_str[] = "Generate the wave PSF in hybrid space.\n" "- Assumes the first dimension is the readout dimension.\n" "- Assumes Gy and Gz gradients have idential max\n" " amplitude and slew rate.\n" "Example:\n" "bart wavepsf -x 768 -y 256 -z 1 -p 0.1 -q 0.1 -a 3000 -t 0.00001 -g 4 -s 18000 -n 6 wave_psf\n"; int main_wavepsf(int argc, char* argv[]) { // Spatial dimensions. int sx = 512; // Number of readout points. Size of dimension 0. int sy = 128; // Number of phase encode 1 points. Size of dimension 1. int sz = 1; // Number of phase encode 2 points. Size of dimension 2. float dy = 0.1; // Resolution in the phase encode 1 direction in cm. float dz = 0.1; // Resolution in the phase encode 2 direction in cm. // ADC parameters. int adc = 3000; // Readout duration in microseconds. float dt = 1e-5; // ADC sampling rate in seconds. // Gradient parameters. float gmax = 4; // Maximum gradient amplitude in Gauss per centimeter. float smax = 18000; // Maximum slew rate in Gauss per centimeter per second. // Wave parameters. int ncyc = 6; // Number of gradient sine-cycles. const struct opt_s opts[] = { OPT_INT('x', &sx, "DIM_ro", "Number of readout points or numel(dim 0)"), OPT_INT('y', &sy, "DIM_pe1", "Number of phase encode 1 points or numel(dim 1)"), OPT_INT('z', &sz, "DIM_pe2", "Number of phase encode 2 points or numel(dim 2)"), OPT_FLOAT('p', &dy, "RES_pe1", "Resolution in phase encode 1 (centimeters)"), OPT_FLOAT('q', &dz, "RES_pe2", "Resolution in phase encode 2 (centimeters)"), OPT_INT('a', &adc, "ADC_duration", "Readout duration in microseconds."), OPT_FLOAT('t', &dt, "ADC_dt", "ADC sampling rate in seconds"), OPT_FLOAT('g', &gmax, "GRAD_maxamp", "Maximum gradient amplitude in Gauss/cm"), OPT_FLOAT('s', &smax, "GRAD_maxslew", "Maximum gradient slew rate in Gauss/cm/second"), OPT_INT('n', &ncyc, "WAVE_cycles", "Number of cycles in the gradient sine wave."), }; cmdline(&argc, argv, 1, 1, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); assert(0 == adc % 10); // Scanners require ADC_duration to be a multiple of 10. int wavepoints = adc/10; // Number of points in the gradient wave. float T = wavepoints * dt/ncyc; // Time period of the sine wave. float w = 2 * M_PI/T; // Frequency in radians per second. /* Calculating the sine-amplitude to use. It is ether slew limited or gradient amplitude limited. */ float gamp = (smax >= w * gmax) ? gmax : smax/w; float gwave[wavepoints]; for (int tdx = 0; tdx < wavepoints; tdx++) { gwave[tdx] = gamp * sin(w * tdx * dt); } complex float phasepercm[wavepoints]; float prephase = -2 * M_PI * LARMOR * gamp/w; float cumsum = 0; for (int tdx = 0; tdx < wavepoints; tdx++) { phasepercm[tdx] = 2 * M_PI * LARMOR * (cumsum + gwave[tdx]/2.0) * dt + prephase; cumsum = cumsum + gwave[tdx]; } // Interpolate to sx via sinc interpolation const long wavepoint_dims[1] = {wavepoints}; const long interp_dims[1] = {sx}; complex float k_phasepercm[wavepoints]; fftuc(1, wavepoint_dims, 1, k_phasepercm, phasepercm); complex float k_phasepercm_interp[sx]; md_resize_center(1, interp_dims, k_phasepercm_interp, wavepoint_dims, k_phasepercm, sizeof(complex float)); complex float phasepercm_interp_complex[sx]; ifftuc(1, interp_dims, 1, phasepercm_interp_complex, k_phasepercm_interp); complex float phasepercm_interp_real[sx]; md_zreal(1, interp_dims, phasepercm_interp_real, phasepercm_interp_complex); complex float phasepercm_interp[sx]; float scale = sqrt((float) sx/wavepoints); md_zsmul(1, interp_dims, phasepercm_interp, phasepercm_interp_real, scale); complex float psf[sz][sy][sx]; //Dimensions reversed to be consistent with cfl int midy = sy/2; int midz = sz/2; complex float phase[sx]; float val; for (int ydx = 0; ydx < sy; ydx++) { for (int zdx = 0; zdx < sz; zdx++) { val = -((ydx - midy) * dy + (zdx - midz) * dz); md_zsmul(1, interp_dims, phase, phasepercm_interp, val); md_zexpj(1, interp_dims, psf[zdx][ydx], phase); } } const long psf_dims[3] = {sx, sy, sz}; complex float* psf_cfl = create_cfl(argv[1], 3, psf_dims); md_copy(3, psf_dims, psf_cfl, psf, sizeof(complex float)); unmap_cfl(3, psf_dims, psf_cfl); exit(0); } bart-0.4.02/src/whiten.c000066400000000000000000000115441320577655200147770ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Jon Tamir */ #include #include #include #include "num/multind.h" #include "num/init.h" #include "num/flpmath.h" #include "num/lapack.h" #include "misc/mri.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #include "misc/debug.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " [] []"; static const char help_str[] = "Apply multi-channel noise pre-whitening on using noise data .\n" "Optionally output whitening matrix and noise covariance matrix"; static void whiten(const long dims[DIMS], complex float* out, const long mat_dims[DIMS], const complex float* mat, const complex float* in) { long trp_dims[DIMS]; md_transpose_dims(DIMS, COIL_DIM, MAPS_DIM, trp_dims, dims); md_zmatmul(DIMS, trp_dims, out, mat_dims, mat, dims, in); } /* * Calculate noise covariance matrix. Assumes noise is zero-mean */ static void calc_covar(const long mat_dims[DIMS], complex float* covar, const long noise_dims[DIMS], const complex float* ndata) { long trp_dims[DIMS]; md_transpose_dims(DIMS, COIL_DIM, MAPS_DIM, trp_dims, noise_dims); md_zmatmulc(DIMS, mat_dims, covar, trp_dims, ndata, noise_dims, ndata); md_zsmul(DIMS, mat_dims, covar, covar, 1. / (noise_dims[READ_DIM] - 1)); } /* * Calculate noise whitening matrix W = inv(L), where N = L * L^H is the Cholesky decomposition of noise N */ static void calc_optmat(const long mat_dims[DIMS], complex float* optmat, const complex float* covar) { long N = mat_dims[COIL_DIM]; complex float* chol = md_alloc(DIMS, mat_dims, CFL_SIZE); md_copy(DIMS, mat_dims, chol, covar, CFL_SIZE); lapack_cholesky_lower(N, MD_CAST_ARRAY2(complex float, DIMS, mat_dims, chol, COIL_DIM, MAPS_DIM)); lapack_trimat_inverse_lower(N, MD_CAST_ARRAY2(complex float, DIMS, mat_dims, chol, COIL_DIM, MAPS_DIM)); for (int i = 0; i < N; i++) for (int j = 0; j < i; j++) chol[i*N + j] = 0.; md_transpose(DIMS, COIL_DIM, MAPS_DIM, mat_dims, optmat, mat_dims, chol, CFL_SIZE); md_free(chol); } int main_whiten(int argc, char* argv[]) { const char* optmat_file = NULL; const char* covar_file = NULL; bool normalize = false; const struct opt_s opts[] = { OPT_STRING('o', &optmat_file, "", "use external whitening matrix "), OPT_STRING('c', &covar_file, "", "use external noise covariance matrix "), OPT_SET('n', &normalize, "normalize variance to 1 using noise data "), }; cmdline(&argc, argv, 3, 5, usage_str, help_str, ARRAY_SIZE(opts), opts); int num_args = argc - 1; num_init(); long dims[DIMS]; long noise_dims[DIMS]; long mat_dims[DIMS]; complex float* idata = load_cfl(argv[1], DIMS, dims); complex float* ndata = load_cfl(argv[2], DIMS, noise_dims); complex float* odata = create_cfl(argv[3], DIMS, dims); md_select_dims(DIMS, COIL_FLAG, mat_dims, noise_dims); mat_dims[MAPS_DIM] = mat_dims[COIL_DIM]; complex float* optmat_in = NULL; complex float* optmat_out = NULL; complex float* covar_in = NULL; complex float* covar_out = NULL; if (num_args > 3) optmat_out = create_cfl(argv[4], DIMS, mat_dims); else optmat_out = anon_cfl(NULL, DIMS, mat_dims); if (num_args > 4) covar_out = create_cfl(argv[5], DIMS, mat_dims); else covar_out = anon_cfl(NULL, DIMS, mat_dims); if (NULL != covar_file) { covar_in = load_cfl(covar_file, DIMS, mat_dims); md_copy(DIMS, mat_dims, covar_out, covar_in, CFL_SIZE); unmap_cfl(DIMS, mat_dims, covar_in); } else calc_covar(mat_dims, covar_out, noise_dims, ndata); if (NULL != optmat_file) { optmat_in = load_cfl(optmat_file, DIMS, mat_dims); md_copy(DIMS, mat_dims, optmat_out, optmat_in, CFL_SIZE); unmap_cfl(DIMS, mat_dims, optmat_in); } else { calc_optmat(mat_dims, optmat_out, covar_out); } whiten(dims, odata, mat_dims, optmat_out, idata); if (normalize) { long std_dims[DIMS]; md_singleton_dims(DIMS, std_dims); complex float* nwhite = md_alloc(DIMS, noise_dims, CFL_SIZE); complex float* nstdev = md_alloc(DIMS, std_dims, CFL_SIZE); // get scale factor by whitening the noise data and taking stdev whiten(noise_dims, nwhite, mat_dims, optmat_out, ndata); md_zstd(DIMS, noise_dims, READ_FLAG | COIL_FLAG, nstdev, nwhite); float stdev = md_zasum(DIMS, std_dims, nstdev); md_zsmul(DIMS, dims, odata, odata, 1. / stdev); debug_printf(DP_DEBUG1, "standard deviation scaling: %.6e\n", stdev); md_free(nwhite); md_free(nstdev); } unmap_cfl(DIMS, dims, idata); unmap_cfl(DIMS, noise_dims, ndata); unmap_cfl(DIMS, dims, odata); unmap_cfl(DIMS, mat_dims, optmat_out); unmap_cfl(DIMS, mat_dims, covar_out); exit(0); } bart-0.4.02/src/window.c000066400000000000000000000022651320577655200150100ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Jon Tamir */ #include #include #include "num/multind.h" #include "num/filter.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #include "misc/opts.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = "flags "; static const char help_str[] = "Apply Hamming (Hann) window to along dimensions specified by flags"; int main_window(int argc, char* argv[]) { bool hamming = true; const struct opt_s opts[] = { OPT_CLEAR('H', &hamming, "Hann window"), }; cmdline(&argc, argv, 3, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); long dims[DIMS]; long flags = atoi(argv[1]); complex float* in_data = load_cfl(argv[2], DIMS, dims); complex float* out_data = create_cfl(argv[3], DIMS, dims); (hamming ? md_zhamming : md_zhann)(DIMS, dims, flags, out_data, in_data); unmap_cfl(DIMS, dims, in_data); unmap_cfl(DIMS, dims, out_data); exit(0); } bart-0.4.02/src/zeros.c000066400000000000000000000020711320577655200146360ustar00rootroot00000000000000/* Copyright 2013. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2012-06-04 Martin Uecker */ #include #include #include #include #include #include "num/multind.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/io.h" #include "misc/misc.h" static const char usage_str[] = "dims dim1 ... dimn name"; static const char help_str[] = "Create a zero-filled array with {dims} dimensions of size {dim1} to {dimn}.\n"; int main_zeros(int argc, char* argv[]) { mini_cmdline(&argc, argv, -3, usage_str, help_str); num_init(); int N = atoi(argv[1]); assert(N >= 0); assert(argc == 3 + N); long dims[N]; for (int i = 0; i < N; i++) { dims[i] = atoi(argv[2 + i]); assert(dims[i] >= 1); } complex float* x = create_cfl(argv[2 + N], N, dims); md_clear(N, dims, x, sizeof(complex float)); unmap_cfl(N, dims, x); exit(0); } bart-0.4.02/src/zexpj.c000066400000000000000000000017631320577655200146430ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jon Tamir */ #include #include #include #include #include "num/multind.h" #include "num/flpmath.h" #include "num/init.h" #include "misc/mmio.h" #include "misc/misc.h" #ifndef DIMS #define DIMS 16 #endif static const char usage_str[] = " "; static const char help_str[] = "Complex exponential of array, exp(1j * ).\n"; int main_zexpj(int argc, char* argv[argc]) { mini_cmdline(&argc, argv, 2, usage_str, help_str); num_init(); long dims[DIMS]; complex float* in_data = load_cfl(argv[1], DIMS, dims); complex float* out_data = create_cfl(argv[2], DIMS, dims); md_zexpj(DIMS, dims, out_data, in_data); unmap_cfl(DIMS, dims, in_data); unmap_cfl(DIMS, dims, out_data); exit(0); } bart-0.4.02/startup.m000066400000000000000000000001701320577655200144170ustar00rootroot00000000000000% set Matlab path and TOOLBOX_PATH environment variable addpath(fullfile(pwd, 'matlab')); setenv('TOOLBOX_PATH', pwd); bart-0.4.02/startup.py000066400000000000000000000001771320577655200146220ustar00rootroot00000000000000import os import sys path = os.environ["TOOLBOX_PATH"] + "/python/"; sys.path.append(path); from bart import bart import cfl bart-0.4.02/startup.sh000066400000000000000000000001671320577655200146030ustar00rootroot00000000000000# set environment variables #export OMP_NUM_THREADS=23 export TOOLBOX_PATH=`pwd` export PATH=${TOOLBOX_PATH}:${PATH} bart-0.4.02/sub000077500000000000000000000003751320577655200132650ustar00rootroot00000000000000#!/bin/bash set -e if [ ! -f ${TOOLBOX_PATH}/$(basename $0) ] ; then echo "\$TOOLBOX_PATH is not set correctly!" exit 1 fi if [ $# -lt 3 ] ; then echo "Usage: $0 " exit 1 fi ${TOOLBOX_PATH}/bart saxpy -- -1. $1 $2 $3 bart-0.4.02/tests/000077500000000000000000000000001320577655200137035ustar00rootroot00000000000000bart-0.4.02/tests/casorati.mk000066400000000000000000000012001320577655200160320ustar00rootroot00000000000000 tests/test-casorati: ones noise casorati reshape avg slice squeeze extract nrmse set -e ; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 2 6 6 o1.ra ;\ $(TOOLDIR)/noise o1.ra o2.ra ;\ $(TOOLDIR)/casorati 1 2 o2.ra o3.ra ;\ $(TOOLDIR)/reshape 7 5 6 2 o3.ra o4.ra ;\ $(TOOLDIR)/avg 4 o4.ra o5.ra ;\ $(TOOLDIR)/slice 0 1 o5.ra o6.ra ;\ $(TOOLDIR)/squeeze o6.ra o7.ra ;\ $(TOOLDIR)/extract 1 1 3 o2.ra i2.ra ;\ $(TOOLDIR)/avg 2 i2.ra i3.ra ;\ $(TOOLDIR)/nrmse -t 0.001 i3.ra o7.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-casorati bart-0.4.02/tests/cc.mk000066400000000000000000000037151320577655200146270ustar00rootroot00000000000000 tests/test-cc-svd: cc resize nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -S -p 8 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc8.ra ;\ $(TOOLDIR)/cc -S -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc4.ra ;\ $(TOOLDIR)/resize 3 8 ksp-cc4.ra ksp-cc-z.ra ;\ $(TOOLDIR)/nrmse -t 0.1 ksp-cc8.ra ksp-cc-z.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-cc-geom: cc rss resize nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -G -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc4.ra ;\ $(TOOLDIR)/rss 11 ksp-cc4.ra ksp0-cc4.ra ;\ $(TOOLDIR)/rss 11 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp0.ra ;\ $(TOOLDIR)/nrmse -t 0.0001 ksp0.ra ksp0-cc4.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-cc-esp: cc rss resize nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -E -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc4.ra ;\ $(TOOLDIR)/resize 3 8 ksp-cc4.ra ksp-cc-z.ra ;\ $(TOOLDIR)/rss 11 ksp-cc4.ra ksp0-cc4.ra ;\ $(TOOLDIR)/rss 11 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp0.ra ;\ $(TOOLDIR)/nrmse -t 0.0001 ksp0.ra ksp0-cc4.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-cc-svd-matrix: cc extract fmac transpose nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -S -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc.ra ;\ $(TOOLDIR)/cc -M -S $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ;\ $(TOOLDIR)/extract 4 0 3 sccmat.ra sccmat-4.ra ;\ $(TOOLDIR)/fmac -C -s 8 $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat-4.ra ksp-cc-3.ra ;\ $(TOOLDIR)/transpose 3 4 ksp-cc-3.ra ksp-cc-4.ra ;\ $(TOOLDIR)/nrmse -t 0.001 ksp-cc.ra ksp-cc-4.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-cc-svd tests/test-cc-geom tests/test-cc-esp bart-0.4.02/tests/ccapply.mk000066400000000000000000000066331320577655200156770ustar00rootroot00000000000000 tests/test-ccapply-forward: cc ccapply nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -S -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc.ra ;\ $(TOOLDIR)/cc -M -S $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ;\ $(TOOLDIR)/ccapply -S -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ksp-cc-2.ra ;\ $(TOOLDIR)/nrmse -t 0.001 ksp-cc.ra ksp-cc-2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-ccapply-backward: cc ccapply extract fmac transpose nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -M -S $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ;\ $(TOOLDIR)/ccapply -S -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ksp-cc.ra ;\ $(TOOLDIR)/ccapply -S -u ksp-cc.ra sccmat.ra ksp-2.ra ;\ $(TOOLDIR)/extract 4 0 4 sccmat.ra sccmat-4.ra ;\ $(TOOLDIR)/transpose 3 4 ksp-cc.ra ksp-ccT.ra ;\ $(TOOLDIR)/fmac -s 16 ksp-ccT.ra sccmat-4.ra ksp-3.ra ;\ $(TOOLDIR)/nrmse -t 0.08 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-2.ra ;\ $(TOOLDIR)/nrmse -t 0.0001 ksp-2.ra ksp-3.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-ccapply-geom-forward: cc ccapply extract nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -G -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc.ra ;\ $(TOOLDIR)/cc -M -p 4 -G $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ;\ $(TOOLDIR)/ccapply -G -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ksp-cc-2.ra ;\ $(TOOLDIR)/extract 4 0 4 sccmat.ra sccmat-4.ra ;\ $(TOOLDIR)/nrmse -t 0.001 ksp-cc.ra ksp-cc-2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-ccapply-geom-backward: cc ccapply nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -M -p4 -G $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ;\ $(TOOLDIR)/ccapply -G -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ksp-cc.ra ;\ $(TOOLDIR)/ccapply -G -u ksp-cc.ra sccmat.ra ksp-2.ra ;\ $(TOOLDIR)/nrmse -t 0.08 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-ccapply-esp-forward: cc ccapply extract nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -E -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-cc.ra ;\ $(TOOLDIR)/cc -M -p 4 -E $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ;\ $(TOOLDIR)/ccapply -E -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ksp-cc-2.ra ;\ $(TOOLDIR)/extract 4 0 4 sccmat.ra sccmat-4.ra ;\ $(TOOLDIR)/nrmse -t 0.001 ksp-cc.ra ksp-cc-2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-ccapply-esp-backward: cc ccapply nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/cc -M -p4 -E $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ;\ $(TOOLDIR)/ccapply -E -p 4 $(TESTS_OUT)/shepplogan_coil_ksp.ra sccmat.ra ksp-cc.ra ;\ $(TOOLDIR)/ccapply -E -u ksp-cc.ra sccmat.ra ksp-2.ra ;\ $(TOOLDIR)/nrmse -t 0.08 $(TESTS_OUT)/shepplogan_coil_ksp.ra ksp-2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-ccapply-forward tests/test-ccapply-backward TESTS += tests/test-ccapply-geom-forward tests/test-ccapply-geom-backward TESTS += tests/test-ccapply-esp-forward tests/test-ccapply-esp-backward bart-0.4.02/tests/copy.mk000066400000000000000000000014551320577655200152130ustar00rootroot00000000000000 tests/test-copy-out: ones zeros copy resize nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 2 50 50 o.ra ;\ $(TOOLDIR)/resize -c 0 100 1 100 o.ra o0.ra ;\ $(TOOLDIR)/zeros 2 50 50 o1.ra ;\ $(TOOLDIR)/copy 0 25 1 25 o0.ra o1.ra ;\ $(TOOLDIR)/nrmse -t 0. o1.ra o.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-copy-in: ones zeros copy resize nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 2 50 50 o.ra ;\ $(TOOLDIR)/resize -c 0 100 1 100 o.ra o0.ra ;\ $(TOOLDIR)/zeros 2 100 100 o1.ra ;\ $(TOOLDIR)/copy 0 25 1 25 o.ra o1.ra ;\ $(TOOLDIR)/nrmse -t 0. o1.ra o0.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-copy-out tests/test-copy-in bart-0.4.02/tests/ecalib.mk000066400000000000000000000016501320577655200154550ustar00rootroot00000000000000 tests/test-ecalib: ecalib pocsense nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e ; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ecalib -m1 $(TESTS_OUT)/shepplogan_coil_ksp.ra coils.ra ;\ $(TOOLDIR)/pocsense -i1 $(TESTS_OUT)/shepplogan_coil_ksp.ra coils.ra proj.ra ;\ $(TOOLDIR)/nrmse -t 0.05 proj.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-ecalib-auto: ecalib pocsense nrmse noise $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e ; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/noise -n 100 $(TESTS_OUT)/shepplogan_coil_ksp.ra shepplogan_noise.ra ;\ $(TOOLDIR)/ecalib -m 1 -a -v 100 shepplogan_noise.ra coils.ra ;\ $(TOOLDIR)/pocsense -i 1 shepplogan_noise.ra coils.ra proj.ra ;\ $(TOOLDIR)/nrmse -t 0.05 proj.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-ecalib tests/test-ecalib-auto bart-0.4.02/tests/estdelay.mk000066400000000000000000000006231320577655200160470ustar00rootroot00000000000000 tests/test-estdelay: estdelay traj phantom nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -D -q1.5:1:-0.5 -r -y8 t.ra ;\ $(TOOLDIR)/phantom -k -t t.ra k.ra ;\ $(TOOLDIR)/traj -D -q`$(TOOLDIR)/estdelay t.ra k.ra` -r -y8 t2.ra ;\ $(TOOLDIR)/nrmse -t 0.00001 t.ra t2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-estdelay bart-0.4.02/tests/fft.mk000066400000000000000000000031011320577655200150060ustar00rootroot00000000000000 # basic 2D FFT $(TESTS_OUT)/shepplogan_fft.ra: fft $(TESTS_OUT)/shepplogan.ra $(TOOLDIR)/fft 7 $(TESTS_OUT)/shepplogan.ra $@ tests/test-fft-basic: scale fft nrmse $(TESTS_OUT)/shepplogan_fft.ra $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/scale 16384 $(TESTS_OUT)/shepplogan.ra shepploganS.ra ;\ $(TOOLDIR)/fft -i 7 $(TESTS_OUT)/shepplogan_fft.ra shepplogan2.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 shepploganS.ra shepplogan2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # unitary FFT $(TESTS_OUT)/shepplogan_fftu.ra: fft $(TESTS_OUT)/shepplogan.ra $(TOOLDIR)/fft -u 7 $(TESTS_OUT)/shepplogan.ra $@ tests/test-fft-unitary: fft nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_fftu.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/fft -u -i 7 $(TESTS_OUT)/shepplogan_fftu.ra shepplogan2u.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 $(TESTS_OUT)/shepplogan.ra shepplogan2u.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # uncentered FFT tests/test-fft-uncentered: fftmod fft nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_fftu.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/fftmod -i 7 $(TESTS_OUT)/shepplogan_fftu.ra shepplogan_fftu2.ra ;\ $(TOOLDIR)/fft -uni 7 shepplogan_fftu2.ra shepplogan2u.ra ;\ $(TOOLDIR)/fftmod -i 7 shepplogan2u.ra shepplogan3u.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 $(TESTS_OUT)/shepplogan.ra shepplogan3u.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-fft-basic tests/test-fft-unitary tests/test-fft-uncentered bart-0.4.02/tests/flatten.mk000066400000000000000000000006331320577655200156730ustar00rootroot00000000000000 tests/test-flatten: ones reshape flatten noise nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 6 1 2 1 3 1 4 a0.ra ;\ $(TOOLDIR)/noise a0.ra a1.ra ;\ $(TOOLDIR)/flatten a1.ra a2.ra ;\ $(TOOLDIR)/reshape 63 24 1 1 1 1 1 a1.ra a3.ra ;\ $(TOOLDIR)/nrmse -t 0. a2.ra a3.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-flatten bart-0.4.02/tests/fmac.mk000066400000000000000000000006511320577655200151440ustar00rootroot00000000000000 tests/test-fmac-sum: ones fmac noise nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 3 2 1 3 a0.ra ;\ $(TOOLDIR)/noise a0.ra a1.ra ;\ $(TOOLDIR)/fmac -s 4 a1.ra a2.ra ;\ $(TOOLDIR)/ones 1 1 o.ra ;\ $(TOOLDIR)/fmac -s 4 a1.ra o.ra a3.ra ;\ $(TOOLDIR)/nrmse -t 0. a2.ra a3.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-fmac-sum bart-0.4.02/tests/gpu.mk000066400000000000000000000047251320577655200150370ustar00rootroot00000000000000 # tests/test-pics-gpu: pics nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/pics -g -S -r0.001 $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco1.ra ;\ $(TOOLDIR)/pics -S -r0.001 $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 reco1.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-gpu-noncart: traj scale phantom ones pics nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y64 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -t traj2.ra ksp.ra ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -S -r0.001 -t traj2.ra ksp.ra o.ra reco1.ra ;\ $(TOOLDIR)/pics -g -S -r0.001 -t traj2.ra ksp.ra o.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.001 reco1.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-gpu-weights: pics scale ones nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 2 128 128 weights.ra ;\ $(TOOLDIR)/scale 0.1 weights.ra weights2.ra ;\ $(TOOLDIR)/pics -S -r0.001 -p weights2.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco1.ra ;\ $(TOOLDIR)/pics -g -S -r0.001 -p weights2.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 reco2.ra reco1.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # similar to the non-gpu test this had to be relaxed to 0.01 tests/test-pics-gpu-noncart-weights: traj scale ones phantom pics nrmse $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y32 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -s8 -t traj2.ra ksp.ra ;\ $(TOOLDIR)/ones 4 1 256 32 1 weights.ra ;\ $(TOOLDIR)/scale 0.1 weights.ra weights2.ra ;\ $(TOOLDIR)/pics -S -r0.001 -p weights2.ra -t traj2.ra ksp.ra $(TESTS_OUT)/coils.ra reco1.ra ;\ $(TOOLDIR)/pics -g -S -r0.001 -p weights2.ra -t traj2.ra ksp.ra $(TESTS_OUT)/coils.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.010 reco1.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS_GPU += tests/test-pics-gpu tests/test-pics-gpu-noncart TESTS_GPU += tests/test-pics-gpu-weights tests/test-pics-gpu-noncart-weights bart-0.4.02/tests/join.mk000066400000000000000000000013541320577655200151760ustar00rootroot00000000000000 tests/test-join: ones join nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ seq 1 300 | xargs -P 100 -n1 -I {} $(TOOLDIR)/ones 3 6 1 7 o-{}.ra ;\ $(TOOLDIR)/ones 3 6 300 7 o1.ra ;\ $(TOOLDIR)/join 1 o-*.ra o.ra ;\ $(TOOLDIR)/nrmse -t 0.00001 o.ra o1.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-join-append: ones zeros join nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 3 6 7 1 o ;\ $(TOOLDIR)/zeros 3 6 7 1 z ;\ $(TOOLDIR)/join 2 o z o j ;\ $(TOOLDIR)/join -a 2 z o o ;\ $(TOOLDIR)/nrmse -t 0.00001 o j ;\ rm *.cfl ; rm *.hdr ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-join tests/test-join-append bart-0.4.02/tests/nlinv.mk000066400000000000000000000007511320577655200153650ustar00rootroot00000000000000 tests/test-nlinv: normalize nlinv pocsense nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e ; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/nlinv $(TESTS_OUT)/shepplogan_coil_ksp.ra r.ra c.ra ;\ $(TOOLDIR)/normalize 8 c.ra c_norm.ra ;\ $(TOOLDIR)/pocsense -i1 $(TESTS_OUT)/shepplogan_coil_ksp.ra c_norm.ra proj.ra ;\ $(TOOLDIR)/nrmse -t 0.05 proj.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-nlinv bart-0.4.02/tests/nrmse.mk000066400000000000000000000005761320577655200153700ustar00rootroot00000000000000 tests/test-nrmse-scale: nrmse scale $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_fftu.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/scale 2.i $(TESTS_OUT)/shepplogan.ra shepplogan_sc.ra ;\ $(TOOLDIR)/nrmse -s -t 0.000001 $(TESTS_OUT)/shepplogan.ra shepplogan_sc.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-nrmse-scale bart-0.4.02/tests/nufft.mk000066400000000000000000000107261320577655200153640ustar00rootroot00000000000000 # compare with FFT on a Cartesian grid tests/test-nudft-forward: traj nufft reshape nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_fft.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -x128 -y128 traj.ra ;\ $(TOOLDIR)/nufft -s traj.ra $(TESTS_OUT)/shepplogan.ra shepplogan_ksp2.ra ;\ $(TOOLDIR)/reshape 7 128 128 1 shepplogan_ksp2.ra shepplogan_ksp3.ra ;\ $(TOOLDIR)/nrmse -t 0.0001 $(TESTS_OUT)/shepplogan_fft.ra shepplogan_ksp3.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # compare with FFT on a Cartesian grid tests/test-nufft-forward: traj nufft reshape nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_fftu.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -x128 -y128 traj.ra ;\ $(TOOLDIR)/nufft traj.ra $(TESTS_OUT)/shepplogan.ra shepplogan_ksp2.ra ;\ $(TOOLDIR)/reshape 7 128 128 1 shepplogan_ksp2.ra shepplogan_ksp3.ra ;\ $(TOOLDIR)/nrmse -t 0.01 $(TESTS_OUT)/shepplogan_fftu.ra shepplogan_ksp3.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # compare nufft and nufdt tests/test-nufft-nudft: traj nufft reshape nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x128 -y12 traj.ra ;\ $(TOOLDIR)/nufft traj.ra $(TESTS_OUT)/shepplogan.ra shepplogan_ksp1.ra ;\ $(TOOLDIR)/nufft -s traj.ra $(TESTS_OUT)/shepplogan.ra shepplogan_ksp2.ra ;\ $(TOOLDIR)/scale 128. shepplogan_ksp1.ra shepplogan_ksp3.ra ;\ $(TOOLDIR)/nrmse -t 0.002 shepplogan_ksp2.ra shepplogan_ksp3.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # test adjoint using definition tests/test-nudft-adjoint: zeros noise reshape traj nufft fmac nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/zeros 3 64 64 1 z.ra ;\ $(TOOLDIR)/noise -s123 z.ra n1.ra ;\ $(TOOLDIR)/noise -s321 z.ra n2b.ra ;\ $(TOOLDIR)/reshape 7 1 64 64 n2b.ra n2.ra ;\ $(TOOLDIR)/traj -r -x64 -y64 traj.ra ;\ $(TOOLDIR)/nufft -s traj.ra n1.ra k.ra ;\ $(TOOLDIR)/nufft -s -a traj.ra n2.ra x.ra ;\ $(TOOLDIR)/fmac -C -s7 n1.ra x.ra s1.ra ;\ $(TOOLDIR)/fmac -C -s7 k.ra n2.ra s2.ra ;\ $(TOOLDIR)/nrmse -t 0.00001 s1.ra s2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # test adjoint using definition tests/test-nufft-adjoint: zeros noise reshape traj nufft fmac nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/zeros 3 128 128 1 z.ra ;\ $(TOOLDIR)/noise -s123 z.ra n1.ra ;\ $(TOOLDIR)/noise -s321 z.ra n2b.ra ;\ $(TOOLDIR)/reshape 7 1 128 128 n2b.ra n2.ra ;\ $(TOOLDIR)/traj -r -x128 -y128 traj.ra ;\ $(TOOLDIR)/nufft traj.ra n1.ra k.ra ;\ $(TOOLDIR)/nufft -a traj.ra n2.ra x.ra ;\ $(TOOLDIR)/fmac -C -s7 n1.ra x.ra s1.ra ;\ $(TOOLDIR)/fmac -C -s7 k.ra n2.ra s2.ra ;\ $(TOOLDIR)/nrmse -t 0.00001 s1.ra s2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # test inverse using definition tests/test-nufft-inverse: traj scale phantom nufft nrmse set -e ; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y201 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -t traj2.ra ksp.ra ;\ $(TOOLDIR)/nufft -r -i traj2.ra ksp.ra reco.ra ;\ $(TOOLDIR)/nufft traj2.ra reco.ra k2.ra ;\ $(TOOLDIR)/nrmse -t 0.001 ksp.ra k2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # test toeplitz by comparing to non-toeplitz tests/test-nufft-toeplitz: traj phantom nufft nrmse set -e ; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x128 -y128 traj.ra ;\ $(TOOLDIR)/phantom -k -t traj.ra ksp.ra ;\ $(TOOLDIR)/nufft -l1. -i -r traj.ra ksp.ra reco1.ra ;\ $(TOOLDIR)/nufft -l1. -i -t traj.ra ksp.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.002 reco1.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-nufft-gpu: traj phantom nufft nrmse set -e ; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x128 -y128 traj.ra ;\ $(TOOLDIR)/phantom -k -t traj.ra ksp.ra ;\ $(TOOLDIR)/nufft -l1. -i -r traj.ra ksp.ra reco1.ra ;\ $(TOOLDIR)/nufft -l1. -g -i -t traj.ra ksp.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.002 reco1.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-nufft-forward tests/test-nufft-adjoint tests/test-nufft-inverse tests/test-nufft-toeplitz TESTS += tests/test-nufft-nudft tests/test-nudft-forward tests/test-nudft-adjoint TESTS_GPU += tests/test-nufft-gpu bart-0.4.02/tests/out/000077500000000000000000000000001320577655200145125ustar00rootroot00000000000000bart-0.4.02/tests/out/.gitignore000066400000000000000000000001071320577655200165000ustar00rootroot00000000000000# Ignore everything in this directory * # Except this file !.gitignore bart-0.4.02/tests/phantom.mk000066400000000000000000000037021320577655200157040ustar00rootroot00000000000000 $(TESTS_OUT)/shepplogan.ra: phantom $(TOOLDIR)/phantom $@ $(TESTS_OUT)/shepplogan_ksp.ra: phantom $(TOOLDIR)/phantom -k $@ $(TESTS_OUT)/shepplogan_coil.ra: phantom $(TOOLDIR)/phantom -s8 $@ $(TESTS_OUT)/shepplogan_coil_ksp.ra: phantom $(TOOLDIR)/phantom -s8 -k $@ $(TESTS_OUT)/coils.ra: phantom $(TOOLDIR)/phantom -S8 $@ tests/test-phantom-ksp: fft nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/fft -i 7 $(TESTS_OUT)/shepplogan_ksp.ra shepplogan_img.ra ;\ $(TOOLDIR)/nrmse -t 0.22 $(TESTS_OUT)/shepplogan.ra shepplogan_img.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-phantom-noncart: traj phantom reshape nrmse $(TESTS_OUT)/shepplogan_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj traj.ra ;\ $(TOOLDIR)/phantom -k -t traj.ra shepplogan_ksp2.ra ;\ $(TOOLDIR)/reshape 7 128 128 1 shepplogan_ksp2.ra shepplogan_ksp3.ra ;\ $(TOOLDIR)/nrmse -t 0.00001 $(TESTS_OUT)/shepplogan_ksp.ra shepplogan_ksp3.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-phantom-coil: fmac nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_coil.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/fmac $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/coils.ra sl_coil2.ra ;\ $(TOOLDIR)/nrmse -t 0. $(TESTS_OUT)/shepplogan_coil.ra sl_coil2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-phantom-ksp-coil: fft nrmse $(TESTS_OUT)/shepplogan_coil.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/fft -i 7 $(TESTS_OUT)/shepplogan_coil_ksp.ra shepplogan_cimg.ra ;\ $(TOOLDIR)/nrmse -t 0.22 $(TESTS_OUT)/shepplogan_coil.ra shepplogan_cimg.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-phantom-ksp tests/test-phantom-noncart tests/test-phantom-coil tests/test-phantom-ksp-coil bart-0.4.02/tests/pics.mk000066400000000000000000000167131320577655200152020ustar00rootroot00000000000000 tests/test-pics-pi: pics scale nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/pics -S -r0.001 $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco.ra ;\ $(TOOLDIR)/scale 128. reco.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.23 reco2.ra $(TESTS_OUT)/shepplogan.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-noncart: traj scale phantom ones pics nufft nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y64 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -t traj2.ra ksp.ra ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -S -r0.001 -t traj2.ra ksp.ra o.ra reco.ra ;\ $(TOOLDIR)/nufft traj2.ra reco.ra k2.ra ;\ $(TOOLDIR)/nrmse -t 0.002 ksp.ra k2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-cs: traj scale phantom ones pics nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y48 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -t traj2.ra ksp.ra ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -S -RT:7:0:0.001 -u0.1 -e -t traj2.ra ksp.ra o.ra reco.ra ;\ $(TOOLDIR)/scale 128. reco.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.22 reco2.ra $(TESTS_OUT)/shepplogan.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-wavl1: traj scale phantom ones pics nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y48 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -t traj2.ra ksp.ra ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -S -RW:3:0:0.001 -i150 -e -t traj2.ra ksp.ra o.ra reco.ra ;\ $(TOOLDIR)/scale 128. reco.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.23 reco2.ra $(TESTS_OUT)/shepplogan.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-poisson-wavl1: poisson squeeze fft fmac ones pics nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/poisson -Y128 -Z128 -y1.2 -z1.2 -e -v -C24 p.ra ;\ $(TOOLDIR)/squeeze p.ra p2.ra ;\ $(TOOLDIR)/fft -u 7 $(TESTS_OUT)/shepplogan.ra ksp1.ra ;\ $(TOOLDIR)/fmac ksp1.ra p2.ra ksp.ra ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -S -RW:3:0:0.01 -i50 ksp.ra o.ra reco.ra ;\ $(TOOLDIR)/nrmse -t 0.21 $(TESTS_OUT)/shepplogan.ra reco.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-bpwavl1: scale fft noise fmac ones pics nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/scale 50 $(TESTS_OUT)/shepplogan.ra shepp.ra ;\ $(TOOLDIR)/fft -u 7 shepp.ra ksp1.ra ;\ $(TOOLDIR)/noise -s 1 -n 1 ksp1.ra ksp2.ra ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -a -P 128 -w1. -RW:3:0:1. -i50 ksp2.ra o.ra reco.ra ;\ $(TOOLDIR)/pics -m -P 128 -w1. -RW:3:0:1. -i50 -u 2 ksp2.ra o.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.08 shepp.ra reco.ra ;\ $(TOOLDIR)/nrmse -t 0.08 shepp.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-joint-wavl1: poisson reshape fft fmac ones pics slice nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/poisson -Y128 -Z128 -y1.1 -z1.1 -e -v -C24 -T2 p.ra ;\ $(TOOLDIR)/reshape 63 128 128 1 1 1 2 p.ra p2.ra ;\ $(TOOLDIR)/fft -u 7 $(TESTS_OUT)/shepplogan.ra ksp1.ra ;\ $(TOOLDIR)/fmac ksp1.ra p2.ra ksp.ra ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -S -RW:3:32:0.02 -i50 ksp.ra o.ra reco2.ra ;\ $(TOOLDIR)/slice 5 0 reco2.ra reco.ra ;\ $(TOOLDIR)/nrmse -t 0.22 $(TESTS_OUT)/shepplogan.ra reco.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-pics: traj scale phantom pics nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y32 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -s8 -t traj2.ra ksp.ra ;\ $(TOOLDIR)/pics -S -RT:7:0:0.001 -u1000000000. -e -t traj2.ra ksp.ra $(TESTS_OUT)/coils.ra reco.ra ;\ $(TOOLDIR)/scale 128. reco.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.22 reco2.ra $(TESTS_OUT)/shepplogan.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # test that weights =0.5 have no effect tests/test-pics-weights: pics ones nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 2 128 128 weights.ra ;\ $(TOOLDIR)/scale 0.5 weights.ra weights2.ra ;\ $(TOOLDIR)/pics -S -r0.001 $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco1.ra ;\ $(TOOLDIR)/pics -S -r0.001 -p weights2.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 reco2.ra reco1.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ # test that weights =0.5 have no effect # FIXME: this was 0.005 before but fails on travis tests/test-pics-noncart-weights: traj scale ones phantom pics nrmse $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/traj -r -x256 -y32 traj.ra ;\ $(TOOLDIR)/scale 0.5 traj.ra traj2.ra ;\ $(TOOLDIR)/phantom -s8 -t traj2.ra ksp.ra ;\ $(TOOLDIR)/ones 4 1 256 32 1 weights.ra ;\ $(TOOLDIR)/scale 0.5 weights.ra weights2.ra ;\ $(TOOLDIR)/pics -S -r0.001 -p weights2.ra -t traj2.ra ksp.ra $(TESTS_OUT)/coils.ra reco1.ra ;\ $(TOOLDIR)/pics -S -r0.001 -t traj2.ra ksp.ra $(TESTS_OUT)/coils.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.010 reco1.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-warmstart: pics scale nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 3 128 128 1 o.ra ;\ $(TOOLDIR)/pics -i0 -Wo.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra reco.ra ;\ $(TOOLDIR)/nrmse -t 0. o.ra reco.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-batch: pics repmat nrmse $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/repmat 5 32 $(TESTS_OUT)/shepplogan_coil_ksp.ra kspaces.ra ;\ $(TOOLDIR)/pics -r0.01 -B32 kspaces.ra $(TESTS_OUT)/coils.ra reco1.ra ;\ $(TOOLDIR)/pics -r0.01 kspaces.ra $(TESTS_OUT)/coils.ra reco2.ra ;\ $(TOOLDIR)/nrmse -t 0.00001 reco1.ra reco2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-pics-tedim: phantom fmac fft pics nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/phantom -s4 -m coils.ra ;\ $(TOOLDIR)/phantom -m img.ra ;\ $(TOOLDIR)/fmac img.ra coils.ra cimg.ra ;\ $(TOOLDIR)/fft -u 7 cimg.ra ksp.ra ;\ $(TOOLDIR)/pics -i10 -w 1. -m ksp.ra coils.ra reco.ra ;\ $(TOOLDIR)/nrmse -t 0.01 img.ra reco.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-pics-pi tests/test-pics-noncart tests/test-pics-cs tests/test-pics-pics TESTS += tests/test-pics-wavl1 tests/test-pics-poisson-wavl1 tests/test-pics-joint-wavl1 tests/test-pics-bpwavl1 TESTS += tests/test-pics-weights tests/test-pics-noncart-weights TESTS += tests/test-pics-warmstart tests/test-pics-batch TESTS += tests/test-pics-tedim bart-0.4.02/tests/pythoncfl.mk000066400000000000000000000007641320577655200162510ustar00rootroot00000000000000 # test python reacfl and writecfl interface tests/test-python-cfl: $(TOOLDIR)/tests/pythoncfl.py nrmse flip $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/flip 0 $(TESTS_OUT)/shepplogan.ra shepplogan ;\ PYTHONPATH=$(TOOLDIR)/python $(TOOLDIR)/tests/pythoncfl.py shepplogan shepplogan2 ;\ $(TOOLDIR)/nrmse -t 0.000001 shepplogan shepplogan2 ;\ rm *.cfl *.hdr ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS_PYTHON += tests/test-python-cfl bart-0.4.02/tests/pythoncfl.py000077500000000000000000000010121320577655200162600ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import print_function import cfl import sys # from http://stackoverflow.com/questions/5574702/how-to-print-to-stderr-in-python def errprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) def main(out_name, in_name): input = cfl.readcfl(in_name) #cfl.writecfl(input, out_name) cfl.writecfl(out_name, input) return 0 if __name__ == '__main__': if len(sys.argv) != 3: errprint('Usage:', sys.argv[0], ' ') exit(main(sys.argv[2], sys.argv[1])) bart-0.4.02/tests/slice.mk000066400000000000000000000006161320577655200153360ustar00rootroot00000000000000 tests/test-slice: ones slice resize nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 2 1 1 o.ra ;\ $(TOOLDIR)/resize -c 0 100 1 100 o.ra o0.ra ;\ $(TOOLDIR)/slice 1 50 o0.ra o1.ra ;\ $(TOOLDIR)/slice 0 50 o1.ra o2.ra ;\ $(TOOLDIR)/nrmse -t 0. o2.ra o.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-slice bart-0.4.02/tests/squeeze.mk000066400000000000000000000006531320577655200157210ustar00rootroot00000000000000 tests/test-squeeze: ones squeeze noise nrmse set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 6 1 2 1 3 1 4 a0.ra ;\ $(TOOLDIR)/ones 3 2 3 4 b0.ra ;\ $(TOOLDIR)/noise a0.ra a1.ra ;\ $(TOOLDIR)/noise b0.ra b1.ra ;\ $(TOOLDIR)/squeeze a1.ra a2.ra ;\ $(TOOLDIR)/nrmse -t 0. b1.ra a2.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-squeeze bart-0.4.02/tests/tmp/000077500000000000000000000000001320577655200145035ustar00rootroot00000000000000bart-0.4.02/tests/tmp/.gitignore000066400000000000000000000001071320577655200164710ustar00rootroot00000000000000# Ignore everything in this directory * # Except this file !.gitignore bart-0.4.02/tests/wave.mk000066400000000000000000000015431320577655200152010ustar00rootroot00000000000000tests/test-wave: wave wavepsf scale nrmse $(TESTS_OUT)/shepplogan.ra $(TESTS_OUT)/shepplogan_coil_ksp.ra $(TESTS_OUT)/coils.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/wavepsf -x 640 -y 128 -z 1 wave_psf.ra ;\ $(TOOLDIR)/fft -iu 7 $(TESTS_OUT)/shepplogan_coil_ksp.ra img.ra ;\ $(TOOLDIR)/resize -c 0 640 img.ra wave_zpad.ra ;\ $(TOOLDIR)/fft -u 1 wave_zpad.ra wave_hyb.ra ;\ $(TOOLDIR)/fmac wave_hyb.ra wave_psf.ra wave_acq.ra ;\ $(TOOLDIR)/fft -u 6 wave_acq.ra wave_ksp.ra ;\ $(TOOLDIR)/wave wave_ksp.ra $(TESTS_OUT)/coils.ra wave_psf.ra reco.ra ;\ $(TOOLDIR)/nrmse -t 0.23 -s reco.ra $(TESTS_OUT)/shepplogan.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-wave bart-0.4.02/tests/wavelet.mk000066400000000000000000000071531320577655200157110ustar00rootroot00000000000000 tests/test-wavelet: wavelet nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/wavelet 3 $(TESTS_OUT)/shepplogan.ra w.ra ;\ $(TOOLDIR)/wavelet -a 3 128 128 w.ra a.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 $(TESTS_OUT)/shepplogan.ra a.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-wavelet-batch: ones resize circshift fft fmac wavelet nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 1 1 one.ra ;\ $(TOOLDIR)/resize -c 2 100 one.ra one2.ra ;\ $(TOOLDIR)/circshift 2 1 one2.ra one3.ra ;\ $(TOOLDIR)/fft 4 one3.ra ph.ra ;\ $(TOOLDIR)/fmac $(TESTS_OUT)/shepplogan.ra ph.ra ph2.ra ;\ $(TOOLDIR)/wavelet 3 ph2.ra w.ra ;\ $(TOOLDIR)/wavelet -a 3 128 128 w.ra a.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 ph2.ra a.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-wavelet-batch1: ones resize circshift fft fmac wavelet slice nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 1 1 one.ra ;\ $(TOOLDIR)/resize -c 2 100 one.ra one2.ra ;\ $(TOOLDIR)/circshift 2 1 one2.ra one3.ra ;\ $(TOOLDIR)/fft 4 one3.ra ph.ra ;\ $(TOOLDIR)/fmac $(TESTS_OUT)/shepplogan.ra ph.ra ph2.ra ;\ $(TOOLDIR)/wavelet 3 ph2.ra w.ra ;\ $(TOOLDIR)/slice 2 80 w.ra w1.ra ;\ $(TOOLDIR)/slice 2 80 ph2.ra ph1.ra ;\ $(TOOLDIR)/wavelet 3 ph1.ra a.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 w1.ra a.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-wavelet-batch2: ones resize circshift fft fmac transpose wavelet nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 1 1 one.ra ;\ $(TOOLDIR)/resize -c 2 100 one.ra one2.ra ;\ $(TOOLDIR)/circshift 2 1 one2.ra one3.ra ;\ $(TOOLDIR)/fft 4 one3.ra ph.ra ;\ $(TOOLDIR)/fmac $(TESTS_OUT)/shepplogan.ra ph.ra ph2.ra ;\ $(TOOLDIR)/transpose 0 2 ph2.ra ph3.ra ;\ $(TOOLDIR)/wavelet 6 ph3.ra w.ra ;\ $(TOOLDIR)/wavelet -a 6 128 128 w.ra a.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 ph3.ra a.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-wavelet-batch3: ones resize circshift fft fmac transpose wavelet nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 1 1 one.ra ;\ $(TOOLDIR)/resize -c 2 100 one.ra one2.ra ;\ $(TOOLDIR)/circshift 2 1 one2.ra one3.ra ;\ $(TOOLDIR)/fft 4 one3.ra ph.ra ;\ $(TOOLDIR)/fmac $(TESTS_OUT)/shepplogan.ra ph.ra ph2.ra ;\ $(TOOLDIR)/transpose 1 2 ph2.ra ph3.ra ;\ $(TOOLDIR)/wavelet 5 ph3.ra w.ra ;\ $(TOOLDIR)/wavelet -a 5 128 128 w.ra a.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 ph3.ra a.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ tests/test-wavelet-batch4: ones resize circshift fft fmac transpose wavelet nrmse $(TESTS_OUT)/shepplogan.ra set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/ones 1 1 one.ra ;\ $(TOOLDIR)/resize -c 2 5 3 5 4 5 one.ra one2.ra ;\ $(TOOLDIR)/circshift 2 1 one2.ra one3.ra ;\ $(TOOLDIR)/fft 28 one3.ra ph.ra ;\ $(TOOLDIR)/fmac $(TESTS_OUT)/shepplogan.ra ph.ra ph2.ra ;\ $(TOOLDIR)/transpose 0 3 ph2.ra ph3.ra ;\ $(TOOLDIR)/wavelet 10 ph3.ra w.ra ;\ $(TOOLDIR)/wavelet -a 10 128 128 w.ra a.ra ;\ $(TOOLDIR)/nrmse -t 0.000001 ph3.ra a.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-wavelet tests/test-wavelet-batch tests/test-wavelet-batch1 TESTS += tests/test-wavelet-batch2 tests/test-wavelet-batch3 tests/test-wavelet-batch4 bart-0.4.02/tests/whiten.mk000066400000000000000000000014621320577655200155350ustar00rootroot00000000000000 tests/test-whiten: zeros ones noise join whiten std nrmse show set -e; mkdir $(TESTS_TMP) ; cd $(TESTS_TMP) ;\ $(TOOLDIR)/zeros 1 4096 z.ra ;\ $(TOOLDIR)/noise -s 1 -n 1 z.ra n1.ra ;\ $(TOOLDIR)/noise -s 2 -n 2 z.ra n2.ra ;\ $(TOOLDIR)/noise -s 3 -n 3 z.ra n3.ra ;\ $(TOOLDIR)/join 3 n1.ra n2.ra n3.ra n.ra ;\ $(TOOLDIR)/ones 1 4096 o.ra ;\ $(TOOLDIR)/noise -s 1 -n 1 o.ra s1.ra ;\ $(TOOLDIR)/noise -s 2 -n 2 o.ra s2.ra ;\ $(TOOLDIR)/noise -s 3 -n 3 o.ra s3.ra ;\ $(TOOLDIR)/join 3 s1.ra s2.ra s3.ra s.ra ;\ $(TOOLDIR)/whiten s.ra n.ra w.ra ;\ $(TOOLDIR)/std 7 w.ra d.ra ;\ $(TOOLDIR)/ones 4 1 1 1 3 o.ra ;\ $(TOOLDIR)/nrmse -t 0.001 d.ra o.ra ;\ rm *.ra ; cd .. ; rmdir $(TESTS_TMP) touch $@ TESTS += tests/test-whiten bart-0.4.02/utests/000077500000000000000000000000001320577655200140705ustar00rootroot00000000000000bart-0.4.02/utests/test_batchsvd.c000066400000000000000000000025761320577655200171030ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include "lowrank/batchsvd.h" #include "num/flpmath.h" #include "misc/debug.h" #include "misc/misc.h" #include "utest.h" static bool test_batch_svthresh_tall(void) { complex float inout[1][5][3] = { { { 3., 0., 0. }, { 0., 2., 0. }, { 0., 0., 1. }, { 0., 0., 0. }, { 0., 0., 0. }, } }; batch_svthresh(3, 5, 1, 1., inout); const complex float ref[1][5][3] = { { { 2., 0., 0. }, { 0., 1., 0. }, { 0., 0., 0. }, { 0., 0., 0. }, { 0., 0., 0. }, } }; long dims[3] = { 3, 5, 1 }; UT_ASSERT(md_znrmse(3, dims, &ref[0][0][0], &inout[0][0][0]) < UT_TOL); return true; } static bool test_batch_svthresh_wide(void) { complex float inout[1][3][5] = { { { 3., 0., 0., 0., 0. }, { 0., 2., 0., 0., 0. }, { 0., 0., 1., 0., 0. }, } }; batch_svthresh(5, 3, 1, 1., inout); const complex float ref[1][3][5] = { { { 2., 0., 0., 0., 0. }, { 0., 1., 0., 0., 0. }, { 0., 0., 0., 0., 0. }, } }; long dims[3] = { 5, 3, 1 }; UT_ASSERT(md_znrmse(3, dims, &ref[0][0][0], &inout[0][0][0]) < UT_TOL); return true; } UT_REGISTER_TEST(test_batch_svthresh_tall); UT_REGISTER_TEST(test_batch_svthresh_wide); bart-0.4.02/utests/test_biot_savart.c000066400000000000000000000020741320577655200176130ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include #include #include "num/vec3.h" #include "simu/biot_savart.h" #include "utest.h" static bool test_vec3_ring(void) { unsigned int N = 10; vec3_t r[N]; vec3_t c = { 0., 0., 0. }; vec3_t n = { 1., 0., 0. }; vec3_ring(N, r, c, n, 0.33); bool ok = true; for (unsigned int i = 0; i < N; i++) ok &= (1.E-6 > fabs(0.33 - vec3_norm(r[i]))); for (unsigned int i = 0; i < N; i++) ok &= (0. == vec3_sdot(r[i], n)); return ok; } static bool test_biot_savart(void) { unsigned int N = 100; vec3_t r[N]; vec3_t c = { 0.6, 0.3, 0.1 }; vec3_t n = { 1., 0., 0. }; vec3_ring(N, r, c, n, 0.5); vec3_t x; biot_savart(x, c, N, (const vec3_t*)r); vec3_t d; vec3_sub(d, x, n); return (1.E-3 > vec3_norm(d)); } UT_REGISTER_TEST(test_vec3_ring); UT_REGISTER_TEST(test_biot_savart); bart-0.4.02/utests/test_blas.c000066400000000000000000000035661320577655200162260ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include "num/multind.h" #include "num/flpmath.h" #include "num/blas.h" #include "num/rand.h" #include "misc/misc.h" #include "misc/debug.h" #include "utest.h" static void matrix_mult(int A, int B, int C, complex float (*dst)[A][C], const complex float (*src1)[A][B], const complex float (*src2)[B][C]) { for (int i = 0; i < A; i++) { for (int k = 0; k < C; k++) { (*dst)[i][k] = 0.; for (int j = 0; j < B; j++) (*dst)[i][k] += (*src1)[i][j] * (*src2)[j][k]; } } } static bool test_blas_matrix_mult(void) { int A = 10; int B = 20; int C = 30; long odims[3] = { A, 1, C }; long idims1[3] = { 1, B, C }; long idims2[3] = { A, B, 1 }; complex float* dst1 = md_alloc(3, odims, CFL_SIZE); complex float* dst2 = md_alloc(3, odims, CFL_SIZE); complex float* src1 = md_alloc(3, idims1, CFL_SIZE); complex float* src2 = md_alloc(3, idims2, CFL_SIZE); md_gaussian_rand(3, odims, dst1); md_gaussian_rand(3, odims, dst2); md_gaussian_rand(3, idims1, src1); md_gaussian_rand(3, idims2, src2); blas_matrix_multiply(A, C, B, MD_CAST_ARRAY2(complex float, 3, odims, dst1, 0, 2), MD_CAST_ARRAY2(const complex float, 3, idims2, src2, 0, 1), MD_CAST_ARRAY2(const complex float, 3, idims1, src1, 1, 2)); // (A^T B^T)^T = B A matrix_mult(C, B, A, &MD_CAST_ARRAY2(complex float, 3, odims, dst2, 0, 2), &MD_CAST_ARRAY2(const complex float, 3, idims1, src1, 1, 2), &MD_CAST_ARRAY2(const complex float, 3, idims2, src2, 0, 1)); double err = md_znrmse(3, odims, dst2, dst1); md_free(src1); md_free(src2); md_free(dst1); md_free(dst2); return (err < UT_TOL); } UT_REGISTER_TEST(test_blas_matrix_mult); bart-0.4.02/utests/test_flpmath.c000066400000000000000000000126761320577655200167420ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jonathan Tamir * 2016 Martin Uecker */ #include #include "num/flpmath.h" #include "num/multind.h" #include "num/rand.h" #include "misc/misc.h" #include "misc/debug.h" #include "utest.h" // include test data #include "test_flpmath_data.h" static bool test_md_zfmacc2_flags(unsigned int D, const long idims[D], unsigned int flags, const complex float* in1, const complex float* in2, const complex float* out_ref) { long odims[D]; md_select_dims(D, ~flags, odims, idims); complex float* out = md_calloc(D, odims, CFL_SIZE); long istr[D]; long ostr[D]; md_calc_strides(D, istr, idims, CFL_SIZE); md_calc_strides(D, ostr, odims, CFL_SIZE); md_zfmacc2(D, idims, ostr, out, istr, in1, istr, in2); float err = md_znrmse(D, odims, out_ref, out); md_free(out); UT_ASSERT(err < UT_TOL); return true; } /* * Tests based on previously generated data included in the header file */ static bool test_md_zfmacc2(void) { long idims[4] = { 3, 3, 3, 3 }; bool ret = true; for (unsigned int flags = 0u; flags < 16u; flags++) { debug_printf(DP_DEBUG1, "Testing md_zfmacc2_flags with flags=%d\n", flags); ret &= test_md_zfmacc2_flags(4, idims, flags, test_md_in0, test_md_in1, test_md_zfmacc2_out[flags]); } return ret; } static bool test_md_zavg_flags(unsigned int D, const long idims[D], unsigned int flags, const complex float* in, const complex float* out_ref, bool wavg) { long odims[D]; md_select_dims(D, ~flags, odims, idims); complex float* out = md_alloc(D, odims, CFL_SIZE); (wavg ? md_zwavg : md_zavg)(D, idims, flags, out, in); float err = md_znrmse(D, odims, out_ref, out); md_free(out); UT_ASSERT(err < UT_TOL); return true; } /* * Tests based on previously generated data included in the header file */ static bool test_md_zwavg(void) { long idims[4] = { 3, 3, 3, 3 }; bool wavg = true; bool ret = true; for (unsigned int flags = 0u; flags < 16u; flags++) { debug_printf(DP_DEBUG1, "Testing md_zwavg_flags with flags=%d\n", flags); ret &= test_md_zavg_flags(4, idims, flags, test_md_in0, test_md_zwavg_out[flags], wavg); } return ret; } static bool test_md_zavg(void) { long idims[4] = { 3, 3, 3, 3 }; bool wavg = false; bool ret = true; for (unsigned int flags = 0u; flags < 16u; flags++) { debug_printf(DP_DEBUG1, "Testing md_zavg_flags with flags=%d\n", flags); ret &= test_md_zavg_flags(4, idims, flags, test_md_in0, test_md_zavg_out[flags], wavg); } return ret; } static void matrix_mult(int A, int B, int C, complex float (*dst)[A][C], const complex float (*src1)[A][B], const complex float (*src2)[B][C]) { for (int i = 0; i < A; i++) { for (int k = 0; k < C; k++) { (*dst)[i][k] = 0.; for (int j = 0; j < B; j++) (*dst)[i][k] += (*src1)[i][j] * (*src2)[j][k]; } } } static bool test_md_zmatmul(void) { int A = 10; int B = 20; int C = 30; long odims[3] = { C, 1, A }; long idims1[3] = { 1, B, A }; long idims2[3] = { C, B, 1 }; complex float* dst1 = md_alloc(3, odims, CFL_SIZE); complex float* dst2 = md_alloc(3, odims, CFL_SIZE); complex float* src1 = md_alloc(3, idims1, CFL_SIZE); complex float* src2 = md_alloc(3, idims2, CFL_SIZE); md_gaussian_rand(3, odims, dst1); md_gaussian_rand(3, odims, dst2); md_gaussian_rand(3, idims1, src1); md_gaussian_rand(3, idims2, src2); md_zmatmul(3, odims, dst1, idims1, src1, idims2, src2); matrix_mult(A, B, C, &MD_CAST_ARRAY2(complex float, 3, odims, dst2, 0, 2), &MD_CAST_ARRAY2(const complex float, 3, idims1, src1, 1, 2), &MD_CAST_ARRAY2(const complex float, 3, idims2, src2, 0, 1)); double err = md_znrmse(3, odims, dst2, dst1); md_free(src1); md_free(src2); md_free(dst1); md_free(dst2); return (err < UT_TOL); } static bool test_md_zhardthresh(void) { complex float test_vec[] = { 1., 2., 3., 4., 5., 6., 7., 8., 9. }; unsigned int N = ARRAY_SIZE(test_vec); complex float test_out[N]; unsigned int k = 5; md_zhardthresh(1, (long[1]){ N }, k, 0, test_out, test_vec); bool ok = true; for (unsigned int i = 0; i < N - k; i++) ok &= (0. == test_out[i]); for (unsigned int i = N - k; i < N; i++) ok &= (test_vec[i] == test_out[i]); return ok; } static bool test_md_zvar(void) { const complex float test_vec[] = { 1 -6.j, 2 - 5.j, 3 - 4.j, 4 - 3.j, 5 - 2.j, 6 - 1.j }; const complex float ref[] = { 8., 8. }; long idims[2] = { 2, 3 }; long odims[2] = { 2, 1 }; complex float* out = md_alloc(2, odims, CFL_SIZE); md_zvar(2, idims, MD_BIT(1), out, test_vec); double err = md_znrmse(2, odims, ref, out); md_free(out); return (err < UT_TOL); } static bool test_md_zstd(void) { const complex float test_vec[] = { 1 -6.j, 2 - 5.j, 3 - 4.j, 4 - 3.j, 5 - 2.j, 6 - 1.j }; const complex float ref[] = { 1., 1., 1.}; long idims[2] = { 2, 3 }; long odims[2] = { 1, 3 }; complex float* out = md_alloc(2, odims, CFL_SIZE); md_zstd(2, idims, MD_BIT(0), out, test_vec); double err = md_znrmse(2, odims, ref, out); md_free(out); return (err < UT_TOL); } UT_REGISTER_TEST(test_md_zfmacc2); UT_REGISTER_TEST(test_md_zwavg); UT_REGISTER_TEST(test_md_zavg); UT_REGISTER_TEST(test_md_zmatmul); UT_REGISTER_TEST(test_md_zhardthresh); UT_REGISTER_TEST(test_md_zvar); UT_REGISTER_TEST(test_md_zstd); bart-0.4.02/utests/test_flpmath_data.h000066400000000000000000000631161320577655200177330ustar00rootroot00000000000000 const complex float test_md_in0[] = { +0.000000e+00+0.000000e+00i, +2.055377e+00+1.326829e-03i, +1.195222e+00-1.464677e-01i, +1.176603e+00-8.083734e-01i, +0.000000e+00+0.000000e+00i, -3.515362e-01-1.362136e+00i, +1.680448e+00-4.103929e-01i, +5.236822e-01-3.636895e-01i, +1.681717e+00-5.943724e-01i, +1.466989e+00-1.560116e+00i, +1.437756e+00-1.145047e-01i, +0.000000e+00+0.000000e+00i, +1.261929e+00+1.136487e-01i, +1.333536e+00+1.374940e+00i, +0.000000e+00+0.000000e+00i, +2.753761e-02-6.144895e-02i, +7.782375e-01+8.355249e-01i, +9.668950e-01-6.451524e-02i, +0.000000e+00+0.000000e+00i, +1.351041e+00-5.060037e-01i, +0.000000e+00+0.000000e+00i, +4.692753e-01-1.272591e-01i, +0.000000e+00+0.000000e+00i, +1.606379e+00+8.576965e-01i, +9.275984e-01+4.085222e-01i, +1.980575e+00-2.203636e-01i, +0.000000e+00+0.000000e+00i, +8.705365e-01-1.462714e-01i, +7.474442e-01+1.056995e-01i, +3.951547e-01+7.911025e-02i, +2.356944e-01+8.030261e-01i, +1.788536e+00-6.211302e-01i, +1.135170e+00+7.936571e-01i, +1.330269e+00-7.232582e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +1.042533e+00-1.176756e+00i, +9.316891e-01-5.066474e-02i, +1.971207e+00-1.423560e-01i, +0.000000e+00+0.000000e+00i, +1.292203e+00-1.797395e+00i, +1.068649e+00-3.408340e-02i, +9.923827e-01-2.130693e-01i, +1.050540e+00+1.981200e+00i, -2.409087e-01+1.063292e+00i, +0.000000e+00+0.000000e+00i, +1.170681e+00+6.509815e-01i, +0.000000e+00+0.000000e+00i, -2.989733e-01+1.690195e+00i, +9.002356e-01+2.762856e-01i, +1.748728e+00-1.088211e+00i, +0.000000e+00+0.000000e+00i, +5.186851e-01+1.769070e+00i, +1.205285e+00+2.983286e-01i, +6.900309e-01+4.038573e-01i, +6.311213e-01-4.964017e-02i, -3.051573e-01-8.232310e-01i, +2.171920e+00+4.882384e-01i, +3.193713e-01+6.740723e-01i, +0.000000e+00+0.000000e+00i, +6.873003e-01-7.488998e-03i, +1.446579e-01-4.822526e-01i, +1.022734e+00+1.512557e-01i, +1.292160e+00-4.191149e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +3.354388e-01-1.880779e-01i, +1.023323e+00+8.066830e-01i, -2.286107e-01-4.679730e-01i, +0.000000e+00+0.000000e+00i, +1.675541e+00+8.596825e-02i, +2.613676e-01-7.391413e-02i, +8.977595e-01+3.807217e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +1.306031e+00-4.057826e-02i, +1.046151e+00+4.341971e-01i, +1.969883e+00-1.505297e+00i, +1.308183e+00+5.377352e-01i, +1.098623e+00+4.579275e-01i, +1.406074e+00+3.533691e-01i, }; const complex float test_md_in1[] = { -1.739342e+00+6.139934e-01i, +1.055377e+00+1.326829e-03i, +0.000000e+00+0.000000e+00i, +1.766032e-01-8.083734e-01i, +0.000000e+00+0.000000e+00i, -1.351536e+00-1.362136e+00i, +6.804481e-01-4.103929e-01i, -4.763178e-01-3.636895e-01i, +0.000000e+00+0.000000e+00i, +4.669890e-01-1.560116e+00i, +4.377561e-01-1.145047e-01i, +5.442010e-01+2.287491e-01i, +2.619285e-01+1.136487e-01i, +3.335362e-01+1.374940e+00i, +5.712720e-01-5.553765e-01i, -9.724624e-01-6.144895e-02i, -2.217625e-01+8.355249e-01i, -3.310501e-02-6.451524e-02i, -8.968572e-01-4.953450e-01i, +3.510412e-01-5.060037e-01i, -5.060645e-01+1.026288e-01i, -5.307246e-01-1.272591e-01i, -4.097993e-01+2.641491e-01i, +6.063793e-01+8.576965e-01i, -7.240165e-02+4.085222e-01i, +9.805748e-01-2.203636e-01i, -8.162820e-01+7.263730e-01i, -1.294635e-01-1.462714e-01i, -2.525558e-01+1.056995e-01i, +0.000000e+00+0.000000e+00i, -7.643057e-01+8.030261e-01i, +7.885358e-01-6.211302e-01i, +0.000000e+00+0.000000e+00i, +3.302687e-01-7.232582e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +9.712067e-01-1.423560e-01i, -7.432643e-02-8.247252e-01i, +2.922028e-01-1.797395e+00i, +6.864856e-02-3.408340e-02i, +0.000000e+00+0.000000e+00i, +5.054002e-02+1.981200e+00i, -1.240909e+00+1.063292e+00i, -1.493490e-01+1.064104e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, -1.298973e+00+1.690195e+00i, -9.976440e-02+2.762856e-01i, +0.000000e+00+0.000000e+00i, +3.488271e-03+1.817008e+00i, -4.813149e-01+1.769070e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, -3.688787e-01-4.964017e-02i, -1.305157e+00-8.232310e-01i, +1.171920e+00+4.882384e-01i, -6.806287e-01+6.740723e-01i, +0.000000e+00+0.000000e+00i, -3.126998e-01-7.488998e-03i, -8.553421e-01-4.822526e-01i, +0.000000e+00+0.000000e+00i, +2.921597e-01-4.191149e-01i, +6.834013e-01+7.885104e-02i, +3.068649e-01-2.382343e+00i, -6.645612e-01-1.880779e-01i, +0.000000e+00+0.000000e+00i, -1.228611e+00-4.679730e-01i, +2.430212e-02-1.069518e+00i, +6.755413e-01+8.596825e-02i, -7.386324e-01-7.391413e-02i, +0.000000e+00+0.000000e+00i, -2.375880e-01+3.568163e-01i, +4.391219e-01+1.319193e-01i, +3.060309e-01-4.057826e-02i, +4.615126e-02+4.341971e-01i, +9.698826e-01-1.505297e+00i, +3.081829e-01+5.377352e-01i, +0.000000e+00+0.000000e+00i, +4.060740e-01+3.533691e-01i, }; const complex float test_md_zwavg_1_out[] = { +1.625299e+00-7.257044e-02i, +4.125334e-01-1.085255e+00i, +1.295282e+00-4.561516e-01i, +1.452373e+00-8.373104e-01i, +1.297733e+00+7.442943e-01i, +5.908900e-01+2.365202e-01i, +1.351041e+00-5.060037e-01i, +1.037827e+00+3.652187e-01i, +1.454087e+00+9.407929e-02i, +6.710451e-01+1.284612e-02i, +1.053133e+00+3.251843e-01i, +1.330269e+00-7.232582e-01i, +1.315143e+00-4.565923e-01i, +1.180426e+00-9.157392e-01i, +6.006714e-01+9.438076e-01i, +1.170681e+00+6.509815e-01i, +7.833301e-01+2.927565e-01i, +8.619850e-01+1.033699e+00i, +3.386649e-01-1.563379e-01i, +1.245646e+00+5.811554e-01i, +6.182307e-01-1.128286e-01i, +1.292160e+00-4.191149e-01i, +3.767170e-01+5.021071e-02i, +9.684543e-01+6.027058e-03i, +8.977595e-01+3.807217e-01i, +1.440688e+00-3.705594e-01i, +1.270960e+00+4.496773e-01i, }; const complex float test_md_zwavg_2_out[] = { +1.428525e+00-6.093832e-01i, +1.289530e+00-1.811813e-01i, +8.418009e-01-7.009920e-01i, +9.188186e-01-5.026388e-01i, +1.183177e+00+6.986534e-01i, +9.668950e-01-6.451524e-02i, +6.984369e-01+1.406315e-01i, +1.665808e+00-3.631836e-01i, +1.606379e+00+8.576965e-01i, +8.121666e-01-2.216784e-02i, +1.267990e+00-2.577154e-01i, +7.651623e-01+4.363837e-01i, +1.017458e+00-6.949127e-01i, +1.091477e+00+4.438007e-02i, +9.329824e-01+2.956175e-01i, -2.989733e-01+1.690195e+00i, +8.632006e-01+8.987792e-01i, +1.477006e+00-3.949412e-01i, +1.183084e+00+2.948689e-01i, +3.650502e-01+4.739318e-02i, +3.587884e-01-3.359876e-01i, +8.137994e-01-3.035964e-01i, +1.349432e+00+4.463256e-01i, +1.637845e-02-2.709436e-01i, +1.170658e+00+2.926262e-01i, +1.072387e+00+4.460623e-01i, +1.687979e+00-5.759639e-01i, }; const complex float test_md_zwavg_3_out[] = { +1.137359e+00-5.263007e-01i, +1.038983e+00+7.478981e-02i, +1.266974e+00+8.251847e-02i, +9.289721e-01+4.154758e-02i, +1.013537e+00-4.622906e-02i, +8.741069e-01+5.994416e-01i, +6.702473e-01+4.435137e-02i, +7.265366e-01-4.273811e-02i, +1.290386e+00+8.829648e-02i, }; const complex float test_md_zwavg_4_out[] = { +1.466989e+00-1.560116e+00i, +1.614725e+00-2.063939e-01i, +1.195222e+00-1.464677e-01i, +9.692690e-01-2.739946e-01i, +1.333536e+00+1.374940e+00i, +6.274214e-01-2.522198e-01i, +8.785281e-01-2.110656e-02i, +1.094165e+00+8.382394e-02i, +1.324306e+00-3.294438e-01i, +9.565347e-01-6.615137e-01i, +9.499381e-01+2.353387e-01i, +1.183181e+00-3.162287e-02i, -3.163945e-02+1.246611e+00i, +1.326992e+00-7.140799e-01i, +1.317516e+00-1.095458e-01i, +1.161326e+00-4.681638e-01i, +7.846125e-01+1.875135e+00i, +4.821881e-01+6.808103e-01i, +9.599834e-01+1.218214e-01i, +6.311213e-01-4.964017e-02i, -3.051573e-01-8.232310e-01i, +1.271130e+00+8.652741e-02i, +7.962818e-01+6.383175e-01i, +8.706361e-01-9.866350e-01i, +9.977416e-01+2.651231e-01i, +9.729407e-01+2.054772e-02i, +8.967251e-01+1.435702e-01i, }; const complex float test_md_zwavg_5_out[] = { +1.501277e+00-4.651531e-01i, +9.160311e-01+8.086115e-03i, +1.070836e+00-5.884194e-02i, +1.018464e+00-9.717958e-02i, +9.837804e-01+2.793014e-03i, +8.093755e-01+6.959271e-01i, +6.411828e-01-1.014814e-01i, +9.929384e-01+2.515808e-02i, +9.505602e-01+1.278250e-01i, }; const complex float test_md_zwavg_6_out[] = { +1.001483e+00-3.493457e-01i, +1.351458e+00+1.438900e-01i, +1.019735e+00-2.619590e-01i, +6.954071e-01+3.897768e-02i, +1.050002e+00+2.892558e-01i, +1.040469e+00+1.385339e-01i, +1.086103e+00+1.444116e-01i, +8.483983e-01+2.752793e-01i, +6.877151e-01-3.942984e-01i, }; const complex float test_md_zwavg_7_out[] = { +1.135224e+00-1.446307e-01i, +9.455114e-01+1.675072e-01i, +8.930431e-01+3.411703e-02i, }; const complex float test_md_zwavg_8_out[] = { +7.802837e-01+1.287929e-01i, +1.144647e+00+1.912872e-02i, +4.284065e-01-2.968628e-01i, +1.194739e+00+1.609637e-01i, +1.053954e+00+2.647105e-02i, +3.918169e-01-2.842394e-01i, +1.232672e+00-3.803800e-01i, +3.341700e-01-4.229711e-01i, +1.352226e+00-2.215583e-01i, +1.267227e+00-1.051996e+00i, +1.184723e+00-8.258472e-02i, +1.971207e+00-1.423560e-01i, +7.986839e-01-3.721460e-02i, +1.216354e+00+1.280760e-01i, +4.200192e-01-2.510282e-01i, +5.099602e-01-1.372591e-01i, +1.168106e+00+9.675643e-01i, +3.291180e-01+3.082876e-01i, +8.977595e-01+3.807217e-01i, +1.260861e+00+7.248890e-02i, +0.000000e+00+0.000000e+00i, +4.921110e-01+5.074525e-01i, +9.731933e-01+3.552414e-01i, +1.774997e+00-5.786039e-01i, +1.117891e+00+4.731287e-01i, +1.199294e+00+6.688780e-01i, +1.305680e+00+3.258488e-01i, }; const complex float test_md_zwavg_9_out[] = { +7.849661e-01-7.195204e-02i, +9.251084e-01-4.663655e-03i, +1.010116e+00-3.471713e-01i, +1.357056e+00-5.772520e-01i, +8.694955e-01-2.746537e-02i, +6.889491e-01+4.441297e-01i, +1.139827e+00+1.752332e-01i, +1.093464e+00+6.212860e-02i, +1.206432e+00+5.149413e-01i, }; const complex float test_md_zwavg_10_out[] = { +1.105350e+00-5.008288e-02i, +8.871700e-01-1.050877e-01i, +6.819006e-01-2.717406e-01i, +9.169958e-01-5.007049e-01i, +1.190353e+00+3.902190e-01i, +6.330999e-01+4.674171e-02i, +7.683123e-01+4.748894e-01i, +1.152285e+00+4.088706e-01i, +1.587270e+00-2.168228e-01i, }; const complex float test_md_zwavg_11_out[] = { +9.011952e-01-1.381119e-01i, +9.400189e-01-4.892006e-03i, +1.145123e+00+2.570732e-01i, }; const complex float test_md_zwavg_12_out[] = { +1.043335e+00-4.196132e-01i, +1.189301e+00+5.313500e-03i, +8.141066e-01-2.582361e-01i, +8.322397e-01+2.413524e-01i, +1.100479e+00+1.639504e-01i, +9.926661e-01-4.009067e-01i, +9.933884e-01-6.705729e-02i, +9.713178e-01+5.079231e-01i, +9.004521e-01+1.619205e-01i, }; const complex float test_md_zwavg_13_out[] = { +1.049503e+00-2.066723e-01i, +9.686334e-01+1.236934e-02i, +9.557921e-01+2.148831e-01i, }; const complex float test_md_zwavg_14_out[] = { +9.462690e-01-5.029815e-02i, +1.081773e+00+2.385560e-01i, +9.171251e-01-1.503249e-01i, }; const complex float test_md_zwavg_15_out[] = { +9.865393e-01+2.436269e-02i, }; const complex float* test_md_zwavg_out[] = { test_md_in0, test_md_zwavg_1_out, test_md_zwavg_2_out, test_md_zwavg_3_out, test_md_zwavg_4_out, test_md_zwavg_5_out, test_md_zwavg_6_out, test_md_zwavg_7_out, test_md_zwavg_8_out, test_md_zwavg_9_out, test_md_zwavg_10_out, test_md_zwavg_11_out, test_md_zwavg_12_out, test_md_zwavg_13_out, test_md_zwavg_14_out, test_md_zwavg_15_out }; const complex float test_md_zavg_1_out[] = { +1.083533e+00-4.838029e-02i, +2.750223e-01-7.235031e-01i, +1.295282e+00-4.561516e-01i, +9.682484e-01-5.582069e-01i, +8.651551e-01+4.961962e-01i, +5.908900e-01+2.365203e-01i, +4.503470e-01-1.686679e-01i, +6.918848e-01+2.434791e-01i, +9.693911e-01+6.271953e-02i, +6.710452e-01+1.284612e-02i, +1.053133e+00+3.251843e-01i, +4.434230e-01-2.410861e-01i, +1.315143e+00-4.565923e-01i, +7.869507e-01-6.104928e-01i, +6.006714e-01+9.438076e-01i, +3.902270e-01+2.169938e-01i, +7.833301e-01+2.927565e-01i, +5.746567e-01+6.891329e-01i, +3.386650e-01-1.563380e-01i, +8.304304e-01+3.874369e-01i, +6.182308e-01-1.128286e-01i, +4.307200e-01-1.397050e-01i, +3.767170e-01+5.021071e-02i, +6.456362e-01+4.018039e-03i, +2.992532e-01+1.269072e-01i, +1.440688e+00-3.705594e-01i, +1.270960e+00+4.496773e-01i, }; const complex float test_md_zavg_2_out[] = { +9.523503e-01-4.062555e-01i, +8.596864e-01-1.207876e-01i, +8.418009e-01-7.009920e-01i, +9.188186e-01-5.026388e-01i, +1.183177e+00+6.986534e-01i, +3.222983e-01-2.150508e-02i, +4.656246e-01+9.375437e-02i, +1.110539e+00-2.421224e-01i, +5.354597e-01+2.858988e-01i, +8.121666e-01-2.216784e-02i, +8.453268e-01-1.718103e-01i, +5.101082e-01+2.909225e-01i, +6.783053e-01-4.632751e-01i, +1.091477e+00+4.438007e-02i, +9.329824e-01+2.956176e-01i, -9.965777e-02+5.633984e-01i, +8.632006e-01+8.987791e-01i, +9.846710e-01-2.632942e-01i, +1.183084e+00+2.948689e-01i, +3.650502e-01+4.739318e-02i, +2.391922e-01-2.239918e-01i, +5.425329e-01-2.023976e-01i, +8.996214e-01+2.975504e-01i, +1.091897e-02-1.806290e-01i, +1.170658e+00+2.926262e-01i, +7.149247e-01+2.973749e-01i, +1.125319e+00-3.839760e-01i, }; const complex float test_md_zavg_3_out[] = { +8.846125e-01-4.093450e-01i, +8.080978e-01+5.816986e-02i, +7.038743e-01+4.584359e-02i, +7.225339e-01+3.231478e-02i, +9.009216e-01-4.109250e-02i, +5.827379e-01+3.996278e-01i, +5.957754e-01+3.942344e-02i, +4.843578e-01-2.849207e-02i, +1.003634e+00+6.867504e-02i, }; const complex float test_md_zavg_4_out[] = { +4.889964e-01-5.200387e-01i, +1.614725e+00-2.063939e-01i, +3.984073e-01-4.882257e-02i, +9.692690e-01-2.739946e-01i, +4.445120e-01+4.583133e-01i, +4.182809e-01-1.681465e-01i, +8.785281e-01-2.110656e-02i, +1.094165e+00+8.382394e-02i, +8.828707e-01-2.196292e-01i, +6.376898e-01-4.410091e-01i, +9.499382e-01+2.353387e-01i, +7.887872e-01-2.108192e-02i, -2.109297e-02+8.310737e-01i, +1.326992e+00-7.140799e-01i, +1.317516e+00-1.095458e-01i, +7.742172e-01-3.121092e-01i, +5.230750e-01+1.250090e+00i, +3.214588e-01+4.538736e-01i, +9.599835e-01+1.218214e-01i, +2.103738e-01-1.654672e-02i, -1.017191e-01-2.744103e-01i, +1.271130e+00+8.652741e-02i, +7.962818e-01+6.383175e-01i, +5.804241e-01-6.577567e-01i, +6.651611e-01+1.767487e-01i, +9.729406e-01+2.054772e-02i, +8.967252e-01+1.435702e-01i, }; const complex float test_md_zavg_5_out[] = { +8.340428e-01-2.584184e-01i, +6.106874e-01+5.390743e-03i, +9.518545e-01-5.230394e-02i, +7.921384e-01-7.558411e-02i, +8.744715e-01+2.482679e-03i, +5.395837e-01+4.639514e-01i, +3.562127e-01-5.637856e-02i, +8.826119e-01+2.236274e-02i, +8.449424e-01+1.136222e-01i, }; const complex float test_md_zavg_6_out[] = { +7.789312e-01-2.717133e-01i, +1.051134e+00+1.119145e-01i, +5.665196e-01-1.455328e-01i, +4.636047e-01+2.598512e-02i, +9.333349e-01+2.571163e-01i, +8.092539e-01+1.077486e-01i, +9.654248e-01+1.283658e-01i, +6.598654e-01+2.141062e-01i, +4.584768e-01-2.628656e-01i, }; const complex float test_md_zavg_7_out[] = { +7.988615e-01-1.017772e-01i, +7.353978e-01+1.302833e-01i, +6.945891e-01+2.653547e-02i, }; const complex float test_md_zavg_8_out[] = { +5.201892e-01+8.586197e-02i, +1.144647e+00+1.912872e-02i, +4.284065e-01-2.968628e-01i, +1.194739e+00+1.609637e-01i, +7.026358e-01+1.764737e-02i, +2.612113e-01-1.894930e-01i, +1.232672e+00-3.803800e-01i, +2.227800e-01-2.819807e-01i, +9.014837e-01-1.477056e-01i, +1.267227e+00-1.051996e+00i, +7.898151e-01-5.505648e-02i, +6.570690e-01-4.745200e-02i, +5.324559e-01-2.480973e-02i, +1.216354e+00+1.280760e-01i, +2.800128e-01-1.673521e-01i, +3.399734e-01-9.150609e-02i, +1.168106e+00+9.675643e-01i, +3.291180e-01+3.082876e-01i, +2.992532e-01+1.269072e-01i, +8.405740e-01+4.832594e-02i, +0.000000e+00+0.000000e+00i, +4.921110e-01+5.074525e-01i, +6.487955e-01+2.368276e-01i, +1.774997e+00-5.786039e-01i, +7.452605e-01+3.154191e-01i, +1.199294e+00+6.688780e-01i, +8.704531e-01+2.172326e-01i, }; const complex float test_md_zavg_9_out[] = { +6.977476e-01-6.395737e-02i, +7.195287e-01-3.627287e-03i, +7.856454e-01-2.700221e-01i, +9.047038e-01-3.848347e-01i, +6.762742e-01-2.136195e-02i, +6.123992e-01+3.947820e-01i, +3.799424e-01+5.841105e-02i, +9.719678e-01+5.522542e-02i, +9.383361e-01+4.005099e-01i, }; const complex float test_md_zavg_10_out[] = { +9.825337e-01-4.451812e-02i, +6.900211e-01-8.173487e-02i, +5.303672e-01-2.113538e-01i, +7.132189e-01-3.894371e-01i, +1.058092e+00+3.468613e-01i, +4.220666e-01+3.116114e-02i, +5.122082e-01+3.165930e-01i, +8.962214e-01+3.180105e-01i, +8.818166e-01-1.204571e-01i, }; const complex float test_md_zavg_11_out[] = { +7.343072e-01-1.125356e-01i, +7.311258e-01-3.804893e-03i, +7.634155e-01+1.713821e-01i, }; const complex float test_md_zavg_12_out[] = { +6.955566e-01-2.797422e-01i, +9.250122e-01+4.132722e-03i, +3.618251e-01-1.147716e-01i, +7.397687e-01+2.145355e-01i, +8.559285e-01+1.275170e-01i, +7.720736e-01-3.118163e-01i, +7.726355e-01-5.215567e-02i, +8.633936e-01+4.514872e-01i, +7.003516e-01+1.259382e-01i, }; const complex float test_md_zavg_13_out[] = { +6.607980e-01-1.301270e-01i, +7.892569e-01+1.007872e-02i, +7.787936e-01+1.750899e-01i, }; const complex float test_md_zavg_14_out[] = { +7.359870e-01-3.912078e-02i, +8.814446e-01+1.943790e-01i, +6.114167e-01-1.002166e-01i, }; const complex float test_md_zavg_15_out[] = { +7.429494e-01+1.834721e-02i, }; const complex float* test_md_zavg_out[] = { test_md_in0, test_md_zavg_1_out, test_md_zavg_2_out, test_md_zavg_3_out, test_md_zavg_4_out, test_md_zavg_5_out, test_md_zavg_6_out, test_md_zavg_7_out, test_md_zavg_8_out, test_md_zavg_9_out, test_md_zavg_10_out, test_md_zavg_11_out, test_md_zavg_12_out, test_md_zavg_13_out, test_md_zavg_14_out, test_md_zavg_15_out }; const complex float test_md_zfmacc2_0_out[] = { +0.000000e+00+0.000000e+00i, +2.169199e+00-1.326829e-03i, +0.000000e+00+0.000000e+00i, +8.612594e-01+8.083732e-01i, +0.000000e+00+0.000000e+00i, +2.330528e+00+1.362136e+00i, +1.311880e+00+4.103929e-01i, -1.171691e-01+3.636895e-01i, +0.000000e+00+0.000000e+00i, +3.119030e+00+1.560116e+00i, +6.424978e-01+1.145047e-01i, +0.000000e+00+0.000000e+00i, +3.434512e-01-1.136488e-01i, +2.335243e+00-1.374940e+00i, +0.000000e+00+0.000000e+00i, -2.300332e-02+6.144895e-02i, +5.255180e-01-8.355249e-01i, -2.784685e-02+6.451524e-02i, +0.000000e+00+0.000000e+00i, +7.303107e-01+5.060036e-01i, +0.000000e+00+0.000000e+00i, -2.328611e-01+1.272591e-01i, +0.000000e+00+0.000000e+00i, +1.709718e+00-8.576963e-01i, +9.973073e-02-4.085222e-01i, +1.990662e+00+2.203636e-01i, +0.000000e+00+0.000000e+00i, -9.130738e-02+1.462714e-01i, -1.775990e-01-1.056995e-01i, +0.000000e+00+0.000000e+00i, +4.647083e-01-8.030262e-01i, +1.796127e+00+6.211303e-01i, +0.000000e+00+0.000000e+00i, +9.624486e-01+7.232584e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +1.934715e+00+1.423560e-01i, +0.000000e+00+0.000000e+00i, +3.608214e+00+1.797395e+00i, +7.452290e-02+3.408341e-02i, +0.000000e+00+0.000000e+00i, +3.978248e+00-1.981200e+00i, +1.429536e+00-1.063292e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +3.245117e+00-1.690194e+00i, -1.347774e-02-2.762856e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +2.879958e+00-1.769070e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, -2.303430e-01+4.964017e-02i, +1.075987e+00+8.232306e-01i, +2.783693e+00-4.882385e-01i, +2.370002e-01-6.740724e-01i, +0.000000e+00+0.000000e+00i, -2.148626e-01+7.488999e-03i, +1.088356e-01+4.822526e-01i, +0.000000e+00+0.000000e+00i, +5.531744e-01+4.191150e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, -1.875463e-01+1.880779e-01i, +0.000000e+00+0.000000e+00i, +4.998724e-01+4.679731e-01i, +0.000000e+00+0.000000e+00i, +1.139288e+00-8.596823e-02i, -1.875913e-01+7.391413e-02i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +4.013324e-01+4.057826e-02i, +2.368083e-01-4.341970e-01i, +4.176474e+00+1.505298e+00i, +6.923188e-01-5.377352e-01i, +0.000000e+00+0.000000e+00i, +6.958398e-01-3.533691e-01i, }; const complex float test_md_zfmacc2_1_out[] = { +2.169199e+00-1.326829e-03i, +3.191788e+00+2.170509e+00i, +1.194711e+00+7.740824e-01i, +3.761528e+00+1.674621e+00i, +2.678694e+00-1.488589e+00i, +4.746678e-01-7.095608e-01i, +7.303107e-01+5.060036e-01i, +1.476857e+00-7.304372e-01i, +2.090393e+00-1.881585e-01i, -2.689064e-01+4.057188e-02i, +2.260836e+00-1.818959e-01i, +9.624486e-01+7.232584e-01i, +1.934715e+00+1.423560e-01i, +3.682737e+00+1.831479e+00i, +5.407784e+00-3.044492e+00i, +0.000000e+00+0.000000e+00i, +3.231639e+00-1.966480e+00i, +2.879958e+00-1.769070e+00i, +8.456444e-01+8.728708e-01i, +3.020694e+00-1.162311e+00i, -1.060270e-01+4.897416e-01i, +5.531744e-01+4.191150e-01i, +3.123260e-01+6.560510e-01i, +9.516963e-01-1.205409e-02i, +0.000000e+00+0.000000e+00i, +4.814615e+00+1.111679e+00i, +1.388159e+00-8.911043e-01i, }; const complex float test_md_zfmacc2_2_out[] = { +2.173140e+00+1.218766e+00i, +2.052030e+00+3.623627e-01i, +2.330528e+00+1.362136e+00i, +3.439478e+00+1.507916e+00i, +3.503258e+00-2.095960e+00i, -2.784685e-02+6.451524e-02i, -1.331303e-01-2.812631e-01i, +2.720973e+00+7.263672e-01i, +1.709718e+00-8.576963e-01i, +1.335850e+00+6.650358e-02i, +1.618528e+00+5.154309e-01i, +0.000000e+00+0.000000e+00i, +0.000000e+00+0.000000e+00i, +7.586462e+00-1.838046e-01i, +3.438773e+00-8.868529e-01i, +3.245117e+00-1.690194e+00i, +2.866480e+00-2.045356e+00i, +0.000000e+00+0.000000e+00i, +2.568831e+00-4.807495e-01i, +1.154928e-01-1.421796e-01i, +1.075987e+00+8.232306e-01i, +3.656281e-01+6.071929e-01i, +1.139288e+00-8.596823e-02i, +3.122811e-01+5.418873e-01i, +1.093651e+00-4.971570e-01i, +2.368083e-01-4.341970e-01i, +4.872314e+00+1.151928e+00i, }; const complex float test_md_zfmacc2_3_out[] = { +6.555697e+00+2.943265e+00i, +6.914890e+00-5.235287e-01i, +4.297561e+00-4.125923e-01i, +2.954378e+00+5.819345e-01i, +1.102523e+01-1.070657e+00i, +6.111597e+00-3.735550e+00i, +3.760310e+00+2.003016e-01i, +1.817197e+00+1.063112e+00i, +6.202774e+00+2.205745e-01i, }; const complex float test_md_zfmacc2_4_out[] = { +3.119030e+00+1.560116e+00i, +3.542008e+00+6.191815e-01i, +0.000000e+00+0.000000e+00i, +9.718496e-01+8.219835e-01i, +2.335243e+00-1.374940e+00i, +4.040246e+00+5.044395e-01i, +1.388608e+00+6.331962e-02i, +2.399011e+00-2.514718e-01i, -2.784685e-02+6.451524e-02i, -9.130738e-02+1.462714e-01i, -1.775990e-01-1.056995e-01i, +1.934715e+00+1.423560e-01i, +3.709826e+00-2.493221e+00i, +5.390864e+00+2.142240e+00i, +7.452290e-02+3.408341e-02i, +9.624486e-01+7.232584e-01i, +6.858206e+00-3.750270e+00i, +1.429536e+00-1.063292e+00i, +5.531744e-01+4.191150e-01i, -2.303430e-01+4.964017e-02i, +1.075987e+00+8.232306e-01i, +2.997479e+00-2.595823e-01i, +4.738085e-01-1.108269e+00i, +4.676347e+00+1.973271e+00i, +4.774562e-01-5.302463e-01i, +1.248123e+00+3.962843e-01i, +5.082486e-01-2.794550e-01i, }; const complex float test_md_zfmacc2_5_out[] = { +6.661037e+00+2.179297e+00i, +7.347339e+00-4.851699e-02i, +3.759772e+00-1.236369e-01i, +1.665808e+00+1.829279e-01i, +9.175213e+00-3.168970e-01i, +9.250190e+00-4.090304e+00i, +1.398819e+00+1.291986e+00i, +8.147635e+00+6.054189e-01i, +2.233828e+00-4.134169e-01i, }; const complex float test_md_zfmacc2_6_out[] = { +5.479487e+00+2.445419e+00i, +8.276260e+00-1.007230e+00i, +4.012400e+00+5.689547e-01i, +4.580967e+00-1.623691e+00i, +1.207147e+01-1.713729e+00i, +3.438773e+00-8.868529e-01i, +4.028110e+00-3.707135e-01i, +1.491589e+00-6.623448e-01i, +6.260583e+00+2.517046e+00i, }; const complex float test_md_zfmacc2_7_out[] = { +1.776814e+01+2.007144e+00i, +2.009121e+01-4.224273e+00i, +1.178028e+01+1.483988e+00i, }; const complex float test_md_zfmacc2_8_out[] = { -9.130738e-02+1.462714e-01i, +1.761257e+00-5.738616e-02i, +1.075987e+00+8.232306e-01i, +4.109661e+00-4.828914e-01i, +2.033128e+00-5.294201e-02i, +2.330528e+00+1.362136e+00i, +2.059466e+00+1.141140e+00i, -8.333489e-03+8.459420e-01i, +0.000000e+00+0.000000e+00i, +3.672204e+00+1.979231e+00i, +6.424978e-01+1.145047e-01i, +1.934715e+00+1.423560e-01i, +1.559049e-01+7.442913e-02i, +5.943457e+00+4.224555e-01i, +5.743952e-01+5.020566e-01i, -2.300332e-02+6.144895e-02i, +5.643053e+00-2.902693e+00i, +1.214097e+00-9.248629e-01i, +0.000000e+00+0.000000e+00i, +7.303107e-01+5.060036e-01i, +0.000000e+00+0.000000e+00i, +3.413589e+00-1.522357e+00i, +2.233306e-01-7.104826e-01i, +5.886192e+00+6.476014e-01i, +7.920495e-01-9.462575e-01i, +4.870620e+00-1.548706e+00i, +6.958398e-01-3.533691e-01i, }; const complex float test_md_zfmacc2_9_out[] = { +2.745937e+00+9.121158e-01i, +8.473317e+00+8.263025e-01i, +2.051133e+00+1.987082e+00i, +6.249417e+00+2.236092e+00i, +6.673758e+00+9.989411e-01i, +6.834147e+00-3.766107e+00i, +7.303107e-01+5.060036e-01i, +9.523111e+00-1.585239e+00i, +6.358510e+00-2.848333e+00i, }; const complex float test_md_zfmacc2_10_out[] = { +6.077820e+00+8.045202e-01i, +3.786051e+00+7.356139e-01i, +3.406516e+00+2.185366e+00i, +3.805106e+00+2.115109e+00i, +1.222901e+01-2.365733e+00i, +3.723207e+00-2.804503e-01i, +4.205638e+00-2.468614e+00i, +5.824261e+00-1.753185e+00i, +6.582032e+00+2.942323e-01i, }; const complex float test_md_zfmacc2_11_out[] = { +1.327039e+01+3.725501e+00i, +1.975732e+01-5.310741e-01i, +1.661193e+01-3.927568e+00i, }; const complex float test_md_zfmacc2_12_out[] = { +3.580897e+00+2.125503e+00i, +3.134065e+00+5.631222e-01i, +3.010702e+00+9.655867e-01i, +7.679154e+00-1.930819e+00i, +8.199914e+00-3.409690e-01i, +8.791117e+00+2.511794e+00i, +2.828512e+00+2.563318e-01i, +1.050534e+01-3.605457e+00i, +1.909937e+00-1.278232e+00i, }; const complex float test_md_zfmacc2_13_out[] = { +9.725664e+00+3.654212e+00i, +2.467019e+01+2.400053e-01i, +1.524379e+01-4.627357e+00i, }; const complex float test_md_zfmacc2_14_out[] = { +1.408856e+01+4.510146e-01i, +2.183932e+01-3.383304e+00i, +1.371176e+01+2.199148e+00i, }; const complex float test_md_zfmacc2_15_out[] = { +4.963963e+01-7.331403e-01i, }; const complex float* test_md_zfmacc2_out[] = { test_md_zfmacc2_0_out, test_md_zfmacc2_1_out, test_md_zfmacc2_2_out, test_md_zfmacc2_3_out, test_md_zfmacc2_4_out, test_md_zfmacc2_5_out, test_md_zfmacc2_6_out, test_md_zfmacc2_7_out, test_md_zfmacc2_8_out, test_md_zfmacc2_9_out, test_md_zfmacc2_10_out, test_md_zfmacc2_11_out, test_md_zfmacc2_12_out, test_md_zfmacc2_13_out, test_md_zfmacc2_14_out, test_md_zfmacc2_15_out, }; bart-0.4.02/utests/test_linalg.c000066400000000000000000000014511320577655200165420ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include "num/linalg.h" #include "utest.h" static bool test_thomas_algorithm(void) { const complex float A[7][3] = { { 0. , 1., 0.5 }, { 0.5, 1., 0.5 }, { 0.5, 1., 0.5 }, { 0.5, 1., 0.5 }, { 0.5, 1., 0.5 }, { 0.5, 1., 0.5 }, { 0.5, 1., 0. }, }; const complex float d[7] = { 1., 0., 0., 0., 0., 0., 0. }; complex float x[7]; thomas_algorithm(7, x, A, d); bool ok = true; for (int i = 0; i < 7; i++) ok &= (cabsf(x[i] - 0.25f * (7 - i) * powf(-1., i)) < 1e-6); return ok; } UT_REGISTER_TEST(test_thomas_algorithm); bart-0.4.02/utests/test_linop_matrix.c000066400000000000000000000100041320577655200177730ustar00rootroot00000000000000/* Copyright 2017. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Martin Uecker */ #include #include "num/multind.h" #include "num/flpmath.h" #include "num/rand.h" #include "linops/someops.h" #include "linops/linop.h" #include "linops/lintest.h" #include "misc/misc.h" #include "misc/debug.h" #include "utest.h" static bool test_linop_matrix(void) { enum { N = 3 }; int A = 2; int B = 3; int C = 4; long odims[N] = { C, 1, A }; long idims1[N] = { 1, B, A }; long idims2[N] = { C, B, 1 }; complex float* dst1 = md_alloc(N, odims, CFL_SIZE); complex float* dst2 = md_alloc(N, odims, CFL_SIZE); complex float* src1 = md_alloc(N, idims1, CFL_SIZE); complex float* src2 = md_alloc(N, idims2, CFL_SIZE); md_gaussian_rand(N, odims, dst1); // test complete fill md_gaussian_rand(N, odims, dst2); // test complete fill md_gaussian_rand(N, idims1, src1); md_gaussian_rand(N, idims2, src2); struct linop_s* mat = linop_matrix_create(N, odims, idims2, idims1, src1); md_zmatmul(N, odims, dst1, idims1, src1, idims2, src2); linop_forward(mat, N, odims, dst2, N, idims2, src2); double err = md_znrmse(N, odims, dst2, dst1); linop_free(mat); md_free(src1); md_free(src2); md_free(dst1); md_free(dst2); return (err < UT_TOL); } static bool test_linop_matrix_adjoint(void) { enum { N = 3 }; int A = 2; int B = 3; int C = 4; long odims[N] = { C, 1, A }; long idims1[N] = { 1, B, A }; long idims2[N] = { C, B, 1 }; complex float* src1 = md_alloc(N, idims1, CFL_SIZE); md_gaussian_rand(N, idims1, src1); struct linop_s* mat = linop_matrix_create(N, odims, idims2, idims1, src1); float diff = linop_test_adjoint(mat); debug_printf(DP_DEBUG1, "adjoint diff: %f\n", diff); bool ret = (diff < 1.E-4f); linop_free(mat); return ret; } static bool test_linop_matrix_normal(void) { enum { N = 3 }; int A = 2; int B = 3; int C = 4; long odims[N] = { C, 1, A }; long idims1[N] = { 1, B, A }; long idims2[N] = { C, B, 1 }; complex float* src1 = md_alloc(N, idims1, CFL_SIZE); md_gaussian_rand(N, idims1, src1); struct linop_s* mat = linop_matrix_create(N, odims, idims2, idims1, src1); float nrmse = linop_test_normal(mat); debug_printf(DP_DEBUG1, "normal nrmse: %f\n", nrmse); bool ret = (nrmse < 1.E-6f); linop_free(mat); return ret; } static bool test_linop_matrix_chain(void) { int A = 9; int B = 7; int C = 3; int D = 2; int E = 5; enum { N = 8 }; long odims[N] = { D, C, 1, 1, 1, 1, C, D }; long idims0[N] = { D, 1, 1, A, E, 1, A, D }; long idims1[N] = { D, 1, B, A, 1, B, A, D }; long tdims[N] = { D, 1, B, 1, E, B, 1, D }; long idims2[N] = { D, C, B, 1, E, B, C, D }; complex float* dst1 = md_alloc(N, odims, CFL_SIZE); complex float* dst2 = md_alloc(N, odims, CFL_SIZE); complex float* src0 = md_alloc(N, idims0, CFL_SIZE); complex float* src1 = md_alloc(N, idims1, CFL_SIZE); complex float* src2 = md_alloc(N, idims2, CFL_SIZE); md_gaussian_rand(N, odims, dst1); // test complete fill md_gaussian_rand(N, odims, dst2); // test complete fill md_gaussian_rand(N, idims0, src0); md_gaussian_rand(N, idims1, src1); md_gaussian_rand(N, idims2, src2); struct linop_s* mat1 = linop_matrix_create(N, tdims, idims0, idims1, src1); struct linop_s* mat2 = linop_matrix_create(N, odims, tdims, idims2, src2); struct linop_s* matA = linop_chain(mat1, mat2); linop_forward(matA, N, odims, dst1, N, idims0, src0); linop_free(matA); struct linop_s* matB = linop_matrix_chain(mat1, mat2); linop_forward(matB, N, odims, dst2, N, idims0, src0); linop_free(matB); double err = md_znrmse(N, odims, dst2, dst1); linop_free(mat1); linop_free(mat2); md_free(src0); md_free(src1); md_free(src2); md_free(dst1); md_free(dst2); return (err < 1.E-5); } UT_REGISTER_TEST(test_linop_matrix); UT_REGISTER_TEST(test_linop_matrix_adjoint); UT_REGISTER_TEST(test_linop_matrix_normal); UT_REGISTER_TEST(test_linop_matrix_chain); bart-0.4.02/utests/test_multind.c000066400000000000000000000052741320577655200167570ustar00rootroot00000000000000 #include #include "num/multind.h" #include "num/rand.h" #include "utest.h" static bool test_md_copy(void) { enum { N = 4 }; long dims[N] = { 10, 10, 10, 10 }; complex float* a = md_alloc(N, dims, sizeof(complex float)); md_gaussian_rand(N, dims, a); complex float* b = md_alloc(N, dims, sizeof(complex float)); md_copy(N, dims, b, a, sizeof(complex float)); bool eq = md_compare(N, dims, a, b, sizeof(complex float)); md_free(a); md_free(b); return eq; } UT_REGISTER_TEST(test_md_copy); static bool test_md_transpose(void) { enum { N = 4 }; long dims[N] = { 10, 10, 10, 10 }; complex float* a = md_alloc(N, dims, sizeof(complex float)); md_gaussian_rand(N, dims, a); complex float* b = md_alloc(N, dims, sizeof(complex float)); complex float* c = md_alloc(N, dims, sizeof(complex float)); md_transpose(N, 0, 2, dims, b, dims, a, sizeof(complex float)); md_transpose(N, 0, 2, dims, c, dims, b, sizeof(complex float)); bool eq = md_compare(N, dims, a, c, sizeof(complex float)); md_free(a); md_free(b); md_free(c); return eq; } UT_REGISTER_TEST(test_md_transpose); static bool test_md_swap(void) { enum { N = 4 }; long dims[N] = { 10, 10, 10, 10 }; complex float* a = md_alloc(N, dims, sizeof(complex float)); complex float* b = md_alloc(N, dims, sizeof(complex float)); complex float* c = md_alloc(N, dims, sizeof(complex float)); md_gaussian_rand(N, dims, a); md_gaussian_rand(N, dims, b); md_gaussian_rand(N, dims, c); complex float* d = md_alloc(N, dims, sizeof(complex float)); complex float* e = md_alloc(N, dims, sizeof(complex float)); complex float* f = md_alloc(N, dims, sizeof(complex float)); md_copy(N, dims, d, a, sizeof(complex float)); md_copy(N, dims, e, b, sizeof(complex float)); md_copy(N, dims, f, c, sizeof(complex float)); md_circular_swap(3, N, dims, (void*[]){ a, b, c }, sizeof(complex float)); bool eq = true; eq &= md_compare(N, dims, c, d, sizeof(complex float)); eq &= md_compare(N, dims, a, e, sizeof(complex float)); eq &= md_compare(N, dims, b, f, sizeof(complex float)); md_free(a); md_free(b); md_free(c); md_free(d); md_free(e); md_free(f); return eq; } UT_REGISTER_TEST(test_md_swap); static bool test_md_flip(void) { enum { N = 4 }; long dims[N] = { 10, 10, 10, 10 }; complex float* a = md_alloc(N, dims, sizeof(complex float)); md_gaussian_rand(N, dims, a); complex float* b = md_alloc(N, dims, sizeof(complex float)); md_flip(N, dims, MD_BIT(0) | MD_BIT(2), b, a, sizeof(complex float)); md_flip(N, dims, MD_BIT(0) | MD_BIT(2), b, b, sizeof(complex float)); bool eq = md_compare(N, dims, a, b, sizeof(complex float)); md_free(a); md_free(b); return eq; } UT_REGISTER_TEST(test_md_flip); bart-0.4.02/utests/test_pattern.c000066400000000000000000000030041320577655200167450ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jonathan Tamir */ #include #include #include "num/flpmath.h" #include "num/multind.h" #include "misc/debug.h" #include "misc/misc.h" #include "misc/mri.h" #include "utest.h" static bool test_pattern_flags(unsigned int D, const long dims[D], unsigned int flags, const complex float* in, const complex float* ref) { long odims[D]; md_select_dims(D, ~flags, odims, dims); complex float* out = md_alloc(D, odims, CFL_SIZE); estimate_pattern(D, dims, flags, out, in); UT_ASSERT(md_znrmse(D, odims, ref, out) < UT_TOL); md_free(out); return true; } static bool test_pattern(void) { const complex float in[1][5][3] = { { { 3., 0., 0. }, { 0., 2., 0. }, { .2, 0., 0. }, { 0., 0., 0. }, { 0., 2., 0. }, } }; const complex float ref0[1][5][3] = { { { 1., 0., 0. }, { 0., 1., 0. }, { 1., 0., 0. }, { 0., 0., 0. }, { 0., 1., 0. }, } }; const complex float ref2[1][1][3] = { { { 1., 1., 0. }, } }; const complex float ref3[1][1][1] = { { { 1. }, } }; long idims[3] = { 3, 5, 1 }; return (test_pattern_flags(3, idims, 0, &in[0][0][0], &ref0[0][0][0]) && test_pattern_flags(3, idims, 2, &in[0][0][0], &ref2[0][0][0]) && test_pattern_flags(3, idims, 3, &in[0][0][0], &ref3[0][0][0])); } UT_REGISTER_TEST(test_pattern); bart-0.4.02/utests/test_polynom.c000066400000000000000000000065051320577655200167760ustar00rootroot00000000000000 #include "misc/misc.h" #include "num/polynom.h" #include "utest.h" static bool test_polynom_eval(void) { const complex double coeff[3] = { 1., 0., 1. }; bool ok = true; ok &= (1. == polynom_eval(0., 2, coeff)); ok &= (2. == polynom_eval(1., 2, coeff)); ok &= (2. == polynom_eval(-1., 2, coeff)); return ok; } UT_REGISTER_TEST(test_polynom_eval); static bool array_eq(int N, const complex double c1[N], const complex double c2[N], double eps) { return (0 == N) ? true : ((cabs(c1[0] - c2[0]) <= eps) && array_eq(N - 1, c1 + 1, c2 + 1, eps)); } static bool test_polynom_derivative(void) { const complex double coeff[3] = { 1., 0., 1. }; complex double coeff2[2]; polynom_derivative(2, coeff2, coeff); return array_eq(2, coeff2, (const complex double[]){ 0., 2. }, 0.); } UT_REGISTER_TEST(test_polynom_derivative); static bool test_polynom_integral(void) { const complex double coeff[3] = { 1., 0., 1. }; complex double coeff2[2]; complex double coeff3[3]; polynom_derivative(2, coeff2, coeff); polynom_integral(1, coeff3, coeff2); return ((0. == coeff3[1]) && (1. == coeff3[2])); } UT_REGISTER_TEST(test_polynom_integral); static bool test_polynom_integrate(void) { const complex double coeff2[2] = { 0., 1. }; return (0.5 == polynom_integrate(0., 1., 1, coeff2)); } UT_REGISTER_TEST(test_polynom_integrate); static bool test_polynom_from_roots1(void) { const complex double roots[2] = { 1., -2.i }; // (x - 1.) * (x + 2.i) == x^2 + (-1 + 2.i) x -2i. const complex double coeff0[3] = { -2.i, -1. + 2.i, 1. }; complex double coeff[3]; polynom_from_roots(2, coeff, roots); return array_eq(3, coeff0, coeff, 0.); } UT_REGISTER_TEST(test_polynom_from_roots1); static bool test_polynom_from_roots(void) { const complex double roots[3] = { 1., 2., 3. }; complex double coeff[4]; polynom_from_roots(3, coeff, roots); bool ok = true; for (unsigned int i = 0; i < ARRAY_SIZE(roots); i++) ok &= (0. == polynom_eval(roots[i], 3, coeff)); complex double prod = 1.; for (unsigned int i = 0; i < ARRAY_SIZE(roots); i++) prod *= -roots[i]; ok &= (prod == polynom_eval(0., 3, coeff)); return ok; } UT_REGISTER_TEST(test_polynom_from_roots); static bool test_polynom_scale(void) { const complex double coeff[3] = { 1., 0., 1. }; complex double coeff2[3]; polynom_scale(2, coeff2, 2., coeff); return (5. == polynom_eval(1., 2, coeff2)); } UT_REGISTER_TEST(test_polynom_scale); static bool test_polynom_shift(void) { const complex double coeff[3] = { 1., 0., 1. }; // 1. + (x + 1)^2 = 2 + 2 * x + x^2 complex double coeff2[3]; polynom_shift(2, coeff2, 1., coeff); return array_eq(3, coeff2, (const complex double[]){ 2., 2., 1. }, 0.); } UT_REGISTER_TEST(test_polynom_shift); static bool test_quadratic_formula(void) { const complex double roots[2] = { 1., -2.i }; // (x - 1.) * (x + 2.i) == x^2 + (-1 + 2.i) x -2i. complex double coeff[3] = { -2.i, -1. + 2.i, 1. }; complex double r2[2]; quadratic_formula(r2, coeff); return array_eq(2, r2, roots, 0.); } UT_REGISTER_TEST(test_quadratic_formula); static bool test_cubic_formula(void) { const complex double roots[3] = { 1., 0.5, -2.i }; complex double coeff[4]; polynom_from_roots(3, coeff, roots); complex double r2[3]; cubic_formula(r2, coeff); return array_eq(3, r2, roots, 1.E-15); } UT_REGISTER_TEST(test_cubic_formula); bart-0.4.02/utests/test_splines.c000066400000000000000000000137301320577655200167540ustar00rootroot00000000000000/* Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Martin Uecker */ #include #include #include "num/splines.h" #include "utest.h" static const double coords[5] = { 0., 1., 0.5, 1., 0.5 }; static bool test_bezier_curve(void) { bool ret = true; ret = ret && (coords[0] == bezier_curve(0., 4, coords)); ret = ret && (coords[4] == bezier_curve(1., 4, coords)); return ret; } UT_REGISTER_TEST(test_bezier_curve); static bool test_bezier_increase_degree(void) { double coords2[6]; bezier_increase_degree(4, coords2, coords); double err = 0.; for (double x = 0.; x < 1.; x += 0.01) { double a = bezier_curve(x, 4, coords); double b = bezier_curve(x, 5, coords2); err += pow(a - b, 2); } return (err < 1.E-28); } UT_REGISTER_TEST(test_bezier_increase_degree); static bool test_bezier_split(void) { double coords[5] = { 0., 1., 0.5, 1., 0.5 }; double coordsA[5]; double coordsB[5]; bezier_split(0.5, 4, coordsA, coordsB, coords); double err = 0.; for (double x = 0.; x < 1.; x += 0.01) { double a = bezier_curve(x, 4, coords); double b = (x <= 0.5) ? bezier_curve(2. * x, 4, coordsA) : bezier_curve(2. * (x - 0.5), 4, coordsB); err += pow(a - b, 2); } return (err < 1.E-28); } UT_REGISTER_TEST(test_bezier_split); static bool test_cspline(void) { const double coeff[4] = { 0., 1., 1., 1. }; bool ok = true; for (double x = 0.; x < 1.; x += 0.1) ok &= (fabs(x - cspline(x, coeff)) < 1.E-15); return ok; } UT_REGISTER_TEST(test_cspline); static bool test_bspline(void) { const double knots[11] = { 0., 0.0, 0.0, 0., 0.25, 0.5, 0.75, 1., 1., 1., 1. }; bool ok = true; for (int i = 0; i < 7; i++) { double coord[7] = { 0., 0., 0., 0., 0., 0., 0. }; coord[i] = 1.; double err = 0.; for (double x = 0.; x <= 1.; x += 0.01) { double a = bspline(10, i, 3, knots, x); double b = bspline_curve(10, 3, knots, coord, x); err += pow(a - b, 2); } ok &= (err < 1.E-28); } return ok; } UT_REGISTER_TEST(test_bspline); static bool test_bspline_knot_insert(void) { const double knots[11] = { 0., 0.0, 0.0, 0., 0.25, 0.5, 0.75, 1., 1., 1., 1. }; double coord[7] = { 0., 0., 0.75, 0.5, 0.25, 0., 0 }; double knots2[12]; double coord2[8]; bspline_knot_insert(0.6, 10, 3, knots2, coord2, knots, coord); double err = 0.; for (double x = 0.; x < 1.; x += 0.01) { double a = bspline_curve(10, 3, knots, coord, x); double b = bspline_curve(11, 3, knots2, coord2, x); err += pow(a - b, 2); } return (err < 1.E-28); } UT_REGISTER_TEST(test_bspline_knot_insert); static bool test_bspline_derivative(void) { const double knots[11] = { 0., 0.0, 0.0, 0., 0.25, 0.5, 0.75, 1., 1., 1., 1. }; bool ok = true; for (int i = 0; i < 7; i++) { double coord[7] = { 0., 0., 0., 0., 0., 0., 0. }; coord[i] = 1.; double err = 0.; for (double x = 0.; x <= 1.; x += 0.01) { double a = bspline_derivative(10, i, 3, knots, x); double b = bspline_curve_derivative(1, 10, 3, knots, coord, x); err += pow(a - b, 2); } ok &= (err < 1.E-1); } return ok; } UT_REGISTER_TEST(test_bspline_derivative); static bool test_bspline_zero(void) { const double knots[11] = { 0., 0.0, 0.0, 0., 0.25, 0.5, 0.75, 1., 1., 1., 1. }; const double z0[7] = { 0., 0., 0.75, 0.5, 0.25, 0., 0 }; bool ok = true; for (int i = 2; i < 5; i++) { // FIXME double coord[7] = { 0., 0., 0., 0., 0., 0., 0. }; coord[i] = 1.; double k2[9]; double c2[6]; bspline_coeff_derivative_n(1, 10, 3, k2, c2, knots, coord); double z = bspline_curve_zero(8, 2, k2, c2); ok &= (fabs(z - z0[i]) < 1.E-5); } return ok; } UT_REGISTER_TEST(test_bspline_zero); static bool test_nurbs(void) { const double knots[11] = { 0., 0.0, 0.0, 0., 0.25, 0.5, 0.75, 1., 1., 1., 1. }; bool ok = true; for (int i = 0; i < 7; i++) { double coord[7] = { 0., 0., 0., 0., 0., 0., 0. }; double weights[7] = { 1., 1., 1., 1., 1., 1., 1. }; coord[i] = 1.; double err = 0.; for (double x = 0.; x <= 1.; x += 0.01) { double a = nurbs(10, 3, knots, coord, weights, x); double b = bspline_curve(10, 3, knots, coord, x); double c = bspline_curve(10, 3, knots, weights, x); err += pow(a - b / c, 2); } ok &= (err < 1.E-28); } return ok; } UT_REGISTER_TEST(test_nurbs); static bool test_nurbs_arc(void) { const double knots[6] = { 0., 0., 0., 1., 1., 1. }; double coordx[3] = { 0., 1., 1. }; double coordy[3] = { 1., 1., 0. }; double weights[3] = { sqrt(2.), 1., sqrt(2.) }; bool ok = true; for (double t = 0.; t <= 1.; t += 0.01) { double x = nurbs(5, 2, knots, coordx, weights, t); double y = nurbs(5, 2, knots, coordy, weights, t); ok &= fabs(pow(x, 2.) + pow(y, 2.) - 1.) < 1.E-15; } return ok; } UT_REGISTER_TEST(test_nurbs_arc); static bool test_nurbs_circle(void) { const double knots[10] = { 0., 0., 0., 1., 1., 2., 2., 3., 3., 3. }; double coordx[7] = { cos(0. * 2. * M_PI / 3.) + cos(1. * 2. * M_PI / 3.), cos(1. * 2. * M_PI / 3.) * 2., cos(1. * 2. * M_PI / 3.) + cos(2. * 2. * M_PI / 3.), cos(2. * 2. * M_PI / 3.) * 2., cos(2. * 2. * M_PI / 3.) + cos(3. * 2. * M_PI / 3.), cos(3. * 2. * M_PI / 3.) * 2., cos(3. * 2. * M_PI / 3.) + cos(1. * 2. * M_PI / 3.), }; double coordy[7] = { sin(0. * 2. * M_PI / 3.) + sin(1. * 2. * M_PI / 3.), sin(1. * 2. * M_PI / 3.) * 2., sin(1. * 2. * M_PI / 3.) + sin(2. * 2. * M_PI / 3.), sin(2. * 2. * M_PI / 3.) * 2., sin(2. * 2. * M_PI / 3.) + sin(3. * 2. * M_PI / 3.), sin(3. * 2. * M_PI / 3.) * 2., sin(3. * 2. * M_PI / 3.) + sin(1. * 2. * M_PI / 3.), }; double weights[7] = { 1., 0.5, 1., 0.5, 1., 0.5, 1. }; bool ok = true; for (double t = 0.; t <= 3.; t += 0.01) { double x = nurbs(9, 2, knots, coordx, weights, t); double y = nurbs(9, 2, knots, coordy, weights, t); ok &= fabs(pow(x, 2.) + pow(y, 2.) - 1.) < 1.E-15; } return ok; } UT_REGISTER_TEST(test_nurbs_circle); bart-0.4.02/utests/test_window.c000066400000000000000000000041071320577655200166040ustar00rootroot00000000000000/* Copyright 2017. The Regents of the University of California. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2017 Jon Tamir */ #include #include "num/filter.h" #include "num/flpmath.h" #include "num/multind.h" #include "misc/misc.h" #include "utest.h" static bool test_window(unsigned int D, long dims[D], long flags, bool hamming, const complex float* ref) { complex float* in = md_alloc(3, dims, CFL_SIZE); md_zfill(3, dims, in, 1.); complex float* out = md_alloc(3, dims, CFL_SIZE); (hamming ? md_zhamming : md_zhann)(3, dims, flags, out, in); UT_ASSERT(md_znrmse(3, dims, &ref[0], out) < UT_TOL); md_free(out); md_free(in); return true; } static bool test_hamming(void) { // compare to Matlab: // >> z = permute(repmat(hamming(5)*hamming(4)', [1, 1, 2]), [3, 1, 2]); // >> z(:) const complex float ref[2 * 5 * 4] = { 0.0064, 0.0064, 0.0432, 0.0432, 0.0800, 0.0800, 0.0432, 0.0432, 0.0064, 0.0064, 0.0616, 0.0616, 0.4158, 0.4158, 0.7700, 0.7700, 0.4158, 0.4158, 0.0616, 0.0616, 0.0616, 0.0616, 0.4158, 0.4158, 0.7700, 0.7700, 0.4158, 0.4158, 0.0616, 0.0616, 0.0064, 0.0064, 0.0432, 0.0432, 0.0800, 0.0800, 0.0432, 0.0432, 0.0064, 0.0064, }; long dims[3] = { 2, 5, 4 }; return test_window(3, dims, MD_BIT(1) | MD_BIT(2), true, ref); } UT_REGISTER_TEST(test_hamming); static bool test_hann(void) { // compare to Matlab: // >> z = permute(repmat(hann(4)*hann(5)', [1, 1, 2]), [1, 3, 2]); // >> z(:) const complex float ref[4 * 2 * 5] = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3750, 0.3750, 0.0000, 0.0000, 0.3750, 0.3750, 0.0000, 0.0000, 0.7500, 0.7500, 0.0000, 0.0000, 0.7500, 0.7500, 0.0000, 0.0000, 0.3750, 0.3750, 0.0000, 0.0000, 0.3750, 0.3750, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, }; long dims[3] = { 4, 2, 5 }; return test_window(3, dims, MD_BIT(0) | MD_BIT(2), false, ref); } UT_REGISTER_TEST(test_hann); bart-0.4.02/utests/utest.c000066400000000000000000000024771320577655200154120ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jonathan Tamir * 2016 Martin Uecker */ #include #include "misc/debug.h" #include "misc/misc.h" #include "utest.h" #if 0 /* A linker script is used to assemble a list of all * registered unit tests and to set begin and end. */ extern ut_test_f* _utests_begin; extern ut_test_f* _utests_end; #else /* A shell script called by make is used to create * the list of registered unit tests in UTESTS. * This also works on MacOS X. */ extern ut_test_f UTESTS dummy; ut_test_f* ut_tests[] = { UTESTS }; #define _utests_begin (ut_tests[0]) #define _utests_end (ut_tests[ARRAY_SIZE(ut_tests)]) #endif int main(int argc, char* argv[]) { UNUSED(argc); int num_tests_run = 0; int num_tests_pass = 0; for (ut_test_f** ptr = &_utests_begin; ptr != &_utests_end; ptr++) UNUSED((num_tests_run++, (**ptr)()) && num_tests_pass++); bool good = (num_tests_pass == num_tests_run); debug_printf(good ? DP_INFO : DP_ERROR, "%20s: %2d/%2d passed.\n", argv[0], num_tests_pass, num_tests_run); exit(good ? 0 : 1); } bart-0.4.02/utests/utest.h000066400000000000000000000015721320577655200154120ustar00rootroot00000000000000/* Copyright 2016. The Regents of the University of California. * Copyright 2016. Martin Uecker. * All rights reserved. Use of this source code is governed by * a BSD-style license which can be found in the LICENSE file. * * Authors: * 2016 Jonathan Tamir * 2016 Martin Uecker */ #ifndef _UTEST_H #define _UTEST_H #include #include "misc/debug.h" #include "misc/misc.h" #define UT_ASSERT(test) \ return ((test) || (debug_printf(DP_ERROR, "%s:%d assertion `%s` failed.\n", __func__, __LINE__, #test), false)) #define UT_TOL 1E-6 typedef bool ut_test_f(void); #if 0 #define UT_REGISTER_TEST(x) \ ut_test_f* ptr_ ## x __attribute__((section(".utest"))) = &x; #else #define UT_REGISTER_TEST(x) \ extern bool call_ ## x(void); \ extern bool call_ ## x(void) { return x(); }; #endif #endif bart-0.4.02/utests/utests-collect.sh000077500000000000000000000001651320577655200174030ustar00rootroot00000000000000#!/bin/sh UTESTS=$(grep UT_REGISTER $1 | cut -f2 -d'(' | cut -f1 -d')') for i in $UTESTS; do echo "call_$i," done bart-0.4.02/utests/utests.ld000066400000000000000000000002121320577655200157330ustar00rootroot00000000000000 SECTIONS { . = ALIGN(8); .utests_array : { _utests_begin = .; KEEP( *(.utest)) _utests_end = .; } } INSERT AFTER .rodata; bart-0.4.02/version.txt000066400000000000000000000000101320577655200147560ustar00rootroot00000000000000v0.4.01