pax_global_header00006660000000000000000000000064142141353060014511gustar00rootroot0000000000000052 comment=993d5984abf1e57d775a1e077fe49b3be3dd7dc6 vdt-0.4.4/000077500000000000000000000000001421413530600123135ustar00rootroot00000000000000vdt-0.4.4/CMakeDefaults.txt000066400000000000000000000017421421413530600155300ustar00rootroot00000000000000# Useful Macros to be used later macro(change_option NAME NEWVAL) unset(${NAME} CACHE) set(${NAME} ${NEWVAL} CACHE BOOL "Value changed by cmake - probably due to compiler incompatibilities") endmacro(change_option) macro(add_exe_and_link EXENAME FILENAME ADDITIONAL_LIB) # no name is lib not present set (ADDITIONAL_LIB_NAME "") if (NOT APPLE) # in this case u have the framework carbon if (${ADDITIONAL_LIB} MATCHES "None") else (${ADDITIONAL_LIB} MATCHES "None") set (ADDITIONAL_LIB_NAME ${ADDITIONAL_LIB}) endif (${ADDITIONAL_LIB} MATCHES "None") endif (NOT APPLE) #link Vc if present if(USE_VC) add_executable( ${EXENAME} ${FILENAME}) target_link_libraries(${EXENAME} vdt VcWrapper ${ADDITIONAL_LIB_NAME}) else(USE_VC) add_executable( ${EXENAME} ${FILENAME}) target_link_libraries(${EXENAME} vdt ${ADDITIONAL_LIB_NAME}) endif(USE_VC) endmacro(add_exe_and_link) # End macro definitions vdt-0.4.4/CMakeLists.txt000066400000000000000000000161351421413530600150610ustar00rootroot00000000000000# VDT Math Library cmake_minimum_required(VERSION 3.2 FATAL_ERROR) project (Vdt) #------------------------------------------------------------------------------- # Include the defaults include ( CMakeDefaults.txt ) #------------------------------------------------------------------------------- # configuration options -- you may change them when running cmake ============== # with 'cmake -D = .' option( DIAG "Build in diagnostic mode - all diagnostic exes (default cache entry: OFF)" OFF) option( AVX "Use AVX instruction set (default cache entry: OFF)" OFF) option( AVX2 "Use AVX2 instruction set (default cache entry: OFF)" OFF) option( FMA "Use FMA instruction set (default cache entry: OFF)" OFF) option( USERFLAGS "Pass arbitrary flags to the compiler") option( SSE "Use SSE instruction set (default cache entry: ON)" ON) option( NEON "Use NEON instruction set (default cache entry: OFF)" OFF) option( BUILD_SHARED_LIBS "Build libraries as SHARED instead of STATIC (default cache entry: ON)" ON) option( PRELOAD "Create in the library the symbols to preload the library (default cache entry: OFF)" OFF) option( USE_VC "Use Vc library - requires symlink to Vc from ${CMAKE_SOURCE_DIR} (default cache entry: OFF)" OFF) option( DEBUG "Compile library with debug symbols (default is OFF)" OFF) message(${CMAKE_CXX_COMPILER_ID}) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) message(FATAL_ERROR "VDT requires GCC version >= 4.8") set(COMP_IS_GCC TRUE) endif() elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) message(FATAL_ERROR "VDT requires AppleClang version >= 5.0") endif() set(COMP_IS_CLANG TRUE) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3) message(FATAL_ERROR "VDT requires Clang version >= 3.3") endif() set(COMP_IS_CLANG TRUE) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0) message(FATAL_ERROR "VDT requires ICC >= 15.0") set(COMP_IS_ICC TRUE) endif() else() message(WARNING "You are using an unsupported compiler! Compilation has only been tested with Clang, ICC and GCC.") endif() # SIMD and FMA instructions set------------------------------------------------- if (NEON) message(STATUS "Using NEON instructions!") set(PACKED_INSTR "-mfpu=neon ") else() if (SSE AND (NOT (AVX OR AVX2) )) message(STATUS "Using SSE instructions!") set(PACKED_INSTR "-msse") endif () if (AVX AND (NOT AVX2)) message(STATUS "Using AVX instructions!") set (PACKED_INSTR "-mavx") if(CMAKE_COMPILER_IS_ICC) set(PACKED_INSTR "-xavx") endif() endif () if (AVX2) message(STATUS "Using AVX2 instructions!") set (PACKED_INSTR "-mavx2") if(CMAKE_COMPILER_IS_ICC) set(PACKED_INSTR "-xavx2") endif() endif () if (FMA) message(STATUS "Using FMA instructions!") set (FMA_INSTR "-mfma") endif () endif() # To use svml at CERN ---------------------------------------------------------- set (INTEL_SVML_FLAGS "") if (SVML) message (STATUS "Linking SVML library") set (INTEL_SVML_FLAGS "-mveclibabi=svml -L/afs/cern.ch/sw/IntelSoftware/linux/x86_64/Compiler/11.1/072/lib/intel64/ -lsvml -lirc") endif (SVML) # Vc setup --------------------------------------------------------------------- if(USE_VC) message(STATUS "VC usage is turned on now, if you do not intend to use it, run 'cmake -D USE_VC=0 .'") set (VC_SYMLINK_MSG "To use Vc you must have a (symlink) 'Vc' leading to the Vc rootdir in your ${CMAKE_SOURCE_DIR}") #check for files set (VC_LIB_NAME "${CMAKE_SOURCE_DIR}/Vc/libVc.a") set (VC_HEADER_NAME "${CMAKE_SOURCE_DIR}/Vc/include/Vc/Vc") if(NOT EXISTS ${VC_LIB_NAME}) message(STATUS "Vc lib not found at ${VC_LIB_NAME}, turning off Vc usage") message(STATUS ${VC_SYMLINK_MSG}) change_option(USE_VC 0) endif(NOT EXISTS ${VC_LIB_NAME}) if (EXISTS ${VC_LIB_NAME}) if(NOT EXISTS ${VC_HEADER_NAME}) message(STATUS "Vc header not found at ${VC_HEADER_NAME}, turning off Vc usage") message(STATUS ${VC_SYMLINK_MSG}) change_option(USE_VC 0) endif(NOT EXISTS ${VC_HEADER_NAME}) endif(EXISTS ${VC_LIB_NAME}) link_directories( ${CMAKE_SOURCE_DIR}/Vc ) endif(USE_VC) # set compiler options ========================================================= if(DIAG) # Library for time measurement: macOS and Linux set (LIBTIMING "rt") # do not set it if on macOS if (APPLE AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0) set (LIBTIMINGAPPLE "-framework Carbon") endif () endif(DIAG) #------------------------------------------------------------------------------- # Compiler optimisations set (VECT_OPT "-Ofast") if (CMAKE_COMPILER_IS_ICC) set (VECT_OPT "") endif() if (${COMP_IS_GCC}) set (VECTORIZER_VERBOSITY "-ftree-vectorizer-verbose=0") set (INLINE_OPT " --param vect-max-version-for-alias-checks=50 --param inline-unit-growth=150") endif() set (CPP11_OPT "-std=c++11") set (VERBOSITY_OPT "-Winline") # set it for clang until it understands __always_inline set (CLANG_INLINE_DEFINE "") if (${COMP_IS_CLANG}) set (CLANG_INLINE_DEFINE "-D__extern_always_inline=inline") endif() # compiler dependent changes --------------------------------------------------- if(${COMP_IS_ICC}) set (VECTORIZER_VERBOSITY "") set (INLINE_OPT "") endif() set (WARNING_FLAGS "-W -Wall -Werror -Wno-error=unused-parameter") if (DEBUG) set (DEBUG_FLAGS " -g") message(STATUS "Adding debugging symbols") endif () set (COMMON_FLAGS "${CPP11_OPT} ${INTEL_SVML_FLAGS} ${PACKED_INSTR} ${FMA_INSTR} ${INLINE_OPT} ${WARNING_FLAGS} ${DEBUG_FLAGS} ${CLANG_INLINE_DEFINE}") if (USERFLAGS) set (COMMON_FLAGS "${COMMON_FLAGS} ${USERFLAGS}") endif() set (LIB_FLAGS "${VERBOSITY_OPT} ${VECT_OPT} ${VECTORIZER_VERBOSITY} ${COMMON_FLAGS}") set (DIAG_FLAGS "${LIBTIMINGAPPLE} ${VECT_OPT} ${COMMON_FLAGS}") # Locations ==================================================================== # Location of executables set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin ) # Location of sources set( SRC_DIR ${CMAKE_SOURCE_DIR}/src ) # Location of library set( LIB_DIR ${CMAKE_SOURCE_DIR}/lib ) # Common Includes set (INC_DIR ${CMAKE_SOURCE_DIR}/include ) #------------------------------------------------------------------------------- add_subdirectory( src ) add_subdirectory( lib ) if (DIAG) message("DIAG option is now on, building diagnostic programs") add_subdirectory( progs ) add_subdirectory( progs/units ) else(DIAG) message("DIAG option is now off, building library only") endif(DIAG) #------------------------------------------------------------------------------- # Installation # Install location INSTALL(FILES include/asin.h include/atan.h include/tanh.h include/atan2.h include/cos.h include/exp.h include/identity.h include/inv.h include/log.h include/sincos.h include/sin.h include/sqrt.h include/tan.h include/vdtcore_common.h include/vdtMath.h DESTINATION include/vdt) vdt-0.4.4/Licence.txt000066400000000000000000000011301421413530600144110ustar00rootroot00000000000000VDT is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details. You should have received a copy of the GNU Lesser Public License along with this program. If not, see . vdt-0.4.4/ReadMe.md000066400000000000000000000140151421413530600137730ustar00rootroot00000000000000![VDT Logo](https://svnweb.cern.ch/trac/vdt/raw-attachment/ticket/3/VDTlogo.png) # The **vdt** mathematical library **v**ectorise**d** ma**t**h * A collection of **fast** and **inline** implementations of mathematical functions. * The functions can be used in autovectorised loops. * **Double and single precision** implementations are available. * No overhead present, no intrinsics used. * A scalar (`T(T)`) and array signature (`void(const unsigned int,T*,T*)`) are provided. Born and developed at [CERN](www.cern.ch), it is used, among the others, by LHC experiments and the [Geant4](http://geant4.cern.ch/) simulation toolkit. Much of the VDT code is inspired by the well known [Cephes](http://www.netlib.org/cephes) mathematical library. ## How to get, compile and install it The **vdt** functions are **inline** and contained in header files: they are ready to be used without compilation of an external library. In any case, there is the possibility to compile a shared library containing the array signatures `void(const unsigned int,T*,T*)`. The makesystem chosen for **vdt** is [CMake](http://www.cmake.org). ```bash export INSTALLDIR=/path/to/mydir git clone https://github.com/dpiparo/vdt.git cd vdt cmake -DCMAKE_INSTALL_PREFIX=$INSTALLDIR . make make install ``` ## How to use it Good examples of vdt functions usage are located in the **progs** and **progs/units** directories. ### The vdt functions All **vdt** functions live in the `vdt` namespace. Their names are structured as follows: ```cpp vdt::fast_[f][v] ``` Where: * The function name is one of the list in the table below. * The `f` letter stands for the single precision function (`float`). * The `v` letter identifies the array function. You may wonder, why prepending `fast_`? This is done to allow the user to decide where a fast and approximate implementation of the function is to be used. It is not always obvious where the accuracy can be reduced: all the flexibility is needed. These are the available functions: | Function | Scalar double precision | Scalar single precision | Array double precision | Array single precision | | -------- | ----------------------- | ----------------------- | ---------------------- | ---------------------- | | exponential | fast_exp | fast_expf | fast_expv | fast_expfv | | sine | fast_sin | fast_sinf | fast_sinv | fast_sinfv | | cosine | fast_cos | fast_cosf | fast_cosv | fast_cosfv | | tangent | fast_tan | fast_tanf | fast_tanv | fast_tanfv | | hyperbolic tangent | fast_tanh | fast_tanhf | fast_tanhv | fast_tanhfv | | logarithm | fast_log | fast_logf | fast_logv | fast_logfv | | arcsine | fast_asin | fast_asinf | fast_asinv | fast_asinfv | | arccosine | fast_acos | fast_acosf | fast_acosv | fast_acosfv | | arctangent | fast_atan | fast_atanf | fast_atanv | fast_atanfv | | inverse square root (1/sqrt) | fast_isqrt | fast_isqrtf | fast_isqrtv | fast_isqrtfv | ## Other Cmake options Other options for Cmake are available to steer the creation of the makefile: * Enable AVX extensions `-D AVX=1` * Enable NEON extensions on ARM `-D NEON=1` * Benchmarking tools and unit tests (requires C++11 support by the compiler) `-D DIAG=1` * Build static library `-D BUILD_SHARED_LIBS=0` * Prepare the library to be pre-loaded in order to replace the calls to the default math lib at runtime `-D PRELOAD=1` ## Supported Compilers The **vdt** functions can be used with every compiler (icc and gcc were tested). To compile the benchmarking tools **gcc4.7** (icc12) is at least needed because of the support of c++11. To vectorise the functions **gcc4.7** (icc12) is at least needed. ## Benchmarks This section is for experts who want to study the details of the functions provided and/or to compare them to other implementations. **vdt** comes with a complete benchmark suite both for accuracy and speed measurements. To measure the speed of the functions, you should use the ''vdtPerfBenchmark''. To dump on disk the ascii files summarising the accuracy of the functions, you should use ''vdtArithmBenchmark'', while the tool to compare them is ''vdtArithmComparison''. In order to produce the plots of the different bits as a function of input, the script to be used is ''diffhisto.py'' (which depends on [ROOT](http://root.cern.ch) to produce plots). ## Example of Performance Double precision, Intel® Core™ i7-3930K CPU @ 3.20GHz running Scientific Linux 6. Operative interval of the input: [-5000,5000] ((0,5000] for isqrt and [-1,1] for Asin and Acos). Time in ns per call. |Function| libm | VDT | VDT SSE | VDT AVX | |--------| ---- | --- | ------- | ------- | |Exp|16.7|6.1|3.8|2.9| |Log|34.9|12.5|5.7|4.2| |Sin|33.7|16.2|6|5.7| |Cos|34.4|13.4|5.4|5.1| |Tan|46.6|12.5|6.3|5.6| |Asin|23|10.3|8.6|8.1| |Acos|23.7|11|8.2|8.1| |Atan|19.7|11|8.3|8.3| |Isqrt| 9.3|6.7|3|2.1| ### Accuracy Accuracy measured in terms of least significant bit. Average difference with respect to libm. |Function|AVG vdt| |--------| ------| |Acos|0.39| |Asin|0.32| |Atan|0.33| |Cos|0.25| |Exp|0.14| |Isqrt|0.45| |Log|0.42| |Sin|0.25| |Tan|0.35| ## Related Documents: * A full characterisation of the accuracies please refer to this [presentation](http://indico.cern.ch/contributionDisplay.py?contribId=4&sessionId=9&confId=202688). ## Reference If you want to cite vdt, please use your reference: [D. Piparo, V.Innocente and T.Hauth 2014 J. Phys.: Conf. Ser. 513 052027 "Speeding up HEP experiment software with a library of fast and auto-vectorisable mathematical functions"](http://iopscience.iop.org/1742-6596/513/5/052027) ## Mailing List The VDT projects has a mailing list: VDTlibrary-talk at cern ch, linked to an e-group with the same name. The Infrastructure used is the one provided by CERN IT. If you don't have a CERN account, you can obtain an external one [here](https://simba3.web.cern.ch/simba3/SelfSubscription.aspx?groupName=your-e-group-name). Alternatively, feel free to contact Danilo Piparo (danilo_dot_piparo_at_cern_dot_ch). ## Licence The VDT mathematical library is licenced under the LGPL3 licence ![LGPL3](https://svnweb.cern.ch/trac/vdt/raw-attachment/ticket/2/LGPL-3-Logo-mini.png) vdt-0.4.4/ReleaseNotes.txt000066400000000000000000000041721421413530600154510ustar00rootroot00000000000000V0.4.1 o Simplify the CMake infrastructure o Introduce minimal compiler versions: GCC 4.8, AppleClang 5.0, Clang 3.3, ICC 15.0 V0.4.0 o Introduce experiment NumPy building and gcc x86_64 multi-target fat library (not built by default) o Adding explicit inline to struct constructor o Adding tanh V0.3.6 o Fixed suppport for Clang on Mac o Simpler compiler detection system V0.3.5 o Added support for Clang in CMake V0.3.4 o Added regex support to perfbenchmark to profile only groups of functions o Removed fpe rising in atan2, both single and double precision, when x argument was 0. V0.3.3 o Minor Bugfixes V0.3.2 o Added the possibility to prepare the lib for preload. The CMake command is -DPRELOAD=1. If set, symbols identical to the Libm ones but containing a vdt call will be generated in the lib. If preloaded, vdt calls will be performed instead of libm ones. V0.3.1 o Use NEON instructions on ARM (-DNEON=1 cmake option) V0.3.0 o Constants * Add ULL to unsigned long long constants o CMake * Removed -pedantic * Add possibility to disable SSE for ARM or other archs * More readable constructs o Can now add debugging symbols to build with -D DEBUG=1 . o randomPool: * Removed long doubles * Size is now a uint64_t * Refactored: added abstract interface to random generator * Added 2D random generator and test o Removed redundancy of includes in diagnostic files o Removed fake program deleteme.cpp o fcnResponse,fcnComparison: * Added interface * Added 2D version * Added test o printFuncDiff: added T(T,T) and void(uint32_t,T*,T*,T*) versions o Atan2: * Added tests for response. * Integrated in the accuracy tests * Double and Single precision V0.2.3 o Fixed the installation of atan2.h V0.2.2 o Removed typos in the documentation (thanks to A. Neumann for spotting this!) o Fixed CMake to make the install working also on macOS V0.2.1 o Shared library built by default instead of static one. o make install now supported. To specify the install dir use the variable -DCMAKE_INSTALL_PREFIX= whith cmake. o Vector signatures are now void (const unsigned int, T const *, T*) o Petulant compilation flags (picky with warnings) vdt-0.4.4/include/000077500000000000000000000000001421413530600137365ustar00rootroot00000000000000vdt-0.4.4/include/asin.h000066400000000000000000000136631421413530600150520ustar00rootroot00000000000000/* * aasin.h * The basic idea is to exploit Pade' polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef ASIN_H_ #define ASIN_H_ #include "vdtcore_common.h" namespace vdt{ namespace details{ const double RX1asin = 2.967721961301243206100E-3; const double RX2asin = -5.634242780008963776856E-1; const double RX3asin = 6.968710824104713396794E0; const double RX4asin = -2.556901049652824852289E1; const double RX5asin = 2.853665548261061424989E1; const double SX1asin = -2.194779531642920639778E1; const double SX2asin = 1.470656354026814941758E2; const double SX3asin = -3.838770957603691357202E2; const double SX4asin = 3.424398657913078477438E2; const double PX1asin = 4.253011369004428248960E-3; const double PX2asin = -6.019598008014123785661E-1; const double PX3asin = 5.444622390564711410273E0; const double PX4asin = -1.626247967210700244449E1; const double PX5asin = 1.956261983317594739197E1; const double PX6asin = -8.198089802484824371615E0; const double QX1asin = -1.474091372988853791896E1; const double QX2asin = 7.049610280856842141659E1; const double QX3asin = -1.471791292232726029859E2; const double QX4asin = 1.395105614657485689735E2; const double QX5asin = -4.918853881490881290097E1; inline double getRX(const double x){ double rx = RX1asin; rx*= x; rx+= RX2asin; rx*= x; rx+= RX3asin; rx*= x; rx+= RX4asin; rx*= x; rx+= RX5asin; return rx; } inline double getSX(const double x){ double sx = x; sx+= SX1asin; sx*= x; sx+= SX2asin; sx*= x; sx+= SX3asin; sx*= x; sx+= SX4asin; return sx; } inline double getPX(const double x){ double px = PX1asin; px*= x; px+= PX2asin; px*= x; px+= PX3asin; px*= x; px+= PX4asin; px*= x; px+= PX5asin; px*= x; px+= PX6asin; return px; } inline double getQX(const double x){ double qx = x; qx+= QX1asin; qx*= x; qx+= QX2asin; qx*= x; qx+= QX3asin; qx*= x; qx+= QX4asin; qx*= x; qx+= QX5asin; return qx; } } } namespace vdt{ // asin double precision -------------------------------------------------------- /// Double Precision asin inline double fast_asin(double x){ const uint64_t sign_mask = details::getSignMask(x); x = std::fabs(x); const double a = x; double zz = 1.0 - a; double px = details::getRX(zz); double qx = details::getSX(zz); const double p = zz * px/qx; zz = std::sqrt(zz+zz); double z = details::PIO4 - zz; zz = zz * p - details::MOREBITS; z -= zz; z += details::PIO4; if( a < 0.625 ){ zz = a * a; px = details::getPX(zz); qx = details::getQX(zz); z = zz*px/qx; z = a * z + a; } // Linear approx, not sooo needed but seable. Price is cheap though double res = a < 1e-8? a : z ; // Restore Sign return details::dpORuint64(res,sign_mask); } //------------------------------------------------------------------------------ /// Single Precision asin inline float fast_asinf(float x){ uint32_t flag=0; const uint32_t sign_mask = details::getSignMask(x); const float a = std::fabs(x); float z; if( a > 0.5f ) { z = 0.5f * (1.0f - a); x = sqrtf( z ); flag = 1; } else { x = a; z = x * x; } z = (((( 4.2163199048E-2f * z + 2.4181311049E-2f) * z + 4.5470025998E-2f) * z + 7.4953002686E-2f) * z + 1.6666752422E-1f) * z * x + x; // if( flag != 0 ) // { // z = z + z; // z = PIO2F - z; // } // No branch with the two coefficients float tmp = z + z; tmp = details::PIO2F - tmp; // Linear approx, not sooo needed but seable. Price is cheap though float res = a < 1e-4f? a : tmp * flag + (1-flag) * z ; // Restore Sign return details::spORuint32(res,sign_mask); } //------------------------------------------------------------------------------ // The cos is in this file as well inline double fast_acos( double x ){return details::PIO2 - fast_asin(x);} //------------------------------------------------------------------------------ inline float fast_acosf( float x ){return details::PIO2F - fast_asinf(x);} //------------------------------------------------------------------------------ // Vector signatures void asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); } //vdt namespace #endif /* ASIN_H_ */ vdt-0.4.4/include/atan.h000066400000000000000000000101431421413530600150310ustar00rootroot00000000000000/* * atan.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef ATAN_H_ #define ATAN_H_ #include "vdtcore_common.h" namespace vdt{ namespace details{ const double T3PO8 = 2.41421356237309504880; const double MOREBITSO2 = MOREBITS * 0.5; inline double get_atan_px(const double x2){ const double PX1atan = -8.750608600031904122785E-1; const double PX2atan = -1.615753718733365076637E1; const double PX3atan = -7.500855792314704667340E1; const double PX4atan = -1.228866684490136173410E2; const double PX5atan = -6.485021904942025371773E1; double px = PX1atan; px *= x2; px += PX2atan; px *= x2; px += PX3atan; px *= x2; px += PX4atan; px *= x2; px += PX5atan; return px; } inline double get_atan_qx(const double x2){ const double QX1atan = 2.485846490142306297962E1; const double QX2atan = 1.650270098316988542046E2; const double QX3atan = 4.328810604912902668951E2; const double QX4atan = 4.853903996359136964868E2; const double QX5atan = 1.945506571482613964425E2; double qx=x2; qx += QX1atan; qx *=x2; qx += QX2atan; qx *=x2; qx += QX3atan; qx *=x2; qx += QX4atan; qx *=x2; qx += QX5atan; return qx; } } /// Fast Atan implementation double precision inline double fast_atan(double x){ /* make argument positive and save the sign */ const uint64_t sign_mask = details::getSignMask(x); x=std::fabs(x); /* range reduction */ const double originalx=x; double y = details::PIO4; double factor = details::MOREBITSO2; x = (x-1.0) / (x+1.0); if( originalx > details::T3PO8 ) { y = details::PIO2; factor = details::MOREBITS; x = -1.0 / originalx ; } if ( originalx <= 0.66 ) { y = 0.; factor = 0.; x = originalx; } const double x2 = x * x; const double px = details::get_atan_px(x2); const double qx = details::get_atan_qx(x2); //double res = y +x * x2 * px / qx + x +factor; const double poq=px / qx; double res = x * x2 * poq + x; res+=y; res+=factor; return details::dpORuint64(res,sign_mask); } //------------------------------------------------------------------------------ /// Fast Atan implementation single precision inline float fast_atanf( float xx ) { const uint32_t sign_mask = details::getSignMask(xx); float x= std::fabs(xx); const float x0=x; float y=0.0f; /* range reduction */ if( x0 > 0.4142135623730950f ){ // * tan pi/8 x = (x0-1.0f)/(x0+1.0f); y = details::PIO4F; } if( x0 > 2.414213562373095f ){ // tan 3pi/8 x = -( 1.0f/x0 ); y = details::PIO2F; } const float x2 = x * x; y += ((( 8.05374449538e-2f * x2 - 1.38776856032E-1f) * x2 + 1.99777106478E-1f) * x2 - 3.33329491539E-1f) * x2 * x + x; return details::spORuint32(y,sign_mask); } //------------------------------------------------------------------------------ // Vector signatures void atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); }// end of vdt #endif // end of atan vdt-0.4.4/include/atan2.h000066400000000000000000000100511421413530600151110ustar00rootroot00000000000000/* * atan2.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Sept 20, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef ATAN2_H_ #define ATAN2_H_ #include "vdtcore_common.h" #include "atan.h" namespace vdt{ inline double fast_atan2( double y, double x ) { // move in first octant double xx = std::fabs(x); double yy = std::fabs(y); double tmp (0.0); if (yy>xx) { tmp = yy; yy=xx; xx=tmp; tmp=1.; } // To avoid the fpe, we protect against /0. const double oneIfXXZero = (xx==0.); double t=yy/(xx+oneIfXXZero); double z=t; double s = details::PIO4; double factor = details::MOREBITSO2; t = (t-1.0) / (t+1.0); if( z > details::T3PO8 ) { s = details::PIO2; factor = details::MOREBITS; t = -1.0 / z ; } if ( z <= 0.66 ) { s = 0.; factor = 0.; t = z; } const double t2 = t * t; const double px = details::get_atan_px(t2); const double qx = details::get_atan_qx(t2); //double res = y +x * x2 * px / qx + x +factor; const double poq=px / qx; double ret = t * t2 * poq + t; ret+=s; ret+=factor; // Here we put the result to 0 if xx was 0, if not nothing happens! ret*= (1.-oneIfXXZero); // move back in place if (y==0) ret=0.0; if (tmp!=0) ret = details::PIO2 - ret; if (x<0) ret = details::PI - ret; if (y<0) ret = -ret; return ret; } inline float fast_atan2f( float y, float x ) { // move in first octant float xx = std::fabs(x); float yy = std::fabs(y); float tmp (0.0f); if (yy>xx) { tmp = yy; yy=xx; xx=tmp; tmp =1.f; } // To avoid the fpe, we protect against /0. const float oneIfXXZero = (xx==0.f); float t=yy/(xx/*+oneIfXXZero*/); float z=t; if( t > 0.4142135623730950f ) // * tan pi/8 z = (t-1.0f)/(t+1.0f); //printf("%e %e %e %e\n",yy,xx,t,z); float z2 = z * z; float ret =(((( 8.05374449538e-2f * z2 - 1.38776856032E-1f) * z2 + 1.99777106478E-1f) * z2 - 3.33329491539E-1f) * z2 * z + z ); // Here we put the result to 0 if xx was 0, if not nothing happens! ret*= (1.f - oneIfXXZero); // move back in place if (y==0.f) ret=0.f; if( t > 0.4142135623730950f ) ret += details::PIO4F; if (tmp!=0) ret = details::PIO2F - ret; if (x<0.f) ret = details::PIF - ret; if (y<0.f) ret = -ret; return ret; } //------------------------------------------------------------------------------ // Vector signatures void atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray); void fast_atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray); void atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray); void fast_atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray); } // end namespace vdt #endif vdt-0.4.4/include/cos.h000066400000000000000000000024441421413530600146770ustar00rootroot00000000000000/* * cos.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ #ifndef COS_H_ #define COS_H_ #include "sincos.h" namespace vdt{ // Cos double precision -------------------------------------------------------- /// Double precision cosine: just call sincos. inline double fast_cos(double x){double s,c;fast_sincos(x,s,c);return c;} //------------------------------------------------------------------------------ inline float fast_cosf(float x){float s,c;fast_sincosf(x,s,c);return c;} //------------------------------------------------------------------------------ void cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_cosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_cosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); } //vdt namespace #endif /* COS_H_ */ vdt-0.4.4/include/diagnostic/000077500000000000000000000000001421413530600160625ustar00rootroot00000000000000vdt-0.4.4/include/diagnostic/vdtdiag_fcnComparison.h000066400000000000000000000422561421413530600225470ustar00rootroot00000000000000/** * This file contains the classes to store and compare the * arithmetical performance of the mathematical functions. * * Author Danilo Piparo **/ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef _VDT_COMPARISON_ #define _VDT_COMPARISON_ #include #include "vdtdiag_helper.h" #include "vdtdiag_filePersistence.h" #include "vdtdiag_interfaces.h" /** * Class that represents the comparison of the response of two mathematical * functions. It is initialised with input values and outputs of the two * functions or with an ascii file. It dumps on disk its status as ascii file * as well. Methods to fetch Mean, RMS, min and MAX of the differences between * functions outputs are provided. **/ template class fcnComparison1D:public IfcnComparison{ using vectT=std::vector; public: //------------------------------------------------------------------------------ fcnComparison1D(const std::string& name, const vectT& input, const vectT& out1, const vectT& out2): IfcnComparison(name,out1,out2), m_input(input){}; //------------------------------------------------------------------------------ fcnComparison1D(const std::string& input_filename): IfcnComparison(input_filename){ std::ifstream ifile ( input_filename ); std::string line; //skip the 2 header lines but read the func name from resp for (uint16_t i=0;i<5;++i) std::getline(ifile,line); //read data from file //read stats: fpFromHex mean, rms; ifile >> IfcnComparison::m_max >> IfcnComparison::m_min >> mean >> rms; IfcnComparison::m_mean = mean.getValue(); IfcnComparison::m_RMS = rms.getValue(); //read rest of file fpFromHex in_val, out1_val, out2_val; uint16_t tmp_diff; T dummy; //input value for Python, now useless while(ifile >> in_val >> out1_val >> out2_val >> tmp_diff >> dummy) { m_input.push_back(in_val.getValue()); IfcnComparison::m_out1.push_back(out1_val.getValue()); IfcnComparison::m_out2.push_back(out2_val.getValue()); IfcnComparison::m_diff_bitv.push_back(tmp_diff); } }; //------------------------------------------------------------------------------ ~fcnComparison1D(){}; //------------------------------------------------------------------------------ /// Print to screen the information void print(){ // Loop over all numbers uint32_t counter=0; const uint32_t size=m_input.size(); std::cout << "Function Performance Comparison:\n"; for (uint32_t i=0;i::digits10 +2; // Patchwork, but it's ok to read! const uint32_t dec_repr_w=width+7; std::cout << std::setprecision(width); std::cout << counter++ << "/" << size << " " << IfcnComparison::m_name; std::cout.setf(std::ios_base::scientific); std::cout << "( " << std::setw(dec_repr_w) << m_input[i] << " ) = " << std::setw(dec_repr_w) << IfcnComparison::m_out1[i] << " " << std::setw(dec_repr_w) << IfcnComparison::m_out2[i]; //<< "\t" std::cout.unsetf(std::ios_base::scientific); std::cout.setf(std::ios_base::showbase); std::cout << std::setbase(16) << " "<< vdt::details::fp2uint(IfcnComparison::m_out1[i]) << " " << vdt::details::fp2uint(IfcnComparison::m_out2[i]) << " "<< std::setbase(10) << IfcnComparison::m_diff_bitv[i] << std::endl; std::cout.unsetf(std::ios_base::showbase); } // now the stats IfcnComparison::printStats(); } //------------------------------------------------------------------------------ /// Dump on ascii file void writeFile(const std::string& output_filename){ const std::string preamble("VDT function arithmetics performance comparison file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (IfcnComparison::m_from_file){ std::string line; std::ifstream ifile ( IfcnComparison::m_ifile_name ); getline(ifile,line); ofile << "Dumped by an object initialised by " << IfcnComparison::m_ifile_name << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Comparison specs/function name = " << IfcnComparison::m_name << std::endl << "Format: input out1 out2 diffbit (decimal)input\nFirst line are stats: Max Min 0xMean 0xRMS\n"; // Do not write dec, but HEX! // First line are stats ofile << IfcnComparison::m_max << " " << IfcnComparison::m_min << " " << fpToHex(IfcnComparison::m_mean) << fpToHex(IfcnComparison::m_RMS) << std::endl; // Now the rest of file ofile.precision(std::numeric_limits::digits10); for (uint32_t i=0;i(m_input[i]) << fpToHex(IfcnComparison::m_out1[i]) << fpToHex(IfcnComparison::m_out2[i]) << IfcnComparison::m_diff_bitv[i] << " " << std::fixed << m_input[i] < class fcnComparison2D:public IfcnComparison{ using vectT=std::vector; public: //------------------------------------------------------------------------------ fcnComparison2D(const std::string& name, const vectT& input1, const vectT& input2, const vectT& out1, const vectT& out2): IfcnComparison(name,out1,out2), m_input1(input1), m_input2(input2){}; //------------------------------------------------------------------------------ fcnComparison2D(const std::string& input_filename): IfcnComparison(input_filename){ std::ifstream ifile ( input_filename ); std::string line; //skip the 2 header lines but read the func name from resp for (uint16_t i=0;i<5;++i) std::getline(ifile,line); //read data from file //read stats: fpFromHex mean, rms; ifile >> IfcnComparison::m_max >> IfcnComparison::m_min >> mean >> rms; IfcnComparison::m_mean = mean.getValue(); IfcnComparison::m_RMS = rms.getValue(); //read rest of file fpFromHex in_val1, in_val2, out1_val, out2_val; uint16_t tmp_diff; T dummy1, dummy2; //input value for Python, now useless while(ifile >> in_val1 >> in_val2 >> out1_val >> out2_val >> tmp_diff >> dummy1 >> dummy2) { m_input1.push_back(in_val1.getValue()); m_input2.push_back(in_val2.getValue()); IfcnComparison::m_out1.push_back(out1_val.getValue()); IfcnComparison::m_out2.push_back(out2_val.getValue()); IfcnComparison::m_diff_bitv.push_back(tmp_diff); } }; //------------------------------------------------------------------------------ ~fcnComparison2D(){}; //------------------------------------------------------------------------------ /// Print to screen the information void print(){ // Loop over all numbers uint32_t counter=0; const uint32_t size=m_input1.size(); std::cout << "Function Performance Comparison:\n"; for (uint32_t i=0;i::digits10 +2; // Patchwork, but it's ok to read! const uint32_t dec_repr_w=width+7; std::cout << std::setprecision(width); std::cout << counter++ << "/" << size << " " << IfcnComparison::m_name; std::cout.setf(std::ios_base::scientific); std::cout << "( " << std::setw(dec_repr_w) << m_input1[i] << ", "<< m_input2[i]<< " ) = " << std::setw(dec_repr_w) << IfcnComparison::m_out1[i] << " " << std::setw(dec_repr_w) << IfcnComparison::m_out2[i]; //<< "\t" std::cout.unsetf(std::ios_base::scientific); std::cout.setf(std::ios_base::showbase); std::cout << std::setbase(16) << " "<< vdt::details::fp2uint(IfcnComparison::m_out1[i]) << " " << vdt::details::fp2uint(IfcnComparison::m_out2[i]) << " "<< std::setbase(10) << IfcnComparison::m_diff_bitv[i] << std::endl; std::cout.unsetf(std::ios_base::showbase); } // now the stats IfcnComparison::printStats(); } //------------------------------------------------------------------------------ /// Dump on ascii file void writeFile(const std::string& output_filename){ const std::string preamble("VDT function arithmetics performance comparison file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (IfcnComparison::m_from_file){ std::string line; std::ifstream ifile ( IfcnComparison::m_ifile_name ); getline(ifile,line); ofile << "Dumped by an object initialised by " << IfcnComparison::m_ifile_name << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Comparison specs/function name = " << IfcnComparison::m_name << std::endl << "Format: input out1 out2 diffbit (decimal)input\nFirst line are stats: Max Min 0xMean 0xRMS\n"; // Do not write dec, but HEX! // First line are stats ofile << IfcnComparison::m_max << " " << IfcnComparison::m_min << " " << fpToHex(IfcnComparison::m_mean) << fpToHex(IfcnComparison::m_RMS) << std::endl; // Now the rest of file ofile.precision(std::numeric_limits::digits10); for (uint32_t i=0;i(m_input1[i]) << fpToHex(m_input2[i]) << fpToHex(IfcnComparison::m_out1[i]) << fpToHex(IfcnComparison::m_out2[i]) << IfcnComparison::m_diff_bitv[i] << " " << std::fixed << m_input1[i] << " " << m_input2[i] < class fcnComparison_old{ using vectT=std::vector; public: /// Ctor from input, output1 and output2. fcnComparison_old(const std::string& name, const vectT& input, const vectT& out1, const vectT& out2): m_from_file(false), m_ifile_name("From scratch"), m_name(name), m_input(input), m_out1(out1), m_out2(out2){ // A basic consistency check assert(input.size()==out1.size()); assert(input.size()==out2.size()); // Calculate the differences in bit m_fillBitDiffs(); // Calculate the stats m_calcStats(); } /// Construct from a file fcnComparison_old(const std::string& input_filename): m_from_file(true), m_ifile_name(input_filename), m_name(std::string("From ")+input_filename){ std::ifstream ifile ( input_filename ); std::string line; //skip the 5 header lines but read the func name from resp for (uint16_t i=0;i<2;++i) std::getline(ifile,line); //read data from file //read stats: fpFromHex mean, rms; ifile >> m_max >> m_min >> mean >> rms; m_mean = mean.getValue(); m_RMS = rms.getValue(); //read rest of file fpFromHex in_val, out1_val, out2_val; uint16_t tmp_diff; T trash; //input value for Python, now useless while(ifile >> in_val >> out1_val >> out2_val >> tmp_diff >> trash) { m_input.push_back(in_val.getValue()); m_out1.push_back(out1_val.getValue()); m_out2.push_back(out2_val.getValue()); m_diff_bitv.push_back(tmp_diff); } // The same stuff as before assert(m_input.size()==m_out1.size()); assert(m_input.size()==m_out2.size()); //m_fillBitDiffs(); //m_calcStats(); } //------------------------------------------------------------------------------ /// Nothing to do here ~fcnComparison_old(){}; //------------------------------------------------------------------------------ /// Print to screen the information void print(){ // Loop over all numbers uint32_t counter=0; const uint32_t size=m_input.size(); std::cout << "Function Performance Comparison:\n"; for (uint32_t i=0;i::digits10 +2; // Patchwork, but it's ok to read! const uint32_t dec_repr_w=width+7; std::cout << std::setprecision(width); std::cout << counter++ << "/" << size << " " << m_name; std::cout.setf(std::ios_base::scientific); std::cout << "( " << std::setw(dec_repr_w) << m_input[i] << " ) = " << std::setw(dec_repr_w) << m_out1[i] << " " << std::setw(dec_repr_w) << m_out2[i]; //<< "\t" std::cout.unsetf(std::ios_base::scientific); std::cout.setf(std::ios_base::showbase); std::cout << std::setbase(16) << " "<< vdt::details::fp2uint(m_out1[i]) << " " << vdt::details::fp2uint(m_out2[i]) << " "<< std::setbase(10) << m_diff_bitv[i] << std::endl; std::cout.unsetf(std::ios_base::showbase); } // now the stats printStats(); } //------------------------------------------------------------------------------ void printStats(const bool tabular = false){ if(!tabular){ std::cout << "Stats for " << m_name << ":\n" << std::setprecision(2) << "Max diff bit: " << m_max << "\n" << "Min diff bit: " << m_min << "\n" << "Mean diff bit: " << m_mean << "\n" << "RMS diff bit: " << m_RMS << "\n"; }else{ std::cout << std::setw(30) << std::left << m_name << std::setw(5) << std::right << m_max << std::setw(5) << std::right << m_min << std::setiosflags(std::ios::fixed) << std::setw(7) << std::right << std::setprecision(2) << m_mean << std::setw(7) << std::right << std::setprecision(2) << m_RMS << "\n"; } }; /// Dump on ascii file void writeFile(const std::string& output_filename){ const std::string preamble("VDT function arithmetics performance comparison file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (m_from_file){ std::string line; std::ifstream ifile ( m_ifile_name ); getline(ifile,line); ofile << "Dumped by an object initialised by " << m_ifile_name << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Comparison specs/function name = " << m_name << std::endl << "Format: input out1 out2 diffbit (decimal)input\nFirst line are stats: Max Min 0xMean 0xRMS\n"; // Do not write dec, but HEX! // First line are stats ofile << m_max << " " << m_min << " " << fpToHex(m_mean) << fpToHex(m_RMS) << std::endl; // Now the rest of file ofile.precision(std::numeric_limits::digits10); for (uint32_t i=0;i(m_input[i]) << fpToHex(m_out1[i]) << fpToHex(m_out2[i]) << m_diff_bitv[i] << " " << std::fixed << m_input[i] < 0);} private: const bool m_from_file; const std::string m_ifile_name; const std::string m_name; vectT m_input; vectT m_out1; vectT m_out2; std::vector m_diff_bitv; double m_mean = 0; double m_RMS = 0; uint16_t m_min=255; uint16_t m_max=0; /// Fill the vector of different bits void m_fillBitDiffs(){ const uint32_t size=m_input.size(); m_diff_bitv.reserve(size); for (uint32_t i=0;im_max)m_max=bitdiff; } // Now the mean! const uint32_t size=m_input.size(); m_mean=sum_x/size; if (size==1) m_RMS=-1; else m_RMS=(sum_x2 - size*m_mean*m_mean)/(size-1); } }; // For compatibility template using fcnComparison = fcnComparison1D; #endif vdt-0.4.4/include/diagnostic/vdtdiag_fcnPerformance.h000066400000000000000000000263641421413530600227000ustar00rootroot00000000000000#ifndef VDTDIAG_FCNPERFORMANCE_H_ #define VDTDIAG_FCNPERFORMANCE_H_ #include #include "vdtdiag_helper.h" /** * Class that represents the CPU performance of a mathematical function. * The quantities stored are the number mean time per execution and the * associated error. Two constructors are available: one for the scalar and one * for the vector signature. * TODO: * o Write the timings on disk on an ascii file * * Author Danilo Piparo * **/ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ //----------------------------------------------------------------------------- template class fcnPerformance{ public: //----------------------------------------------------------------------------- /** * Construct from name, input and scalar function and number of repetitions. * Scalar signature. **/ fcnPerformance(const std::string& fcnName, const std::vector& input, const std::function fcn, const uint32_t repetitions=10): m_fcn_name(fcnName), m_input(input), m_input2(input), m_fcn(fcn), m_repetitions(repetitions){ measureTime(); } //----------------------------------------------------------------------------- fcnPerformance(const std::string& fcnName, const std::vector& input1, const std::vector& input2, const std::function fcn, const uint32_t repetitions=10): m_fcn_name(fcnName), m_input(input1), m_input2(input2), m_fcn2D(fcn), m_repetitions(repetitions){ measureTime2D(); } //----------------------------------------------------------------------------- /** * Construct from name, input and scalar function and number of repetitions. * Array signature. **/ fcnPerformance(const std::string& fcnName, const std::vector& input, std::function fcnv, const uint32_t repetitions=10): m_fcn_name(fcnName), m_input(input), m_input2(input), m_fcnv(fcnv), m_repetitions(repetitions){ measureTimev(); } //------------------------------------------------------------------------------ fcnPerformance(const std::string& fcnName, const std::vector& input1, const std::vector& input2, std::function fcnv, const uint32_t repetitions=10): m_fcn_name(fcnName), m_input(input1), m_input2(input2), m_fcn2Dv(fcnv), m_repetitions(repetitions){ measureTime2Dv(); } //------------------------------------------------------------------------------ /// Nothing to do here ~fcnPerformance(){} //------------------------------------------------------------------------------ /// Print the timing on screen. If a scale is povided, express the time in terms of a speedup factor. void print(std::ostream& stream = std::cout, const double scale=1.){ stream << std::setprecision(2) << std::fixed << "Function " << std::setw(15) << std::left << m_fcn_name << " : " << m_avg_time << " +- " << m_avg_time_err << " ns"; if (scale!=1.) stream << " --> " << scale/m_avg_time << "X speedup!"; stream << std::endl; } //----------------------------------------------------------------------------- /// Get the mean elapsed time per call const T getAvg()const { return m_avg_time; } //----------------------------------------------------------------------------- /// Get the error on the mean elapsed time per call const T getAvgErr()const { return m_avg_time_err; } //----------------------------------------------------------------------------- private: /// The name of the benchmarked function const std::string m_fcn_name; /// Const reference to the input values const std::vector& m_input; /// Const reference to the input values const std::vector& m_input2; /// Scalar function const std::function m_fcn; /// Scalar function const std::function m_fcn2D; /// Array function (cannot coexist with scalar) const std::function m_fcnv; /// Array function (cannot coexist with scalar) const std::function m_fcn2Dv; /// Number of repetitions of the measurement for stability const uint32_t m_repetitions; /// Mean time double m_avg_time; /// Error on the mean time double m_avg_time_err; //----------------------------------------------------------------------------- /// Measure the timings of the function, scalar signature void measureTime(){ // momenta to calculate mean and rms, they will be filled later uint64_t t=0; uint64_t t2=0; // An useful quantity const uint32_t size=m_input.size(); // Allocate the array of results. Necessary to circumvent compiler optimisations double* results_arr = new double[size]; // Set up some warm-up iterations const uint32_t warm_up = m_repetitions; // Allocate once the delta outside the loop uint64_t deltat=0; // The timer which is used to mesure the time interval vdth::timer fcntimer; // Start the loop on the repetitions for (uint32_t irep=0;irep=warm_up){ deltat=fcntimer.get_elapsed_time(); t+=deltat; t2+=deltat*deltat; } // To avoid optimisations, call a dummy function std::vector results(results_arr,results_arr+size); fool_optimisation(results); } delete [] results_arr; // Calculate mean and error on the mean const uint64_t iterations = size * m_repetitions; calculate_mean_and_err(t,t2,iterations); } //----------------------------------------------------------------------------- /// Measure the timings of the function, scalar signature void measureTime2D(){ // momenta to calculate mean and rms, they will be filled later uint64_t t=0; uint64_t t2=0; // An useful quantity const uint32_t size=m_input.size(); // Allocate the array of results. Necessary to circumvent compiler optimisations double* results_arr = new double[size]; // Set up some warm-up iterations const uint32_t warm_up = m_repetitions; // Allocate once the delta outside the loop uint64_t deltat=0; // The timer which is used to mesure the time interval vdth::timer fcntimer; // Start the loop on the repetitions for (uint32_t irep=0;irep=warm_up){ deltat=fcntimer.get_elapsed_time(); t+=deltat; t2+=deltat*deltat; } // To avoid optimisations, call a dummy function std::vector results(results_arr,results_arr+size); fool_optimisation(results); } delete [] results_arr; // Calculate mean and error on the mean const uint64_t iterations = size * m_repetitions; calculate_mean_and_err(t,t2,iterations); } //----------------------------------------------------------------------------- /// Measure the timings of the function, array signature void measureTimev(){ // See explainations in the scalar method! uint64_t t=0.; uint64_t t2=0.; const uint32_t size=m_input.size(); const uint32_t warm_up = m_repetitions; uint64_t deltat=0; vdth::timer fcntimer; for (uint32_t irep=0;irep (&m_input[0]); T* results_arr=new T[size]; fcntimer.start(); m_fcnv(size,input_arr,results_arr); fcntimer.stop(); if (irep>=warm_up){ deltat = fcntimer.get_elapsed_time(); t+=deltat; t2+=deltat*deltat; } std::vector results(results_arr,results_arr+size); delete[] results_arr; fool_optimisation(results); } const uint64_t iterations = size * m_repetitions; calculate_mean_and_err(t,t2,iterations); } //----------------------------------------------------------------------------- /// Measure the timings of the function, array signature void measureTime2Dv(){ // See explainations in the scalar method! uint64_t t=0.; uint64_t t2=0.; const uint32_t size=m_input.size(); const uint32_t warm_up = m_repetitions; uint64_t deltat=0; vdth::timer fcntimer; for (uint32_t irep=0;irep (&m_input[0]); T* input_arr2= const_cast (&m_input2[0]); T* results_arr=new T[size]; fcntimer.start(); m_fcn2Dv(size,input_arr1,input_arr2,results_arr); fcntimer.stop(); if (irep>=warm_up){ deltat = fcntimer.get_elapsed_time(); t+=deltat; t2+=deltat*deltat; } std::vector results(results_arr,results_arr+size); delete[] results_arr; fool_optimisation(results); } const uint64_t iterations = size * m_repetitions; calculate_mean_and_err(t,t2,iterations); } //------------------------------------------------------------------------------ /// Loop on the values in order to force the compiler to actually calculate them for real void fool_optimisation(const std::vector& results){ for (const T& res:results ) if (res == -0.123) std::cout << "To fool the compiler's optimisations!\n"; } //------------------------------------------------------------------------------ /// Calculate Mean elapsed time and error on the mean void calculate_mean_and_err(const double t, const double t2,const uint64_t iterations){ // Mean is easy m_avg_time = t / iterations; // Calculate the error on the mean // RMS, one dof is gone for the mean, so iterations-1 const double rms2=(double(t2) - iterations*m_avg_time*m_avg_time)/(iterations-1); const double rms=sqrt(rms2); m_avg_time_err = rms / sqrt(iterations); //2 sqrts, but we go for clarity here. } //------------------------------------------------------------------------------ }; #endif vdt-0.4.4/include/diagnostic/vdtdiag_fcnResponse.h000066400000000000000000000333171421413530600222310ustar00rootroot00000000000000/** * This file contains the classes to store the * arithmetical performance of the mathematical functions. * * Author Danilo Piparo * **/ #include "assert.h" #include #include #include #include "vdtdiag_interfaces.h" #include "vdtdiag_filePersistence.h" /** * Class that represents the response of a mathematical function. * The quantities stored are the input numbers and the output numbers. * The Ascii file i/o is supported. A dump on an ascii file can be performed * as well as the object construction from ascii file. **/ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ template class fcnResponse1D:IfcnResponse{ public: fcnResponse1D(const std::string& fcnName, const std::vector& input, const std::function& fcn): IfcnResponse(fcnName,input), m_fcn(fcn){ for (auto& inp : this->getInput() ) pushOutputVal(fcn(inp)); } //----------------------------------------------------------------------------- /// Construct from name, input and vector function fcnResponse1D(const std::string& fcnName, const std::vector& input, const std::function& fcnv): IfcnResponse(fcnName,input), m_fcnv(fcnv){ //m_output.reserve(size); const uint64_t size = this->getInput().size(); T* input_arr = const_cast (&this->getInput()[0]); T* output_arr = new T[size]; fcnv(size,input_arr,output_arr); for (uint32_t i=0;i(input_filename){ std::ifstream ifile ( input_filename ); std::string line; //skip the 5 header lines for (uint16_t i=0;i<5;++i) std::getline(ifile,line); //read data from file fpFromHex in_val, out_val; while(ifile >> in_val >> out_val) { pushInputVal(in_val.getValue()); pushOutputVal(out_val.getValue()); } } //----------------------------------------------------------------------------- ~fcnResponse1D(){}; //----------------------------------------------------------------------------- /// Return the input inline std::vector& getInput() {return IfcnResponse::getInput1();}; void pushInputVal(T val) {IfcnResponse::pushInput1Val(val);}; inline const T outputVal(uint64_t index) {return IfcnResponse::outputVal(index);}; //----------------------------------------------------------------------------- /// Return the input inline std::vector& getOutput() {return IfcnResponse::getOutput();}; void pushOutputVal(T val) {IfcnResponse::pushOutputVal(val);}; inline const T inputVal(uint64_t index) {return IfcnResponse::input1Val (index);}; //----------------------------------------------------------------------------- /// Dump on ascii file void writeFile(const std::string& output_filename) { const std::string preamble("VDT function arithmetics performance 1 input file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (IfcnResponse::isFromFile()){ std::string line; std::ifstream ifile ( IfcnResponse::getIfileName() ); getline(ifile,line); ofile << "Dumped by an object initialised by " << IfcnResponse::getIfileName() << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Function Name = " << IfcnResponse::getFcnName() << std::endl << "--\n--\n"; // Do not write dec, but HEX! for (uint32_t i=0;igetInput().size();++i) ofile << fpToHex(inputVal(i)) << fpToHex(outputVal(i)) << std::endl; } } //----------------------------------------------------------------------------- /// Print to screen void print() { const uint64_t size=getInput().size(); std::cout << "Function Performance (1 single input):\n"; std::cout << std::setprecision(std::numeric_limits::digits10); for (uint64_t i=0;i::getFcnName() << "(" << inputVal(i) << ") = " << outputVal(i) << std::endl; } //----------------------------------------------------------------------------- private: const std::function m_fcn; const std::function m_fcnv; }; template class fcnResponse2D:IfcnResponse{ public: fcnResponse2D(const std::string& fcnName, const std::vector& input1, const std::vector& input2, const std::function& fcn): IfcnResponse(fcnName,input1,input2), m_fcn(fcn){ for (uint64_t i=0;i& input1, const std::vector& input2, const std::function& fcnv): IfcnResponse(fcnName,input1,input2), m_fcnv(fcnv){ //m_output.reserve(size); const uint64_t size = this->getInput1().size(); T* input1_arr = const_cast (&this->getInput1()[0]); T* input2_arr = const_cast (&this->getInput2()[0]); T* output_arr = new T[size]; fcnv(size,input1_arr,input2_arr,output_arr); for (uint32_t i=0;i(input_filename){ std::ifstream ifile ( input_filename ); std::string line; //skip the 5 header lines for (uint16_t i=0;i<5;++i) std::getline(ifile,line); //read data from file fpFromHex in_val1, in_val2, out_val; while(ifile >> in_val1 >> in_val2 >> out_val) { pushInput1Val(in_val1.getValue()); pushInput2Val(in_val2.getValue()); pushOutputVal(out_val.getValue()); } } //----------------------------------------------------------------------------- ~fcnResponse2D(){}; //----------------------------------------------------------------------------- /// Return the input inline std::vector& getInput1() {return IfcnResponse::getInput1();}; void pushInput1Val(T val) {IfcnResponse::pushInput1Val(val);}; inline const T input1Val(uint64_t index) {return IfcnResponse::input1Val (index);}; inline std::vector& getInput2() {return IfcnResponse::getInput2();}; void pushInput2Val(T val) {IfcnResponse::pushInput2Val(val);}; inline const T input2Val(uint64_t index) {return IfcnResponse::input2Val (index);}; //----------------------------------------------------------------------------- /// Return the input inline std::vector& getOutput() {return IfcnResponse::getOutput();}; void pushOutputVal(T val) {IfcnResponse::pushOutputVal(val);}; inline const T outputVal(uint64_t index) {return IfcnResponse::outputVal(index);}; //----------------------------------------------------------------------------- /// Dump on ascii file void writeFile(const std::string& output_filename) { const std::string preamble("VDT function arithmetics performance 1 input file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (IfcnResponse::isFromFile()){ std::string line; std::ifstream ifile ( IfcnResponse::getIfileName() ); getline(ifile,line); ofile << "Dumped by an object initialised by " << IfcnResponse::getIfileName() << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Function Name = " << IfcnResponse::getFcnName() << std::endl << "--\n--\n"; // Do not write dec, but HEX! for (uint32_t i=0;igetInput1().size();++i) ofile << fpToHex(input1Val(i)) << fpToHex(input2Val(i)) << fpToHex(outputVal(i)) << std::endl; } } //----------------------------------------------------------------------------- /// Print to screen void print() { const uint64_t size=getInput1().size(); std::cout << "Function Performance (2 inputs):\n"; std::cout << std::setprecision(std::numeric_limits::digits10); for (uint64_t i=0;i::getFcnName() << "(" << input1Val(i) << ", "<< input2Val(i) <<" ) = " << outputVal(i) << std::endl; } //----------------------------------------------------------------------------- private: const std::function m_fcn; const std::function m_fcnv; }; template class fcnResponse_old{ public: //----------------------------------------------------------------------------- /// Construct from name, input and scalar function fcnResponse_old(const std::string& fcnName, const std::vector& input, const std::function& fcn): m_from_file(false), m_fcn_name(fcnName), m_ifile_name("From Scratch"), m_fcn(fcn), m_input(input){ m_output.reserve(m_input.size()); for (auto& inp : m_input ) m_output.push_back(fcn(inp)); } //----------------------------------------------------------------------------- /// Construct from name, input and vector function fcnResponse_old(const std::string& fcnName, const std::vector& input, const std::function& fcnv): m_from_file(false), m_fcn_name(fcnName), m_ifile_name("From Scratch"), m_fcnv(fcnv), m_input(input){ const uint32_t size=m_input.size(); m_output.reserve(size); T* input_arr = const_cast (&m_input[0]); T* output_arr = new T[size]; fcnv(size,input_arr,output_arr); for (uint32_t i=0;i in_val, out_val; while(ifile >> in_val >> out_val) { m_input.push_back(in_val.getValue()); m_output.push_back(out_val.getValue()); } } //----------------------------------------------------------------------------- /// Nothing to do ~fcnResponse_old(){}; //----------------------------------------------------------------------------- /// Return the output const std::vector& getOutput() const {return m_output;}; /// Return the input const std::vector& getInput() const {return m_input;}; //----------------------------------------------------------------------------- /// Dump on ascii file void writeFile(const std::string& output_filename){ const std::string preamble("VDT function arithmetics performance file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (m_from_file){ std::string line; std::ifstream ifile ( m_ifile_name ); getline(ifile,line); ofile << "Dumped by an object initialised by " << m_ifile_name << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Function Name = " << m_fcn_name << std::endl << "--\n--\n"; //ofile << std::setprecision(std::numeric_limits::digits10); // Do not write dec, but HEX! for (uint32_t i=0;i(m_input[i]) << fpToHex(m_output[i]) << std::endl; } } //----------------------------------------------------------------------------- /// Print to screen void print(){ uint32_t counter=0; const uint32_t size=m_input.size(); std::cout << "Function Performance:\n"; for (uint32_t i=0;i::digits10); std::cout << counter++ << "/" << size << " " << m_fcn_name << "(" << m_input[i] << ") = " << m_output[i] << std::endl; } } private: const bool m_from_file; const std::string m_fcn_name; const std::string m_ifile_name; const std::function m_fcn; const std::function m_fcnv; std::vector m_input; std::vector m_output; }; // For compatibility template using fcnResponse = fcnResponse1D; vdt-0.4.4/include/diagnostic/vdtdiag_fcnTuples.h000066400000000000000000000506451421413530600217120ustar00rootroot00000000000000/* vdtdiag_fcnTuples.h * * created: 12.7.2012 * * Contains two functions (scalar, vector form) * that fill a std::vector with std::tuple(s) binding * together a function, a function name and a random pool * of appropriate range. * * This function should serve as a central resource for tuples, * function names alone and so on. * * Author: Ladislav Horky, Danilo Piparo */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef _VDTDIAG_TUPLES_ #define _VDTDIAG_TUPLES_ #include "vdtdiag_helper.h" #include "vdtdiag_random.h" #include "vdtMath.h" #include #include //external libs #include "externalLibcfg.h" #ifdef _VC_AVAILABLE_ #include "vdtdiag_vcWrapper.h" #endif using namespace vdt; template using genfpfcn_tuple = std::tuple,const std::vector& >; template using genfpfcnv_tuple = std::tuple,const std::vector& >; template using genfpfcn2D_tuple = std::tuple, const std::vector&, const std::vector&>; template using genfpfcn2Dv_tuple = std::tuple, const std::vector&, const std::vector&>; /// Fills vector passed in first parameter with fcn tuples based on random pools passed in following parameters void getFunctionTuples( std::vector>* fcn_tuples, randomPool2D& moneone2Pool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcn2D_tuple( "Identity2D", identity2D, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2D_tuple( "Atan2", refMath::atan2, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2D_tuple( "Fast_Atan2", fast_atan2, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); } void getFunctionTuples( std::vector>* fcn_tuples, randomPool2D& moneone2Pool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcn2D_tuple( "Identity2Df", identity2Df, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2D_tuple( "Atan2f", refMath::atan2f, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2D_tuple( "Fast_Atan2f", fast_atan2f, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); } /// Fills vector passed in first parameter with fcn tuples based on random pools passed in following parameters void getFunctionTuplesvect( std::vector>* fcn_tuples, randomPool2D& moneone2Pool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcn2Dv_tuple( "Identity2Dv", identity2Dv, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2Dv_tuple( "Atan2v", atan2v, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2Dv_tuple( "Fast_Atan2v", fast_atan2v, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); } /// Fills vector passed in first parameter with fcn tuples based on random pools passed in following parameters void getFunctionTuplesvect( std::vector>* fcn_tuples, randomPool2D& moneone2Pool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcn2Dv_tuple( "Identity2Dfv", identity2Dfv, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2Dv_tuple( "Atan2fv", atan2fv, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); fcn_tuples->push_back(genfpfcn2Dv_tuple( "Fast_Atan2fv", fast_atan2fv, moneone2Pool.getNumbersX(), moneone2Pool.getNumbersY() )); } /// Fills vector passed in first parameter with fcn tuples based on random pools passed in following parameters void getFunctionTuples( std::vector>* fcn_tuples, randomPool& symmetricrPool, randomPool& positiverPool, randomPool& mone2onerPool, randomPool& expPool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcn_tuple( "Identity", identity, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Exp", refMath::exp, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Log", refMath::log, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Sin", refMath::sin, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Cos", refMath::cos, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Tan", refMath::tan, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Tanh", refMath::tanh, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Asin", refMath::asin, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Acos", refMath::acos, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Atan", refMath::atan, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Isqrt", isqrt, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Inverse", inv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Exp", fast_exp, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Log", fast_log, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Sin", fast_sin, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Cos", fast_cos, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Tan", fast_tan, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Tanh", fast_tanh, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Asin", fast_asin, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Acos", fast_acos, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Atan", fast_atan, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Isqrt", fast_isqrt, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Inv", fast_inv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Apr_Isqrt", fast_approx_isqrt, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Apr_Inv", fast_approx_inv, symmetricrPool.getNumbers() )); #ifdef _VC_AVAILABLE_ fcn_tuples->push_back(genfpfcn_tuple( "VC_Identity", vc_identity, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Log", vc_log, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Sin", vc_sin, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Cos", vc_cos, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Asin", vc_asin, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Atan", vc_atan, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Isqrt", vc_isqrt, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Inverse", vc_inv, symmetricrPool.getNumbers() )); #endif } void getFunctionTuples( std::vector>* fcn_tuples, randomPool& symmetricrPool, randomPool& positiverPool, randomPool& mone2onerPool, randomPool& expPool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcn_tuple( "Identityf", identityf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Expf", refMath::expf, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Logf", refMath::logf, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Sinf", refMath::sinf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Cosf", refMath::cosf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Tanf", refMath::tanf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Tanhf", refMath::tanhf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Asinf", refMath::asinf, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Acosf", refMath::acosf, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Atanf", refMath::atanf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Isqrtf", isqrtf, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Inversef", invf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Expf", fast_expf, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Logf", fast_logf, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Sinf", fast_sinf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Cosf", fast_cosf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Tanf", fast_tanf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Tanhf", fast_tanhf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Asinf", fast_asinf, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Acosf", fast_acosf, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Atanf", fast_atanf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Isqrtf", fast_isqrtf, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Invf", fast_invf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Apr_Isqrtf", fast_approx_isqrtf, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "Fast_Apr_Invf", fast_approx_invf, symmetricrPool.getNumbers() )); #ifdef _VC_AVAILABLE_ fcn_tuples->push_back(genfpfcn_tuple( "VC_Identityf", vc_identityf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Logf", vc_logf, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Sinf", vc_sinf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Cosf", vc_cosf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Asinf", vc_asinf, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Atanf", vc_atanf, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Isqrtf", vc_isqrtf, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcn_tuple( "VC_Inversef", vc_invf, symmetricrPool.getNumbers() )); #endif } /// Vector form void getFunctionTuplesvect(std::vector>* fcn_tuples, randomPool& symmetricrPool, randomPool& positiverPool, randomPool& mone2onerPool, randomPool& expPool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcnv_tuple( "Identityv", identityv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Expv", vdt::expv, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Logv", vdt::logv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Sinv", vdt::sinv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Cosv", vdt::cosv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Tanv", vdt::tanv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Tanhv", vdt::tanhv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Asinv", vdt::asinv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Acosv", vdt::acosv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Atanv", vdt::atanv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Isqrtv", vdt::isqrtv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Inversev", vdt::invv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Expv", fast_expv, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Logv", fast_logv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Sinv", fast_sinv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Cosv", fast_cosv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Tanv", fast_tanv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Tanhv", fast_tanhv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Asinv", fast_asinv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Acosv", fast_acosv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Atanv", fast_atanv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Isqrtv", fast_isqrtv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Invv", fast_invv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Apr_Isqrtv", fast_approx_isqrtv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Apr_Invv", fast_approx_invv, symmetricrPool.getNumbers() )); #ifdef _VC_AVAILABLE_ fcn_tuples->push_back(genfpfcnv_tuple( "VC_Identityv", vc_identityv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Logv", vc_logv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Sinv", vc_sinv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Cosv", vc_cosv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Asinv", vc_asinv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Atanv", vc_atanv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Isqrtv", vc_isqrtv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Inversev", vc_invv, symmetricrPool.getNumbers() )); #endif } void getFunctionTuplesvect(std::vector>* fcn_tuples, randomPool& symmetricrPool, randomPool& positiverPool, randomPool& mone2onerPool, randomPool& expPool){ fcn_tuples->clear(); fcn_tuples->push_back(genfpfcnv_tuple( "Identityfv", identityfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Expfv", vdt::expfv, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Logfv", vdt::logfv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Sinfv", vdt::sinfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Cosfv", vdt::cosfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Tanfv", vdt::tanfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Tanhfv", vdt::tanhfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Asinfv", vdt::asinfv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Acosfv", vdt::acosfv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Atanfv", vdt::atanfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Isqrtfv", vdt::isqrtfv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Inversefv", vdt::invfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Expfv", fast_expfv, expPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Logfv", fast_logfv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Sinfv", fast_sinfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Cosfv", fast_cosfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Tanfv", fast_tanfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Tanhfv", fast_tanhfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Asinfv", fast_asinfv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Acosfv", fast_acosfv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Atanfv", fast_atanfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Isqrtfv", fast_isqrtfv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Invfv", fast_invfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Apr_Isqrtfv", fast_approx_isqrtfv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "Fast_Apr_Invfv", fast_approx_invfv, symmetricrPool.getNumbers() )); #ifdef _VC_AVAILABLE_ fcn_tuples->push_back(genfpfcnv_tuple( "VC_Identityfv", vc_identityfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Logfv", vc_logfv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Sinfv", vc_sinfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Cosfv", vc_cosfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Asinfv", vc_asinfv, mone2onerPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Atanfv", vc_atanfv, symmetricrPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Isqrtfv", vc_isqrtfv, positiverPool.getNumbers() )); fcn_tuples->push_back(genfpfcnv_tuple( "VC_Inversefv", vc_invfv, symmetricrPool.getNumbers() )); #endif } /// Helper function retrieves basic function names like Cos, Sin, Exp... (no Fast_, no extensions) void getFunctionBasicNames(std::vector* names){ // Prepare dummy tuple to retrieve function names std::vector> tmpTuples; randomPool dummy(0,0,1); getFunctionTuples(&tmpTuples, dummy,dummy,dummy,dummy); names->clear(); // A better algo to do that std::string name; for(unsigned int i=0;i(tmpTuples[i]); if (name.find("Fast")==std::string::npos && name.find("Identity")==std::string::npos){ names->push_back(name); } } return; } #endif vdt-0.4.4/include/diagnostic/vdtdiag_filePersistence.h000066400000000000000000000040041421413530600230570ustar00rootroot00000000000000/* vdtdiag_filePersistence.h * * created on 7.7.2012 * * Some useful manipulators for writting/reading * floating point values to/from stream in hex format * * Author: Ladislav Horky */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef _VDT_FILE_PERSISTENCE_ #define _VDT_FILE_PERSISTENCE_ #include #include #include #include #include"vdtcore_common.h" /// Useful manipulators /// Usage: stream << fpToHex(0.0) << ...; template class fpToHex{ const T fpvalue; public: /// Constructor just stores the value fpToHex(const T val):fpvalue(val){} ~fpToHex(){} /// Overloaded insertion operator, prints "0xFFF..FFF " including last 'space' friend std::ostream &operator<<(std::ostream &stream, const fpToHex &obj){ return stream << "0x"<< std::setbase(16) < foo; stream >> foo; val = foo.getValue(); template class fpFromHex{ uint64_t value; public: /// Nothing fpFromHex(){} ~fpFromHex(){} /// Overloaded insertion operator, just reads int value friend std::istream &operator>>(std::istream &stream, fpFromHex &obj){ stream >> std::setbase(16) >> obj.value >> std::setbase(10); return stream; } /// Performs conversion to fp T getValue(){ if(sizeof(T) == 8) return vdt::details::uint642dp(value); else return vdt::details::uint322sp((uint32_t)value); } }; #endif vdt-0.4.4/include/diagnostic/vdtdiag_helper.h000066400000000000000000000261031421413530600212160ustar00rootroot00000000000000/** * Helper functions used for the diagnostic of the vdt routines. * They are not optimised for speed. * Authors: Danilo Piparo CERN **/ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef VDTHELPER_H_ #define VDTHELPER_H_ #include #include #include #include #include #include "inttypes.h" // #include "x86intrin.h" #include //for log2 #include "time.h" #include "sys/time.h" #ifdef __APPLE__ #include #include #include #include #endif // Wrappers to support macos 10.14 https://github.com/dpiparo/vdt/issues/9 namespace refMath { double atan2(double x, double y) { return ::atan2(x, y); } float atan2f(float x, float y) { return ::atan2f(x, y); } double tan(double x) { return ::tan(x); } float tanf(float x) { return ::tanf(x); } double atan(double x) { return ::atan(x); } float atanf(float x) { return ::atanf(x); } double tanh(double x) { return ::tanh(x); } float tanhf(float x) { return ::tanhf(x); } double exp(double x) { return ::exp(x); } float expf(float x) { return ::expf(x); } double log(double x) { return ::log(x); } float logf(float x) { return ::logf(x); } double sin(double x) { return ::sin(x); } float sinf(float x) { return ::sinf(x); } double asin(double x) { return ::asin(x); } float asinf(float x) { return ::asinf(x); } double cos(double x) { return ::cos(x); } float cosf(float x) { return ::cosf(x); } double acos(double x) { return ::acos(x); } float acosf(float x) { return ::acosf(x); } } namespace{ // Establish the size of the double and single precision and the bitsets constexpr double _tmp=0; constexpr uint32_t dp_size_in_bits = sizeof(_tmp)*8; using dp_bitset = std::bitset; } namespace vdth{ //------------------------------------------------------------------------------ // Useful alias for some functions using dpdpfunction = std::function; using dpdpfunctionv = std::function; using spspfunction = std::function; using spspfunctionv = std::function; using dpdp2function = std::function; using dpdp2functionv = std::function; using spsp2function = std::function; using spsp2functionv = std::function; //maybe for convenience template using genfpfunction = std::function; template using genfpfunctionv = std::function; template using genfp2function = std::function; template using genfp2functionv = std::function; //------------------------------------------------------------------------------ /// Useful union union standard{ double dp; float sp[2]; uint64_t li; }; //------------------------------------------------------------------------------ template uint32_t inline getSizeInbits(const T x){ return sizeof(x) * 8; } //------------------------------------------------------------------------------ /// Convert a fp into a uint64_t not optimised for speed template inline uint64_t fp2uint64(const T x){ const uint32_t size = getSizeInbits(x); standard conv; conv.dp=0.; if (size==dp_size_in_bits) conv.dp=x; else conv.sp[0]=x; return conv.li; } //------------------------------------------------------------------------------ /// Convert a double into a bitset template inline const dp_bitset fp2bs( const T x ){ dp_bitset const bits (fp2uint64(x)); return bits; } //------------------------------------------------------------------------------ /// Print as a dp formatted bitset template const std::string getbsasstr(const T x){ const uint32_t size = getSizeInbits(x); uint32_t offset = 0; uint32_t exp_size = 11; uint32_t mant_size = 52; if (size!=dp_size_in_bits){ offset = 32; exp_size = 8; mant_size = 23; } // Convert the bitstream to string std::string bitset_as_string (fp2bs(x).to_string()); std::ostringstream os; // sign os << bitset_as_string[offset] << " "; // exponent for (unsigned int i=offset+1;i uint16_t diffbit(const T a,const T b ){ /// make a xor uint64_t ia = fp2uint64(a); uint64_t ib = fp2uint64(b); uint64_t c = ia>ib? ia-ib : ib -ia; //uint64_t c = ia^ib; /// return the log2+1 return log2(c)+1; } //------------------------------------------------------------------------------ ///Check and print which instructions sets are enabled. void print_instructions_info(){ std::ostringstream os; os << "List of enabled instructions' sets:\n"; os << " o SSE2 instructions set " #ifndef __SSE2__ << "not " #endif << "enabled.\n" << " o SSE3 instructions set " #ifndef __SSE3__ << "not " #endif << "enabled.\n" << " o SSE4.1 instructions set " #ifndef __SSE4_1__ << "not " #endif << "enabled.\n" << " o AVX instructions set " #ifndef __AVX__ << "not " #endif << "enabled.\n"; std::cout << os.str(); } //------------------------------------------------------------------------------ /// Print the different bit of two fp numbers template void print_different_bit(const T a, const T b, const bool show_identical=true){ std::cout.precision(10); std::cout << "Different bit between " << a << " and " << b << " is " << diffbit(a,b) << std::endl; if (show_identical) std::cout << getbsasstr(a) << std::endl << getbsasstr(b) << std::endl<< std::endl; } //------------------------------------------------------------------------------ /// Invoke two functions and print on screen their argument and different bits template void printFuncDiff(const std::string& func_name, std::function f1,std::function f2, const T x){ std::cout << "Function " << func_name << "(" << x << ")" << std::endl; print_different_bit(f1(x),f2(x),true); } /// Invoke two functions and print on screen their argument and different bits template void printFuncDiff(const std::string& func_name, std::function f1,std::function f2, const T x, const T y){ std::cout << "Function " << func_name << "(" << x << ", "<< y <<")" << std::endl; print_different_bit(f1(x,y),f2(x,y),true); } //------------------------------------------------------------------------------ /// Invoke two functions and print on screen their argument and different bits template void printFuncDiff(const std::string& func_name, genfpfunctionv f1, genfpfunctionv f2, T* x_arr, const uint32_t size){ std::cout << "Function " << func_name << std::endl; T* res_1 = new T[size]; f1(size,x_arr,res_1); T* res_2 = new T[size]; f2(size,x_arr,res_2); for (uint32_t i=0;i void printFuncDiff(const std::string& func_name, genfp2functionv f1, genfp2functionv f2, T* x_arr, T* y_arr, const uint32_t size){ std::cout << "Function " << func_name << std::endl; T* res_1 = new T[size]; f1(size,x_arr,y_arr,res_1); T* res_2 = new T[size]; f2(size,x_arr,y_arr,res_2); for (uint32_t i=0;i void printFuncDiff(const std::string& name, std::function fpfunction, std::function fpfunction_ref, T* fpvals, const uint32_t size){ for (uint32_t i=0;i) fpfunction, (std::function) fpfunction_ref, fpvals[i] ); } //------------------------------------------------------------------------------ // Function tests /// Test a fp function with a double (double) signatures template void printFuncDiff(const std::string& name, std::function fpfunction, std::function fpfunction_ref, T* fpvals1, T* fpvals2, const uint32_t size){ for (uint32_t i=0;i) fpfunction, (std::function) fpfunction_ref, fpvals1[i], fpvals2[i]); } //------------------------------------------------------------------------------ /// Get the clock cycles class timer{ public: timer(){} ~timer(){} void print(){ const uint64_t nsecs=get_elapsed_time(); std::cout << "Time elapsed: " << nsecs << " nanoseconds.\n";// (" //<< m_get_elapsed_clocks(nsecs) << " clock)\n"; } #if defined (__APPLE__) void inline start(){m_time1=mach_absolute_time();} void inline stop(){m_time2=mach_absolute_time();} uint64_t get_elapsed_time(){ static mach_timebase_info_data_t sTimebaseInfo; const uint64_t elapsed = m_time2 - m_time1; // Convert to nanoseconds. // Have to do some pointer fun because AbsoluteToNanoseconds // works in terms of UnsignedWide, which is a structure rather // than a proper 64-bit integer. if ( sTimebaseInfo.denom == 0 ) { (void) mach_timebase_info(&sTimebaseInfo); } // Do the maths. We hope that the multiplication doesn't // overflow; the price you pay for working in fixed point. uint64_t elapsedNano = elapsed * sTimebaseInfo.numer / sTimebaseInfo.denom; return elapsedNano; } private: uint64_t m_time1,m_time2; #else void inline start(){ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &m_time1); } void inline stop(){ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &m_time2); } /// Return time in nanoseconds uint64_t get_elapsed_time(){ timespec temp; temp.tv_sec = m_time2.tv_sec-m_time1.tv_sec; temp.tv_nsec = m_time2.tv_nsec-m_time1.tv_nsec; uint64_t elapsed_time = temp.tv_nsec; elapsed_time += 1e9*temp.tv_sec; return elapsed_time; } private: timespec m_time1,m_time2; #endif }; //------------------------------------------------------------------------------ // inline uint64_t getcpuclock() { // return __rdtsc(); // } }//end of namespace vdth #endif vdt-0.4.4/include/diagnostic/vdtdiag_interfaces.h000066400000000000000000000167621421413530600220740ustar00rootroot00000000000000/** * This file contains the abstract interfaces for the diagnostic classes * * Author Danilo Piparo * **/ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #include #include #include #include #include #include "assert.h" #include "vdtdiag_helper.h" #ifndef _VDT_INTERFACES_ #define _VDT_INTERFACES_ /** * Abstract interface for classes that can be printed on screen and written from a file. **/ class Iprintable{ public: Iprintable(){}; virtual ~Iprintable(){}; virtual void writeFile(const std::string& output_filename) =0; virtual void print() =0; }; //------------------------------------------------------------------------------ template class IfcnComparison:Iprintable{ using vectT=std::vector; public: //------------------------------------------------------------------------------ // Ctor from 2 outputs IfcnComparison(const std::string& name, const vectT& out1, const vectT& out2): m_from_file(false), m_ifile_name("From scratch"), m_name(name), m_out1(out1), m_out2(out2){ // A basic consistency check assert(out2.size()==out1.size()); // Calculate the differences in bit m_fillBitDiffs(); // Calculate the stats m_calcStats(); }; //------------------------------------------------------------------------------ /// Ctor from file IfcnComparison(const std::string& input_filename): m_from_file(true), m_ifile_name(input_filename), m_name(std::string("From ")+input_filename){} //------------------------------------------------------------------------------ ~IfcnComparison(){}; //------------------------------------------------------------------------------ void printStats(const bool tabular = false){ if(!tabular){ std::cout << "Stats for " << m_name << ":\n" << std::setprecision(2) << "Max diff bit: " << m_max << "\n" << "Min diff bit: " << m_min << "\n" << "Mean diff bit: " << m_mean << "\n" << "RMS diff bit: " << m_RMS << "\n"; }else{ std::cout << std::setw(30) << std::left << m_name << std::setw(5) << std::right << m_max << std::setw(5) << std::right << m_min << std::setiosflags(std::ios::fixed) << std::setw(7) << std::right << std::setprecision(2) << m_mean << std::setw(7) << std::right << std::setprecision(2) << m_RMS << "\n"; } }; //------------------------------------------------------------------------------ // Handy functions: inline bool hasDifference(){return (m_max > 0);} protected: const bool m_from_file; const std::string m_ifile_name; const std::string m_name; vectT m_out1; vectT m_out2; std::vector m_diff_bitv; double m_mean = 0; double m_RMS = 0; uint16_t m_min=255; uint16_t m_max=0; private: /// Fill the vector of different bits void m_fillBitDiffs(){ const uint32_t size = m_out1.size(); m_diff_bitv.reserve(size); for (uint32_t i=0;im_max)m_max=bitdiff; } // Now the mean! const uint32_t size=m_out1.size(); m_mean=sum_x/size; if (size==1) m_RMS=-1; else m_RMS=(sum_x2 - size*m_mean*m_mean)/(size-1); } }; template class IfcnResponse:Iprintable{ public: IfcnResponse(const std::string& fcnName, std::vector input): m_input1(input), m_from_file(false), m_fcn_name(fcnName), m_ifile_name("From scratch"){m_output.reserve(input.size());}; //----------------------------------------------------------------------------- IfcnResponse(const std::string& fcnName, std::vector input1, std::vector input2): m_input1(input1), m_input2(input2), m_from_file(false), m_fcn_name(fcnName), m_ifile_name("From scratch"){m_output.reserve(input1.size());}; //----------------------------------------------------------------------------- /// Construct from ascii file IfcnResponse(const std::string& input_filename): m_from_file(true), m_fcn_name(std::string("From ")+input_filename), m_ifile_name(input_filename){}; //----------------------------------------------------------------------------- ~IfcnResponse(){}; //----------------------------------------------------------------------------- const std::string& getFcnName() const {return m_fcn_name;} //----------------------------------------------------------------------------- const std::string& getIfileName() const {return m_ifile_name;} //----------------------------------------------------------------------------- bool isFromFile() const {return m_from_file;} //----------------------------------------------------------------------------- inline std::vector& getOutput() {return m_output;}; void pushOutputVal(T value) {m_output.push_back(value);}; inline const T outputVal(uint64_t index) const {return m_output[index];}; //----------------------------------------------------------------------------- inline std::vector& getInput1() {return m_input1;}; void pushInput1Val(T value) {m_input1.push_back(value);}; inline const T input1Val(uint64_t index) const {return m_input1[index];}; //----------------------------------------------------------------------------- inline std::vector& getInput2() {return m_input2;}; void pushInput2Val(T value) {m_input2.push_back(value);}; inline const T input2Val(uint64_t index) const {return m_input2[index];}; private: std::vector m_output; std::vector m_input1; std::vector m_input2; const bool m_from_file; const std::string m_fcn_name; const std::string m_ifile_name; }; class IrandomPool:public Iprintable{ public: IrandomPool(const uint64_t size, const int32_t seed=1): m_size(size), m_mtwister_engine(seed), m_ifile_name(""){}; IrandomPool(const std::string& input_filename): m_size(0), m_mtwister_engine(0), m_ifile_name(input_filename){}; ~IrandomPool(){}; uint64_t getSize() const {return m_size;}; const std::string& getFileName() const {return m_ifile_name;}; protected: template void fillVector(std::vector& randomv,T min, T max){ // allocate the distribution // use C++11 long double to be able to generate whole double range // This generates pure uniform distribution, which may be not suitable for // longer ranges std::uniform_real_distribution uniform_dist(min, max); // Fill the numbers randomv.reserve(m_size); for (uint64_t i = 0; i < m_size; ++i){ T temp = (T)uniform_dist(m_mtwister_engine); //std::cout << "Generated num = " << temp << "\n"; randomv.push_back(temp); } std::cout << "\n"; } private: const uint64_t m_size; std::mt19937_64 m_mtwister_engine; const std::string m_ifile_name; }; #endif vdt-0.4.4/include/diagnostic/vdtdiag_random.h000066400000000000000000000260351421413530600212230ustar00rootroot00000000000000/** * This file contains the routines for the creation and * i/o of random numbers. * * Author Danilo Piparo * **/ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef _VDT_RANDOM_ #define _VDT_RANDOM_ #include #include #include "vdtcore_common.h" #include "vdtdiag_filePersistence.h" #include "vdtdiag_interfaces.h" /** * Class that represents a floating point random numbers pool. * It has 2 constructors: * 1) specifying min and max number and the size of the collection. * 2) from ascii file * The objects are able to dump on disk in the form of ascii files with 5 lines header * the random numbers. The header is hardcoded!! No comments, nothing. Just 5 lines in * the header. In order not to loose precision, the numbers are written in HEX. **/ template class randomPool1D:public IrandomPool{ public: randomPool1D(const T min, const T max, const uint64_t size, const int32_t seed=1): IrandomPool(size, seed), m_min(min), m_max(max){ // Fill the numbers fillVector(m_numbers,m_min,m_max); }; //----------------------------------------------------------------------------- /// Initialise with an ascii file. The numbers are in hex format. randomPool1D(const std::string& input_filename): IrandomPool(input_filename), m_min(0), m_max(0){ std::ifstream ifile ( getFileName() ); std::string line; // skip the 5 header lines for (uint16_t i=0;i<5;++i) std::getline(ifile,line); // read from file fpFromHex input_val; while(ifile >> input_val) { m_numbers.push_back(input_val.getValue()); } } //----------------------------------------------------------------------------- ~randomPool1D(){}; //----------------------------------------------------------------------------- /// Write numbers on disk in the form of an ascii file. The numbers are in HEX format. void writeFile(const std::string& output_filename) { const std::string preamble("VDT Random numbers file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (getSize()==0 ){ std::string line; std::ifstream ifile ( getFileName() ); getline(ifile,line); ofile << "Dumped by an object initialised by " << getFileName() << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Minimum Number = " << m_min << std::endl << "Maximum Number = " << m_max << std::endl << "Total numbers = " << getSize() << std::endl; // Do not write dec, but HEX! for (uint64_t i=0;i(m_numbers[i]); ofile << std::setbase(10); } } //----------------------------------------------------------------------------- /// Return a vector of the numbers const std::vector& getNumbers() const {return m_numbers;}; //----------------------------------------------------------------------------- // Prints the random numbers on screen void print() { std::cout << "Random Pool 1D:\n"; const uint64_t numbers_size = m_numbers.size(); std::cout << std::setprecision(std::numeric_limits::digits10); for(uint64_t i=0;i m_numbers; }; template class randomPool2D:public IrandomPool{ public: randomPool2D(const T minX, const T minY, const T maxX, const T maxY, const uint64_t size, const int32_t seed=1): IrandomPool(size, seed), m_minX(minX), m_minY(minY), m_maxX(maxX), m_maxY(maxY){ // Fill the numbers fillVector(m_numbersX,m_minX,m_maxX); fillVector(m_numbersY,m_minY,m_maxY); }; //----------------------------------------------------------------------------- /// Initialise with an ascii file. The numbers are in hex format. randomPool2D(const std::string& input_filename): IrandomPool(input_filename), m_minX(0), m_minY(0), m_maxX(0), m_maxY(0){ std::ifstream ifile ( getFileName() ); std::string line; // skip the 5 header lines for (uint16_t i=0;i<5;++i) std::getline(ifile,line); // read from file fpFromHex input_val; uint64_t counter=0; while(ifile >> input_val) { if(counter%2 == 0) m_numbersX.push_back(input_val.getValue()); else m_numbersY.push_back(input_val.getValue()); counter++; } } //----------------------------------------------------------------------------- ~randomPool2D(){}; //----------------------------------------------------------------------------- /// Write numbers on disk in the form of an ascii file. The numbers are in HEX format. void writeFile(const std::string& output_filename) { const std::string preamble("VDT Random numbers file 2D (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (getSize()==0 ){ std::string line; std::ifstream ifile ( getFileName() ); getline(ifile,line); ofile << "Dumped by an object initialised by " << getFileName() << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Minimum Number = " << m_minX << " " << m_minY << std::endl << "Maximum Number = " << m_maxX << " " << m_maxY << std::endl << "Total numbers = " << getSize() << std::endl; // Do not write dec, but HEX! for (uint64_t i=0;i(m_numbersX[j]); else ofile << fpToHex(m_numbersY[j]); } ofile << std::setbase(10); } } //----------------------------------------------------------------------------- /// Return a vector of the numbers const std::vector& getNumbersX() const {return m_numbersX;}; /// Return a vector of the numbers const std::vector& getNumbersY() const {return m_numbersY;}; //----------------------------------------------------------------------------- // Prints the random numbers on screen void print() { std::cout << "Random Pool 2D:\n"; const uint64_t numbers_size = m_numbersX.size(); std::cout << std::setprecision(std::numeric_limits::digits10); for(uint64_t i=0;i m_numbersX; std::vector m_numbersY; }; // Old implementation, deprecated template class randomPool_old{ public: /// Initialise with min and max numbers as well as desired size of the pool. randomPool_old(const T min, const T max, const uint64_t size, const uint32_t seed=1): m_min(min), m_max(max), m_size(size){ // Allocate the engine with seed one. Always the same numbers. std::mt19937_64 mtwister_engine(seed); // allocate the distribution // use C++11 long double to be able to generate whole double range // This generates pure uniform distribution, which may be not suitable for // longer ranges std::uniform_real_distribution uniform_dist(m_min, m_max); auto uniform_gen = std::bind(uniform_dist, mtwister_engine); // Fill the numbers m_numbers.reserve(size); for (uint64_t i = 0; i < m_size; ++i) m_numbers.push_back((T)uniform_gen()); } //----------------------------------------------------------------------------- /// Initialise with an ascii file. The numbers are in hex format. randomPool_old(const std::string& input_filename){ m_ifile_name=input_filename; std::ifstream ifile ( input_filename ); std::string line; // skip the 5 header lines for (uint16_t i=0;i<5;++i) std::getline(ifile,line); // read from file fpFromHex input_val; while(ifile >> input_val) { m_numbers.push_back(input_val.getValue()); } // particular of file init!!! m_min=m_max=m_size=0; } //----------------------------------------------------------------------------- /// Really nothing to do ~randomPool_old(){} //----------------------------------------------------------------------------- /// Write numbers on disk in the form of an ascii file. The numbers are in HEX format. void writeFile(const std::string output_filename) const{ const std::string preamble("VDT Random numbers file (the first 5 lines are the header)\n"); std::ofstream ofile ( output_filename ); // Copy the input file if the object was created from file if (m_size==0 ){ std::string line; std::ifstream ifile ( m_ifile_name ); getline(ifile,line); ofile << "Dumped by an object initialised by " << m_ifile_name << " - " << preamble; ofile << ifile.rdbuf() ; } else{ // Write an header and the numbers in the other case ofile << preamble; if (sizeof(T)==8) // some kind of RTTC ofile << "Double Precision\n"; else ofile << "Single Precision\n"; ofile << "Minimum Number = " << m_min << std::endl << "Maximum Number = " << m_max << std::endl << "Total numbers = " << m_size << std::endl; // Do not write dec, but HEX! for (uint64_t i=0;i(m_numbers[i]); ofile << std::setbase(10); } } //----------------------------------------------------------------------------- /// Return a vector of the numbers const std::vector& getNumbers() const {return m_numbers;}; //----------------------------------------------------------------------------- // Prints the random numbers on screen void print() const{ uint64_t counter=1; std::cout << "Random Pool:\n"; const uint64_t numbers_size = m_numbers.size(); std::cout << std::setprecision(std::numeric_limits::digits10); for(uint64_t i=0;i m_numbers; }; // For compatibility //using randomPool = randomPool_old; template using randomPool = randomPool1D; #endif vdt-0.4.4/include/diagnostic/vdtdiag_simpleCmd.h000066400000000000000000000106411421413530600216540ustar00rootroot00000000000000/* * * Very simple object for storing and parsing * commandline parameters in unix fashion * * Author: Ladislav Horky */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #include #include #include #include #include // tuple containing short indentifier, long identifier and help string using opt_tuple = std::tuple; using strpair = std::pair; class CmdOptions{ std::map receivedOpts; std::vector allowedOpts; public: CmdOptions(){ receivedOpts.clear(); allowedOpts.clear(); addOption("-h","--help","Prints help (no full functionality). All parametrized options must be in the form with " "'=' i.e. -o=parameter or --option=parameter, otherwise parameters will not be processed. Also, manual command" "specific help (like -o --hepl) does not work - works only in case of errorneous option."); } ~CmdOptions(){} int addOption(const std::string shrt, const std::string lng, const std::string help){ allowedOpts.push_back(opt_tuple(shrt,lng,help)); //std::cout << std::get<0>(allowedOpts[allowedOpts.size()-1]) << " " << std::get<1>(allowedOpts[allowedOpts.size()-1]) << "\n"; return 1; } int parseCmd(int argc, char** argv){ // omit the first argument for(int i=1; i 0){ optFlag = rawOpt.substr(0,eqPos); //std::cout<< optFlag; }else optFlag = rawOpt; //find (and parse) option bool found = false; for(unsigned int j=0;j(allowedOpts[j]); // was short or long name of option used? if(shortFlag == optFlag || std::get<1>(allowedOpts[j]) == optFlag){ found = true; // multiple specification fails if(receivedOpts.find(shortFlag) != receivedOpts.end()){ std::cout << "Option " << shortFlag << "specified multiple times, which is forbidden.\n"; return 0; } // if ok, add to received together with possible argument receivedOpts.insert(strpair(shortFlag,eqPos>0?rawOpt.substr(eqPos+1):"")); break; } } if(!found){ std::cout << "Unknown option " << optFlag << "\n"; return 0; } } //print just help if(isSet("-h")){ printHelp(); //std::cout << "\nHelp printed, ignore any subsequent error messages.\n"; return 1; } return 1; } bool isSet(const std::string shortFlag){ if(receivedOpts.find(shortFlag) != receivedOpts.end()) return true; else return false; } std::string getArgument(const std::string shortFlag){ if(!isSet(shortFlag)) return ""; return std::get<1>(*receivedOpts.find(shortFlag)); } //print help, if no command specified, print all void printHelp(const std::string opt = ""){ // print option-specific help if(opt != ""){ //find option bool found = false; for(unsigned int j=0;j(allowedOpts[j]); // try both short and long option name if(shortFlag == opt || std::get<1>(allowedOpts[j]) == opt){ std::cout << "Option-specific help:\n" << shortFlag << " " << std::get<1>(allowedOpts[j]) << "\n " << std::get<2>(allowedOpts[j]) << "\n"; found = true; } } if(!found) std::cout << "Unknown option " << opt << "\n"; // print whole help }else{ std::cout << "Help:\n"; for(unsigned int j=0;j(allowedOpts[j]) << " " << std::get<1>(allowedOpts[j]) << "\n " << std::get<2>(allowedOpts[j]) << "\n"; } } }; vdt-0.4.4/include/exp.h000066400000000000000000000106561421413530600147130ustar00rootroot00000000000000/* * exp.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef _VDT_EXP_ #define _VDT_EXP_ #include "vdtcore_common.h" #include namespace vdt{ namespace details{ const double EXP_LIMIT = 708; const double PX1exp = 1.26177193074810590878E-4; const double PX2exp = 3.02994407707441961300E-2; const double PX3exp = 9.99999999999999999910E-1; const double QX1exp = 3.00198505138664455042E-6; const double QX2exp = 2.52448340349684104192E-3; const double QX3exp = 2.27265548208155028766E-1; const double QX4exp = 2.00000000000000000009E0; const double LOG2E = 1.4426950408889634073599; // 1/log(2) const float MAXLOGF = 88.72283905206835f; const float MINLOGF = -88.f; const float C1F = 0.693359375f; const float C2F = -2.12194440e-4f; const float PX1expf = 1.9875691500E-4f; const float PX2expf =1.3981999507E-3f; const float PX3expf =8.3334519073E-3f; const float PX4expf =4.1665795894E-2f; const float PX5expf =1.6666665459E-1f; const float PX6expf =5.0000001201E-1f; const float LOG2EF = 1.44269504088896341f; } // Exp double precision -------------------------------------------------------- /// Exponential Function double precision inline double fast_exp(double initial_x){ double x = initial_x; double px=details::fpfloor(details::LOG2E * x +0.5); const int32_t n = int32_t(px); x -= px * 6.93145751953125E-1; x -= px * 1.42860682030941723212E-6; const double xx = x * x; // px = x * P(x**2). px = details::PX1exp; px *= xx; px += details::PX2exp; px *= xx; px += details::PX3exp; px *= x; // Evaluate Q(x**2). double qx = details::QX1exp; qx *= xx; qx += details::QX2exp; qx *= xx; qx += details::QX3exp; qx *= xx; qx += details::QX4exp; // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) ) x = px / (qx - px); x = 1.0 + 2.0 * x; // Build 2^n in double. x *= details::uint642dp(( ((uint64_t)n) +1023)<<52); if (initial_x > details::EXP_LIMIT) x = std::numeric_limits::infinity(); if (initial_x < -details::EXP_LIMIT) x = 0.; return x; } // Exp single precision -------------------------------------------------------- /// Exponential Function single precision inline float fast_expf(float initial_x) { float x = initial_x; float z = details::fpfloor( details::LOG2EF * x +0.5f ); /* floor() truncates toward -infinity. */ x -= z * details::C1F; x -= z * details::C2F; const int32_t n = int32_t ( z ); const float x2 = x * x; z = x*details::PX1expf; z += details::PX2expf; z *= x; z += details::PX3expf; z *= x; z += details::PX4expf; z *= x; z += details::PX5expf; z *= x; z += details::PX6expf; z *= x2; z += x + 1.0f; /* multiply by power of 2 */ z *= details::uint322sp((n+0x7f)<<23); if (initial_x > details::MAXLOGF) z=std::numeric_limits::infinity(); if (initial_x < details::MINLOGF) z=0.f; return z; } //------------------------------------------------------------------------------ void expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); } // end namespace vdt #endif vdt-0.4.4/include/externalLibcfg.h000066400000000000000000000004641421413530600170440ustar00rootroot00000000000000/* * Do not modify externalLibcfg.h, it's automatically generated by * Cmake. Modify externalLibcfg.h.cmake instead. */ #ifndef _VDT_EXTERNAL_LIB_CFG_ #define _VDT_EXTERNAL_LIB_CFG_ //Vc usage setup #define USE_VC 0 #if USE_VC //use this flag through the program #define _VC_AVAILABLE_ #endif #endif vdt-0.4.4/include/externalLibcfg.h.cmake000066400000000000000000000005071421413530600201210ustar00rootroot00000000000000/* * Do not modify externalLibcfg.h, it's automatically generated by * Cmake. Modify externalLibcfg.h.cmake instead. */ #ifndef _VDT_EXTERNAL_LIB_CFG_ #define _VDT_EXTERNAL_LIB_CFG_ //Vc usage setup #cmakedefine01 USE_VC #if USE_VC //use this flag through the program #define _VC_AVAILABLE_ #endif #endifvdt-0.4.4/include/identity.h000066400000000000000000000036021421413530600157410ustar00rootroot00000000000000/* * identity.h * For testing purposes * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef IDENTITY_H_ #define IDENTITY_H_ namespace vdt{ // Identity double precision --------------------------------------------------- inline double identity(double x){return x;} inline double fast_identity(double x){return x;} inline double identity2D (double x, double /*y*/){return x;} // Identity float precision --------------------------------------------------- inline float identityf(float x){return x;} inline float fast_identityf(float x){return x;} inline float identity2Df (float x, float /*y*/){return x;} //------------------------------------------------------------------------------ void identityv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void identityfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void identity2Dv(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray); void identity2Dfv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray); } //vdt namespace #endif /* IDENTITY_H_ */ vdt-0.4.4/include/inv.h000066400000000000000000000072751421413530600147160ustar00rootroot00000000000000/* * inv.h * An experiment: implement division with the square fo the approximate * inverse square root. * In other words one transforms a shift, multiplications and sums into a * sqrt. * * Created on: Jun 24, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente * * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef INV_H_ #define INV_H_ #include "vdtcore_common.h" #include "sqrt.h" #include #include namespace vdt{ //------------------------------------------------------------------------------ /// General implementation of the inversion inline double fast_inv_general(double x, const uint32_t isqrt_iterations) { const uint64_t sign_mask = details::getSignMask(x); const double sqrt_one_over_x = fast_isqrt_general(std::fabs(x), isqrt_iterations); return sqrt_one_over_x*(details::dpORuint64(sqrt_one_over_x , sign_mask )); } //------------------------------------------------------------------------------ /// Four iterations inversion inline double fast_inv(double x) {return fast_inv_general(x,4);} //------------------------------------------------------------------------------ /// Three iterations inline double fast_approx_inv(double x) {return fast_inv_general(x,3);} //------------------------------------------------------------------------------ /// For comparisons inline double inv (double x) {return 1./x;} //------------------------------------------------------------------------------ // Single precision /// General implementation of the inversion inline float fast_invf_general(float x, const uint32_t isqrt_iterations) { const uint32_t sign_mask = details::getSignMask(x); const float sqrt_one_over_x = fast_isqrtf_general(std::fabs(x), isqrt_iterations); return sqrt_one_over_x*(details::spORuint32(sqrt_one_over_x , sign_mask )); } //------------------------------------------------------------------------------ /// Two iterations inline float fast_invf(float x) {return fast_invf_general(x,2);} //------------------------------------------------------------------------------ /// One iterations inline float fast_approx_invf(float x) {return fast_invf_general(x,1);} //------------------------------------------------------------------------------ /// For comparisons inline float invf (float x) {return 1.f/x;} //------------------------------------------------------------------------------ void invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_approx_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_approx_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); } // end namespace vdt #endif /* INV_H_ */ vdt-0.4.4/include/log.h000066400000000000000000000117421421413530600146750ustar00rootroot00000000000000/* * log.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef LOG_H_ #define LOG_H_ #include "vdtcore_common.h" #include namespace vdt{ // local namespace for the constants/functions which are necessary only here namespace details{ const double LOG_UPPER_LIMIT = 1e307; const double LOG_LOWER_LIMIT = 0; const double SQRTH = 0.70710678118654752440; inline double get_log_px(const double x){ const double PX1log = 1.01875663804580931796E-4; const double PX2log = 4.97494994976747001425E-1; const double PX3log = 4.70579119878881725854E0; const double PX4log = 1.44989225341610930846E1; const double PX5log = 1.79368678507819816313E1; const double PX6log = 7.70838733755885391666E0; double px = PX1log; px *= x; px += PX2log; px *= x; px += PX3log; px *= x; px += PX4log; px *= x; px += PX5log; px *= x; px += PX6log; return px; } inline double get_log_qx(const double x){ const double QX1log = 1.12873587189167450590E1; const double QX2log = 4.52279145837532221105E1; const double QX3log = 8.29875266912776603211E1; const double QX4log = 7.11544750618563894466E1; const double QX5log = 2.31251620126765340583E1; double qx = x; qx += QX1log; qx *=x; qx += QX2log; qx *=x; qx += QX3log; qx *=x; qx += QX4log; qx *=x; qx += QX5log; return qx; } } // Log double precision -------------------------------------------------------- inline double fast_log(double x){ const double original_x = x; /* separate mantissa from exponent */ double fe; x = details::getMantExponent(x,fe); // blending x > details::SQRTH? fe+=1. : x+=x ; x -= 1.0; /* rational form */ double px = details::get_log_px(x); //for the final formula const double x2 = x*x; px *= x; px *= x2; const double qx = details::get_log_qx(x); double res = px / qx ; res -= fe * 2.121944400546905827679e-4; res -= 0.5 * x2 ; res = x + res; res += fe * 0.693359375; if (original_x > details::LOG_UPPER_LIMIT) res = std::numeric_limits::infinity(); if (original_x < details::LOG_LOWER_LIMIT) // THIS IS NAN! res = - std::numeric_limits::quiet_NaN(); return res; } // Log single precision -------------------------------------------------------- namespace details{ const float LOGF_UPPER_LIMIT = MAXNUMF; const float LOGF_LOWER_LIMIT = 0; const float PX1logf = 7.0376836292E-2f; const float PX2logf = -1.1514610310E-1f; const float PX3logf = 1.1676998740E-1f; const float PX4logf = -1.2420140846E-1f; const float PX5logf = 1.4249322787E-1f; const float PX6logf = -1.6668057665E-1f; const float PX7logf = 2.0000714765E-1f; const float PX8logf = -2.4999993993E-1f; const float PX9logf = 3.3333331174E-1f; inline float get_log_poly(const float x){ float y = x*PX1logf; y += PX2logf; y *= x; y += PX3logf; y *= x; y += PX4logf; y *= x; y += PX5logf; y *= x; y += PX6logf; y *= x; y += PX7logf; y *= x; y += PX8logf; y *= x; y += PX9logf; return y; } const float SQRTHF = 0.707106781186547524f; } // Log single precision -------------------------------------------------------- inline float fast_logf( float x ) { const float original_x = x; float fe; x = details::getMantExponentf( x, fe); x > details::SQRTHF? fe+=1.f : x+=x ; x -= 1.0f; const float x2 = x*x; float res = details::get_log_poly(x); res *= x2*x; res += -2.12194440e-4f * fe; res += -0.5f * x2; res= x + res; res += 0.693359375f * fe; if (original_x > details::LOGF_UPPER_LIMIT) res = std::numeric_limits::infinity(); if (original_x < details::LOGF_LOWER_LIMIT) res = -std::numeric_limits::quiet_NaN(); return res; } //------------------------------------------------------------------------------ void logv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_logv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void logfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_logfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); } //vdt namespace #endif /* LOG_H_ */ vdt-0.4.4/include/sin.h000066400000000000000000000036501421413530600147040ustar00rootroot00000000000000/* * cos.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef SIN_H_ #define SIN_H_ #include "sincos.h" namespace vdt{ // Sin double precision -------------------------------------------------------- /// Double precision sine: just call sincos. inline double fast_sin(double x){double s,c;fast_sincos(x,s,c);return s;} //------------------------------------------------------------------------------ inline float fast_sinf(float x){float s,c;fast_sincosf(x,s,c);return s;} //------------------------------------------------------------------------------ void sinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_sinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void sinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_sinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); } //vdt namespace #endif /* SIN_H_ */ vdt-0.4.4/include/sincos.h000066400000000000000000000133571421413530600154160ustar00rootroot00000000000000/* * sincos_common.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #include "vdtcore_common.h" #include #include #ifndef SINCOS_COMMON_H_ #define SINCOS_COMMON_H_ namespace vdt{ namespace details{ // double precision constants const double DP1sc = 7.85398125648498535156E-1; const double DP2sc = 3.77489470793079817668E-8; const double DP3sc = 2.69515142907905952645E-15; const double C1sin = 1.58962301576546568060E-10; const double C2sin =-2.50507477628578072866E-8; const double C3sin = 2.75573136213857245213E-6; const double C4sin =-1.98412698295895385996E-4; const double C5sin = 8.33333333332211858878E-3; const double C6sin =-1.66666666666666307295E-1; const double C1cos =-1.13585365213876817300E-11; const double C2cos = 2.08757008419747316778E-9; const double C3cos =-2.75573141792967388112E-7; const double C4cos = 2.48015872888517045348E-5; const double C5cos =-1.38888888888730564116E-3; const double C6cos = 4.16666666666665929218E-2; const double DP1 = 7.853981554508209228515625E-1; const double DP2 = 7.94662735614792836714E-9; const double DP3 = 3.06161699786838294307E-17; // single precision constants const float DP1F = 0.78515625; const float DP2F = 2.4187564849853515625e-4; const float DP3F = 3.77489497744594108e-8; const float T24M1 = 16777215.; //------------------------------------------------------------------------------ inline double get_sin_px(const double x){ double px=C1sin; px *= x; px += C2sin; px *= x; px += C3sin; px *= x; px += C4sin; px *= x; px += C5sin; px *= x; px += C6sin; return px; } //------------------------------------------------------------------------------ inline double get_cos_px(const double x){ double px=C1cos; px *= x; px += C2cos; px *= x; px += C3cos; px *= x; px += C4cos; px *= x; px += C5cos; px *= x; px += C6cos; return px; } //------------------------------------------------------------------------------ /// Reduce to 0 to 45 inline double reduce2quadrant(double x, int32_t& quad) { x = fabs(x); quad = int (ONEOPIO4 * x); // always positive, so (int) == std::floor quad = (quad+1) & (~1); const double y = double (quad); // Extended precision modular arithmetic return ((x - y * DP1) - y * DP2) - y * DP3; } //------------------------------------------------------------------------------ /// Sincos only for -45deg < x < 45deg inline void fast_sincos_m45_45( const double z, double & s, double &c ) { double zz = z * z; s = z + z * zz * get_sin_px(zz); c = 1.0 - zz * .5 + zz * zz * get_cos_px(zz); } //------------------------------------------------------------------------------ } // End namespace details /// Double precision sincos inline void fast_sincos( const double xx, double & s, double &c ) { // I have to use doubles to make it vectorise... int j; double x = details::reduce2quadrant(xx,j); const double signS = (j&4); j-=2; const double signC = (j&4); const double poly = j&2; details::fast_sincos_m45_45(x,s,c); //swap if( poly==0 ) { const double tmp = c; c=s; s=tmp; } if(signC == 0.) c = -c; if(signS != 0.) s = -s; if (xx < 0.) s = -s; } // Single precision functions namespace details { //------------------------------------------------------------------------------ /// Reduce to 0 to 45 inline float reduce2quadrant(float x, int & quad) { /* make argument positive */ x = fabs(x); quad = int (ONEOPIO4F * x); /* integer part of x/PIO4 */ quad = (quad+1) & (~1); const float y = float(quad); // quad &=4; // Extended precision modular arithmetic return ((x - y * DP1F) - y * DP2F) - y * DP3F; } //------------------------------------------------------------------------------ /// Sincos only for -45deg < x < 45deg inline void fast_sincosf_m45_45( const float x, float & s, float &c ) { float z = x * x; s = (((-1.9515295891E-4f * z + 8.3321608736E-3f) * z - 1.6666654611E-1f) * z * x) + x; c = (( 2.443315711809948E-005f * z - 1.388731625493765E-003f) * z + 4.166664568298827E-002f) * z * z - 0.5f * z + 1.0f; } //------------------------------------------------------------------------------ } // end details namespace /// Single precision sincos inline void fast_sincosf( const float xx, float & s, float &c ) { int j; const float x = details::reduce2quadrant(xx,j); int signS = (j&4); j-=2; const int signC = (j&4); const int poly = j&2; float ls,lc; details::fast_sincosf_m45_45(x,ls,lc); //swap if( poly==0 ) { const float tmp = lc; lc=ls; ls=tmp; } if(signC == 0) lc = -lc; if(signS != 0) ls = -ls; if (xx<0) ls = -ls; c=lc; s=ls; } } // end namespace vdt #endif vdt-0.4.4/include/sqrt.h000066400000000000000000000067431421413530600151120ustar00rootroot00000000000000/* * sqrt.h * Implementations born on the Quake 3 fast inverse square root * function. * http://en.wikipedia.org/wiki/Fast_inverse_square_root * * Created on: Jun 24, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef SQRT_H_ #define SQRT_H_ #include "vdtcore_common.h" namespace vdt{ //------------------------------------------------------------------------------ /// Sqrt implmentation from Quake3 inline double fast_isqrt_general(double x, const uint32_t ISQRT_ITERATIONS) { const double threehalfs = 1.5; const double x2 = x * 0.5; double y = x; uint64_t i = details::dp2uint64(y); // Evil! i = 0x5fe6eb50c7aa19f9ULL - ( i >> 1 ); y = details::uint642dp(i); for (uint32_t j=0;j> 1 ); y = details::uint322sp(i); for (uint32_t j=0;j inline T fast_sqrt(T x) { return std::sqrt(x); } template inline T fast_div(T x,T y) { return x/y; } template inline T fast_fma(T x,T y, T z) { return x*y+z; } // correclty rounded template inline T fast_fmac(T x, T y, T z) { return std::fma(x,y,z); } } #endif vdt-0.4.4/include/tan.h000066400000000000000000000123551421413530600146770ustar00rootroot00000000000000/* * tan.h * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code. * The Cephes library can be found here: http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef TAN_H_ #define TAN_H_ #include "vdtcore_common.h" #include "sincos.h" namespace vdt{ namespace details{ const double PX1tan=-1.30936939181383777646E4; const double PX2tan=1.15351664838587416140E6; const double PX3tan=-1.79565251976484877988E7; const double QX1tan = 1.36812963470692954678E4; const double QX2tan = -1.32089234440210967447E6; const double QX3tan = 2.50083801823357915839E7; const double QX4tan = -5.38695755929454629881E7; const double DP1tan = 7.853981554508209228515625E-1; const double DP2tan = 7.94662735614792836714E-9; const double DP3tan = 3.06161699786838294307E-17; const float DP1Ftan = 0.78515625; const float DP2Ftan = 2.4187564849853515625e-4; const float DP3Ftan = 3.77489497744594108e-8; //------------------------------------------------------------------------------ /// Reduce to -45 to 45 inline double reduce2quadranttan(double x, int32_t& quad) { x = fabs(x); quad = int( ONEOPIO4 * x ); // always positive, so (int) == std::floor quad = (quad+1) & (~1); const double y = quad; // Extended precision modular arithmetic return ((x - y * DP1tan) - y * DP2tan) - y * DP3tan; } //------------------------------------------------------------------------------ /// Reduce to -45 to 45 inline float reduce2quadranttan(float x, int32_t& quad) { x = fabs(x); quad = int( ONEOPIO4F * x ); // always positive, so (int) == std::floor quad = (quad+1) & (~1); const float y = quad; // Extended precision modular arithmetic return ((x - y * DP1Ftan) - y * DP2Ftan) - y * DP3Ftan; } } //------------------------------------------------------------------------------ /// Double precision tangent implementation inline double fast_tan(double x){ const uint64_t sign_mask = details::getSignMask(x); int32_t quad =0; const double z=details::reduce2quadranttan(x,quad); const double zz = z * z; double res=z; if( zz > 1.0e-14 ){ double px = details::PX1tan; px *= zz; px += details::PX2tan; px *= zz; px += details::PX3tan; double qx=zz; qx += details::QX1tan; qx *=zz; qx += details::QX2tan; qx *=zz; qx += details::QX3tan; qx *=zz; qx += details::QX4tan; res = z + z * zz * px / qx; } // A no branching way to say: if j&2 res = -1/res. You can!!! quad &=2; quad >>=1; const int32_t alt = quad^1; // Avoid fpe generated by 1/0 if res is 0 const double zeroIfXNonZero = (x==0.); res += zeroIfXNonZero; res = quad * (-1./res) + alt * res; // one coeff is one and one is 0! // Again, return 0 if res==0, the correct result otherwhise return details::dpXORuint64(res,sign_mask) * (1.-zeroIfXNonZero); } // Single precision ------------------------------------------------------------ inline float fast_tanf(float x){ const uint32_t sign_mask = details::getSignMask(x); int32_t quad =0; const float z=details::reduce2quadranttan(x,quad); const float zz = z * z; float res=z; if( zz > 1.0e-14f ){ res = ((((( 9.38540185543E-3f * zz + 3.11992232697E-3f) * zz + 2.44301354525E-2f) * zz + 5.34112807005E-2f) * zz + 1.33387994085E-1f) * zz + 3.33331568548E-1f) * zz * z + z; } // A no branching way to say: if j&2 res = -1/res. You can!!! quad &=2; quad >>=1; const int32_t alt = quad^1; // Avoid fpe generated by 1/0 if res is 0 const float zeroIfXNonZero = (x==0.f); res += zeroIfXNonZero; res = quad * (-1.f/res) + alt * res; // one coeff is one and one is 0! return details::spXORuint32(res,sign_mask) * (1.f-zeroIfXNonZero); } //------------------------------------------------------------------------------ void tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); //------------------------------------------------------------------------------ } //vdt namespace #endif /* TAN_H_ */ vdt-0.4.4/include/tanh.h000066400000000000000000000055021421413530600150430ustar00rootroot00000000000000/* * tanh.h * The basic idea is to exploit Pade polynomials. * Implemented by Manuel Schiller for LHCb. * * Created on: Sep 23, 2017 * Author: Paul Seyfert, Manuel Schiller */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef TANH_H_ #define TANH_H_ #include "vdtcore_common.h" namespace vdt{ /// Fast tanh implementation double precision inline double fast_tanh(double x){ // for very large |x| > 20, tanh(x) is x/|x| anyway (at least to double // precision) // // NB: branch-free code takes longer to execute if (std::abs(x) > 20.) return std::copysign(1., x); // strategy for large arguments: tanh(2x) = 2 tanh(x)/(1 + tanh^2(x)) // idea is to use this "argument halving" a couple of times, and use a // very short Padé approximation for the rest of the way const auto xx = x * 0.125; const auto xx2 = xx * xx; const auto numer = 135135 + xx2 * (17325 + xx2 * ( 378 + xx2 * 1)); const auto denom = 135135 + xx2 * (62370 + xx2 * (3150 + xx2 * 28)); auto tanh = xx * numer / denom; tanh = 2 * tanh / (tanh * tanh + 1); tanh = 2 * tanh / (tanh * tanh + 1); return 2 * tanh / (tanh * tanh + 1); } //------------------------------------------------------------------------------ /// Fast tanh implementation single precision inline float fast_tanhf( float x ) { // same strategy as double version above, but even shorter Padé // approximation is sufficient for float // // NB: branch-free code takes longer to execute if (std::abs(x) > 9.1f) return std::copysign(1.f, x); const auto xx = x * 0.125f; const auto xx2 = xx * xx; auto tanh = xx * (xx2 + 15) / (6 * xx2 + 15); tanh = 2 * tanh / (tanh * tanh + 1); tanh = 2 * tanh / (tanh * tanh + 1); return 2 * tanh / (tanh * tanh + 1); } //------------------------------------------------------------------------------ // Vector signatures void tanhv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void fast_tanhv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); void tanhfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); void fast_tanhfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); }// end of vdt #endif // end of tanh vdt-0.4.4/include/vdtMath.h000066400000000000000000000004621421413530600155200ustar00rootroot00000000000000#ifndef _VDT_MATH_H_ #define _VDT_MATH_H_ // Include all the VDT fucntions #include "sin.h" #include "asin.h" #include "cos.h" #include "tan.h" #include "tanh.h" #include "atan.h" #include "atan2.h" #include "exp.h" #include "log.h" #include "sqrt.h" #include "inv.h" #include "identity.h" #endif vdt-0.4.4/include/vdtcore_common.h000066400000000000000000000163531421413530600171350ustar00rootroot00000000000000/* * vdtcore_common.h * Common functions for the vdt routines. * The basic idea is to exploit Pade polynomials. * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos, * tan, asin, acos and atan functions. The Cephes library can be found here: * http://www.netlib.org/cephes/ * * Created on: Jun 23, 2012 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente */ /* * VDT is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with this program. If not, see . */ #ifndef VDTCOMMON_H_ #define VDTCOMMON_H_ #include "inttypes.h" #include namespace vdt{ namespace details{ // Constants const double TWOPI = 2.*M_PI; const double PI = M_PI; const double PIO2 = M_PI_2; const double PIO4 = M_PI_4; const double ONEOPIO4 = 4./M_PI; const float TWOPIF = 2.*M_PI; const float PIF = M_PI; const float PIO2F = M_PI_2; const float PIO4F = M_PI_4; const float ONEOPIO4F = 4./M_PI; const double MOREBITS = 6.123233995736765886130E-17; const float MAXNUMF = 3.4028234663852885981170418348451692544e38f; //------------------------------------------------------------------------------ /// Used to switch between different type of interpretations of the data (64 bits) union ieee754{ inline ieee754 () {}; inline ieee754 (double thed) {d=thed;}; inline ieee754 (uint64_t thell) {ll=thell;}; inline ieee754 (float thef) {f[0]=thef;}; inline ieee754 (uint32_t thei) {i[0]=thei;}; double d; float f[2]; uint32_t i[2]; uint64_t ll; uint16_t s[4]; }; //------------------------------------------------------------------------------ /// Converts an unsigned long long to a double inline double uint642dp(uint64_t ll) { ieee754 tmp; tmp.ll=ll; return tmp.d; } //------------------------------------------------------------------------------ /// Converts a double to an unsigned long long inline uint64_t dp2uint64(double x) { ieee754 tmp; tmp.d=x; return tmp.ll; } //------------------------------------------------------------------------------ /// Makes an AND of a double and a unsigned long long inline double dpANDuint64(const double x, const uint64_t i ){ return uint642dp(dp2uint64(x) & i); } //------------------------------------------------------------------------------ /// Makes an OR of a double and a unsigned long long inline double dpORuint64(const double x, const uint64_t i ){ return uint642dp(dp2uint64(x) | i); } /// Makes a XOR of a double and a unsigned long long inline double dpXORuint64(const double x, const uint64_t i ){ return uint642dp(dp2uint64(x) ^ i); } //------------------------------------------------------------------------------ inline uint64_t getSignMask(const double x){ const uint64_t mask=0x8000000000000000ULL; return dp2uint64(x) & mask; } //------------------------------------------------------------------------------ /// Converts an int to a float inline float uint322sp(int x) { ieee754 tmp; tmp.i[0]=x; return tmp.f[0]; } //------------------------------------------------------------------------------ /// Converts a float to an int inline uint32_t sp2uint32(float x) { ieee754 tmp; tmp.f[0]=x; return tmp.i[0]; } //------------------------------------------------------------------------------ /// Makes an AND of a float and a unsigned long inline float spANDuint32(const float x, const uint32_t i ){ return uint322sp(sp2uint32(x) & i); } //------------------------------------------------------------------------------ /// Makes an OR of a float and a unsigned long inline float spORuint32(const float x, const uint32_t i ){ return uint322sp(sp2uint32(x) | i); } //------------------------------------------------------------------------------ /// Makes an OR of a float and a unsigned long inline float spXORuint32(const float x, const uint32_t i ){ return uint322sp(sp2uint32(x) ^ i); } //------------------------------------------------------------------------------ /// Get the sign mask inline uint32_t getSignMask(const float x){ const uint32_t mask=0x80000000; return sp2uint32(x) & mask; } //------------------------------------------------------------------------------ /// Like frexp but vectorising and the exponent is a double. inline double getMantExponent(const double x, double & fe){ uint64_t n = dp2uint64(x); // Shift to the right up to the beginning of the exponent. // Then with a mask, cut off the sign bit uint64_t le = (n >> 52); // chop the head of the number: an int contains more than 11 bits (32) int32_t e = le; // This is important since sums on uint64_t do not vectorise fe = e-1023 ; // This puts to 11 zeroes the exponent n &=0x800FFFFFFFFFFFFFULL; // build a mask which is 0.5, i.e. an exponent equal to 1022 // which means *2, see the above +1. const uint64_t p05 = 0x3FE0000000000000ULL; //dp2uint64(0.5); n |= p05; return uint642dp(n); } //------------------------------------------------------------------------------ /// Like frexp but vectorising and the exponent is a float. inline float getMantExponentf(const float x, float & fe){ uint32_t n = sp2uint32(x); int32_t e = (n >> 23)-127; fe = e; // fractional part const uint32_t p05f = 0x3f000000; // //sp2uint32(0.5); n &= 0x807fffff;// ~0x7f800000; n |= p05f; return uint322sp(n); } //------------------------------------------------------------------------------ /// Converts a fp to an int inline uint32_t fp2uint(float x) { return sp2uint32(x); } /// Converts a fp to an int inline uint64_t fp2uint(double x) { return dp2uint64(x); } /// Converts an int to fp inline float int2fp(uint32_t i) { return uint322sp(i); } /// Converts an int to fp inline double int2fp(uint64_t i) { return uint642dp(i); } //------------------------------------------------------------------------------ /** * A vectorisable floor implementation, not only triggered by fast-math. * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 * compliant for argument -0.0 **/ inline double fpfloor(const double x){ // no problem since exp is defined between -708 and 708. Int is enough for it! int32_t ret = int32_t (x); ret-=(sp2uint32(x)>>31); return ret; } //------------------------------------------------------------------------------ /** * A vectorisable floor implementation, not only triggered by fast-math. * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 * compliant for argument -0.0 **/ inline float fpfloor(const float x){ int32_t ret = int32_t (x); ret-=(sp2uint32(x)>>31); return ret; } //------------------------------------------------------------------------------ } } // end of namespace vdt #endif /* VDTCOMMON_H_ */ vdt-0.4.4/lib/000077500000000000000000000000001421413530600130615ustar00rootroot00000000000000vdt-0.4.4/lib/CMakeLists.txt000066400000000000000000000016101421413530600156170ustar00rootroot00000000000000# Rules for making the so library with the vector and libm-like signatures # Define this directory's flags: SET(CMAKE_CXX_FLAGS ${LIB_FLAGS}) include_directories ( ${INC_DIR} ) if(BUILD_SHARED_LIBS) message(STATUS "Libraries are configured as: SHARED") else() message(STATUS "Libraries are configured as: STATIC") endif() if(PRELOAD) message(STATUS "Symbols for the preload requested") endif() # The library ADD_LIBRARY(vdt ${SRC_DIR}/vdtMath_signatures.cc ${INC_DIR}/vdtMath.h ) # Installation of the lib INSTALL(TARGETS vdt DESTINATION lib) # Build Vc wrapper (without c++11) if(USE_VC) SET(CMAKE_CXX_FLAGS "${LIB_FLAGS} -fabi-version=6") include_directories( ${CMAKE_SOURCE_DIR}/Vc ${CMAKE_SOURCE_DIR}/Vc/include ) add_library(VcWrapper ${SRC_DIR}/vdtdiag_vcWrapper.cc ${SRC_DIR}/vdtdiag_vcWrapper.h) target_link_libraries(VcWrapper libVc.a) endif() vdt-0.4.4/progs/000077500000000000000000000000001421413530600134455ustar00rootroot00000000000000vdt-0.4.4/progs/CMakeLists.txt000066400000000000000000000006261421413530600162110ustar00rootroot00000000000000# Define this directory's flags: SET(CMAKE_CXX_FLAGS ${DIAG_FLAGS}) include_directories ( ${CMAKE_SOURCE_DIR}/include/diagnostic ) include_directories ( ${CMAKE_SOURCE_DIR}/include ) # Benchmarks add_exe_and_link ( vdtPerfBenchmark vdtPerfBenchmark.cpp ${LIBTIMING}) add_exe_and_link ( vdtArithmBenchmark vdtArithmBenchmark.cpp None) add_exe_and_link ( vdtArithmComparison vdtArithmComparison.cpp None) vdt-0.4.4/progs/numpyBootstrap.csh000077500000000000000000000011251421413530600172140ustar00rootroot00000000000000set CD=$PWD echo $CD pushd $TMPDIR # source /cvmfs/sft.cern.ch/lcg/views/LCG_88/x86_64-slc6-gcc62-opt/setup.csh # source /cvmfs/sft.cern.ch/lcg/views/LCG_88/x86_64-centos7-gcc62-opt/setup.csh git clone https://github.com/dpiparo/vdt.git cd vdt/src/ python numpy_wrapper_generator.py c++ -Ofast -shared -fPIC -Wall -o libvdtFatLibWrapper.so vdtFatLibWrapper.cc -I../include/ python setup.py build_ext --inplace python ../progs/testExp.py cp -r vdtnpfun_directory $CD/. cp libvdtFatLibWrapper.so $CD/. cp vdt.py $CD/. cp ../progs/testExp.py $CD/. cp ../progs/*.ipynb $CD/. popd python testExp.py vdt-0.4.4/progs/testExp.py000066400000000000000000000007451421413530600154610ustar00rootroot00000000000000import imp import numpy vdtnpfun= imp.load_dynamic('vdtnpfun','vdtnpfun_directory/vdtnpfun.so') import vdtnpfun print dir(vdtnpfun) vi=[1,2,3,4] requires = ['CONTIGUOUS', 'ALIGNED'] vi = numpy.asanyarray(vi) vi = numpy.require(vi, numpy.single, requires) vo = numpy.empty_like(vi) print vdtnpfun.vdt_exp(vi) vd=[1,2,3,4] requires = ['CONTIGUOUS', 'ALIGNED'] vd = numpy.asanyarray(vi) vd = numpy.require(vi, numpy.double, requires) vo = numpy.empty_like(vi) print vdtnpfun.vdt_exp(vd) vdt-0.4.4/progs/units/000077500000000000000000000000001421413530600146075ustar00rootroot00000000000000vdt-0.4.4/progs/units/CMakeLists.txt000066400000000000000000000022141421413530600173460ustar00rootroot00000000000000# Enable this directory's flags: SET(CMAKE_CXX_FLAGS ${DIAG_FLAGS}) include_directories ( ${INC_DIR} ) include_directories ( ${INC_DIR}/diagnostic ) # Utils add_executable ( fpDifferentBit fpDifferentBit.cpp ) add_executable ( printInstSet printInstSet.cpp ) add_executable ( test_randomPool test_randomPool.cpp ) add_exe_and_link ( test_fcnResponse test_fcnResponse.cpp None) add_exe_and_link ( test_fcnComparison test_fcnComparison.cpp None ) add_exe_and_link ( test_fcnPerformance test_fcnPerformance.cpp rt ) # Funtions add_exe_and_link ( exp_test exp_test.cpp None ) add_exe_and_link ( log_test log_test.cpp None ) add_exe_and_link ( sqrt_test sqrt_test.cpp None ) add_exe_and_link ( sin_test sin_test.cpp None ) add_exe_and_link ( cos_test cos_test.cpp None ) add_exe_and_link ( inv_test inv_test.cpp None ) add_exe_and_link ( tan_test tan_test.cpp None ) add_exe_and_link ( tanh_test tanh_test.cpp None ) add_exe_and_link ( asin_test asin_test.cpp None ) add_exe_and_link ( acos_test acos_test.cpp None ) add_exe_and_link ( atan_test atan_test.cpp None ) add_exe_and_link ( atan2_test atan2_test.cpp None ) # add_executable ( deleteme deleteme.cpp ) vdt-0.4.4/progs/units/acos_test.cpp000066400000000000000000000016411421413530600173010ustar00rootroot00000000000000/* * sin_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef COS_TEST_CPP_ #define COS_TEST_CPP_ #include "asin.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; //dp double dpvals[size]={1,.9,.8,.6,1e-200,0,-0.00004,-.2,-.8,-0.9999999999}; printFuncDiff("acos", (dpdpfunction)refMath::acos,(dpdpfunction)fast_acos, dpvals, size); printFuncDiff ("acosv", (dpdpfunctionv) acosv, (dpdpfunctionv) fast_acosv, dpvals, size ); //sp float spvals[size]={1.f,.9f,.8f,.12f,1e-20f,0.f,-0.004f,-.2f,-.8f,-0.9999999999f}; printFuncDiff("acosf", (spspfunction)refMath::acosf,(spspfunction)fast_acosf,spvals,size); printFuncDiff ("acosfv", (spspfunctionv) acosfv, (spspfunctionv) fast_acosfv, spvals, size ); } #endif /* COS_TEST_CPP_ */ vdt-0.4.4/progs/units/asin_test.cpp000066400000000000000000000015061421413530600173060ustar00rootroot00000000000000/* * sin_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef SIN_TEST_CPP_ #define SIN_TEST_CPP_ #include "asin.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; // dp double dpvals[size]={1,.9,.8,.6,1e-200,0,-0.00004,-.2,-.8,-0.9999999999}; printFuncDiff("asin", (dpdpfunction)refMath::asin,(dpdpfunction)fast_asin,dpvals,size); printFuncDiff ("asinv", (dpdpfunctionv) asinv, (dpdpfunctionv) fast_asinv, dpvals, size ); //sp float spvals[size]={1,.9,.8,.12,1e-200,0,-0.004,-.2,-.8,-0.9999999999}; printFuncDiff("asinf", (spspfunction)refMath::asinf,(spspfunction)fast_asinf,spvals,size); printFuncDiff ("asinfv", (spspfunctionv) asinfv, (spspfunctionv) fast_asinfv, spvals, size ); } #endif /* SIN_TEST_CPP_ */ vdt-0.4.4/progs/units/atan2_test.cpp000066400000000000000000000014111421413530600173540ustar00rootroot00000000000000#ifndef ATAN2_TEST_CPP_ #define ATAN2_TEST_CPP_ #include "atan2.h" #include #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; //sp float spvals1[size]={-1e30f,-1e19f,-300.f,-20.f,-0.23f,0.f,13.f,230.f,1e20f,1e30f}; float spvals2[size]={1e2f,1e1f,400.f,20.f,0.934f,0.f,-21.2f,-532.f,-1e17f,-1e29f}; printFuncDiff("atan2f", (spsp2function)refMath::atan2f,(spsp2function)fast_atan2f,spvals1,spvals2,size); //sp double dpvals1[size]={-1e30,-1e19,-300.,-20.,-0.23,0.f,13.,230.,1e20,1e90}; double dpvals2[size]={1e2,1e1,400.,20.,0.934,0.,-21.2,-532.,-1e17,-1e29}; printFuncDiff("atan2", (dpdp2function)refMath::atan2,(dpdp2function)fast_atan2,dpvals1,dpvals2,size); } #endif /* ATAN2_TEST_CPP_ */ vdt-0.4.4/progs/units/atan_test.cpp000066400000000000000000000015241421413530600172770ustar00rootroot00000000000000/* * atan_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef ATAN_TEST_CPP_ #define ATAN_TEST_CPP_ #include "atan.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; // dp double dpvals[size]={-1e200,-1e50,-300.,-20.,0.,13.,230.,1e20,1e303}; printFuncDiff("atan", (dpdpfunction)refMath::atan,(dpdpfunction)fast_atan,dpvals,size); printFuncDiff ("atanv", (dpdpfunctionv) atanv, (dpdpfunctionv) fast_atanv, dpvals, size ); //sp float spvals[size]={-1e30f,-1e19f,-300.f,-20.f,0.f,13.f,230.f,1e20f,1e30f}; printFuncDiff("atanf", (spspfunction)refMath::atanf,(spspfunction)fast_atanf,spvals,size); printFuncDiff ("atanfv", (spspfunctionv) atanfv, (spspfunctionv) fast_atanfv, spvals, size ); } #endif /* ATAN_TEST_CPP_ */ vdt-0.4.4/progs/units/cos_test.cpp000066400000000000000000000016721421413530600171440ustar00rootroot00000000000000/* * cos_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef COS_TEST_CPP_ #define COS_TEST_CPP_ #include "cos.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; constexpr double PI = M_PI; // dp double dpvals[size]={32.*PI,5.6*PI,PI/4.1,PI/6.2,PI/7,PI/7.5,PI/85,PI/19,PI/10,0.000001}; printFuncDiff("cos", (dpdpfunction)refMath::cos,(dpdpfunction)fast_cos,dpvals,size); printFuncDiff ("cosv", (dpdpfunctionv) cosv, (dpdpfunctionv) fast_cosv, dpvals, size ); // printFuncDiff("cos2", (dpdpfunction)cos,(dpdpfunction)fast_cos2,dpvals,size); //sp float spvals[size]={PI,PI/2.9,PI/4.1,PI/6.2,PI/7,PI/7,PI/85,PI/19,PI/10,PI/20.2}; printFuncDiff("cosf", (spspfunction)refMath::cosf,(spspfunction)fast_cosf,spvals,size); printFuncDiff ("cosfv", (spspfunctionv) cosfv, (spspfunctionv) fast_cosfv, spvals, size ); } #endif /* COS_TEST_CPP_ */ vdt-0.4.4/progs/units/exp_test.cpp000066400000000000000000000014531421413530600171510ustar00rootroot00000000000000/* * exp_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef EXP_TEST_CPP_ #define EXP_TEST_CPP_ #include "exp.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; // dp double dpvals[size]={-705,-100,-2,-1e-16,0,1e-50,4,10,500,805}; printFuncDiff("exp", (dpdpfunction)refMath::exp,(dpdpfunction)fast_exp,dpvals,size); printFuncDiff ("expv", (dpdpfunctionv) expv, (dpdpfunctionv) fast_expv, dpvals, size ); //sp float spvals[size]={-87.f,-50.f,-2.f,-1e-10f,0.f,1e-10f,4.f,10.f,50.f,95.f}; printFuncDiff("expf", (spspfunction)refMath::expf,(spspfunction)fast_expf,spvals,size); printFuncDiff ("expvf", (spspfunctionv) expfv, (spspfunctionv) fast_expfv, spvals, size ); } #endif /* EXP_TEST_CPP_ */ vdt-0.4.4/progs/units/fpDifferentBit.cpp000066400000000000000000000004161421413530600202070ustar00rootroot00000000000000/** * Checks for the difference between pairs of floating points numbers. **/ #include "vdtdiag_helper.h" #include using namespace vdth; int main(){ print_different_bit(3.f,-3.f); print_different_bit(123.,123.00001); print_different_bit(4.f,4.f); } vdt-0.4.4/progs/units/inv_test.cpp000066400000000000000000000023541421413530600171520ustar00rootroot00000000000000/* * inv_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef INV_TEST_CPP_ #define INV_TEST_CPP_ #include "inv.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; // dp double dpvals[size]={1e200, 1e-17, 1, 0.5, .3, -10, -1e300, -.2, -101., -88}; printFuncDiff("inv", (dpdpfunction) inv,(dpdpfunction)fast_inv,dpvals,size); printFuncDiff ("invv", (dpdpfunctionv) invv, (dpdpfunctionv) fast_invv, dpvals, size ); printFuncDiff("approx_inv", (dpdpfunction) inv,(dpdpfunction)fast_approx_inv,dpvals,size); printFuncDiff ("approx_invv", (dpdpfunctionv) invv, (dpdpfunctionv) fast_approx_invv, dpvals, size ); //sp float spvals[size]={1e20f, 1e-7f, 1.f, 0.5f, .3f, -10.f, -1e9f, -.2f, -101.f, -88.f}; printFuncDiff("invf", (spspfunction) invf,(spspfunction)fast_invf,spvals,size); printFuncDiff ("invfv", (spspfunctionv) invfv, (spspfunctionv) fast_invfv, spvals, size ); printFuncDiff("approx_invf", (spspfunction) invf,(spspfunction) fast_approx_invf,spvals,size); printFuncDiff ("approx_invfv", (spspfunctionv) invfv, (spspfunctionv) fast_approx_invfv, spvals, size ); } #endif /* SIN_TEST_CPP_ */ vdt-0.4.4/progs/units/log_test.cpp000066400000000000000000000014541421413530600171370ustar00rootroot00000000000000/* * log_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef LOG_TEST_CPP_ #define LOG_TEST_CPP_ #include "log.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; // dp double dpvals[size]={1e200,1.34e101,2,1e-16,0,1e-50,4,10,500,.1}; printFuncDiff("log", (dpdpfunction)refMath::log,(dpdpfunction)fast_log,dpvals,size); printFuncDiff ("logv", (dpdpfunctionv) logv, (dpdpfunctionv) fast_logv, dpvals, size ); //sp float spvals[size]={-87.f,-50.f,-2.f,-1e-1f,0.f,1e-5f,4.f,10.f,50.f,95.f}; printFuncDiff("logf", (spspfunction)refMath::logf,(spspfunction)fast_logf,spvals,size); printFuncDiff ("logvf", (spspfunctionv) logfv, (spspfunctionv) fast_logfv, spvals, size ); } #endif /* LOG_TEST_CPP_ */ vdt-0.4.4/progs/units/printInstSet.cpp000066400000000000000000000002711421413530600177610ustar00rootroot00000000000000/* * printInstSet.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #include "vdtdiag_helper.h" using namespace vdth; int main(){ print_instructions_info(); } vdt-0.4.4/progs/units/sin_test.cpp000066400000000000000000000015441421413530600171470ustar00rootroot00000000000000/* * sin_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef SIN_TEST_CPP_ #define SIN_TEST_CPP_ #include "sin.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; constexpr double PI = M_PI; // dp double dpvals[size]={PI,PI/2.9,PI/4.1,PI/6.2,PI/7,PI/7,PI/85,PI/19,PI/10,PI/20.2}; printFuncDiff("sin", (dpdpfunction)refMath::sin,(dpdpfunction)fast_sin,dpvals,size); printFuncDiff ("sinv", (dpdpfunctionv) sinv, (dpdpfunctionv) fast_sinv, dpvals, size ); //sp float spvals[size]={PI,PI/2.9,PI/4.1,PI/6.2,PI/7,PI/7,PI/85,PI/19,PI/10,PI/20.2}; printFuncDiff("sinf", (spspfunction)refMath::sinf,(spspfunction)fast_sinf,spvals,size); printFuncDiff ("sinfv", (spspfunctionv) sinfv, (spspfunctionv) fast_sinfv, spvals, size ); } #endif /* SIN_TEST_CPP_ */ vdt-0.4.4/progs/units/sqrt_test.cpp000066400000000000000000000024051421413530600173440ustar00rootroot00000000000000/* * sqrt_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef SQRT_TEST_CPP_ #define SQRT_TEST_CPP_ #include "sqrt.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; // dp double dpvals[size]={1e200,1.34e101,2,1e-16,0,1e-50,4,10,500,.1}; // the vanilla one printFuncDiff("isqrt", (dpdpfunction) isqrt,(dpdpfunction)fast_isqrt,dpvals,size); printFuncDiff ("sqrtv", (dpdpfunctionv) isqrtv, (dpdpfunctionv) fast_isqrtv, dpvals, size ); // the approximated one! printFuncDiff("approx_isqrt", (dpdpfunction) isqrt,(dpdpfunction)fast_approx_isqrt,dpvals,size); printFuncDiff ("approx_isqrtv", (dpdpfunctionv) isqrtv, (dpdpfunctionv) fast_approx_isqrtv, dpvals, size ); //sp float spvals[size]={87.f,50.f,2.f,1e-5f,0.f,1e-8f,4.f,10.f,50.f,95.f}; printFuncDiff("sqrtf", (spspfunction) isqrtf,(spspfunction) fast_isqrtf,spvals,size); printFuncDiff ("sqrtvf", (spspfunctionv) isqrtfv, (spspfunctionv) fast_isqrtfv, spvals, size ); printFuncDiff("approx_sqrtf", (spspfunction) isqrtf,(spspfunction)fast_approx_isqrtf,spvals,size); printFuncDiff ("approx_sqrtvf", (spspfunctionv) isqrtfv, (spspfunctionv) fast_approx_isqrtfv, spvals, size ); } #endif /* SQRT_TEST_CPP_ */ vdt-0.4.4/progs/units/tan_test.cpp000066400000000000000000000016041421413530600171350ustar00rootroot00000000000000/* * tan_test.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #ifndef TAN_TEST_CPP_ #define TAN_TEST_CPP_ #include "tan.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; constexpr double PI = M_PI; // dp double dpvals[size]={0.01, 0.1+PI/4, 0.01+PI/2, 1176.2*PI,0,-PI/7.5,PI/85,-PI/19,PI/10,-0.000001}; printFuncDiff("tan", (dpdpfunction)refMath::tan,(dpdpfunction)fast_tan,dpvals,size); printFuncDiff ("tanv", (dpdpfunctionv) tanv, (dpdpfunctionv) fast_tanv, dpvals, size ); //sp float spvals[size]={0.01, 0.1+PI/4, 0.01+PI/2, 1176.2*PI,PI/7,-PI/7.5,PI/85,-PI/19,PI/10,-0.000001}; printFuncDiff("tanf", (spspfunction)refMath::tanf,(spspfunction)fast_tanf,spvals,size); printFuncDiff ("tanfv", (spspfunctionv) tanfv, (spspfunctionv) fast_tanfv, spvals, size ); } #endif /* TAN_TEST_CPP_ */ vdt-0.4.4/progs/units/tanh_test.cpp000066400000000000000000000015151421413530600173060ustar00rootroot00000000000000/* * tanh_test.cpp * * Created on: Sep 23, 2017 * Author: Paul Seyfert */ #ifndef TANH_TEST_CPP_ #define TANH_TEST_CPP_ #include "tanh.h" #include "vdtdiag_helper.h" using namespace vdt; using namespace vdth; int main(){ constexpr uint32_t size=10; // dp double dpvals[size]={-1e200,-1e50,-10.,-0.1,0.,0.3,2.3,1e20,1e303}; printFuncDiff("tanh", (dpdpfunction)refMath::tanh,(dpdpfunction)fast_tanh,dpvals,size); printFuncDiff ("tanhv", (dpdpfunctionv) tanhv, (dpdpfunctionv) fast_tanhv, dpvals, size ); //sp float spvals[size]={-1e30f,-1e1f,-3.f,-0.7f,0.f,.3f,2.3f,1e20f,1e30f}; printFuncDiff("tanhf", (spspfunction)refMath::tanhf,(spspfunction)fast_tanhf,spvals,size); printFuncDiff ("tanhfv", (spspfunctionv) tanhfv, (spspfunctionv) fast_tanhfv, spvals, size ); } #endif /* TANH_TEST_CPP_ */ vdt-0.4.4/progs/units/test_fcnComparison.cpp000066400000000000000000000057121421413530600211600ustar00rootroot00000000000000/* * test_fcnResponse.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #include "vdtdiag_random.h" #include "vdtdiag_fcnResponse.h" #include "vdtdiag_fcnComparison.h" #include "vdtdiag_helper.h" #include #include "log.h" #include "atan2.h" #include int main(){ const uint32_t size = 1000000; // Test the FcnResponse in double precision std::string dpofilename("test_dpfunctionComparison.txt"); randomPool dpRandomPool(100,1000,size); fcnResponse dpLogResp("Log",dpRandomPool.getNumbers(), (vdth::dpdpfunction) refMath::log); fcnResponse dpFastLogResp("Fast Log",dpRandomPool.getNumbers(), (vdth::dpdpfunction) vdt::fast_log); fcnComparison dpLogComp("Log - libmVSvdt", dpRandomPool.getNumbers(), dpLogResp.getOutput(), dpFastLogResp.getOutput()); dpLogComp.printStats(); dpLogComp.writeFile(dpofilename); std::cout <<"Read from file: -----------------\n"; fcnComparison dpLogFromFile(dpofilename); dpLogFromFile.printStats(); // Test the FcnResponse in single precision std::string spofilename("test_spfunctionComparison.txt"); randomPool spRandomPool(1,1000,size); fcnResponse spLogResp("Logf",spRandomPool.getNumbers(), (vdth::spspfunction) refMath::logf); fcnResponse spFastLogResp("Fast Logf",spRandomPool.getNumbers(), (vdth::spspfunction) vdt::fast_logf); fcnComparison spLogComp("Logf - libmVSvdt", spRandomPool.getNumbers(), spLogResp.getOutput(), spFastLogResp.getOutput()); spLogComp.printStats(); spLogComp.writeFile(spofilename); std::cout <<"Read from file: -----------------\n"; fcnComparison spLogFromFile(spofilename); spLogFromFile.printStats(); // Test the FcnResponse in single precision with 2 inputs std::string spofilename2D("test_spfunctionComparison2D.txt"); randomPool2D spRandomPool2D(-1,-1,1,1,size); fcnResponse2D spLogResp2D("Atan2f", spRandomPool2D.getNumbersX(), spRandomPool2D.getNumbersY(), (vdth::spsp2function) atan2f); fcnResponse2D spFastLogResp2D("Fast Atan2f", spRandomPool2D.getNumbersX(), spRandomPool2D.getNumbersY(), (vdth::spsp2function) vdt::fast_atan2f); fcnComparison2D spLogComp2D("Atan2f - libmVSvdt", spRandomPool2D.getNumbersX(), spRandomPool2D.getNumbersY(), spLogResp2D.getOutput(), spFastLogResp2D.getOutput()); spLogComp2D.printStats(); spLogComp2D.writeFile(spofilename2D); std::cout <<"Read from file: -----------------\n"; fcnComparison spLogFromFile2D(spofilename2D); spLogFromFile2D.printStats(); } vdt-0.4.4/progs/units/test_fcnPerformance.cpp000066400000000000000000000053111421413530600213020ustar00rootroot00000000000000/* * test_fcnPerformance.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #include "vdtdiag_random.h" #include "vdtdiag_fcnPerformance.h" #include "vdtMath.h" #include "vdtdiag_helper.h" #include using namespace vdt; int main(){ constexpr uint32_t size = 10000; constexpr uint32_t repetitions = 100; randomPool dpRandomPool(-500,500,size); // Test the FcnResponse in double precision fcnPerformance dpExpPerf("Exp", dpRandomPool.getNumbers(), (vdth::dpdpfunction) refMath::exp,repetitions); dpExpPerf.print(); fcnPerformance dpFastExpPerf("Fast Exp", dpRandomPool.getNumbers(), (vdth::dpdpfunction) fast_exp,repetitions); dpFastExpPerf.print(); //-------------------------------------------------- // Test the FcnResponse in double precision, array signature fcnPerformance dpExpvPerf("Expv", dpRandomPool.getNumbers(), (vdth::dpdpfunctionv) expv,repetitions); dpExpvPerf.print(); fcnPerformance dpFastExpvPerf("Fast Expv", dpRandomPool.getNumbers(), (vdth::dpdpfunctionv) fast_expv,repetitions); dpFastExpvPerf.print(); //-------------------------------------------------- // Test the FcnResponse in double precision randomPool spRandomPool(-500,500,size); fcnPerformance spExpPerf("Expf", spRandomPool.getNumbers(), (vdth::spspfunction) expf,repetitions); spExpPerf.print(); fcnPerformance spFastExpPerf("Fast Expf", spRandomPool.getNumbers(), (vdth::spspfunction) fast_expf,repetitions); spFastExpPerf.print(); //-------------------------------------------------- // Test the FcnResponse in double precision, array signature fcnPerformance spExpvPerf("Expfv", spRandomPool.getNumbers(), (vdth::spspfunctionv) expfv,repetitions); spExpvPerf.print(); fcnPerformance spFastExpvPerf("Fast Expfv", spRandomPool.getNumbers(), (vdth::spspfunctionv) fast_expfv,repetitions); spFastExpvPerf.print(); //-------------------------------------------------- } vdt-0.4.4/progs/units/test_fcnResponse.cpp000066400000000000000000000043161421413530600206430ustar00rootroot00000000000000/* * test_fcnResponse.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #include "vdtdiag_random.h" #include "vdtdiag_fcnResponse.h" #include "vdtdiag_helper.h" #include #include #include #include int main(){ const uint32_t size = 10; // Test the FcnResponse in double precision const std::string dpfilename("testDpFcnPerf.txt"); randomPool dpRandomPool(-500,500,size); fcnResponse dpExpResp("Exp",dpRandomPool.getNumbers(), (vdth::dpdpfunction) refMath::exp); dpExpResp.writeFile(dpfilename); fcnResponse dpExpRespFromFile(dpfilename); dpExpRespFromFile.print(); dpExpRespFromFile.writeFile("testDpFcnPerf_fromFile.txt"); // Test the FcnResponse in single precision const std::string spfilename("testSpFcnPerf.txt"); randomPool spRandomPool(-50,50,size); fcnResponse spExpResp("Exp",spRandomPool.getNumbers(), (vdth::spspfunction) refMath::expf); spExpResp.writeFile(spfilename); fcnResponse spExpRespFromFile(spfilename); spExpRespFromFile.print(); spExpRespFromFile.writeFile("testSpFcnPerf_fromFile.txt"); // 2 inputs // Test the FcnResponse in double precision const std::string dp2filename("testDp2FcnPerf.txt"); randomPool2D dp2RandomPool(-500,-500,500,500, size); fcnResponse2D dpAtan2Resp("Atan2", dp2RandomPool.getNumbersX(), dp2RandomPool.getNumbersY(), (vdth::dpdp2function) refMath::atan2); dpAtan2Resp.print(); dpAtan2Resp.writeFile(dp2filename); fcnResponse2D dpAtan2RespFromFile(dp2filename); dpAtan2RespFromFile.print(); dpAtan2RespFromFile.writeFile("testDp2FcnPerf_fromFile.txt"); // Test the FcnResponse in single precision const std::string sp2filename("testSp2FcnPerf.txt"); randomPool2D sp2RandomPool(-500,-500,500,500, size); fcnResponse2D spAtan2Resp("Atan2", sp2RandomPool.getNumbersX(), sp2RandomPool.getNumbersY(), (vdth::spsp2function) refMath::atan2); spAtan2Resp.print(); spAtan2Resp.writeFile(sp2filename); fcnResponse2D spAtan2RespFromFile(sp2filename); spAtan2RespFromFile.print(); spAtan2RespFromFile.writeFile("testSp2FcnPerf_fromFile.txt"); } vdt-0.4.4/progs/units/test_randomPool.cpp000066400000000000000000000033771421413530600204760ustar00rootroot00000000000000/* * test_randomPool.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #include "vdtdiag_random.h" #include int main(){ constexpr uint32_t size = 10; // 1D // Test the Random Pool in double precision std::string dpfilename("testDpRandomNumbers.txt"); randomPool dprp_fromScratch(1,2,size, 3); dprp_fromScratch.writeFile(dpfilename); dprp_fromScratch.print(); randomPool dprp_fromFile(dpfilename); dprp_fromFile.print(); dpfilename="testDpRandomNumbers_rewritten.txt"; dprp_fromFile.writeFile(dpfilename); // Test the Random Pool in single precision std::string spfilename("testSpRandomNumbers.txt"); randomPool sprp_fromScratch(-2e2,2e2,size); sprp_fromScratch.print(); sprp_fromScratch.writeFile(spfilename); randomPool sprp_fromFile(spfilename); sprp_fromFile.print(); spfilename="testSpRandomNumbers_rewritten.txt"; sprp_fromFile.writeFile(spfilename); // 2D // Test the Random Pool in double precision std::string dpfilename2D("testDpRandomNumbers2D.txt"); randomPool2D dprp_fromScratch2D(1,2,4,7,size, 3); dprp_fromScratch2D.print(); dprp_fromScratch2D.writeFile(dpfilename2D); randomPool2D dprp_fromFile2D(dpfilename2D); dprp_fromFile2D.print(); dpfilename2D="testDpRandomNumbers2D_rewritten.txt"; dprp_fromFile2D.writeFile(dpfilename2D); // Test the Random Pool in single precision std::string spfilename2D("testSpRandomNumbers2D.txt"); randomPool2D sprp_fromScratch2D(-2e2, -1e3 ,2e2, 1e4,size); sprp_fromScratch2D.print(); sprp_fromScratch2D.writeFile(spfilename2D); randomPool2D sprp_fromFile2D(spfilename2D); sprp_fromFile2D.print(); spfilename2D="testSpRandomNumbers2D_rewritten.txt"; sprp_fromFile2D.writeFile(spfilename2D); } vdt-0.4.4/progs/vdtArithmBenchmark.cpp000066400000000000000000000154561421413530600177410ustar00rootroot00000000000000/* vdtArithmBenchmark.cpp * * created: 13.7.2012 * * Dumps fcnResponce of every function to the file. * Created for bunch-testing as preceeding step to the * bunch-fcnComparison * * Author: Ladislav Horky */ #include "vdtMath.h" #include "vdtdiag_random.h" #include "vdtdiag_fcnResponse.h" #include "vdtdiag_fcnTuples.h" #include "vdtdiag_simpleCmd.h" #include #include const double spool_max = 5000; //1.0e8; const double spool_min = -spool_max; const double ppool_min = .0001; //1e-10; const float spool_maxf = 5000;//8192; const float spool_minf = -spool_maxf; const float ppool_minf = .0001f;//8192; template class Tuple> void saveResponses(const Tuple& fcn_tuple, const std::string nickname){ fcnResponse response(std::get<0>(fcn_tuple),std::get<2>(fcn_tuple),std::get<1>(fcn_tuple)); // create filename: __response.txt std::string fname = nickname; // two dashes to prevent i.e Fast_something mess fname += "__"; fname += std::get<0>(fcn_tuple); fname += "__response.txt"; // dump to file response.writeFile(fname); } template void saveResponses2D(const std::string& fcn_name, FUNC fcn, const std::vector& randomX, const std::vector& randomY, const std::string& nickname){ fcnResponse2D response(fcn_name, randomX,randomY,fcn); // create filename: __response.txt std::string fname = nickname; // two dashes to prevent i.e Fast_something mess fname += "__"; fname += fcn_name; fname += "__response.txt"; // dump to file response.writeFile(fname); } int main(int argc, char **argv){ // set and parse commandline options CmdOptions opt; opt.addOption("-n","--nick","Nickname to distinguish different runs/libraries used (required)"); opt.addOption("-s","--size","# of numbers to be tested (default 50000)"); std::string nick = " "; uint32_t SIZE = 50000; if(!opt.parseCmd(argc,argv)){ std::cout << "Something is wrong with cmd options, try --help\n" <<"usage: vdtArithmBenchmark -n="; return 0; } // if help was printed, exit if(opt.isSet("-h")) return 1; // process cmd options nick = opt.getArgument("-n"); if(nick == ""){ std::cout << "Error: Nickname was not specified!\n"; opt.printHelp("-n"); return 0; } //getArgument() contains isSet check if(opt.getArgument("-s") != "") SIZE = atoi(opt.getArgument("-s").c_str()); // print basic info std::cout << "Nick = " << nick << ", size = " << SIZE << "\n"; std::cout<<"Starting...\n"; randomPool *spool,*ppool,*onepool,*exppool; randomPool2D *spool2D; spool = new randomPool(spool_min,spool_max,SIZE); spool2D = new randomPool2D(-1.,-1.,1.,1.,SIZE); ppool = new randomPool(ppool_min,spool_max,SIZE); onepool = new randomPool(-1.0,1.0,SIZE); exppool = new randomPool(-705.,705.,SIZE); //======================DP========================= std::vector> dpTuples; //retrieve tuples getFunctionTuples(&dpTuples,*spool,*ppool,*onepool,*exppool); //loop over & save to file std::cout <<"Saving double precision\n"; for(const auto& i_tuple : dpTuples){ std::cout << " - Processing " << std::get<0>(i_tuple) << std::endl; saveResponses(i_tuple,nick); } //========================DPv====================== std::vector> dpvTuples; //retrieve tuples getFunctionTuplesvect(&dpvTuples,*spool,*ppool,*onepool,*exppool); //loop over & save to file std::cout <<"Saving double precision vector form\n"; for(const auto& i_tuple : dpvTuples){ std::cout << " - Processing " << std::get<0>(i_tuple) << std::endl; saveResponses(i_tuple,nick); } // Add atan2 std::cout << " - Processing atan2 (all flavours)\n"; saveResponses2D ("Atan2", refMath::atan2, spool2D->getNumbersX(),spool2D->getNumbersY(), nick); saveResponses2D ("Fast_Atan2", vdt::fast_atan2, spool2D->getNumbersX(),spool2D->getNumbersY(), nick); saveResponses2D ("Atan2v", vdt::atan2v, spool2D->getNumbersX(),spool2D->getNumbersY(), nick); saveResponses2D ("Fast_Atan2v", vdt::fast_atan2v, spool2D->getNumbersX(),spool2D->getNumbersY(), nick); //delete to spare memeory.. delete spool; delete ppool; delete onepool; delete exppool; delete spool2D; randomPool* fspool,*fppool,*fonepool,*fexppool; randomPool2D* fspool2D; fspool = new randomPool(spool_minf,spool_maxf,SIZE); fppool = new randomPool(ppool_minf,spool_maxf,SIZE); fonepool = new randomPool(-1.0,1.0,SIZE); fexppool = new randomPool(-85,85,SIZE); fspool2D = new randomPool2D(-1.f,-1.f,1.f,1.f,SIZE); //======================SP========================= std::vector> spTuples; //retrieve tuples getFunctionTuples(&spTuples,*fspool,*fppool,*fonepool,*fexppool); //loop over & save to file std::cout <<"Saving single precision\n"; for(const auto& i_tuple : spTuples){ std::cout << " - Processing " << std::get<0>(i_tuple) << std::endl; saveResponses(i_tuple,nick); } //======================SPv========================= std::vector> spvTuples; //retrieve tuples getFunctionTuplesvect(&spvTuples,*fspool,*fppool,*fonepool,*fexppool); //loop over & save to file std::cout <<"Saving single precision vector form\n"; for(const auto& i_tuple : spvTuples){ std::cout << " - Processing " << std::get<0>(i_tuple) << std::endl; saveResponses(i_tuple,nick); } // Add atan2 std::cout << " - Processing atan2 (all flavours)\n"; saveResponses2D ("Atan2f", refMath::atan2f, fspool2D->getNumbersX(),fspool2D->getNumbersY(), nick); saveResponses2D ("Fast_Atan2f", vdt::fast_atan2f, fspool2D->getNumbersX(),fspool2D->getNumbersY(), nick); saveResponses2D ("Atan2fv", vdt::atan2fv, fspool2D->getNumbersX(),fspool2D->getNumbersY(), nick); saveResponses2D ("Fast_Atan2fv", vdt::fast_atan2fv, fspool2D->getNumbersX(),fspool2D->getNumbersY(), nick); //delete to spare memeory delete fspool; delete fppool; delete fonepool; delete fexppool; delete fspool2D; return 0; } vdt-0.4.4/progs/vdtArithmComparison.cpp000066400000000000000000000141451421413530600201530ustar00rootroot00000000000000/* vdtArithmComparison.cpp * * created: 16.7.2012 * * Reads files dumped by vdtAtritmBenchmark.cpp * * Author: Ladislav Horky */ #include #include #include #include #include "vdtdiag_fcnResponse.h" #include "vdtdiag_fcnTuples.h" #include "vdtdiag_fcnComparison.h" #include "vdtdiag_simpleCmd.h" //converts (comma) separated list (in string) to vector of strings void list2vector(std::string csList, std::vector* vect, char separator = ','){ int lastCommaPos = -1, commaPos; vect->clear(); while(true){ //no other comma, break commaPos = csList.find(separator,lastCommaPos+1); if(commaPos < 0) break; //store string between commas vect->push_back(csList.substr(lastCommaPos+1,commaPos-lastCommaPos-1)); lastCommaPos = commaPos; } //process the last string after the last separator if there is any if(int(csList.size()) > lastCommaPos+1) vect->push_back(csList.substr(lastCommaPos+1)); } /// Performs comparison of arithmetic precision int main(int argc, char **argv){ // set and parse commandline options CmdOptions opt; opt.addOption("-n","--nick","Output (!) file nickname. Make sure it contains all information about compared function " "(libraries used, vect VS single signature comaprison...)."); opt.addOption("-R","--reference","List of comma-separated filenames that will be used as a reference values" "(i.e. will be in the second column of output comparison file). The propper function names for output files will " "be refined from the filenames, so do not rename the files from vdtArithmBenchmark. See -T for more."); opt.addOption("-T","--test","List of comma-separated filenames that will be compared to reference files" "(their values will appear in the third column of output file). It is the responsibility of the user" "to make sure the reference and the test files are compatibile (same functions, same order of files, same # of entries, " "same inputs values...)."); //opt.addOption("-s","--separator","Alternative seperator for the list of files (default: ',')"); // parse and process cmd options if(!opt.parseCmd(argc,argv)){ std::cout << "Something is wrong with cmd options, try --help\n" << "usage: vdtArithmComparison -n= -R= -T="; return 1; } // if help was printed, exit if(opt.isSet("-h")) return 0; std::string nick; std::vector ref, test; // process cmd options nick = opt.getArgument("-n"); if(nick == ""){ std::cout << "Error: Nickname was not specified!\n"; opt.printHelp("-n"); return 1; } std::string tmp; tmp = opt.getArgument("-R"); list2vector(tmp,&ref); if(!ref.size()){ std::cout << "Error: No reference files specified!\n"; opt.printHelp("-R"); return 1; } tmp = opt.getArgument("-T"); list2vector(tmp,&test); if(!test.size()){ std::cout << "Error: No test files specified!\n"; opt.printHelp("-T"); return 1; } if(test.size() != ref.size()){ std::cout << "Error: Number of test and ref files not equal!\n"; opt.printHelp(); return 1; } // pheeew.. all errors hopefully checked //print table column names:-------------------------------------------------------------------- std::cout << std::setw(30) << std::left << "function name" << std::setw(5) << std::right << "max" << std::setw(5) << std::right << "min" << std::setw(6) << std::right << "mean" << std::setw(6) << std::right << "RMS\n"; for(unsigned int i=0; i rrefDP(ref[i]),rtestDP(test[i]); fcnComparison2D cDP(fcname+" "+ref[i].substr(0,ref[i].find("__"))+" VS "+ test[i].substr(0,test[i].find("__")), rrefDP.getInput1(), rrefDP.getInput2(), rrefDP.getOutput(),rtestDP.getOutput()); cDP.printStats(true); cDP.writeFile("comparison__"+nick+"__"+fcname+".txt"); } else{ fcnResponse rrefDP(ref[i]),rtestDP(test[i]); fcnComparison cDP(fcname+" "+ref[i].substr(0,ref[i].find("__"))+" VS "+ test[i].substr(0,test[i].find("__")), rrefDP.getInput(),rrefDP.getOutput(),rtestDP.getOutput()); cDP.printStats(true); cDP.writeFile("comparison__"+nick+"__"+fcname+".txt"); } }else{ if (is_atan2){ fcnResponse2D rrefSP(ref[i]),rtestSP(test[i]); fcnComparison2D cSP(fcname+" "+ref[i].substr(0,ref[i].find("__"))+" VS "+ test[i].substr(0,test[i].find("__")), rrefSP.getInput1(), rrefSP.getInput2(), rrefSP.getOutput(),rtestSP.getOutput()); cSP.printStats(true); cSP.writeFile("comparison__"+nick+"__"+fcname+".txt"); } else{ fcnResponse rrefSP(ref[i]),rtestSP(test[i]); fcnComparison cSP(fcname+" "+ref[i].substr(0,ref[i].find("__"))+" VS "+ test[i].substr(0,test[i].find("__")), rrefSP.getInput(),rrefSP.getOutput(),rtestSP.getOutput()); cSP.printStats(true); cSP.writeFile("comparison__"+nick+"__"+fcname+".txt"); } } } return 0; } vdt-0.4.4/progs/vdtCtypesTest.py000066400000000000000000000041301421413530600166420ustar00rootroot00000000000000from vdt_ctypes import * import numpy as np import timeit print vdt_arch() print vdt_sin([1,2,3,4]) print vdt_sinf([1,2,3,4]) print vdt_atan2([1,2,-3,-4],[1,-2,3,-4]) print vdt_sincosf([1,2,3,4]) print vdt_expf([1,2,3,4]) xx = np.linspace(-np.pi, np.pi, 2001) xf = np.linspace(-np.pi, np.pi, 2001) x = np.linspace(-np.pi, np.pi, 2001) def nsc() : global xx s = np.sin(xx) c = np.cos(xx) return (s,c) def nscf() : global xf s = np.sin(xf) c = np.cos(xf) return (s,c) def vsc() : global x return vdt_sincos(x) def vscf() : global x return vdt_sincosf(x) def nex() : global xx return np.exp(xx) def nexf() : global xf return np.sin(xf) def vex() : global x return vdt_exp(x) def vexf() : global x return vdt_expf(x) (fscf, ftype) = VDTFunMap['vdt_sincosf'] (fexf, ftype) = VDTFunMap['vdt_expf'] requires = ['CONTIGUOUS', 'ALIGNED'] x = numpy.asanyarray(x) x = numpy.require(x, ftype, requires) vo1 = numpy.empty_like(x) vo2 = numpy.empty_like(x) xf = numpy.asanyarray(xf) xf = numpy.require(xf, ftype, requires) def vscff() : global x global fscf global vo1 global vo2 fscf(x,vo1,vo2,x.size) def vexff() : global x global fscf global vo1 fexf(x,vo1,x.size) print "timing exp" print(timeit.timeit("nex()", setup="from __main__ import nex",number=100000)) print(timeit.timeit("nexf()", setup="from __main__ import nexf",number=100000)) print(timeit.timeit("vex()", setup="from __main__ import vex",number=100000)) print(timeit.timeit("vexf()", setup="from __main__ import vexf",number=100000)) print(timeit.timeit("vexff()", setup="from __main__ import vexff",number=100000)) print "timing sincos" print(timeit.timeit("nsc()", setup="from __main__ import nsc",number=100000)) print(timeit.timeit("nscf()", setup="from __main__ import nscf",number=100000)) print(timeit.timeit("vsc()", setup="from __main__ import vsc",number=100000)) print(timeit.timeit("vscf()", setup="from __main__ import vscf",number=100000)) print(timeit.timeit("vscff()", setup="from __main__ import vscff",number=100000)) vdt-0.4.4/progs/vdtNumpyTime.py000066400000000000000000000037511421413530600164720ustar00rootroot00000000000000import imp vdtnpfun= imp.load_dynamic('vdtnpfun','vdtnpfun_directory/vdtnpfun.so') import vdtnpfun print dir(vdtnpfun) from vdt_ctypes import * import numpy as np import timeit #print vdt_arch() #print vdt_expf([1,2,3,4]) loadit('vdt_expf') N=2001 xx = np.linspace(-np.pi, np.pi, N) xf = np.linspace(-np.pi, np.pi, N) x = np.linspace(-np.pi, np.pi, N) def nsc() : global xx s = np.sin(xx) c = np.cos(xx) return (s,c) def nscf() : global xf s = np.sin(xf) c = np.cos(xf) return (s,c) def vsc() : global x return vdt_sincos(x) def vscf() : global x return vdt_sincosf(x) def nex() : global xx return np.exp(xx) def nexf() : global xf return np.exp(xf) def vex() : global x return vdt_exp(x) def vexf() : global x return vdt_expf(x) # (fscf, ftype) = VDTFunMap['vdt_sincosf'] (fexf, ftype) = VDTFunMap['vdt_expf'] requires = ['CONTIGUOUS', 'ALIGNED'] x = numpy.asanyarray(x) x = numpy.require(x, ftype, requires) vo1 = numpy.empty_like(x) vo2 = numpy.empty_like(x) xf = numpy.asanyarray(xf) xf = numpy.require(xf, ftype, requires) def vscff() : global x global fscf global vo1 global vo2 fscf(x,vo1,vo2,x.size) def vexff() : global x global fexf global vo1 fexf(x,vo1,x.size) def vexfm() : global x return vdtnpfun.vdt_exp(x) def vexm() : global xx return vdtnpfun.vdt_exp(xx) print "timing exp" print timeit.timeit("nex()", setup="from __main__ import nex",number=100000), 'np exp' print timeit.timeit("nexf()", setup="from __main__ import nexf",number=100000), 'np expf' #print timeit.timeit("vexf()", setup="from __main__ import vexf",number=100000), 'vdt expf ctypes wrapped' print timeit.timeit("vexff()", setup="from __main__ import vexff",number=100000), 'vdt expf ctypes' print timeit.timeit("vexfm()", setup="from __main__ import vexfm",number=100000), 'vdt expf module' print timeit.timeit("vexm()", setup="from __main__ import vexm",number=100000), 'vdt exp module' vdt-0.4.4/progs/vdtNumpytest.py000066400000000000000000000044231421413530600165500ustar00rootroot00000000000000import imp import timeit import numpy as np #vdtnpfun= imp.load_dynamic('vdtnpfun','vdtnpfun_directory/vdtnpfun.so') #from vdtnpfun import * #print dir(vdtnpfun) import vdt from vdt import * print dir(vdt) def norf(vi): requires = ['CONTIGUOUS', 'ALIGNED'] vi = np.asanyarray(vi) vi = np.require(vi, np.single, requires) return vi def nord(vi): requires = ['CONTIGUOUS', 'ALIGNED'] vi = np.asanyarray(vi) vi = np.require(vi, np.double, requires) return vi print vdt_arch() print vdt_sin(nord([1,2,3,4])) print vdt_sin(norf([1,2,3,4])) print vdt_atan2(norf([1,2,-3,-4]),norf([1,-2,3,-4])) print vdt_sincos(norf([1,2,3,4])) print vdt_exp(norf([1,2,3,4])) xx = nord(np.linspace(-np.pi, np.pi, 2001)) xf = norf(np.linspace(-np.pi, np.pi, 2001)) yf = norf(np.linspace(-np.pi, np.pi, 2001)) zf = norf(np.linspace(-np.pi, np.pi, 2001)) def nfma(): global xf,yf,zf return xf*yf+zf def nsc() : global xx s = np.sin(xx) c = np.cos(xx) return (s,c) def nscf() : global xf s = np.sin(xf) c = np.cos(xf) return (s,c) def vfma(): global xf,yf,zf return vdt_fma(xf,yf,zf) def vsc() : global xx return vdt_sincos(xx) def vscf() : global xf return vdt_sincos(xf) def nex() : global xx return np.exp(xx) def nexf() : global xf return np.exp(xf) def vex() : global xx return vdt_exp(xx) def vexf() : global xf return vdt_exp(xf) print "timing fma" print(timeit.timeit("nfma()", setup="from __main__ import nfma",number=100000)) print(timeit.timeit("vfma()", setup="from __main__ import vfma",number=100000)) print "timing exp" print(timeit.timeit("nex()", setup="from __main__ import nex",number=100000)) print(timeit.timeit("nexf()", setup="from __main__ import nexf",number=100000)) print(timeit.timeit("vex()", setup="from __main__ import vex",number=100000)) print(timeit.timeit("vexf()", setup="from __main__ import vexf",number=100000)) print "timing sincos" print(timeit.timeit("nsc()", setup="from __main__ import nsc",number=100000)) print(timeit.timeit("nscf()", setup="from __main__ import nscf",number=100000)) print(timeit.timeit("vsc()", setup="from __main__ import vsc",number=100000)) print(timeit.timeit("vscf()", setup="from __main__ import vscf",number=100000)) vdt-0.4.4/progs/vdtPerfBenchmark.cpp000066400000000000000000000173221421413530600174030ustar00rootroot00000000000000/* * vdtPerfBenchmark.cpp * * Created on: Jun 23, 2012 * Author: danilopiparo */ #include "vdtdiag_random.h" #include "vdtMath.h" #include "vdtdiag_helper.h" #include "vdtdiag_fcnPerformance.h" #include "vdtdiag_fcnTuples.h" #include "vdtdiag_simpleCmd.h" #include #include #include #include /** * Loop on the functions, and measure performance **/ /* - log - exp - sin - cos - tan - asin - acos - atan - inverse sqrt - inverse (faster than division, based on isqrt) */ template void print_avg(const TUPLE& dpfcntuple, std::ofstream& ofile,uint32_t repetitions){ fcnPerformance dpExpPerf(std::get<0>(dpfcntuple), std::get<2>(dpfcntuple), std::get<1>(dpfcntuple), repetitions); dpExpPerf.print(); dpExpPerf.print(ofile); } template void print_avg2D(const TUPLE& dpfcntuple, std::ofstream& ofile,uint32_t repetitions){ fcnPerformance dpExpPerf(std::get<0>(dpfcntuple), std::get<2>(dpfcntuple), std::get<3>(dpfcntuple), std::get<1>(dpfcntuple), repetitions); dpExpPerf.print(); dpExpPerf.print(ofile); } int main(int argc, char **argv){ //set cmd options CmdOptions opt; opt.addOption("-n","--nick","Nickname to distinguish different runs/libraries used (required)"); opt.addOption("-s","--size","# of numbers to be tested (default 50000)"); opt.addOption("-r","--repetitions","# of repetitions from which statistics are calculated (default 150)"); opt.addOption("-M","--pool_max","Upper limit of the pool interval"); opt.addOption("-m","--pool_min","Lower limit of the pool interval"); opt.addOption("-p","--pattern","Regular expression to be matched in function name"); double POOL_MAX=5000; double POOL_MIN=-POOL_MAX; uint32_t SIZE = 50000; uint32_t REPETITIONS = 150; std::string nick = ""; std::string pattern_s=".*"; if(!opt.parseCmd(argc,argv)){ std::cout << "Something is wrong with cmd options, try --help\n" <<"usage: vdtPerfBenchmark -n=\n"; return 0; } // if help was printed, exit if(opt.isSet("-h")) return 1; // process cmd options nick = opt.getArgument("-n"); if(nick == ""){ std::cout << "Error: Nickname was not specified!\n"; opt.printHelp("-n"); return 0; } //getArgument() contains isSet check if(opt.getArgument("-s") != "") SIZE = std::stoi(opt.getArgument("-s").c_str()); if(opt.getArgument("-r") != "") REPETITIONS = std::stoi(opt.getArgument("-r").c_str()); if(opt.getArgument("-m") != "") POOL_MIN = std::stod(opt.getArgument("-m").c_str()); if(opt.getArgument("-M") != "") POOL_MAX = std::stod(opt.getArgument("-M").c_str()); if (opt.getArgument("-p")!= "") pattern_s = opt.getArgument("-p"); std::regex pattern (pattern_s); // Control print std::cout << "Running with nick: " << nick << ", size: " << SIZE << ", repetitions: "<< REPETITIONS << ", the pool max:" << POOL_MAX << ", the pool min:" << POOL_MIN << " and the pattern " << pattern_s << "\n"; // setup filename std::string fname = nick + "__performance_benchmark.txt"; std::ofstream ofile(fname); std::cout << "Double Precision\n"; randomPool symmrpool (POOL_MIN,POOL_MAX,SIZE); randomPool asymmrpool (.00001,POOL_MAX,SIZE); randomPool mone2onerpool (-1,1,SIZE); randomPool expPool (-705,705,SIZE); randomPool2D mone2onerpool2D (-1,-1,1,1,SIZE); // simple std::vector> dp_fcns; getFunctionTuples(&dp_fcns,symmrpool,asymmrpool,mone2onerpool,expPool); std::string funcname; for (const auto& dpfcntuple : dp_fcns){ funcname = std::get<0>(dpfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg>(dpfcntuple,ofile,REPETITIONS); } // double precision vectorised ----------------------------------------------- // Simple std::vector> dp_fcnsv; getFunctionTuplesvect(&dp_fcnsv,symmrpool,asymmrpool,mone2onerpool,expPool); for (const auto& dpfcntuple : dp_fcnsv){ funcname = std::get<0>(dpfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg>(dpfcntuple,ofile,REPETITIONS); } //------------------------------------------------------------------------------ // NOW SINGLE PRECISION std::cout << "Single Precision\n"; randomPool symmrpoolf (POOL_MIN,POOL_MAX,SIZE); randomPool asymmrpoolf (.00001,POOL_MAX,SIZE); randomPool mone2onerpoolf (-1,1,SIZE); randomPool expPoolf (-80,80,SIZE); randomPool2D mone2onerpool2Df (-1.f,-1.f,1.f,1.f,SIZE); // simple std::vector> sp_fcns; getFunctionTuples(&sp_fcns,symmrpoolf,asymmrpoolf,mone2onerpoolf,expPoolf); for (const auto& spfcntuple : sp_fcns){ funcname = std::get<0>(spfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg>(spfcntuple,ofile,REPETITIONS); } // single precision vectorised // Simple std::vector> sp_fcnsv; getFunctionTuplesvect(&sp_fcnsv,symmrpoolf,asymmrpoolf,mone2onerpoolf,expPoolf); for (const auto& spfcntuple : sp_fcnsv){ funcname = std::get<0>(spfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg>(spfcntuple,ofile,REPETITIONS); } // 2D std::cout << "\n\n Functions with Two Arguments \n"; // Double Precision std::cout << "Double Precision\n"; std::vector> dp_fcns2D; getFunctionTuples(&dp_fcns2D,mone2onerpool2D); for (const auto& dpfcntuple : dp_fcns2D){ funcname = std::get<0>(dpfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg2D>(dpfcntuple,ofile,REPETITIONS); } // Double Precision Array std::vector> dp_fcns2Dv; getFunctionTuplesvect(&dp_fcns2Dv,mone2onerpool2D); for (const auto& dpfcntuple : dp_fcns2Dv){ funcname = std::get<0>(dpfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg2D>(dpfcntuple,ofile,REPETITIONS); } // Single Precision std::cout << "Single Precision\n"; std::vector> sp_fcns2D; getFunctionTuples(&sp_fcns2D,mone2onerpool2Df); for (const auto& spfcntuple : sp_fcns2D){ funcname = std::get<0>(spfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg2D>(spfcntuple,ofile,REPETITIONS); } // Single Precision Array std::vector> sp_fcns2Dv; getFunctionTuplesvect(&sp_fcns2Dv,mone2onerpool2Df); for (const auto& spfcntuple : sp_fcns2Dv){ funcname = std::get<0>(spfcntuple); if (std::regex_match(funcname.begin(), funcname.end(), pattern)) print_avg2D>(spfcntuple,ofile,REPETITIONS); } ofile.close(); } vdt-0.4.4/progs/vdtTest.ipynb000066400000000000000000000044551421413530600161550ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['__doc__', '__file__', '__name__', '__package__', 'vdt_asin', 'vdt_atan', 'vdt_atan2', 'vdt_cos', 'vdt_div', 'vdt_exp', 'vdt_fma', 'vdt_fmac', 'vdt_identity', 'vdt_inv', 'vdt_isqrt', 'vdt_log', 'vdt_sin', 'vdt_sincos', 'vdt_sqrt']\n", "[ 2.71828175 7.38905621 20.08553696 54.59814835]\n", "[ 2.71828183 7.3890561 20.08553692 54.59815003]\n" ] } ], "source": [ "import imp\n", "import numpy\n", "vdtnpfun= imp.load_dynamic('vdtnpfun','vdtnpfun_directory/vdtnpfun.so')\n", "import vdtnpfun\n", "print dir(vdtnpfun)\n", "\n", "vi=[1,2,3,4]\n", "requires = ['CONTIGUOUS', 'ALIGNED']\n", "vi = numpy.asanyarray(vi)\n", "vi = numpy.require(vi, numpy.single, requires)\n", "vo = numpy.empty_like(vi)\n", "print vdtnpfun.vdt_exp(vi)\n", "\n", "requires = ['CONTIGUOUS', 'ALIGNED']\n", "vd = numpy.asanyarray(vi)\n", "vd = numpy.require(vi, numpy.double, requires)\n", "vo = numpy.empty_like(vd)\n", "print vdtnpfun.vdt_exp(vd)\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'target is \"avx2\",\"fma\"'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "from ctypes import c_char_p\n", "def vdt_arch() :\n", " _path = os.path.dirname('__file__')\n", " lib = numpy.ctypeslib.load_library('libvdtFatLibWrapper', _path)\n", " f = lib['vdt_arch']\n", " f.restype = c_char_p\n", " return f()\n", "\n", "vdt_arch()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 1 } vdt-0.4.4/progs/vdtTiming.ipynb000066400000000000000000000154431421413530600164640ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['__builtins__', '__doc__', '__file__', '__name__', '__package__', 'c_char_p', 'imp', 'numpy', 'os', 'vdt_arch', 'vdt_asin', 'vdt_atan', 'vdt_atan2', 'vdt_cos', 'vdt_div', 'vdt_exp', 'vdt_fma', 'vdt_fmac', 'vdt_identity', 'vdt_inv', 'vdt_isqrt', 'vdt_log', 'vdt_sin', 'vdt_sincos', 'vdt_sqrt', 'vdtnpfun']\n", "target is \"avx2\",\"fma\"\n", "[ 0.84147098 0.90929743 0.14112001 -0.7568025 ]\n", "[ 0.84147102 0.90929741 0.14112002 -0.7568025 ]\n", "[ 0.78539819 2.3561945 -0.78539819 -2.3561945 ]\n", "(array([ 0.84147102, 0.90929741, 0.14112002, -0.7568025 ], dtype=float32), array([ 0.54030228, -0.41614681, -0.9899925 , -0.65364361], dtype=float32))\n", "[ 2.71828175 7.38905621 20.08553696 54.59814835]\n", "timing identity\n", "0.173686027527\n", "0.0796730518341\n", "0.495600938797\n", "timing fma\n", "0.189234972\n", "0.171628952026\n", "timing sqrt\n", "0.234452009201\n", "1.20027399063\n", "0.164345979691\n", "0.527653217316\n", "timing exp\n", "3.98345899582\n", "2.35685706139\n", "0.791067123413\n", "0.259509086609\n", "timing sincos\n", "8.04441905022\n", "5.14373493195\n", "0.904365062714\n", "0.413846969604\n" ] } ], "source": [ "import imp\n", "import timeit\n", "import numpy as np\n", "\n", "import vdt\n", "from vdt import *\n", "print dir(vdt)\n", "\n", "def norf(vi):\n", " requires = ['CONTIGUOUS', 'ALIGNED']\n", " vi = np.asanyarray(vi)\n", " vi = np.require(vi, np.single, requires)\n", " return vi\n", "\n", "def nord(vi):\n", " requires = ['CONTIGUOUS', 'ALIGNED']\n", " vi = np.asanyarray(vi)\n", " vi = np.require(vi, np.double, requires)\n", " return vi\n", "\n", "\n", "print vdt_arch()\n", "print vdt_sin(nord([1,2,3,4]))\n", "print vdt_sin(norf([1,2,3,4]))\n", "print vdt_atan2(norf([1,2,-3,-4]),norf([1,-2,3,-4]))\n", "print vdt_sincos(norf([1,2,3,4]))\n", "print vdt_exp(norf([1,2,3,4]))\n", "\n", "\n", "xx = nord(np.linspace(-np.pi, np.pi, 2001))\n", "xf = norf(np.linspace(-np.pi, np.pi, 2001))\n", "yf = norf(np.linspace(-np.pi, np.pi, 2001))\n", "zf = norf(np.linspace(-np.pi, np.pi, 2001))\n", "\n", "xl = norf(np.linspace(0., 2.*np.pi, 20001))\n", "\n", "xo = numpy.empty_like(xf)\n", "\n", "def nid():\n", " global xf,yf,zf\n", " return np.copy(xf)\n", "\n", "def nfma():\n", " global xf,yf,zf\n", " return xf*yf+zf\n", "\n", "def nsqrt():\n", " global xf,yf,zf\n", " return np.sqrt(np.abs(xf))\n", "\n", "def nsqrtl():\n", " global xl\n", " return np.sqrt(xl)\n", "\n", "\n", "\n", "def nsc() :\n", " global xx\n", " s = np.sin(xx)\n", " c = np.cos(xx)\n", " return (s,c)\n", "\n", "def nscf() :\n", " global xf\n", " s = np.sin(xf)\n", " c = np.cos(xf)\n", " return (s,c)\n", "\n", "\n", "\n", "def vid():\n", " global xf,yf,zf\n", " return vdt_identity(xf)\n", "\n", "def vidl():\n", " global xl\n", " return vdt_identity(xl)\n", "\n", "def vfma():\n", " global xf,yf,zf\n", " return vdt_fma(xf,yf,zf)\n", "\n", "def vsqrt():\n", " global xf,yf,zf\n", " return vdt_sqrt(np.abs(xf))\n", "\n", "def vsqrtl():\n", " global xl\n", " return vdt_sqrt(xl)\n", "\n", "\n", "\n", "def vsc() :\n", " global xx\n", " return vdt_sincos(xx)\n", "\n", "def vscf() :\n", " global xf\n", " return vdt_sincos(xf)\n", "\n", "\n", "def nex() :\n", " global xx\n", " return np.exp(xx)\n", "\n", "def nexf() :\n", " global xf\n", " return np.exp(xf)\n", "\n", "\n", "def vex() :\n", " global xx\n", " return vdt_exp(xx)\n", "\n", "def vexf() :\n", " global xf\n", " return vdt_exp(xf)\n", "\n", "\n", "#wake up avx\n", "bha = timeit.timeit(\"vfma()\", setup=\"from __main__ import vfma\",number=100000)\n", "bha = timeit.timeit(\"vsqrtl()\", setup=\"from __main__ import vsqrtl\",number=100000)\n", "\n", "\n", "print \"timing identity\"\n", "print(timeit.timeit(\"nid()\", setup=\"from __main__ import nid\",number=100000))\n", "print(timeit.timeit(\"vid()\", setup=\"from __main__ import vid\",number=100000))\n", "print(timeit.timeit(\"vidl()\", setup=\"from __main__ import vidl\",number=100000))\n", "\n", "print \"timing fma\"\n", "print(timeit.timeit(\"nfma()\", setup=\"from __main__ import nfma\",number=100000))\n", "print(timeit.timeit(\"vfma()\", setup=\"from __main__ import vfma\",number=100000))\n", "\n", "\n", "print \"timing sqrt\"\n", "print(timeit.timeit(\"nsqrt()\", setup=\"from __main__ import nsqrt\",number=100000))\n", "print(timeit.timeit(\"nsqrtl()\", setup=\"from __main__ import nsqrtl\",number=100000))\n", "print(timeit.timeit(\"vsqrt()\", setup=\"from __main__ import vsqrt\",number=100000))\n", "print(timeit.timeit(\"vsqrtl()\", setup=\"from __main__ import vsqrtl\",number=100000))\n", "\n", "\n", "print \"timing exp\"\n", "print(timeit.timeit(\"nex()\", setup=\"from __main__ import nex\",number=100000))\n", "print(timeit.timeit(\"nexf()\", setup=\"from __main__ import nexf\",number=100000))\n", "print(timeit.timeit(\"vex()\", setup=\"from __main__ import vex\",number=100000))\n", "print(timeit.timeit(\"vexf()\", setup=\"from __main__ import vexf\",number=100000))\n", "\n", "\n", "print \"timing sincos\"\n", "print(timeit.timeit(\"nsc()\", setup=\"from __main__ import nsc\",number=100000))\n", "print(timeit.timeit(\"nscf()\", setup=\"from __main__ import nscf\",number=100000))\n", "print(timeit.timeit(\"vsc()\", setup=\"from __main__ import vsc\",number=100000))\n", "print(timeit.timeit(\"vscf()\", setup=\"from __main__ import vscf\",number=100000))\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 1 } vdt-0.4.4/scripts/000077500000000000000000000000001421413530600140025ustar00rootroot00000000000000vdt-0.4.4/scripts/checkAccuracy.py000066400000000000000000000022141421413530600171030ustar00rootroot00000000000000# !/usr/bin/env python ''' Trivial script to build the commandline to check the accuracy of the VDT functions. ''' response_filename_template="%s__%s%s__response.txt" functions=[\ "Acos", "Acosv", "Asin", "Asinv", "Atan", "Atanv", "Atan2", "Atan2v", "Cos", "Cosv", "Exp", "Expv", "Isqrt", "Isqrtv", "Log", "Logv", "Sin", "Sinv", "Tan", "Tanv", "Tanh", "Tanhv", "Acosf", "Acosfv", "Asinf", "Asinfv", "Atanf", "Atanfv", "Atan2f", "Atan2fv", "Cosf", "Cosfv", "Expf", "Expfv", "Isqrtf", "Isqrtfv", "Logf", "Logfv", "Sinf", "Sinfv", "Tanf", "Tanfv", "Tanhf", "Tanhfv"] def get_refs(nick,fast=""): if fast!="": fast+="_" refstring="" for function in functions: refstring+="%s," %response_filename_template%(nick,fast,function) return refstring[:-1] def get_tests(nick): return get_refs(nick,"Fast") if __name__ == "__main__": import sys if len(sys.argv) != 2: print "Usage is checkAccuracy.py nick" sys.exit(1) nick=sys.argv[1] tests=get_tests(nick) refs=get_refs(nick) command='vdtArithmComparison -n=%s -T="%s" -R="%s"' %(nick,tests,refs) print command vdt-0.4.4/scripts/diffhisto.py000066400000000000000000000106651421413530600163430ustar00rootroot00000000000000# !/usr/bin/env python import optparse import re #------------------------------------------------------------------------------- def getTypeLength(cmpfile): # read header, determine type length in bits typelength = 0 typelength_s ="" cmpfile.readline() tmp = cmpfile.readline() if tmp[0] == "D": typelength = 64 typelength_s = "double" else: typelength = 32 typelength_s = "float" return typelength,typelength_s #------------------------------------------------------------------------------- def getYaxisRange(name): lr = -5000 hr = 5000 if "Asin" in name or "Acos" in name: lr = -1 hr = 1 if "Log" in name or "Isqrt" in name: lr = 0 if "Exp" in name: lr = -705 hr = 705 if "Expf" in name: lr = -85 hr = 85 return (lr,hr) #------------------------------------------------------------------------------- def getFilenamesFromDir(nick,dirname): import os return filter (lambda filename: re.match("comparison__" + nick + "__(.*).txt",filename), os.listdir(dirname)) #------------------------------------------------------------------------------- def fill_histos(cmpfile,histo1D,histo2D): #read lines and process them while 1: line = cmpfile.readline() if line == "": break m=re.match("(.*) (.*) (.*) (.*) (.*)",line) inputval=float(m.group(5)) db=int(m.group(4)) # fill histograms histo1D.Fill(db) histo2D.Fill(inputval,db) #------------------------------------------------------------------------------- def compare(nick,dirname): import ROOT # Some globals for the style ROOT.gROOT.SetStyle("Plain") ROOT.gROOT.SetBatch() ROOT.gStyle.SetPalette(1) # Do it for all function variants ofile = ROOT.TFile("%s_histos.root" %nick,"RECREATE") ofile.cd() #python3 returns an iterator instead of a a list # convert to list to be able to run on p2 & p3 filenames = list( getFilenamesFromDir(nick,dirname) ) print ( str( len(filenames) ) + " files found." ) for filename in filenames: print ( "Studying " + filename ) cmpfile = open(filename) typelength,typelength_s = getTypeLength(cmpfile) m=re.match("comparison__" + nick + "__(.*).txt" ,filename) fcn_name=m.group(1) # read rest of header for i in range(1,5): cmpfile.readline() # xaxis range xmin,xmax=(-0.5,typelength+0.5) xNbins=typelength+1 # set up Root 1D histo dbhisto = ROOT.TH1F("Diffbit_"+fcn_name, fcn_name+" diffbit for "+nick+";Diffbit;#", xNbins, xmin,xmax) dbhisto.SetLineColor(ROOT.kBlue) dbhisto.SetLineWidth(2) dbhisto.GetYaxis().SetTitleOffset(1) # setup diffbit VS input histogram ymin,ymax=getYaxisRange(fcn_name) dbVSinhisto = ROOT.TH2F("DiffVsInput_"+fcn_name, fcn_name+" diffbit vs input for "+nick+";Input;Diffbit", 100,ymin,ymax, xNbins,xmin,xmax) fill_histos(cmpfile,dbhisto,dbVSinhisto) # draw and save 1D histogram dbcanvas = ROOT.TCanvas("dbcanv_" + fcn_name, fcn_name + " diffbit for " + nick + " canvas" ,600,600) dbcanvas.cd() dbcanvas.SetLogy() dbhisto.Draw() dbcanvas.Print(nick + "_" + str(typelength_s) + "_" + fcn_name + "_dbhisto.png" ) dbhisto.Write() # draw and save 2D histogram dbVSincanvas = ROOT.TCanvas("dbVSincanv_"+ fcn_name, fcn_name + " diffbit for " + nick + " canvas",600,600) dbVSincanvas.cd() dbVSinhisto.Draw("COLZ") dbVSincanvas.Print(nick + "_" + str(typelength_s) + "_" + fcn_name + "_dbVSinhisto.png" ) dbVSinhisto.Write() ofile.Close() #------------------------------------------------------------------------------- def create_parser(): import sys # set up cmd options cmdParser = optparse.OptionParser(usage="%prog -n=") cmdParser.add_option("-n","--nick",dest="nick",help="Determines which comparison files should be used.",default="") cmdParser.add_option("-d","--dir",dest="dirname",help="Directory to be looked at.",default="./") #parse options and retrieve needed parameters (options,args) = cmdParser.parse_args() if(options.nick == ""): cmdParser.print_help() sys.exit(1) return options #------------------------------------------------------------------------------- if __name__ == "__main__": options = create_parser() compare(options.nick,options.dirname) vdt-0.4.4/scripts/invokeVDT.py000066400000000000000000000026201421413530600162250ustar00rootroot00000000000000import numpy import os from ctypes import c_char_p def invoke(vi,f,type) : requires = ['CONTIGUOUS', 'ALIGNED'] vi = numpy.asanyarray(vi) vi = numpy.require(vi, type, requires) vo = numpy.empty_like(vi) f(vi,vo,vi.size) return vo def invoke2to1(vi1,vi2,f,type) : requires = ['CONTIGUOUS', 'ALIGNED'] vi1 = numpy.asanyarray(vi1) vi1 = numpy.require(vi1, type, requires) vi2 = numpy.asanyarray(vi2) vi2 = numpy.require(vi2, type, requires) vo = numpy.empty_like(vi1) f(vi1,vi2,vo,vi1.size) return vo def invoke1to2(vi,f,type) : requires = ['CONTIGUOUS', 'ALIGNED'] vi = numpy.asanyarray(vi) vi = numpy.require(vi, type, requires) vo1 = numpy.empty_like(vi) vo2 = numpy.empty_like(vi) f(vi,vo1,vo2,vi.size) return (vo1,vo2) def load(fn,is_single,libname='libvdtFatLibWrapper',nin=1,nout=1) : if (is_single) : type = numpy.single else : type = numpy.float tin = numpy.ctypeslib.ndpointer(type,flags='aligned, contiguous') tout = numpy.ctypeslib.ndpointer(type,flags='aligned, contiguous, writeable') _path = os.path.dirname('__file__') lib = numpy.ctypeslib.load_library(libname, _path) f = lib[fn] f.restype = None f.argtypes = [tin]*nin + [tout]*nout + [numpy.ctypeslib.c_intp] return (f,type) def loadInvoke(vi,fn,is_single,libname='libvdtFatLibWrapper') : (f,type) = load(fn,is_single,libname) return invoke(vi,f,type) vdt-0.4.4/scripts/tabulateVDTResults.py000077500000000000000000000151741421413530600201300ustar00rootroot00000000000000#! /usr/bin/env python import re import math #------------------------------------------------------------------------------- def extractTimings(filename): """ Extract timings from vdtPerfBenchmark files into a list of tuples done like: name - time - error """ file = open(filename, "r") timings=[] for line in file: splittedLine=line.split() timings.append([splittedLine[1],float(splittedLine[3]),float(splittedLine[5])]) return timings #------------------------------------------------------------------------------- def normaliseTimings(timings): """ Normalise the timings according to the Identity funcitons """ newTimings=[] offset=-1 for name, time, err in timings: if "Identity" in name: offset=time offsetErr=err continue time -= offset err = math.sqrt( err*err + offsetErr*offsetErr ) newTimings.append( [name,time,err] ) return newTimings #------------------------------------------------------------------------------- def extractTitleTimings(filename,normalise): timings=extractTimings(filename) finalTimings=[] title=filename.split("__")[0] if (normalise): finalTimings=normaliseTimings(timings) else: #finalTimings = filter(lambda nameTimeErr: "Identity" not in nameTimeErr[0] ,timings) finalTimings=timings return [title,finalTimings] #------------------------------------------------------------------------------- def createSummaryTable(TitledTimingsList): """ Create a big summary table with name time error time error time error ... for the different files. timingsList is a list of tuples: name timings """ firstTimings =TitledTimingsList[0][1] nlines=len(firstTimings) print " Function Name", for title,timings in TitledTimingsList: print "%20s" %title, print for i in xrange(nlines): print "%16s" %firstTimings[i][0], first=True norm=1. speedupString="" for title,timings in TitledTimingsList: time = timings[i][1] err = timings[i][2] if first: norm=time else: dividend=time if dividend==0.: dividend =1. factor = norm/dividend speedupString = "%.1fX" %factor s="%.2f +- %.2f %s" %(time,err,speedupString) print "%20s" %s, first=False print #------------------------------------------------------------------------------- def timings2TimingsDict(timings): titledTimingsDict={} for name,time,err in timings: titledTimingsDict[name]=[time,err] return titledTimingsDict #------------------------------------------------------------------------------- def createVectTable(TitledTimingsList): """ Create a big summary table with name time error of the scalar symbols and in the following time error time error for the different files for the vectorised symbols. """ # Duplicate the first titledTimings: the 0th used for the scalar, the 1st for the vect TitledTimingsList = [TitledTimingsList[0]]+TitledTimingsList firstTimings =TitledTimingsList[0][1] nlines=len(firstTimings) print " Function Name", vectSuffix="" for title,timings in TitledTimingsList: print "%20s" %(title+vectSuffix), vectSuffix=" vect." print for i in xrange(nlines): funcName=firstTimings[i][0] if funcName.endswith('v'):continue print "%16s" %funcName, first=True norm=1. speedupString="" for title,timings in TitledTimingsList: if first: time = timings[i][1] err = timings[i][2] norm=time s="%.2f +- %.2f" %(time,err) print "%20s" %s, first=False else: timingsDict = timings2TimingsDict(timings) time,err = timingsDict[funcName+"v"] err = timingsDict[funcName][1] dividend=time if dividend==0.: dividend =1. factor = norm/dividend speedupString = "%.1f X" %factor s="%.2f +- %.2f %s" %(time,err,speedupString) print "%20s" %s, first=False print #------------------------------------------------------------------------------- def createLibmVdtTable(TitledTimings): """ Create a big summary table with name time error of the libm symbols and the corresponding vdt ones. """ nlines=len(TitledTimings) title,timings = TitledTimings print "--------- %s Scalar Calls ----------" %title print "%16s %16s %16s" %("Function Name","Libm","VDT") timingsDict=timings2TimingsDict(timings) for name,time,err in timings: if name.startswith("Fast_") or name.endswith("v"): continue libmString="%.2f +- %.2f" %(time,err) vdtName = "Fast_%s" %name if "Inverse" in vdtName: vdtName= vdtName.replace("Inverse","Inv") vdtTime = timingsDict[vdtName][0] vdtErr = timingsDict[vdtName][1] speedup = time / vdtTime speedupString = "%.1fX"%speedup vdtString="%.2f +- %.2f %s" %(vdtTime,vdtErr,speedupString) print "%16s %20s %20s" %(name,libmString,vdtString) #------------------------------------------------------------------------------- if __name__ == "__main__": from optparse import OptionParser parser = OptionParser(usage="usage: %prog perfFile1 [... perfFileN] options ") parser.add_option("-s", dest="summaryTable", help="Create summary table", default=False, action="store_true") parser.add_option("-l", dest="libmTable", help="Create libm VS vdt timings table", default=False, action="store_true") parser.add_option("-v", dest="vectTable", help="Create vector timings table", default=False, action="store_true") parser.add_option("--no-normalise", dest="noNormalise", help="Do not normalise to identity", default=False, action="store_true") (options, perffilenames) = parser.parse_args() # Create Titled timings titledTimingsList=[] for perffilename in perffilenames: titledTimingsList.append(extractTitleTimings(perffilename,not options.noNormalise)) if options.summaryTable: createSummaryTable(titledTimingsList) if options.vectTable: createVectTable(titledTimingsList) if options.libmTable: map (createLibmVdtTable, titledTimingsList) vdt-0.4.4/scripts/vdtBase.py000066400000000000000000000014731421413530600157510ustar00rootroot00000000000000import numpy import os from ctypes import c_char_p import invokeVDT def vdt_arch() : _path = os.path.dirname('__file__') lib = numpy.ctypeslib.load_library('libvdtFatLibWrapper', _path) f = lib['vdt_arch'] f.restype = c_char_p return f() VDTFunMap = {} def loadit(fn,nin=1,nout=1) : if not fn in VDTFunMap : is_single = fn.endswith('f') VDTFunMap[fn] = invokeVDT.load(fn+'v',is_single,'libvdtFatLibWrapper',nin,nout) def vdt_invoke(fn,vi) : loadit(fn) (f,type) = VDTFunMap[fn] return invokeVDT.invoke(vi,f,type) def vdt_invoke2to1(fn,vi1,vi2) : loadit(fn,2,1) (f,type) = VDTFunMap[fn] return invokeVDT.invoke2to1(vi1,vi2,f,type) def vdt_invoke1to2(fn,vi) : loadit(fn,1,2) (f,type) = VDTFunMap[fn] return invokeVDT.invoke1to2(vi,f,type) vdt-0.4.4/src/000077500000000000000000000000001421413530600131025ustar00rootroot00000000000000vdt-0.4.4/src/CMakeLists.txt000066400000000000000000000011641421413530600156440ustar00rootroot00000000000000# Auto generate the code for the vector signatures and if needed preload # We need Python for the following. find_package (Python COMPONENTS Interpreter REQUIRED) # Generate the code for the lib if (PRELOAD) set (SIGGENOPTS " -p") else() set (SIGGENOPTS " ") endif() EXEC_PROGRAM ("cd src;${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/signatures_generator.py ${SIGGENOPTS} -o ${CMAKE_SOURCE_DIR}/src;cd -") #generare Vc wrapper and config file if(USE_VC) EXEC_PROGRAM ("cd src;${Python_EXECUTABLE} vc_wrapper_generator.py;cd -") endif(USE_VC) configure_file( ${INC_DIR}/externalLibcfg.h.cmake ${INC_DIR}/externalLibcfg.h) vdt-0.4.4/src/expfWrapper.c000066400000000000000000000054641421413530600155620ustar00rootroot00000000000000#include "Python.h" #include "math.h" #include "numpy/ndarraytypes.h" #include "numpy/ufuncobject.h" #include "numpy/npy_3kcompat.h" #include "vdtFatLibWrapper.h" /* * * This is the C code for creating your own * NumPy ufunc for a vdt_expfv function. * * In this code we only define the ufunc for * a single dtype. * * Details explaining the Python-C API can be found under * 'Extending and Embedding' and 'Python/C API' at * docs.python.org . */ static PyMethodDef vdt_Methods[] = { {NULL, NULL, 0, NULL} }; /* The loop definition must precede the PyMODINIT_FUNC. */ static void vdt_vf(char **args, npy_intp *dimensions, npy_intp* steps, void* data) { void (*functionPtr)(const float* __restrict__, float* __restrict__, long); functionPtr = data; npy_intp n = dimensions[0]; char *in = args[0], *out = args[1]; /* npy_intp in_step = steps[0], out_step = steps[1]; */ // vdt_expfv((const float *)in, (float *)out,n); (*functionPtr)((const float *)in, (float *)out,n); } static void vdt_vd(char **args, npy_intp *dimensions, npy_intp* steps, void* data) { void (*functionPtr)(const double* __restrict__, double* __restrict__, long); functionPtr = data; npy_intp n = dimensions[0]; char *in = args[0], *out = args[1]; /* npy_intp in_step = steps[0], out_step = steps[1]; */ (*functionPtr)((const double *)in, (double *)out,n); } /*This a pointer to the above function*/ PyUFuncGenericFunction funcs[2] = {&vdt_vf,&vdt_vd}; /* These are the input and return dtypes of vdt_expfv.*/ static char types[4] = {NPY_FLOAT, NPY_FLOAT, NPY_DOUBLE, NPY_DOUBLE}; static void *data[2] = {&vdt_expfv,&vdt_expv}; #if PY_VERSION_HEX >= 0x03000000 static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "npufunc", NULL, -1, vdt_Methods, NULL, NULL, NULL, NULL }; PyMODINIT_FUNC PyInit_npufunc(void) { PyObject *m, *vdt_v, *d; m = PyModule_Create(&moduledef); if (!m) { return NULL; } d = PyModule_GetDict(m); import_array(); import_umath(); vdt_v = PyUFunc_FromFuncAndData(funcs, data, types, 2, 1, 1, PyUFunc_None, "vdt_expv", "vdt_expv", 0); PyDict_SetItemString(d, "vdt_expv", vdt_v); Py_DECREF(vdt_v); return m; } #else PyMODINIT_FUNC initnpufunc(void) { PyObject *m, *vdt_v, *d; m = Py_InitModule("npufunc", vdt_Methods); if (m == NULL) { return; } import_array(); import_umath(); vdt_v = PyUFunc_FromFuncAndData(funcs, data, types, 2, 1, 1, PyUFunc_None, "vdt_expv", "vdt_expv", 0); d = PyModule_GetDict(m); PyDict_SetItemString(d, "vdt_expv", vdt_v); Py_DECREF(vdt_v); } #endif vdt-0.4.4/src/fatlib.h000066400000000000000000000027641421413530600145250ustar00rootroot00000000000000#ifndef __VDT_FATLIB__ #define __VDT_FATLIB__ #include #include #include namespace { static std::string fathi; } #ifdef __linux__ #define FATHALLO(...) char const * __attribute__ ((__target__ (__VA_ARGS__))) \ fathelloCPP() { fathi = std::string("target is ")+#__VA_ARGS__; return fathi.c_str();} namespace { FATHALLO("default") FATHALLO("sse3") FATHALLO("arch=corei7") FATHALLO("arch=bdver1") FATHALLO("avx2","fma") FATHALLO("avx") FATHALLO("avx512f") // FATHALLO() } extern "C" { char const * vdt_arch() { return fathelloCPP();} } #else extern "C" { char const * vdt_arch() { fathi = std::string("unknown target"); return fathi.c_str();;} } #endif #ifdef __linux__ #define FATLIB(RET,FUN) \ RET __attribute__ ((__target__ ("default"))) FUN \ RET __attribute__ ((__target__ ("sse3"))) FUN \ RET __attribute__ ((__target__ ("arch=corei7"))) FUN \ RET __attribute__ ((__target__ ("arch=bdver1"))) FUN \ RET __attribute__ ((__target__ ("avx2","fma"))) FUN \ RET __attribute__ ((__target__ ("avx"))) FUN \ RET __attribute__ ((__target__ ("avx512f"))) FUN #else #define FATLIB(RET,FUN) RET FUN #endif template inline T theFMA (T x, T y, T z) { return x*y+z;} #define FATFMA(T) myfmaCPP(T x, T y, T z) { return theFMA(x,y,z);} namespace { FATLIB(float,FATFMA(float)) FATLIB(double,FATFMA(double)) } extern "C" { float myfmaF(float x, float y, float z) { return myfmaCPP(x,y,z);} float myfmaD(double x, double y, double z) { return myfmaCPP(x,y,z);} } #endif vdt-0.4.4/src/numpy_wrapper_generator.py000066400000000000000000000265471421413530600204500ustar00rootroot00000000000000#!/usr/bin/env python """ Generates numpy wrapper - both header and .cc file compile with c++ -Ofast -shared -fPIC -Wall -o libvdtFatLibWrapper.so vdtFatLibWrapper.cc -I../include/ """ RESTRICT="__restrict__" VDT_PREF="vdt_" FUNCTIONS_LIST=["asin", "atan", "atan2", "cos", "exp", "inv", "log", "sin", "sincos", "isqrt", "identity"] TEMPLATE_LIST = ['sqrt','div','fma','fmac'] FUNCTIONS_LIST+=TEMPLATE_LIST VDT_WRAPPER_HEADER='vdtFatLibWrapper.h' VDT_WRAPPER_IMPL='vdtFatLibWrapper.cc' VDT_PYTHON_MODULE='vdt_ctypes.py' VDT_NUMPY_WRAPPER_HEADER='vdtNumpyWrapper.h' #------------------------------------------------------------------ def isTemplate(fcn_name) : return fcn_name in TEMPLATE_LIST #------------------------------------------------------------------ def get_type_dependent_parts(is_double, is_vector): suffix="" type="double" if(is_double): if(is_vector): suffix="v" else: type="float" suffix="f" if(is_vector): suffix="fv" data_type="%s" %(type) if(is_vector): data_type="%s* %s" %(type, RESTRICT) return (type, data_type, suffix) #------------------------------------------------------------------ def get_function_prototype(fcn_name,is_double,is_vector): (type,data_type,suffix)=get_type_dependent_parts(is_double,is_vector) prototype="%s%s%s(%s x)" %(VDT_PREF,fcn_name,suffix,data_type) ret = type if(is_vector): prototype="%s%s%s(const %s iarray, %s oarray, long size)" %(VDT_PREF,fcn_name,suffix,data_type,data_type) ret = 'void' return (ret,prototype) def get_function_prototype3to1(fcn_name,is_double,is_vector): (type,data_type,suffix)=get_type_dependent_parts(is_double,is_vector) prototype="%s%s%s(%s x, %s y, %s z)" %(VDT_PREF,fcn_name,suffix,data_type,data_type,data_type) ret = type if(is_vector): prototype="%s%s%s(const %s iarray1, const %s iarray2, const %s iarray3, %s oarray, long size)" %(VDT_PREF,fcn_name,suffix,data_type,data_type,data_type,data_type) ret = 'void' return (ret,prototype) def get_function_prototype2to1(fcn_name,is_double,is_vector): (type,data_type,suffix)=get_type_dependent_parts(is_double,is_vector) prototype="%s%s%s(%s x, %s y)" %(VDT_PREF,fcn_name,suffix,data_type,data_type) ret = type if(is_vector): prototype="%s%s%s(const %s iarray1, const %s iarray2, %s oarray, long size)" %(VDT_PREF,fcn_name,suffix,data_type,data_type,data_type) ret = 'void' return (ret,prototype) def get_function_prototype1to2(fcn_name,is_double,is_vector): (type,data_type,suffix)=get_type_dependent_parts(is_double,is_vector) prototype="%s%s%s(%s x, %s * o1, %s * o2)" %(VDT_PREF,fcn_name,suffix,data_type,data_type,data_type) ret = 'void' if(is_vector): prototype="%s%s%s(const %s iarray, %s oarray1, %s oarray2, long size)" %(VDT_PREF,fcn_name,suffix,data_type,data_type,data_type) ret = 'void' return (ret,prototype) #------------------------------------------------------------------- # translation of raw name withnout suffixes def get_fcnname_translation(fcn_name): if(fcn_name == "inv"): _name = "reciprocal" elif(fcn_name == "isqrt"): _name = "rsqrt" else: _name = fcn_name return _name #------------------------------------------------------------------- def get_function_code3to1(fcn_name,is_vector): if(is_vector): code = "{" +\ "for (long i=0;i= 0x03000000 static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "vdtnpfun", NULL, -1, vdt_Methods, NULL, NULL, NULL, NULL }; PyMODINIT_FUNC PyInit_vdtnpfun(void) { PyObject *m, *vdt_v, *d; m = PyModule_Create(&moduledef); if (!m) { return NULL; } d = PyModule_GetDict(m); import_array(); import_umath(); for (int i=0;i< NVDTFUN;++i) { vdt_v = PyUFunc_FromFuncAndData(funcs, data+2*i, types, 2, 1, 1, PyUFunc_None, fname[i], fdoc[i], 0); PyDict_SetItemString(d, fname+i, vdt_v); Py_DECREF(vdt_v); } // add sincos and atan2 by hand vdt_v = PyUFunc_FromFuncAndData(funcs12, dataSinCos, types12, 2, 1, 2, PyUFunc_None, "vdt_sincos", "vdt_sincos", 0); PyDict_SetItemString(d, "vdt_sincos", vdt_v); Py_DECREF(vdt_v); vdt_v = PyUFunc_FromFuncAndData(funcs21, dataAtan2, types12, 2, 2, 1, PyUFunc_None, "vdt_atan2", "vdt_atan2", 0); PyDict_SetItemString(d, "vdt_atan2", vdt_v); Py_DECREF(vdt_v); return m; } #else PyMODINIT_FUNC initvdtnpfun(void) { PyObject *m, *vdt_v, *d; m = Py_InitModule("vdtnpfun", vdt_Methods); if (m == NULL) { return; } d = PyModule_GetDict(m); import_array(); import_umath(); for (int i=0;i< NVDTFUN; ++i) { vdt_v = PyUFunc_FromFuncAndData(funcs, data+2*i, types, 2, 1, 1, PyUFunc_None, fname[i], fdoc[i], 0); PyDict_SetItemString(d, fname[i], vdt_v); Py_DECREF(vdt_v); } // add sincos, atan2 div,fma and fmac by hand vdt_v = PyUFunc_FromFuncAndData(funcs12, dataSinCos, types12, 2, 1, 2, PyUFunc_None, "vdt_sincos", "vdt_sincos", 0); PyDict_SetItemString(d, "vdt_sincos", vdt_v); Py_DECREF(vdt_v); vdt_v = PyUFunc_FromFuncAndData(funcs21, dataAtan2, types12, 2, 2, 1, PyUFunc_None, "vdt_atan2", "vdt_atan2", 0); PyDict_SetItemString(d, "vdt_atan2", vdt_v); Py_DECREF(vdt_v); vdt_v = PyUFunc_FromFuncAndData(funcs21, dataDiv, types12, 2, 2, 1, PyUFunc_None, "vdt_div", "vdt_div", 0); PyDict_SetItemString(d, "vdt_div", vdt_v); Py_DECREF(vdt_v); vdt_v = PyUFunc_FromFuncAndData(funcs31, dataFMA, types31, 2, 3, 1, PyUFunc_None, "vdt_fma", "vdt_fma", 0); PyDict_SetItemString(d, "vdt_fma", vdt_v); Py_DECREF(vdt_v); vdt_v = PyUFunc_FromFuncAndData(funcs31, dataFMAC, types31, 2, 3, 1, PyUFunc_None, "vdt_fmac", "correctly rounded fma", 0); PyDict_SetItemString(d, "vdt_fmac", vdt_v); Py_DECREF(vdt_v); } #endif