pax_global_header 0000666 0000000 0000000 00000000064 13337156437 0014526 g ustar 00root root 0000000 0000000 52 comment=ff0672dc79af994b1ba4943886326d3c567cfae4
sleef-3.3.1/ 0000775 0000000 0000000 00000000000 13337156437 0012630 5 ustar 00root root 0000000 0000000 sleef-3.3.1/.travis.yml 0000664 0000000 0000000 00000004176 13337156437 0014751 0 ustar 00root root 0000000 0000000 language: c
# Default linux jobs
os: linux
sudo: required
dist: trusty
# Include osx jobs
matrix:
include:
- os: osx
osx_image: xcode8
compiler: gcc
env:
- LABEL="osx-gcc"
- os: osx
compiler: clang # use default apple clang
env:
- LABEL="osx-clang"
- os: linux
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-7
env:
- LABEL="x86_64-gcc"
- os: linux
addons:
apt:
sources:
- llvm-toolchain-trusty-5.0
packages:
- clang-5.0
env:
- LABEL="x86_64-clang"
- os: linux
services: docker
env:
- LABEL="aarch64-gcc"
- ENABLE_DOCKER="true"
- os: linux
services: docker
env:
- LABEL="armhf-gcc"
- ENABLE_DOCKER="true"
- os: linux
services: docker
env:
- LABEL="ppc64el-clang"
- ENABLE_DOCKER="true"
before_install:
- export PATH=$PATH:/usr/bin:${TRAVIS_BUILD_DIR}/sde-external-8.12.0-2017-10-23-lin
- cd ${TRAVIS_BUILD_DIR}
- chmod +x ${TRAVIS_BUILD_DIR}/travis/*.sh
- if [[ "${ENABLE_DOCKER}" == "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/setupdocker.sh; fi
- if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec xenial /build/travis/before_install.${LABEL}.sh; fi
- if [[ ( "${LABEL}" == "x86_64-gcc" ) && ( "{SDE_URL}" != "" ) ]]; then wget -q ${SDE_URL}; fi
- if [[ ( "${LABEL}" == "x86_64-clang" ) && ( "{SDE_URL}" != "" ) ]]; then wget -q ${SDE_URL}; fi
- if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/before_install.${LABEL}.sh; fi
before_script:
- if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec xenial /build/travis/before_script.${LABEL}.sh; fi
- if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/before_script.${LABEL}.sh; fi
script:
- if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec xenial /build/travis/script.${LABEL}.sh; fi
- if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/script.${LABEL}.sh; fi
sleef-3.3.1/CHANGELOG.md 0000664 0000000 0000000 00000005013 13337156437 0014440 0 ustar 00root root 0000000 0000000 # Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## 3.3.1 - 2018-08-20
### Added
- FreeBSD support is added
### Changed
- i386 build problem is fixed
- Trigonometric functions now evaluate correctly with full FP
domain.
https://github.com/shibatch/sleef/pull/210">#210)
## 3.3 - 2018-07-06
### Added
- SVE target support is added to libsleef.
https://github.com/shibatch/sleef/pull/180
- SVE target support is added to DFT. With this patch, DFT operations
can be carried out using 256, 512, 1024 and 2048-bit wide vectors
according to runtime availability of vector registers and operators.
https://github.com/shibatch/sleef/pull/182
- 3.5-ULP versions of sinh, cosh, tanh, sinhf, coshf, tanhf, and the
corresponding testing functionalities are added.
https://github.com/shibatch/sleef/pull/192
- Power VSX target support is added to libsleef.
https://github.com/shibatch/sleef/pull/195
- Payne-Hanek like argument reduction is added to libsleef.
https://github.com/shibatch/sleef/pull/197
## 3.2 - 2018-02-26
### Added
- The whole build system of the project migrated from makefiles to
cmake. In particualr this includes `libsleef`, `libsleefgnuabi`,
`libdft` and all the tests.
- Benchmarks that compare `libsleef` vs `SVML` on X86 Linux are
available in the project tree under src/libm-benchmarks directory.
- Extensive upstream testing via Travis CI and Appveyor, on the
following systems:
* OS: Windows / Linux / OSX.
* Compilers: gcc / clang / MSVC.
* Targets: X86 (SSE/AVX/AVX2/AVX512F), AArch64 (Advanced SIMD), ARM
(NEON). Emulators like QEMU or SDE can be used to run the tests.
- Added the following new vector functions (with relative testing):
* `log2`
- New compatibility tests have been added to check that
`libsleefgnuabi` exports the GNUABI symbols correctly.
- The library can be compiled to an LLVM bitcode object.
- Added masked interface to the library to support AVX512F masked
vectorization.
### Changed
- Use native instructions if available for `sqrt`.
- Fixed fmax and fmin behavior on AArch64:
https://github.com/shibatch/sleef/pull/140
- Speed improvements for `asin`, `acos`, `fmod` and `log`. Computation
speed of other functions are also improved by general optimization.
https://github.com/shibatch/sleef/pull/97
- Removed `libm` dependency.
### Removed
- Makefile build system
sleef-3.3.1/CMakeLists.txt 0000664 0000000 0000000 00000014314 13337156437 0015373 0 ustar 00root root 0000000 0000000 # Options
option(BUILD_SHARED_LIBS "Build shared libs" ON)
option(BUILD_DFT "libsleefdft will be built." ON)
option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON)
option(BUILD_TESTS "Tests will be built." ON)
option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF)
option(SLEEF_SHOW_CONFIG "Show SLEEF configuration status messages." ON)
option(SLEEF_SHOW_ERROR_LOG "Show cmake error log." OFF)
# See doc/build-with-cmake.md for instructions on how to build Sleef.
cmake_minimum_required(VERSION 3.4.3)
enable_testing()
set(SLEEF_VERSION_MAJOR 3)
set(SLEEF_VERSION_MINOR 3)
set(SLEEF_VERSION_PATCHLEVEL 1)
set(SLEEF_VERSION ${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL})
set(SLEEF_SOVERSION ${SLEEF_VERSION_MAJOR})
project(SLEEF
VERSION ${SLEEF_VERSION}
LANGUAGES C)
# Sanity check for in-source builds which we do not want to happen
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)
message(FATAL_ERROR "SLEEF does not allow in-source builds.
You can refer to doc/build-with-cmake.md for instructions on how provide a \
separate build directory. Note: Please remove autogenerated file \
`CMakeCache.txt` and directory `CMakeFiles` in the current directory.")
endif()
# Set output directories for the library files
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES})
string(TOUPPER ${CONFIG} CONFIG)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/bin)
endforeach(CONFIG CMAKE_CONFIGURATION_TYPES)
# Path for finding cmake modules
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
set(SLEEF_SCRIPT_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Scripts CACHE PATH
"Path for finding sleef specific cmake scripts")
# sleef-config.h.in passes cmake settings to the source code
include(Configure.cmake)
configure_file(
${PROJECT_SOURCE_DIR}/sleef-config.h.in
${PROJECT_BINARY_DIR}/include/sleef-config.h @ONLY)
# We like to have a documented index of all targets in the project. The
# variables listed below carry the names of the targets defined throughout
# the project.
# Generates object file (shared library) `libsleef`
# Defined in src/libm/CMakeLists.txt via command add_library
set(TARGET_LIBSLEEF "sleef")
set(TARGET_LIBSLEEFGNUABI "sleefgnuabi")
# Generates the sleef.h headers and all the rename headers
# Defined in src/libm/CMakeLists.txt via custom commands and a custom target
set(TARGET_HEADERS "headers")
# Generates executable files for running the test suite
# Defined in src/libm-tester/CMakeLists.txt via command add_executable
set(TARGET_TESTER "tester")
set(TARGET_IUT "iut")
set(TARGET_IUTSSE2 "iutsse2")
set(TARGET_IUTSSE4 "iutsse4")
set(TARGET_IUTAVX "iutavx")
set(TARGET_IUTFMA4 "iutfma4")
set(TARGET_IUTAVX2 "iutavx2")
set(TARGET_IUTAVX2128 "iutavx2128")
set(TARGET_IUTAVX512F "iutavx512f")
set(TARGET_IUTADVSIMD "iutadvsimd")
set(TARGET_IUTNEON32 "iutneon32")
set(TARGET_IUTSVE "iutsve")
set(TARGET_IUTVSX "iutvsx")
# The target to generate LLVM bitcode only, available when SLEEF_ENABLE_LLVM_BITCODE is passed to cmake
set(TARGET_LLVM_BITCODE "llvm-bitcode")
# Generates the helper executable file mkrename needed to write the sleef header
set(TARGET_MKRENAME "mkrename")
set(TARGET_MKRENAME_GNUABI "mkrename_gnuabi")
set(TARGET_MKMASKED_GNUABI "mkmasked_gnuabi")
# Generates the helper executable file mkdisp needed to write the sleef header
set(TARGET_MKDISP "mkdisp")
set(TARGET_MKALIAS "mkalias")
# Generates static library common
# Defined in src/common/CMakeLists.txt via command add_library
set(TARGET_LIBCOMMON_OBJ "common")
set(TARGET_LIBARRAYMAP_OBJ "arraymap")
# Function used to add an executable that is executed on host
function(add_host_executable TARGETNAME)
if (NOT CMAKE_CROSSCOMPILING)
add_executable(${TARGETNAME} ${ARGN})
else()
add_executable(${TARGETNAME} IMPORTED)
set_property(TARGET ${TARGETNAME} PROPERTY IMPORTED_LOCATION ${NATIVE_BUILD_DIR}/bin/${TARGETNAME})
endif()
endfunction()
# Generates object file (shared library) `libsleefdft`
# Defined in src/dft/CMakeLists.txt via command add_library
set(TARGET_LIBDFT "sleefdft")
# Check subdirectories
add_subdirectory("src")
# Extra messages at configuration time. By default is active, it can be
# turned off by invoking cmake with "-DSLEEF_SHOW_CONFIG=OFF".
if(SLEEF_SHOW_CONFIG)
message(STATUS "Configuring build for ${PROJECT_NAME}-v${SLEEF_VERSION}")
message(" Target system: ${CMAKE_SYSTEM}")
message(" Target processor: ${CMAKE_SYSTEM_PROCESSOR}")
message(" Host system: ${CMAKE_HOST_SYSTEM}")
message(" Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}")
message(" Detected C compiler: ${CMAKE_C_COMPILER_ID} @ ${CMAKE_C_COMPILER}")
if(CMAKE_CROSSCOMPILING)
message(" Crosscompiling SLEEF.")
message(" Native build dir: ${NATIVE_BUILD_DIR}")
endif(CMAKE_CROSSCOMPILING)
message(STATUS "Using option `${SLEEF_C_FLAGS}` to compile libsleef")
message(STATUS "Building shared libs : " ${BUILD_SHARED_LIBS})
message(STATUS "MPFR : " ${LIB_MPFR})
if (MPFR_INCLUDE_DIR)
message(STATUS "MPFR header file in " ${MPFR_INCLUDE_DIR})
endif()
message(STATUS "GMP : " ${LIBGMP})
message(STATUS "RT : " ${LIBRT})
message(STATUS "FFTW3 : " ${LIBFFTW3})
message(STATUS "SDE : " ${SDE_COMMAND})
message(STATUS "RUNNING_ON_TRAVIS : " ${RUNNING_ON_TRAVIS})
message(STATUS "COMPILER_SUPPORTS_OPENMP : " ${COMPILER_SUPPORTS_OPENMP})
if(ENABLE_GNUABI)
message(STATUS "A version of SLEEF compatible with libm and libmvec in GNU libc will be produced (${TARGET_LIBSLEEFGNUABI}.so)")
endif()
if (COMPILER_SUPPORTS_SVE)
message(STATUS "Building SLEEF with VLA SVE support")
if (ARMIE_COMMAND)
message(STATUS "Arm Instruction Emulator found at ${ARMIE_COMMAND}")
message(STATUS "SVE testing is done with ${SVE_VECTOR_BITS}-bits vectors.")
endif()
endif()
endif(SLEEF_SHOW_CONFIG)
if (MSVC)
message("")
message("*** Note: Parallel build is not supported on Microsoft Visual Studio")
endif()
sleef-3.3.1/CONTRIBUTORS.md 0000664 0000000 0000000 00000001343 13337156437 0015110 0 ustar 00root root 0000000 0000000 # List of contributors
| Name | Affiliation | Github profile |
| -------------------- | ----------------------- | ---------------------------------- |
| Naoki Shibata | Nara Institute of Science and Technology | https://github.com/shibatch |
| Jilayne Lovejoy | Arm Inc. | https://github.com/jlovejoy |
| Francesco Petrogalli | Arm Ltd. | https://github.com/fpetrogalli-arm |
| Diana Bite | Arm Ltd. | https://github.com/diaena |
| Alexandre Mutel | Unity Technologies | https://github.com/xoofx |
| Martin Krastev | Chaos Group | https://github.com/blu |
sleef-3.3.1/Configure.cmake 0000664 0000000 0000000 00000041156 13337156437 0015562 0 ustar 00root root 0000000 0000000 include(CheckCCompilerFlag)
include(CheckCSourceCompiles)
include(CheckTypeSize)
# Some toolchains require explicit linking of the libraries following.
find_library(LIB_MPFR mpfr)
find_library(LIBM m)
find_library(LIBGMP gmp)
find_library(LIBRT rt)
find_library(LIBFFTW3 fftw3)
find_path(MPFR_INCLUDE_DIR
NAMES mpfr.h
ONLY_CMAKE_FIND_ROOT_PATH)
find_path(FFTW3_INCLUDE_DIR
NAMES fftw3.h
ONLY_CMAKE_FIND_ROOT_PATH)
if (NOT LIBM)
set(LIBM "")
endif()
if (NOT LIBRT)
set(LIBRT "")
endif()
# The library currently supports the following SIMD architectures
set(SLEEF_SUPPORTED_EXTENSIONS
AVX512F AVX2 AVX2128 FMA4 AVX SSE4 SSE2 # x86
ADVSIMD SVE # Aarch64
NEON32 # Aarch32
VSX # PPC64
CACHE STRING "List of SIMD architectures supported by libsleef."
)
set(SLEEF_SUPPORTED_GNUABI_EXTENSIONS
SSE2 AVX AVX2 AVX512F ADVSIMD SVE
CACHE STRING "List of SIMD architectures supported by libsleef for GNU ABI."
)
# Force set default build type if none was specified
# Note: some sleef code requires the optimisation flags turned on
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "Setting build type to 'Release' (required for full support).")
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
"Debug" "Release" "RelWithDebInfo" "MinSizeRel")
endif()
# Function used to generate safe command arguments for add_custom_command
function(command_arguments PROPNAME)
set(quoted_args "")
foreach(arg ${ARGN})
list(APPEND quoted_args "\"${arg}\"" )
endforeach()
set(${PROPNAME} ${quoted_args} PARENT_SCOPE)
endfunction()
# PLATFORM DETECTION
if((CMAKE_SYSTEM_PROCESSOR MATCHES "x86") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$"))
set(SLEEF_ARCH_X86 ON CACHE INTERNAL "True for x86 architecture.")
set(SLEEF_HEADER_LIST
SSE_
SSE2
SSE4
AVX_
AVX
FMA4
AVX2
AVX2128
AVX512F_
AVX512F
)
command_arguments(HEADER_PARAMS_SSE_ 2 4 __m128d __m128 __m128i __m128i __SSE2__)
command_arguments(HEADER_PARAMS_SSE2 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2)
command_arguments(HEADER_PARAMS_SSE4 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4)
command_arguments(HEADER_PARAMS_AVX_ 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ Sleef_float32x4_t_2 Description
Sleef_float32x4_t_2 is a data type for storing two float32x4_t values,
which is defined in sleef.h as follows:
Vectorized single precision sine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision combined sine and cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision combined sine and cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision sine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinpif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision cosine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision combined sine and cosine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision combined sine and cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincospif_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision tangent function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision tangent function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision power function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_powf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision natural logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_logf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision natural logarithmic function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_logf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log1pf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision base-e exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_expf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision base-2 exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_exp2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision base-10 exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_exp10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_expm1f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision square root function with 0.5001 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sqrtf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision square root function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sqrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision cubic root function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cbrtf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision cubic root function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cbrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_hypotf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_hypotf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc tangent function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc tangent function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan2f_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinhf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_coshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_coshf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanhf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision inverse hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision inverse hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acoshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision inverse hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision error function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_erff_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision complementary error function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_erfcf_u15. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision gamma function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision log gamma function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_lgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for rounding to integer towards zero Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_truncf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for rounding to integer towards negative infinity Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_floorf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for rounding to integer towards positive infinity Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_ceilf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for rounding to nearest integer Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_roundf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for rounding to nearest integer Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_rintf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for fused multiply-accumulation Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmaf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision FP remainder Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmodf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for obtaining fractional component of an FP number Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_frfrexpf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision signed integral and fractional values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_modff. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for calculating the absolute value Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fabsf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for copying signs Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_copysignf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for determining maximum of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmaxf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for determining minimum of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fminf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function to calculate positive difference of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fdimf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Vectorized single precision function for obtaining the next representable FP value Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_nextafterf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.
Sleef_float32x4_t_2 Description
Sleef_float32x4_t_2 is a data type for storing two float32x4_t values,
which is defined in sleef.h as follows:
Sleef_float64x2_t_2 Description
Sleef_float64x2_t_2 is a data type for storing two float64x2_t values,
which is defined in sleef.h as follows:
Sleef_svfloat32_t_2 Description
Sleef_svfloat32_t_2 is a data type for storing two svfloat32_t values,
which is defined in sleef.h as follows:
Sleef_svfloat64_t_2 Description
Sleef_svfloat64_t_2 is a data type for storing two svfloat64_t values,
which is defined in sleef.h as follows:
Vectorized double precision sine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sin_u10 with the same accuracy specification.
Vectorized single precision sine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification.
Vectorized double precision sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sin_u35 with the same accuracy specification.
Vectorized single precision sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification.
Vectorized double precision cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cos_u10 with the same accuracy specification.
Vectorized single precision cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification.
Vectorized double precision cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cos_u35 with the same accuracy specification.
Vectorized single precision cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification.
Vectorized single precision combined sine and cosine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification.
Vectorized single precision combined sine and cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification.
Vectorized double precision combined sine and cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification.
Vectorized single precision combined sine and cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification.
Vectorized double precision sine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification.
Vectorized single precision sine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification.
Vectorized double precision cosine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification.
Vectorized single precision cosine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification.
Vectorized double precision combined sine and cosine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification.
Vectorized single precision combined sine and cosine function with 0.506 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification.
Vectorized double precision combined sine and cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification.
Vectorized single precision combined sine and cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification.
Vectorized double precision tangent function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tan_u10 with the same accuracy specification.
Vectorized single precision tangent function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification.
Vectorized double precision tangent function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tan_u35 with the same accuracy specification.
Vectorized single precision tangent function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification.
Vectorized double precision power function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_pow_u10 with the same accuracy specification.
Vectorized single precision power function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_powf_u10 with the same accuracy specification.
Vectorized double precision natural logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log_u10 with the same accuracy specification.
Vectorized single precision natural logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_logf_u10 with the same accuracy specification.
Vectorized double precision natural logarithmic function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log_u35 with the same accuracy specification.
Vectorized single precision natural logarithmic function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_logf_u35 with the same accuracy specification.
Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log10_u10 with the same accuracy specification.
Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification.
Vectorized double precision base-2 logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log2_u10 with the same accuracy specification.
Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log2f_u10 with the same accuracy specification.
Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification.
Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification.
Vectorized double precision base-e exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_exp_u10 with the same accuracy specification.
Vectorized single precision base-e exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_expf_u10 with the same accuracy specification.
Vectorized double precision base-2 exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification.
Vectorized single precision base-2 exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification.
Vectorized double precision base-10 exponential function function with 1.09 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification.
Vectorized single precision base-10 exponential function function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification.
Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification.
Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification.
Vectorized double precision square root function with 0.5001 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification.
Vectorized single precision square root function with 0.5001 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification.
Vectorized double precision square root function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification.
Vectorized single precision square root function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification.
Vectorized double precision cubic root function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification.
Vectorized single precision cubic root function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification.
Vectorized double precision cubic root function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification.
Vectorized single precision cubic root function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification.
Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification.
Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification.
Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification.
Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification.
Vectorized double precision arc sine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asin_u10 with the same accuracy specification.
Vectorized single precision arc sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification.
Vectorized double precision arc sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asin_u35 with the same accuracy specification.
Vectorized single precision arc sine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification.
Vectorized double precision arc cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acos_u10 with the same accuracy specification.
Vectorized single precision arc cosine function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification.
Vectorized double precision arc cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acos_u35 with the same accuracy specification.
Vectorized single precision arc cosine function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification.
Vectorized double precision arc tangent function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan_u10 with the same accuracy specification.
Vectorized single precision arc tangent function with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification.
Vectorized double precision arc tangent function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan_u35 with the same accuracy specification.
Vectorized single precision arc tangent function with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification.
Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification.
Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification.
Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification.
Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification.
Vectorized double precision hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification.
Vectorized single precision hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification.
Vectorized double precision hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinh_u35 with the same accuracy specification.
Vectorized single precision hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_sinhf_u35 with the same accuracy specification.
Vectorized double precision hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification.
Vectorized single precision hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification.
Vectorized double precision hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_cosh_u35 with the same accuracy specification.
Vectorized single precision hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_coshf_u35 with the same accuracy specification.
Vectorized double precision hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification.
Vectorized single precision hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification.
Vectorized double precision hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanh_u35 with the same accuracy specification.
Vectorized single precision hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tanhf_u35 with the same accuracy specification.
Vectorized double precision inverse hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification.
Vectorized single precision inverse hyperbolic sine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification.
Vectorized double precision inverse hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification.
Vectorized single precision inverse hyperbolic cosine function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification.
Vectorized double precision inverse hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification.
Vectorized single precision inverse hyperbolic tangent function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification.
Vectorized double precision error function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_erf_u10 with the same accuracy specification.
Vectorized single precision error function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_erff_u10 with the same accuracy specification.
Vectorized double precision complementary error function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification.
Vectorized single precision complementary error function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification.
Vectorized double precision gamma function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification.
Vectorized single precision gamma function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification.
Vectorized double precision log gamma function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification.
Vectorized single precision log gamma function Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification.
Vectorized double precision function for rounding to integer towards zero Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_trunc with the same accuracy specification.
Vectorized single precision function for rounding to integer towards zero Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_truncf with the same accuracy specification.
Vectorized double precision function for rounding to integer towards negative infinity Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_floor with the same accuracy specification.
Vectorized single precision function for rounding to integer towards negative infinity Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_floorf with the same accuracy specification.
Vectorized double precision function for rounding to integer towards positive infinity Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_ceil with the same accuracy specification.
Vectorized single precision function for rounding to integer towards positive infinity Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_ceilf with the same accuracy specification.
Vectorized double precision function for rounding to nearest integer Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_round with the same accuracy specification.
Vectorized single precision function for rounding to nearest integer Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_roundf with the same accuracy specification.
Vectorized double precision function for rounding to nearest integer Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_rint with the same accuracy specification.
Vectorized single precision function for rounding to nearest integer Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_rintf with the same accuracy specification.
Vectorized double precision function for fused multiply-accumulation Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fma with the same accuracy specification.
Vectorized single precision function for fused multiply-accumulation Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmaf with the same accuracy specification.
Vectorized double precision FP remainder Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmod with the same accuracy specification.
Vectorized single precision FP remainder Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmodf with the same accuracy specification.
Vectorized double precision function for multiplying by integral power of 2 Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_ldexp with the same accuracy specification.
Vectorized double precision function for obtaining fractional component of an FP number Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_frfrexp with the same accuracy specification.
Vectorized single precision function for obtaining fractional component of an FP number Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_frfrexpf with the same accuracy specification.
Vectorized double precision function for obtaining integral component of an FP number Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_expfrexp with the same accuracy specification.
Vectorized double precision function for getting integer exponent Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_ilogb with the same accuracy specification.
Vectorized double precision signed integral and fractional values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_modf with the same accuracy specification.
Vectorized single precision signed integral and fractional values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_modff with the same accuracy specification.
Vectorized double precision function for calculating the absolute value Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fabs with the same accuracy specification.
Vectorized single precision function for calculating the absolute value Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fabsf with the same accuracy specification.
Vectorized double precision function for copying signs Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_copysign with the same accuracy specification.
Vectorized single precision function for copying signs Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_copysignf with the same accuracy specification.
Vectorized double precision function for determining maximum of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmax with the same accuracy specification.
Vectorized single precision function for determining maximum of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmaxf with the same accuracy specification.
Vectorized double precision function for determining minimum of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fmin with the same accuracy specification.
Vectorized single precision function for determining minimum of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fminf with the same accuracy specification.
Vectorized double precision function to calculate positive difference of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fdim with the same accuracy specification.
Vectorized single precision function to calculate positive difference of two values Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_fdimf with the same accuracy specification.
Vectorized double precision function for obtaining the next representable FP value Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_nextafter with the same accuracy specification.
Vectorized single precision function for obtaining the next representable FP value Synopsis
#include <sleef.h> Description
This is the vectorized function of Sleef_nextafterf with the same accuracy specification.
The GNUABI version of the library (libsleefgnuabi.so) is built for
x86 and aarch64 architectectures. This library provides an API
compatible with libmvec in
glibc, and the API comforms to the vector
ABI. This library is built and installed by default, and some
compilers may call the functions in this library.
Fig. 7.1 shows a simplified code of our dispatcher. There is only
one exported function mainFunc. When
mainFunc is called for the first
time, dispatcherMain is called internally,
since funcPtr is initialized to the pointer to
dispatcherMain(line 14). It then detects if the
CPU supports SSE 4.1(line 7), and
rewrites funcPtr to a pointer to the function
that utilizes SSE 4.1 or SSE 2, depending on the result of CPU
feature detection(line 10). When
mainFunc is called for the second time, it does
not execute the
dispatcherMain. It just executes the function
pointed by the pointer stored in funcPtr during
the execution of
dispatcherMain.
There are a few advantages in our dispatcher. The first advantage is
that it does not require any compiler-specific extension. The second
advantage is simplicity. There are only 18 lines of simple
code. Since the dispatchers are completely separated for each
function, there is not much room for bugs to get in.
The third advantage is low overhead. You might think that the
overhead is one function call including execution of prologue and
epilogue. However, since modern compilers eliminate redundant
execution of the prologue, epilogue and return instruction, the
actual overhead is just one jmp instruction. This is very fast since
it is not conditional.
The fourth advantage is thread safety. There is only one variable
shared among threads, which is funcPtr. There are
only two possible values for this pointer variable. The first value
is the pointer to the dispatcherMain, and the
second value is the pointer to either funcSSE2
or funcSSE4, depending on the availability of
extensions. Once funcPtr is substituted with the
pointer to funcSSE2
or funcSSE4, it will not be changed in the
future. It is obvious that the code works in all the cases.
Fig. 7.1: Simplified code of our dispatcher
ULP stands for "unit in the last place", which is sometimes used for
measuring accuracy of calculations. 1 ULP is basically the distance
between the two closest floating point number, which depends on the
exponent of the FP number. The accuracy of calculations by reputable
math libraries is usually between 0.5 and 1 ULP. Here, the accuracy
means the largest error of calculation, which only happens in the
worst case. SLEEF math library provides multiple accuracy choices
for some math functions. Many functions have 3.5-ULP and 1-ULP
versions, and 3.5-ULP versions are significantly faster than 1-ULP
versions. If you care more about execution speed than accuracy, it
is advised to use the 3.5-ULP versions along with -ffast-math or
"unsafe math optimization" options for the compiler.
In IEEE 754 standard, underflow does not happen abruptly when the
exponent becomes zero. Instead, denormal numbers are produced which
has less precision, and this is sometimes called gradual
underflow. On some implementation which is not IEEE-754 conformant,
flush-to-zero mode is used since it is easier to implement. In
flush-to-zero mode, numbers smaller than the smallest normalized
number cannot be represented, and it is replaced with zero. Because
of this, the accuracy of calculation may be influenced in some
cases. The smallest normalized precision number can be referred with
DBL_MIN for double precision, and FLT_MIN for single precision. The
naming of these macros is a little bit confusing because DBL_MIN is
not the smallest double precision number.
You can see known maximum errors in math functions in glibc
on
this page.
The sincospi series of functions evaluates sin(
πa ) and cos(
πa ) simultaneously. These functions are
added to SLEEF as of version 3.0. There are a few reasons that I
added these functions.
C standards include specifications for functions that evaluate
trigonometric functions. In order to do calculations for evaluating
these functions, reduction of an argument is required. This involves
a multiple precision multiplication with π,
which requires many operations of addition and multiplication. This
is slow especially if accurate evaluation is required. By designing
the function in a way that the argument is pre-multiplied
by π, this reduction can be eliminated. This
leads to faster and more accurate evaluation.
The second reason is that sincospi functions are handy for
implementing an FFT library. FFT libraries need to evaluate
trigonometric functions for generating twiddle factors that is used in
the butterfly operations. Since the butterfly operations are
repeatedly applied, the error in twiddle factors accumulates. Thus, we
want to make the error in twiddle factors as small as possible. In an
FFT of power-of-two size, twiddle factors are
sin( πm /
2n ) where m
and n are integer. If we just use the usual
trigonometric functions defined in the C standards with the
precision same as that used for butterfly operations, we already
have error when calculating arguments, since
πm / 2n cannot
be represented as a floating point value without error. On the
other hand, if we use sincospi function, the argument can be
accurately represented by a radix 2 FP number. Thus, we can
calculate twiddle factors with better accuracy.
The third reason is that sinpi is needed internally to implement
gamma functions.
It is a soup ladle. Sleef means a soup ladle in Dutch.
These graphs show comparison of the execution time between
SLEEF-3.2 compiled
with GCC-7.2 and Intel SVML included in Intel C Compiler 18.0.1.
The execution time of each function is measured by executing each
function 10^8 times and taking the average time. Each time a
function is executed, a uniformly distributed random number is set
to each element of the argument vector(each element is set a
different value.) The ranges of the random number for each
function are shown below. Argument vectors are generated before
the measurement, and the time to generate random argument vectors
is not included in the execution time.
The accuracy of SVML functions can be chosen by compiler options,
not the function names. "-fimf-max-error=1.0" option is specified
to icc to obtain the 1-ulp-accuracy results, and
"-fimf-max-error=5.0" option is used for the 5-ulp-accuracy
results.
Those results are measured on a PC with Intel Core i7-6700 CPU @
3.40GHz with Turbo Boost turned off. The CPU should be always
running at 3.4GHz during the measurement.
Click graphs to magnify.
CMake is an open-source and
cross-platform building tool for software packages that provides easy
managing of multiple build systems at a time. It works by allowing the
developer to specify build parameters and rules in a simple text file
that cmake then processes to generate project files for the actual
native build tools (e.g. UNIX Makefiles, Microsoft Visual Studio,
Apple XCode, etc). That means you can easily maintain multiple
separate builds for one project and manage cross-platform hardware and
software complexity.
If you are not already familiar with cmake, please refer to the
official documentation
or
the basic
introductions in the wiki (recommended).
Before using CMake you will need to install/build the binaries on your
system. Most systems have cmake already installed or provided by the
standard package manager. If that is not the case for you, please
download and install now.
For building SLEEF, version 3.4.3 is the minimum required.
1. Make sure cmake is available on the command-line. The command below
should display a version number greater than or equal to 3.4.3.
2. Download the tar from
the software
repository or checkout out the source code from
the GitHub repository.
3. Make a separate directory to create an out-of-source build. SLEEF does not
allow for in-tree builds.
4. Run cmake to configure your project and generate the system to build it:
This flag configures an optimised libsleef shared library build with
basic debug info. By default, cmake will autodetect your system
platform and configure the build using the default parameters. You can
control and modify these parameters by setting variables when running
cmake. See the list of options and variables for customizing
your build.
NOTE: On Windows, you need to use a specific generator like this:
`cmake -G"Visual Studio 15 2017 Win64" ..` specifying the Visual
Studio version and targeting specifically `Win64` (to support
compilation of AVX/AVX2) Check `cmake -G` to get a full list of
supported Visual Studio project generators. This generator will
create a proper solution `SLEEF.sln` under the build directory. You
can still use `cmake --build .` to build without opening Visual
Studio.
5. Now that you have the build files created by cmake, proceed from the top
of the build directory:
6. Install the library under ../my-sleef/install by running:
7. You can execute the tests by running:
Variables dictate how the build is generated; options are defined and undefined,
respectively, on the cmake command line like this:
Build configurations allow a project to be built in different ways for debug,
optimized, or any other special set of flags.
You need to install libmpfr and OpenMP(libmpfr is only required to
build the tester, and it is not linked to the library). In order to
build the library, please change the directory to sleef-3.X and run
the following commands.
SLEEF Documentation - Math library reference
Table of contents
Data types for AArch32 architecture
typedef struct {
float32x4_t x, y;
} Sleef_float32x4_t_2;
Trigonometric Functions
float32x4_t Sleef_sinf4_u10(float32x4_t a);
float32x4_t Sleef_sinf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_sinf4_u35(float32x4_t a);
float32x4_t Sleef_sinf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_cosf4_u10(float32x4_t a);
float32x4_t Sleef_cosf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_cosf4_u35(float32x4_t a);
float32x4_t Sleef_cosf4_u35neon(float32x4_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincosf4_u10(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u10neon(float32x4_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincosf4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_sinpif4_u05(float32x4_t a);
float32x4_t Sleef_sinpif4_u05neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_cospif4_u05(float32x4_t a);
float32x4_t Sleef_cospif4_u05neon(float32x4_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincospif4_u05(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u05neon(float32x4_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincospif4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_tanf4_u10(float32x4_t a);
float32x4_t Sleef_tanf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_tanf4_u35(float32x4_t a);
float32x4_t Sleef_tanf4_u35neon(float32x4_t a);
Link with -lsleef.
Power, exponential, and logarithmic function
float32x4_t Sleef_powf4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_powf4_u10neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_logf4_u10(float32x4_t a);
float32x4_t Sleef_logf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_logf4_u35(float32x4_t a);
float32x4_t Sleef_logf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_log10f4_u10(float32x4_t a);
float32x4_t Sleef_log10f4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_log2f4_u10(float32x4_t a);
float32x4_t Sleef_log2f4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_log1pf4_u10(float32x4_t a);
float32x4_t Sleef_log1pf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_expf4_u10(float32x4_t a);
float32x4_t Sleef_expf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_exp2f4_u10(float32x4_t a);
float32x4_t Sleef_exp2f4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_exp10f4_u10(float32x4_t a);
float32x4_t Sleef_exp10f4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_expm1f4_u10(float32x4_t a);
float32x4_t Sleef_expm1f4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_sqrtf4(float32x4_t a);
float32x4_t Sleef_sqrtf4_neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_sqrtf4_u35(float32x4_t a);
float32x4_t Sleef_sqrtf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_cbrtf4_u10(float32x4_t a);
float32x4_t Sleef_cbrtf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_cbrtf4_u35(float32x4_t a);
float32x4_t Sleef_cbrtf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_hypotf4_u05(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u05neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_hypotf4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u35neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
Inverse Trigonometric Functions
float32x4_t Sleef_asinf4_u10(float32x4_t a);
float32x4_t Sleef_asinf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_asinf4_u35(float32x4_t a);
float32x4_t Sleef_asinf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_acosf4_u10(float32x4_t a);
float32x4_t Sleef_acosf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_acosf4_u35(float32x4_t a);
float32x4_t Sleef_acosf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_atanf4_u10(float32x4_t a);
float32x4_t Sleef_atanf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_atanf4_u35(float32x4_t a);
float32x4_t Sleef_atanf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_atan2f4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u10neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_atan2f4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u35neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
Hyperbolic function and inverse hyperbolic function
float32x4_t Sleef_sinhf4_u10(float32x4_t a);
float32x4_t Sleef_sinhf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_sinhf4_u35(float32x4_t a);
float32x4_t Sleef_sinhf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_coshf4_u10(float32x4_t a);
float32x4_t Sleef_coshf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_coshf4_u35(float32x4_t a);
float32x4_t Sleef_coshf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_tanhf4_u10(float32x4_t a);
float32x4_t Sleef_tanhf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_tanhf4_u35(float32x4_t a);
float32x4_t Sleef_tanhf4_u35neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_asinhf4_u10(float32x4_t a);
float32x4_t Sleef_asinhf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_acoshf4_u10(float32x4_t a);
float32x4_t Sleef_acoshf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_atanhf4_u10(float32x4_t a);
float32x4_t Sleef_atanhf4_u10neon(float32x4_t a);
Link with -lsleef.
Error and gamma function
float32x4_t Sleef_erff4_u10(float32x4_t a);
float32x4_t Sleef_erff4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_erfcf4_u15(float32x4_t a);
float32x4_t Sleef_erfcf4_u15neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_tgammaf4_u10(float32x4_t a);
float32x4_t Sleef_tgammaf4_u10neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_lgammaf4_u10(float32x4_t a);
float32x4_t Sleef_lgammaf4_u10neon(float32x4_t a);
Link with -lsleef.
Nearest integer function
float32x4_t Sleef_truncf4(float32x4_t a);
float32x4_t Sleef_truncf4_neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_floorf4(float32x4_t a);
float32x4_t Sleef_floorf4_neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_ceilf4(float32x4_t a);
float32x4_t Sleef_ceilf4_neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_roundf4(float32x4_t a);
float32x4_t Sleef_roundf4_neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_rintf4(float32x4_t a);
float32x4_t Sleef_rintf4_neon(float32x4_t a);
Link with -lsleef.
Other function
float32x4_t Sleef_fmaf4(float32x4_t a, float32x4_t b, float32x4_t c);
float32x4_t Sleef_fmaf4_neon(float32x4_t a, float32x4_t b, float32x4_t c);
Link with -lsleef.
float32x4_t Sleef_fmodf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmodf4_neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_frfrexpf4(float32x4_t a);
float32x4_t Sleef_frfrexpf4_neon(float32x4_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_modff4(float32x4_t a);
Sleef_float32x4_t_2 Sleef_modff4_neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_fabsf4(float32x4_t a);
float32x4_t Sleef_fabsf4_neon(float32x4_t a);
Link with -lsleef.
float32x4_t Sleef_copysignf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_copysignf4_neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_fmaxf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmaxf4_neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_fminf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fminf4_neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_fdimf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fdimf4_neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
float32x4_t Sleef_nextafterf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_nextafterf4_neon(float32x4_t a, float32x4_t b);
Link with -lsleef.
SLEEF Documentation - Math library reference
Table of contents
Data types for AArch64 architecture
typedef struct {
float32x4_t x, y;
} Sleef_float32x4_t_2;
typedef struct {
float64x2_t x, y;
} Sleef_float64x2_t_2;
typedef struct {
svfloat32_t x, y;
} Sleef_svfloat32_t_2;
typedef struct {
svfloat64_t x, y;
} Sleef_svfloat64_t_2;
Trigonometric Functions
float64x2_t Sleef_sind2_u10(float64x2_t a);
float64x2_t Sleef_sind2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_sindx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_sinf4_u10(float32x4_t a);
float32x4_t Sleef_sinf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_sinfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_sind2_u35(float64x2_t a);
float64x2_t Sleef_sind2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_sindx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_sinf4_u35(float32x4_t a);
float32x4_t Sleef_sinf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_sinfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_cosd2_u10(float64x2_t a);
float64x2_t Sleef_cosd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_cosdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_cosf4_u10(float32x4_t a);
float32x4_t Sleef_cosf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_cosfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_cosd2_u35(float64x2_t a);
float64x2_t Sleef_cosd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_cosdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_cosf4_u35(float32x4_t a);
float32x4_t Sleef_cosf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_cosfx_u35sve(svfloat32_t a);
Link with -lsleef.
Sleef_float64x2_t_2 Sleef_sincosd2_u10(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u10advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincosdx_u10sve(svfloat64_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincosf4_u10(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u10advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincosfx_u10sve(svfloat32_t a);
Link with -lsleef.
Sleef_float64x2_t_2 Sleef_sincosd2_u35(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u35advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincosdx_u35sve(svfloat64_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincosf4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u35advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincosfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_sinpid2_u05(float64x2_t a);
float64x2_t Sleef_sinpid2_u05advsimd(float64x2_t a);
svfloat64_t Sleef_sinpidx_u05sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_sinpif4_u05(float32x4_t a);
float32x4_t Sleef_sinpif4_u05advsimd(float32x4_t a);
svfloat32_t Sleef_sinpifx_u05sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_cospid2_u05(float64x2_t a);
float64x2_t Sleef_cospid2_u05advsimd(float64x2_t a);
svfloat64_t Sleef_cospidx_u05sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_cospif4_u05(float32x4_t a);
float32x4_t Sleef_cospif4_u05advsimd(float32x4_t a);
svfloat32_t Sleef_cospifx_u05sve(svfloat32_t a);
Link with -lsleef.
Sleef_float64x2_t_2 Sleef_sincospid2_u05(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u05advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincospidx_u05sve(svfloat64_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincospif4_u05(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u05advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincospifx_u05sve(svfloat32_t a);
Link with -lsleef.
Sleef_float64x2_t_2 Sleef_sincospid2_u35(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u35advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincospidx_u35sve(svfloat64_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_sincospif4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u35advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincospifx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_tand2_u10(float64x2_t a);
float64x2_t Sleef_tand2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_tandx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_tanf4_u10(float32x4_t a);
float32x4_t Sleef_tanf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_tanfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_tand2_u35(float64x2_t a);
float64x2_t Sleef_tand2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_tandx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_tanf4_u35(float32x4_t a);
float32x4_t Sleef_tanf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_tanfx_u35sve(svfloat32_t a);
Link with -lsleef.
Power, exponential, and logarithmic function
float64x2_t Sleef_powd2_u10(float64x2_t a, float64x2_t b);
float64x2_t Sleef_powd2_u10advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_powdx_u10sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_powf4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_powf4_u10advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_powfx_u10sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_logd2_u10(float64x2_t a);
float64x2_t Sleef_logd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_logdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_logf4_u10(float32x4_t a);
float32x4_t Sleef_logf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_logfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_logd2_u35(float64x2_t a);
float64x2_t Sleef_logd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_logdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_logf4_u35(float32x4_t a);
float32x4_t Sleef_logf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_logfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_log10d2_u10(float64x2_t a);
float64x2_t Sleef_log10d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_log10dx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_log10f4_u10(float32x4_t a);
float32x4_t Sleef_log10f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_log10fx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_log2d2_u10(float64x2_t a);
float64x2_t Sleef_log2d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_log2dx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_log2f4_u10(float32x4_t a);
float32x4_t Sleef_log2f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_log2fx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_log1pd2_u10(float64x2_t a);
float64x2_t Sleef_log1pd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_log1pdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_log1pf4_u10(float32x4_t a);
float32x4_t Sleef_log1pf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_log1pfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_expd2_u10(float64x2_t a);
float64x2_t Sleef_expd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_expdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_expf4_u10(float32x4_t a);
float32x4_t Sleef_expf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_expfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_exp2d2_u10(float64x2_t a);
float64x2_t Sleef_exp2d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_exp2dx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_exp2f4_u10(float32x4_t a);
float32x4_t Sleef_exp2f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_exp2fx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_exp10d2_u10(float64x2_t a);
float64x2_t Sleef_exp10d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_exp10dx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_exp10f4_u10(float32x4_t a);
float32x4_t Sleef_exp10f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_exp10fx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_expm1d2_u10(float64x2_t a);
float64x2_t Sleef_expm1d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_expm1dx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_expm1f4_u10(float32x4_t a);
float32x4_t Sleef_expm1f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_expm1fx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_sqrtd2(float64x2_t a);
float64x2_t Sleef_sqrtd2_advsimd(float64x2_t a);
svfloat64_t Sleef_sqrtdx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_sqrtf4(float32x4_t a);
float32x4_t Sleef_sqrtf4_advsimd(float32x4_t a);
svfloat32_t Sleef_sqrtfx_sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_sqrtd2_u35(float64x2_t a);
float64x2_t Sleef_sqrtd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_sqrtdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_sqrtf4_u35(float32x4_t a);
float32x4_t Sleef_sqrtf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_sqrtfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_cbrtd2_u10(float64x2_t a);
float64x2_t Sleef_cbrtd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_cbrtdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_cbrtf4_u10(float32x4_t a);
float32x4_t Sleef_cbrtf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_cbrtfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_cbrtd2_u35(float64x2_t a);
float64x2_t Sleef_cbrtd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_cbrtdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_cbrtf4_u35(float32x4_t a);
float32x4_t Sleef_cbrtf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_cbrtfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_hypotd2_u05(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u05advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_hypotdx_u05sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_hypotf4_u05(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u05advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_hypotfx_u05sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_hypotd2_u35(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u35advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_hypotdx_u35sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_hypotf4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u35advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_hypotfx_u35sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
Inverse Trigonometric Functions
float64x2_t Sleef_asind2_u10(float64x2_t a);
float64x2_t Sleef_asind2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_asindx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_asinf4_u10(float32x4_t a);
float32x4_t Sleef_asinf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_asinfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_asind2_u35(float64x2_t a);
float64x2_t Sleef_asind2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_asindx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_asinf4_u35(float32x4_t a);
float32x4_t Sleef_asinf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_asinfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_acosd2_u10(float64x2_t a);
float64x2_t Sleef_acosd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_acosdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_acosf4_u10(float32x4_t a);
float32x4_t Sleef_acosf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_acosfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_acosd2_u35(float64x2_t a);
float64x2_t Sleef_acosd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_acosdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_acosf4_u35(float32x4_t a);
float32x4_t Sleef_acosf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_acosfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_atand2_u10(float64x2_t a);
float64x2_t Sleef_atand2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_atandx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_atanf4_u10(float32x4_t a);
float32x4_t Sleef_atanf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_atanfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_atand2_u35(float64x2_t a);
float64x2_t Sleef_atand2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_atandx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_atanf4_u35(float32x4_t a);
float32x4_t Sleef_atanf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_atanfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_atan2d2_u10(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u10advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_atan2dx_u10sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_atan2f4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u10advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_atan2fx_u10sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_atan2d2_u35(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u35advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_atan2dx_u35sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_atan2f4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u35advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_atan2fx_u35sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
Hyperbolic function and inverse hyperbolic function
float64x2_t Sleef_sinhd2_u10(float64x2_t a);
float64x2_t Sleef_sinhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_sinhdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_sinhf4_u10(float32x4_t a);
float32x4_t Sleef_sinhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_sinhfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_sinhd2_u35(float64x2_t a);
float64x2_t Sleef_sinhd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_sinhdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_sinhf4_u35(float32x4_t a);
float32x4_t Sleef_sinhf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_sinhfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_coshd2_u10(float64x2_t a);
float64x2_t Sleef_coshd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_coshdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_coshf4_u10(float32x4_t a);
float32x4_t Sleef_coshf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_coshfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_coshd2_u35(float64x2_t a);
float64x2_t Sleef_coshd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_coshdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_coshf4_u35(float32x4_t a);
float32x4_t Sleef_coshf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_coshfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_tanhd2_u10(float64x2_t a);
float64x2_t Sleef_tanhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_tanhdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_tanhf4_u10(float32x4_t a);
float32x4_t Sleef_tanhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_tanhfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_tanhd2_u35(float64x2_t a);
float64x2_t Sleef_tanhd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_tanhdx_u35sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_tanhf4_u35(float32x4_t a);
float32x4_t Sleef_tanhf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_tanhfx_u35sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_asinhd2_u10(float64x2_t a);
float64x2_t Sleef_asinhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_asinhdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_asinhf4_u10(float32x4_t a);
float32x4_t Sleef_asinhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_asinhfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_acoshd2_u10(float64x2_t a);
float64x2_t Sleef_acoshd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_acoshdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_acoshf4_u10(float32x4_t a);
float32x4_t Sleef_acoshf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_acoshfx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_atanhd2_u10(float64x2_t a);
float64x2_t Sleef_atanhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_atanhdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_atanhf4_u10(float32x4_t a);
float32x4_t Sleef_atanhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_atanhfx_u10sve(svfloat32_t a);
Link with -lsleef.
Error and gamma function
float64x2_t Sleef_erfd2_u10(float64x2_t a);
float64x2_t Sleef_erfd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_erfdx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_erff4_u10(float32x4_t a);
float32x4_t Sleef_erff4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_erffx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_erfcd2_u15(float64x2_t a);
float64x2_t Sleef_erfcd2_u15advsimd(float64x2_t a);
svfloat64_t Sleef_erfcdx_u15sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_erfcf4_u15(float32x4_t a);
float32x4_t Sleef_erfcf4_u15advsimd(float32x4_t a);
svfloat32_t Sleef_erfcfx_u15sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_tgammad2_u10(float64x2_t a);
float64x2_t Sleef_tgammad2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_tgammadx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_tgammaf4_u10(float32x4_t a);
float32x4_t Sleef_tgammaf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_tgammafx_u10sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_lgammad2_u10(float64x2_t a);
float64x2_t Sleef_lgammad2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_lgammadx_u10sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_lgammaf4_u10(float32x4_t a);
float32x4_t Sleef_lgammaf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_lgammafx_u10sve(svfloat32_t a);
Link with -lsleef.
Nearest integer function
float64x2_t Sleef_truncd2(float64x2_t a);
float64x2_t Sleef_truncd2_advsimd(float64x2_t a);
svfloat64_t Sleef_truncdx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_truncf4(float32x4_t a);
float32x4_t Sleef_truncf4_advsimd(float32x4_t a);
svfloat32_t Sleef_truncfx_sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_floord2(float64x2_t a);
float64x2_t Sleef_floord2_advsimd(float64x2_t a);
svfloat64_t Sleef_floordx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_floorf4(float32x4_t a);
float32x4_t Sleef_floorf4_advsimd(float32x4_t a);
svfloat32_t Sleef_floorfx_sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_ceild2(float64x2_t a);
float64x2_t Sleef_ceild2_advsimd(float64x2_t a);
svfloat64_t Sleef_ceildx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_ceilf4(float32x4_t a);
float32x4_t Sleef_ceilf4_advsimd(float32x4_t a);
svfloat32_t Sleef_ceilfx_sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_roundd2(float64x2_t a);
float64x2_t Sleef_roundd2_advsimd(float64x2_t a);
svfloat64_t Sleef_rounddx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_roundf4(float32x4_t a);
float32x4_t Sleef_roundf4_advsimd(float32x4_t a);
svfloat32_t Sleef_roundfx_sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_rintd2(float64x2_t a);
float64x2_t Sleef_rintd2_advsimd(float64x2_t a);
svfloat64_t Sleef_rintdx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_rintf4(float32x4_t a);
float32x4_t Sleef_rintf4_advsimd(float32x4_t a);
svfloat32_t Sleef_rintfx_sve(svfloat32_t a);
Link with -lsleef.
Other function
float64x2_t Sleef_fmad2(float64x2_t a, float64x2_t b, float64x2_t c);
float64x2_t Sleef_fmad2_advsimd(float64x2_t a, float64x2_t b, float64x2_t c);
svfloat64_t Sleef_fmadx_sve(svfloat64_t a, svfloat64_t b, svfloat64_t c);
Link with -lsleef.
float32x4_t Sleef_fmaf4(float32x4_t a, float32x4_t b, float32x4_t c);
float32x4_t Sleef_fmaf4_advsimd(float32x4_t a, float32x4_t b, svfloat32_t c);
svfloat32_t Sleef_fmafx_sve(svfloat32_t a, svfloat32_t b, svfloat32_t c);
Link with -lsleef.
float64x2_t Sleef_fmodd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmodd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmoddx_sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_fmodf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmodf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fmodfx_sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_ldexpd2(float64x2_t a, int32x2_t b);
float64x2_t Sleef_ldexpd2_advsimd(float64x2_t a, int32x2_t b);
svfloat64_t Sleef_ldexpdx_sve(svfloat64_t a, svint32_t b);
Link with -lsleef.
float64x2_t Sleef_frfrexpd2(float64x2_t a);
float64x2_t Sleef_frfrexpd2_advsimd(float64x2_t a);
svfloat64_t Sleef_frfrexpdx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_frfrexpf4(float32x4_t a);
float32x4_t Sleef_frfrexpf4_advsimd(float32x4_t a);
svfloat32_t Sleef_frfrexpfx_sve(svfloat32_t a);
Link with -lsleef.
int32x2_t Sleef_expfrexpd2(float64x2_t a);
int32x2_t Sleef_expfrexpd2_advsimd(float64x2_t a);
svint32_t Sleef_expfrexpdx_sve(svfloat64_t a);
Link with -lsleef.
int32x2_t Sleef_ilogbd2(float64x2_t a);
int32x2_t Sleef_ilogbd2_advsimd(float64x2_t a);
svint32_t Sleef_ilogbdx_sve(svfloat64_t a);
Link with -lsleef.
Sleef_float64x2_t_2 Sleef_modfd2(float64x2_t a);
Sleef_float64x2_t_2 Sleef_modfd2_advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_modfdx_sve(svfloat64_t a);
Link with -lsleef.
Sleef_float32x4_t_2 Sleef_modff4(float32x4_t a);
Sleef_float32x4_t_2 Sleef_modff4_advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_modffx_sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_fabsd2(float64x2_t a);
float64x2_t Sleef_fabsd2_advsimd(float64x2_t a);
svfloat64_t Sleef_fabsdx_sve(svfloat64_t a);
Link with -lsleef.
float32x4_t Sleef_fabsf4(float32x4_t a);
float32x4_t Sleef_fabsf4_advsimd(float32x4_t a);
svfloat32_t Sleef_fabsfx_sve(svfloat32_t a);
Link with -lsleef.
float64x2_t Sleef_copysignd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_copysignd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_copysigndx_sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_copysignf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_copysignf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_copysignfx_sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_fmaxd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmaxd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmaxdx_sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_fmaxf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmaxf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fmaxfx_sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_fmind2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmind2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmindx_sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_fminf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fminf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fminfx_sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_fdimd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fdimd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fdimdx_sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_fdimf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fdimf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fdimfx_sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
float64x2_t Sleef_nextafterd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_nextafterd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_nextafterdx_sve(svfloat64_t a, svfloat64_t b);
Link with -lsleef.
float32x4_t Sleef_nextafterf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_nextafterf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_nextafterfx_sve(svfloat32_t a, svfloat32_t b);
Link with -lsleef.
SLEEF Documentation - Additional Notes
Table of contents
About the GNUABI version of the library
How the dispatcher works
static double (*funcPtr)(double arg);
static double dispatcherMain(double arg) {
double (*p)(double arg) = funcSSE2;
#if the compiler supports SSE4.1
if (SSE4.1 is available on the CPU) p = funcSSE4;
#endif
funcPtr = p;
return (*funcPtr)(arg);
}
static double (*funcPtr)(double arg) = dispatcherMain;
double mainFunc(double arg) {
return (*funcPtr)(arg);
}
ULP, gradual underflow and flush-to-zero mode
About sincospi
About the logo
sleef-3.3.1/doc/html/apple-touch-icon.png 0000664 0000000 0000000 00000004332 13337156437 0020220 0 ustar 00root root 0000000 0000000 ‰PNG
IHDR hÿ‹ pHYs 5Ô 5Ô^eå PLTEþþþ•••¼¼¼oooÞÞÞIII¨¨¨5r! hIDATx^í™»w²JÆ7È@+j¤Œ¡UñÒb$¦ˆ˜_Ô´B.þûßÃ0ÄKâûIqÖYç)Èà·ž}™
&pQÿ Ùf5 EbàòÇðÿäÈvb¤ÂÞÌuß´-(;¢-¡†6_z°âËuáÀNt«°Ô7>ÜxRìºã§DW»9šÐΖ;×`I~±£¿" ä“r=€ÀÈ®»¦wr@“€`倣ìzC’)èÐ &•$ ˆ]ÈUË@i⨸ à Ûvtý c=Î(hv‚X€:$å ”ƒòhf à,¨#@ª Á°ˆÌÒõ¤ e'ÕÁ9¢—3`êòÈìäÈ íH©äŽÐ€ƒî‹š¥ÄsäéÊ<Õ¼¤h&jf뺡EÕéô.@j äB™CA¢HËíÏ€d· ¡™ -ó‹OQr\¢ú1ˆˆälÉA´ÇydºŽ«I=á#Ðй£±??"G'j€Ð!mOBC±ÈQG„¦¸¥ÈˆL(é…$üRN’½phÍô¢f"O—Ê/Ÿ–_Ô,Ñï šä—àt¾!2< q>7ÌT3ˆÞ”ƒ€õ¹‹cC€ÜŒ”5( À+0$@±14É® ÅeG²0$@l܈ª)¤Ž)h˜{â©¶ò’ P[<â‡÷Ôã³Jc“µVÉvšÚCI®Ûǰ¤ˆW×ý†÷®K,ãn~¸c@×m„¡»&m·?<ÕVÞÔBãw¼£º\r±\8 ÇAžj>Ñ„ºð2ßÜe§¹Äª‹b
2=šeC·Š”Œ•þñ¡OK_‘!£
C +ýZØù7
1CfE†>+2”âjÝJI
•:g3Î7Ýr{¡“T·ÏrÖso6\º&Ô†ÚΣóvht¾ÃjœÍ´ºW<5‚™ÌaåÔê !Œ.ºéZyJÓø„ú
›(…0솆.•^b § jQì2¼ƒa;Zm&_+¯³ŽÑ†¿W;K?c
ÚIËg ²A_Š>[ZO‰îæà±Ú;˽ˆšu–
s$@;)ôÕS<º{fèà4.m²zT{c –#“ƒÚ¯¤N13”&6\P€åÌ‘ÒË@R”¶sÀù0Ñ(HîsC©yôöe¾÷¥q×ûêËñÔ€æº=ŠÍñ½·šÀ®Õ_
¶ÌÐá§M&›†(„ìÐEˆCÜíÒ&qvÎc»ãÖ]/L6ªoÝõRŸê
ÆP5† ®3„+2ô1¸Ö
? õÌ+'cãçµèÊQmül¾†æÇ3´îë¿dÈJq.m¥HwZÑ/†~-½ÕöZ/À?gˆ–þw)~!VúëÝj¤H!9
ð†`¹†ÖæO·úÕP’+2²ÿQ†PUZü[
AU†d:«‰,5+Ju5†^飣
ÙV5©F©Ñý‡E–¤•päT¯*2Wr*Š¥ú¾šU™4ª‰ÌÖ?+ŠL¯¦Ö?,E²ÜÁ‰|Õkub™H‘äë>0uêÀ%õ.@œD':ty7:zAÝA¯Uò…‰³$dµ¦:A–néÀ5™ß_Ï£ª]¨¨åû¾Î•bpt=i”@ž²ÖÞîÛ¦êÍÂîÛx¶yˆü¯ÚBŽî·H¨”ëY;Bð|ú"˜X+^Í”{J¤zs¹œª×T÷§þ1©.-"˜k8\P€ðCÄ@Š×.ƒdÝ8Ú˜8XÂCB,Ôp¾>
Ъ%.‹€Š\™%òÔ7˜HÌ(^‚£ét*Q}‡+OëÁp9R=õiÖí?lkÚˆä(
Æ„¡ àÒ2{&ÜßÖš¾ìï?òó‹ñÒ°î>3LH/ƒÄÚ€k4€iG F’ÿ¼
4·§EdSrS ”:ôغ„ÄoËÿ²í4ýÛ!"·üi94ö“t÷— MoùßAŽõ
„‚¶÷;Hâ»L8úúê·Jôû_|ߟ~9$Øã)(À§ ©ÿìœÕdáE÷€€x6Ð{
‰)wmÛ¸ µží¤¢ŸühE@êç۪ߜ6Í•=
ÖÍn³¡~´a<Ã4%¹þrhÏAØ€¾ ÑTŸÒiD¿»ªx
ƒEmŸýØ"7[å>:õšù¾QÐÎf Hú0 g<àŽèÍ¥$¥' ”hÀ@k
öÄ Á¾H6Û…9È •u°Æ@sxU3Ð;PÄÅž¥#ͰZ›´;æ}4ôw«4|\MVƒÎVÝ<=üyßÖ6Ázn1|ˆ-’Å–ˆæ&qR|Wx´À€}B.”°ØŽ˜#dÀUz¬—ÅbKI@B¾åžÝˆåkO©r¬ÿV^+a‰Uÿo%'|‹ Kd à¿!«å³>ä=pšk™?æ~|§y%µÎ7«ˆì(×ÍîʃB
–ëgcósK–ˆÌ„oê`$n½øNÃb3RÛ9‰L€>•í}´šî°ì?‘wùòº½Eþô86çðÈÈØ¢‘Ö“FªàI`J½šàyØWêK(„Xlº‘çÈ?YÇf ˆÎ§ÀÔzMP=6ÊŸ¤[”ìô™ÖÁ]ÚPP›`6²%ö$)™§ (1Ð;¨uÒ
eKänáèÌww‚ž,GÔÚÖS÷<‡j>@IÈÑK–Nÿ[E*ðõ8]>ß?Êó½¶Y>#ÿIömßiÝì®Û¾‚ çºp“Ã6oÂBûp°»þúB©2*wÒÃÇ IEND®B`‚ sleef-3.3.1/doc/html/benchmark.xhtml 0000664 0000000 0000000 00000012140 13337156437 0017347 0 ustar 00root root 0000000 0000000
SLEEF Documentation - Benchmark Results
Table of contents
Benchmark results
Fig. 6.1: Execution time of double precision trigonometric functions
Fig. 6.2: Execution time of single precision trigonometric functions
Fig. 6.3: Execution time of double precision log, exp, pow and inverse trigonometric functions
Fig. 6.4: Execution time of single precision log, exp, pow and inverse trigonometric functions
SLEEF Documentation - Compiling and installing the library
Table of contents
About CMake
Quick start
$ cmake --version
$ git clone https://github.com/shibatch/sleef
$ cd sleef
$ mkdir build && cd build
$ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_INSTALL_PREFIX=../my-sleef-install \
..
$ make
$ make install
$ make test
Build customization
CMake Variables
Compiling and installing library on Linux
$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX=/usr ..
$ make
$ make test
$ sudo make install
In order to uninstall the libraries and headers, run the following command.
$ sudo xargs rm -v < install_manifest.txt
You need Visual Studio 2017. Open developer command prompt for VS2017, change directory to sleef-3.X, and then run the following commands.
D:\sleef-3.X> mkdir build D:\sleef-3.X> cd build D:\sleef-3.X\build> cmake -G"Visual Studio 15 2017 Win64" .. D:\sleef-3.X\build> cmake --build . --config Release -- /maxcpucount:1Note that parallel build is not supported on MSVC.
Now, let's try compiling the source code shown in Fig. 2.1.
#include <stdio.h>
#include <x86intrin.h>
#include <sleef.h>
int main(int argc, char **argv) {
double a[] = {2, 10};
double b[] = {3, 20};
__m128d va, vb, vc;
va = _mm_loadu_pd(a);
vb = _mm_loadu_pd(b);
vc = Sleef_powd2_u10(va, vb);
double c[2];
_mm_storeu_pd(c, vc);
printf("pow(%g, %g) = %g\n", a[0], b[0], c[0]);
printf("pow(%g, %g) = %g\n", a[1], b[1], c[1]);
}
Fig. 2.1: Source code for testing
Fig.2.2 shows typical commands for compiling and executing the hello code on Linux computers.
$ gcc hellox86.c -o hellox86 -lsleef $ ./hellox86 pow(2, 3) = 8 pow(10, 20) = 1e+20 $ █
Fig. 2.2: Commands for compiling and executing hellox86.c
You may need to set LD_LIBRARY_PATH environment variable appropriately. If you are trying to execute the program on Mac OSX or Windows, try copying the DLLs to the current directory.
Below is an example CMakeLists.txt for compiling the above hellox86.c. CMake will automatically download SLEEF from GitHub repository, and thus there is no need to include SLEEF in your software package. If you prefer importing SLEEF as a submodule in git, you can use SOURCE_DIR option instead of GIT_REPOSITORY option for ExternalProject_Add.
cmake_minimum_required(VERSION 3.4.3)
include(ExternalProject)
find_package(Git REQUIRED)
ExternalProject_Add(libsleef
GIT_REPOSITORY https://github.com/shibatch/sleef
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib
)
include_directories(${CMAKE_BINARY_DIR}/contrib/include)
link_directories(${CMAKE_BINARY_DIR}/contrib/lib)
add_executable(hellox86 hellox86.c)
add_dependencies(hellox86 libsleef)
target_link_libraries(hellox86 sleef)
Fig. 2.3: Example CMakeLists.txt
sleef-3.3.1/doc/html/convention.png 0000664 0000000 0000000 00000064204 13337156437 0017237 0 ustar 00root root 0000000 0000000 ‰PNG IHDR D ð õÂÛH 0PLTE±QtÔŸÏÖØÆíØxxxŸŸŸþþþOOO’ÿzzÿ»µµÿLLqÁr°Ü>J`1 hIDATx^ìœ1oÛFÇHÓzÈrŒÌ”hM,D ‘Ü–zóZ CÖ˜wöµ@`eëèà~œlý àí4¸€6²û/RÔU'>-KÿÉ2!êîô~÷Þ½÷(¦·[²Ÿj{º¾ÆÇ!ç¡=ÃÞEý¥·]ýáL¯©Ÿu•¶Ñ‰1F;(äü®+5⹌bØ»¨+v£·[‰1wz=ýÁnž!¢×Žˆr [Çä˵ñ]Eô+{)ôVˬ¨