pax_global_header00006660000000000000000000000064137300314410014506gustar00rootroot0000000000000052 comment=cc4b0213f2f57a2f7e8f6355758dc40973ae9998 sleef-3.5.1/000077500000000000000000000000001373003144100126125ustar00rootroot00000000000000sleef-3.5.1/.travis.yml000066400000000000000000000051471373003144100147320ustar00rootroot00000000000000language: c # Default linux jobs os: linux sudo: required dist: bionic # Include osx jobs matrix: include: - os: osx arch: amd64 compiler: clang # use default apple clang env: - LABEL="osx-clang" - os: linux services: docker env: - LABEL="armhf-gcc" - ENABLE_DOCKER="true" - os: linux arch: amd64 addons: apt: packages: - gcc-7 env: - LABEL="x86_64-gcc" - os: linux arch: amd64 addons: apt: packages: - clang-7 env: - LABEL="x86_64-clang" - os: linux arch: arm64 addons: apt: sources: - ubuntu-toolchain-r-test packages: - gcc-10 env: - LABEL="arm64-gcc-sve" - os: linux arch: arm64 addons: apt: packages: - clang-8 env: - LABEL="arm64-clang" - arch: ppc64le dist: bionic env: - LABEL="ppc64le-gcc" - OMP_WAIT_POLICY=passive CTEST_OUTPUT_ON_FAILURE=TRUE - arch: ppc64le compiler: clang dist: bionic env: - LABEL="ppc64le-clang" - OMP_WAIT_POLICY=passive CTEST_OUTPUT_ON_FAILURE=TRUE - arch: s390x dist: bionic env: - LABEL="s390x-gcc" - OMP_WAIT_POLICY=passive CTEST_OUTPUT_ON_FAILURE=TRUE - os: linux arch: s390x compiler: clang-8 addons: apt: packages: - clang-8 env: - LABEL="s390x-clang" - OMP_WAIT_POLICY=passive CTEST_OUTPUT_ON_FAILURE=TRUE before_install: - export PATH=$PATH:/usr/bin - if [[ -x /bin/true ]]; then LD_SHOW_AUXV=1 /bin/true; fi - sed '/^ *$/q' /proc/cpuinfo || true - cd ${TRAVIS_BUILD_DIR} - chmod +x ${TRAVIS_BUILD_DIR}/travis/*.sh - if [[ "${ENABLE_DOCKER}" == "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/setupdocker.sh; fi - if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec bionic /build/travis/before_install.${LABEL}.sh; fi - if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/before_install.${LABEL}.sh; fi before_script: - if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec bionic /build/travis/before_script.${LABEL}.sh; fi - if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/before_script.${LABEL}.sh; fi script: - if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec bionic /build/travis/script.${LABEL}.sh; fi - if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/script.${LABEL}.sh; fi sleef-3.5.1/CHANGELOG.md000066400000000000000000000163131373003144100144270ustar00rootroot00000000000000## 3.5 - 2020-09-01 - IBM System/390 support is added. - The library can be built with Clang on Windows. - Static libraries with LTO can be generated. - Alternative division and sqrt methods can be chosen with AArch64. - Header files for inlining the whole SLEEF functions can be generated. - IEEE remainder function is added. - GCC-10 can now build SLEEF with SVE support. ## 3.4.1 - 2019-10-01 ### Changed - Fixed accuracy problem with tan_u35, atan_u10, log2f_u35 and exp10f_u10. https://github.com/shibatch/sleef/pull/260 https://github.com/shibatch/sleef/pull/265 https://github.com/shibatch/sleef/pull/267 - SVE intrinsics that are not supported in newer ACLE are replaced. https://github.com/shibatch/sleef/pull/268 - FMA4 detection problem is fixed. https://github.com/shibatch/sleef/pull/262 - Compilation problem under Windows with MinGW is fixed. https://github.com/shibatch/sleef/pull/266 ## 3.4 - 2019-04-28 ### Added - Faster and low precision functions are added. https://github.com/shibatch/sleef/pull/229 - Functions that return consistent results across platforms are added https://github.com/shibatch/sleef/pull/216 https://github.com/shibatch/sleef/pull/224 - Quad precision math library(libsleefquad) is added https://github.com/shibatch/sleef/pull/235 https://github.com/shibatch/sleef/pull/237 https://github.com/shibatch/sleef/pull/240 - AArch64 Vector Procedure Call Standard (AAVPCS) support. ### Changed - Many functions are now faster - Testers are now faster ## 3.3.1 - 2018-08-20 ### Added - FreeBSD support is added ### Changed - i386 build problem is fixed - Trigonometric functions now evaluate correctly with full FP domain. https://github.com/shibatch/sleef/pull/210 ## 3.3 - 2018-07-06 ### Added - SVE target support is added to libsleef. https://github.com/shibatch/sleef/pull/180 - SVE target support is added to DFT. With this patch, DFT operations can be carried out using 256, 512, 1024 and 2048-bit wide vectors according to runtime availability of vector registers and operators. https://github.com/shibatch/sleef/pull/182 - 3.5-ULP versions of sinh, cosh, tanh, sinhf, coshf, tanhf, and the corresponding testing functionalities are added. https://github.com/shibatch/sleef/pull/192 - Power VSX target support is added to libsleef. https://github.com/shibatch/sleef/pull/195 - Payne-Hanek like argument reduction is added to libsleef. https://github.com/shibatch/sleef/pull/197 ## 3.2 - 2018-02-26 ### Added - The whole build system of the project migrated from makefiles to cmake. In particualr this includes `libsleef`, `libsleefgnuabi`, `libdft` and all the tests. - Benchmarks that compare `libsleef` vs `SVML` on X86 Linux are available in the project tree under src/libm-benchmarks directory. - Extensive upstream testing via Travis CI and Appveyor, on the following systems: * OS: Windows / Linux / OSX. * Compilers: gcc / clang / MSVC. * Targets: X86 (SSE/AVX/AVX2/AVX512F), AArch64 (Advanced SIMD), ARM (NEON). Emulators like QEMU or SDE can be used to run the tests. - Added the following new vector functions (with relative testing): * `log2` - New compatibility tests have been added to check that `libsleefgnuabi` exports the GNUABI symbols correctly. - The library can be compiled to an LLVM bitcode object. - Added masked interface to the library to support AVX512F masked vectorization. ### Changed - Use native instructions if available for `sqrt`. - Fixed fmax and fmin behavior on AArch64: https://github.com/shibatch/sleef/pull/140 - Speed improvements for `asin`, `acos`, `fmod` and `log`. Computation speed of other functions are also improved by general optimization. https://github.com/shibatch/sleef/pull/97 - Removed `libm` dependency. ### Removed - Makefile build system ## 3.1 - 2017-07-19 - Added AArch64 support - Implemented the remaining C99 math functions : lgamma, tgamma, erf, erfc, fabs, copysign, fmax, fmin, fdim, trunc, floor, ceil, round, rint, modf, ldexp, nextafter, frexp, hypot, and fmod. - Added dispatcher for x86 functions - Improved reduction of trigonometric functions - Added support for 32-bit x86, Cygwin, etc. - Improved tester ## 3.0 - 2017-02-07 - New API is defined - Functions for DFT are added - sincospi functions are added - gencoef now supports single, extended and quad precision in addition to double precision - Linux, Windows and Mac OS X are supported - GCC, Clang, Intel Compiler, Microsoft Visual C++ are supported - The library can be compiled as DLLs - Files needed for creating a debian package are now included ## 2.120 - 2017-01-30 - Relicensed to Boost Software License Version 1.0 ## 2.110 - 2016-12-11 - The valid range of argument is extended for trig functions - Specification of each functions regarding to the domain and accuracy is added - A coefficient generation tool is added - New testing tools are introduced - Following functions returned incorrect values when the argument is very large or small : exp, pow, asinh, acosh - SIMD xsin and xcos returned values more than 1 when FMA is enabled - Pure C cbrt returned incorrect values when the argument is negative - tan_u1 returned values with more than 1 ulp of error on rare occasions - Removed support for Java language(because no one seems using this) ## 2.100 - 2016-12-04 - Added support for AVX-512F and Clang Extended Vectors. ## 2.90 - 2016-11-27 - Added ilogbf. All the reported bugs(listed below) are fixed. - Log function returned incorrect values when the argument is very small. - Signs of returned values were incorrect when the argument is signed zero. - Tester incorrectly counted ULP in some cases. - ilogb function returned incorrect values in some cases. ## 2.80 - 2013-05-18 - Added support for ARM NEON. Added higher accuracy single precision functions : sinf_u1, cosf_u1, sincosf_u1, tanf_u1, asinf_u1, acosf_u1, atanf_u1, atan2f_u1, logf_u1, and cbrtf_u1. ## 2.70 - 2013-04-30 - Added higher accuracy functions : sin_u1, cos_u1, sincos_u1, tan_u1, asin_u1, acos_u1, atan_u1, atan2_u1, log_u1, and cbrt_u1. These functions evaluate the corresponding function with at most 1 ulp of error. ## 2.60 - 2013-03-26 - Added the remaining single precision functions : powf, sinhf, coshf, tanhf, exp2f, exp10f, log10f, log1pf. Added support for FMA4 (for AMD Bulldozer). Added more test cases. Fixed minor bugs (which degraded accuracy in some rare cases). ## 2.50 - 2013-03-12 - Added support for AVX2. SLEEF now compiles with ICC. ## 2.40 - 2013-03-07 - Fixed incorrect denormal/nonnumber handling in ldexp, ldexpf, sinf and cosf. Removed support for Go language. ## 2.31 - 2012-07-05 - Added sincosf. ## 2.30 - 2012-01-20 - Added single precision functions : sinf, cosf, tanf, asinf, acosf, atanf, logf, expf, atan2f and cbrtf. ## 2.20 - 2012-01-09 - Added exp2, exp10, expm1, log10, log1p, and cbrt. ## 2.10 - 2012-01-05 - asin() and acos() are back. - Added ilogb() and ldexp(). - Added hyperbolic functions. - Eliminated dependency on frexp, ldexp, fabs, isnan and isinf. ## 2.00 - 2011-12-30 - All of the algorithm has been updated. - Both accuracy and speed are improved since version 1.10. - Denormal number handling is also improved. ## 1.10 - 2010-06-22 - AVX support is added. Accuracy tester is added. ## 1.00 - 2010-05-15 - Initial release sleef-3.5.1/CMakeLists.txt000066400000000000000000000201401373003144100153470ustar00rootroot00000000000000# Options option(BUILD_SHARED_LIBS "Build shared libs" ON) option(BUILD_STATIC_TEST_BINS "Build statically linked test executables" OFF) option(ENABLE_LTO "Enable LTO on GCC or ThinLTO on clang" OFF) option(BUILD_LIBM "libsleef will be built." ON) option(BUILD_DFT "libsleefdft will be built." ON) option(BUILD_QUAD "libsleefquad will be built." OFF) option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON) option(BUILD_TESTS "Tests will be built." ON) option(BUILD_INLINE_HEADERS "Build header for inlining whole SLEEF functions" OFF) option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF) option(SLEEF_SHOW_CONFIG "Show SLEEF configuration status messages." ON) option(SLEEF_SHOW_ERROR_LOG "Show cmake error log." OFF) option(ENFORCE_TESTER "Build fails if tester is not available" OFF) option(ENFORCE_TESTER3 "Build fails if tester3 is not built" OFF) option(ENABLE_ALTDIV "Enable alternative division method (aarch64 only)" OFF) option(ENABLE_ALTSQRT "Enable alternative sqrt method (aarch64 only)" OFF) option(DISABLE_FFTW "Disable testing the DFT library with FFTW" OFF) cmake_minimum_required(VERSION 3.4.3) # Set to NEW when updating cmake_minimum_required to VERSION >= 3.7.2 if(${CMAKE_VERSION} VERSION_GREATER "3.7.1") cmake_policy(SET CMP0066 OLD) endif() if(${CMAKE_VERSION} VERSION_GREATER "3.14.99") cmake_policy(SET CMP0091 NEW) endif() enable_testing() set(SLEEF_VERSION_MAJOR 3) set(SLEEF_VERSION_MINOR 5) set(SLEEF_VERSION_PATCHLEVEL 1) set(SLEEF_VERSION ${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}) set(SLEEF_SOVERSION ${SLEEF_VERSION_MAJOR}) project(SLEEF VERSION ${SLEEF_VERSION} LANGUAGES C) # For specifying installation directories include(GNUInstallDirs) if(NOT DEFINED sleef_SOURCE_DIR) set(sleef_SOURCE_DIR ${CMAKE_SOURCE_DIR}) endif() if(NOT DEFINED sleef_BINARY_DIR) set(sleef_BINARY_DIR ${CMAKE_BINARY_DIR}) endif() # Sanity check for in-source builds which we do not want to happen if(sleef_SOURCE_DIR STREQUAL sleef_BINARY_DIR) message(FATAL_ERROR "SLEEF does not allow in-source builds. You can refer to doc/build-with-cmake.md for instructions on how provide a \ separate build directory. Note: Please remove autogenerated file \ `CMakeCache.txt` and directory `CMakeFiles` in the current directory.") endif() if(ENABLE_LTO AND BUILD_SHARED_LIBS) message(FATAL_ERROR "ENABLE_LTO and BUILD_SHARED_LIBS cannot be specified at the same time") endif(ENABLE_LTO AND BUILD_SHARED_LIBS) if(ENABLE_LTO) cmake_policy(SET CMP0069 NEW) include(CheckIPOSupported) check_ipo_supported(RESULT supported OUTPUT error) endif() # Set output directories for the library files set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES}) string(TOUPPER ${CONFIG} CONFIG) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/bin) endforeach(CONFIG CMAKE_CONFIGURATION_TYPES) # Path for finding cmake modules set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules) set(SLEEF_SCRIPT_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Scripts CACHE PATH "Path for finding sleef specific cmake scripts") if (CMAKE_C_COMPILER_ID MATCHES "Clang" AND "x${CMAKE_C_SIMULATE_ID}" STREQUAL "xMSVC") message(STATUS "Building with Clang on Windows") set(SLEEF_CLANG_ON_WINDOWS TRUE) endif() # sleef-config.h.in passes cmake settings to the source code include(Configure.cmake) configure_file( ${PROJECT_SOURCE_DIR}/sleef-config.h.in ${PROJECT_BINARY_DIR}/include/sleef-config.h @ONLY) # We like to have a documented index of all targets in the project. The # variables listed below carry the names of the targets defined throughout # the project. # Generates object file (shared library) `libsleef` # Defined in src/libm/CMakeLists.txt via command add_library set(TARGET_LIBSLEEF "sleef") set(TARGET_LIBSLEEFGNUABI "sleefgnuabi") # Generates the sleef.h headers and all the rename headers # Defined in src/libm/CMakeLists.txt via custom commands and a custom target set(TARGET_HEADERS "headers") set(TARGET_INLINE_HEADERS "inline_headers") set(TARGET_LIBINLINE "sleefinline") # Generates executable files for running the test suite # Defined in src/libm-tester/CMakeLists.txt via command add_executable set(TARGET_TESTER "tester") set(TARGET_IUT "iut") # The target to generate LLVM bitcode only, available when SLEEF_ENABLE_LLVM_BITCODE is passed to cmake set(TARGET_LLVM_BITCODE "llvm-bitcode") # Generates the helper executable file mkrename needed to write the sleef header set(TARGET_MKRENAME "mkrename") set(TARGET_MKRENAME_GNUABI "mkrename_gnuabi") set(TARGET_MKMASKED_GNUABI "mkmasked_gnuabi") # Generates the helper executable file mkdisp needed to write the sleef header set(TARGET_MKDISP "mkdisp") set(TARGET_MKALIAS "mkalias") # Generates static library common # Defined in src/common/CMakeLists.txt via command add_library set(TARGET_LIBCOMMON_OBJ "common") set(TARGET_LIBARRAYMAP_OBJ "arraymap") # Function used to add an executable that is executed on host function(add_host_executable TARGETNAME) if (NOT CMAKE_CROSSCOMPILING) add_executable(${TARGETNAME} ${ARGN}) else() add_executable(${TARGETNAME} IMPORTED) set_property(TARGET ${TARGETNAME} PROPERTY IMPORTED_LOCATION ${NATIVE_BUILD_DIR}/bin/${TARGETNAME}) endif() endfunction() function(host_target_AAVPCS_definitions TARGETNAME) if (NOT CMAKE_CROSSCOMPILING) target_compile_definitions(${TARGETNAME} PRIVATE ENABLE_AAVPCS=1) endif() endfunction() # Generates object file (shared library) `libsleefdft` # Defined in src/dft/CMakeLists.txt via command add_library set(TARGET_LIBDFT "sleefdft") # Check subdirectories add_subdirectory("src") # Extra messages at configuration time. By default is active, it can be # turned off by invoking cmake with "-DSLEEF_SHOW_CONFIG=OFF". if(SLEEF_SHOW_CONFIG) message(STATUS "Configuring build for ${PROJECT_NAME}-v${SLEEF_VERSION}") message(" Target system: ${CMAKE_SYSTEM}") message(" Target processor: ${CMAKE_SYSTEM_PROCESSOR}") message(" Host system: ${CMAKE_HOST_SYSTEM}") message(" Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}") message(" Detected C compiler: ${CMAKE_C_COMPILER_ID} @ ${CMAKE_C_COMPILER}") message(" CMake: ${CMAKE_VERSION}") message(" Make program: ${CMAKE_MAKE_PROGRAM}") if(CMAKE_CROSSCOMPILING) message(" Crosscompiling SLEEF.") message(" Native build dir: ${NATIVE_BUILD_DIR}") endif(CMAKE_CROSSCOMPILING) message(STATUS "Using option `${SLEEF_C_FLAGS}` to compile libsleef") message(STATUS "Building shared libs : " ${BUILD_SHARED_LIBS}) message(STATUS "Building static test bins: " ${BUILD_STATIC_TEST_BINS}) message(STATUS "MPFR : " ${LIB_MPFR}) if (MPFR_INCLUDE_DIR) message(STATUS "MPFR header file in " ${MPFR_INCLUDE_DIR}) endif() message(STATUS "GMP : " ${LIBGMP}) message(STATUS "RT : " ${LIBRT}) message(STATUS "FFTW3 : " ${LIBFFTW3}) message(STATUS "OPENSSL : " ${OPENSSL_VERSION}) message(STATUS "SDE : " ${SDE_COMMAND}) if (BUILD_INLINE_HEADERS) message(STATUS "SED : " ${SED_COMMAND}) endif() message(STATUS "RUNNING_ON_TRAVIS : " ${RUNNING_ON_TRAVIS}) message(STATUS "COMPILER_SUPPORTS_OPENMP : " ${COMPILER_SUPPORTS_OPENMP}) if(ENABLE_GNUABI) message(STATUS "A version of SLEEF compatible with libm and libmvec in GNU libc will be produced (${TARGET_LIBSLEEFGNUABI}.so)") endif() if (COMPILER_SUPPORTS_SVE) message(STATUS "Building SLEEF with VLA SVE support") if (ARMIE_COMMAND) message(STATUS "Arm Instruction Emulator found at ${ARMIE_COMMAND}") message(STATUS "SVE testing is done with ${SVE_VECTOR_BITS}-bits vectors.") endif() endif() if(FORCE_AAVPCS) message(STATUS "Building SLEEF with AArch64 Vector PCS support") endif() endif(SLEEF_SHOW_CONFIG) if (NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") message("") message("*** Note: Parallel build is only supported with Ninja ***") message("") endif() sleef-3.5.1/CMakeLists.txt.nested000066400000000000000000000012561373003144100166370ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.4.3) set(sleef_SOURCE_DIR ${CMAKE_SOURCE_DIR}/sleef) set(sleef_BINARY_DIR ${CMAKE_BINARY_DIR}/sleef) add_subdirectory("sleef") include_directories(${sleef_BINARY_DIR}/include) include_directories(${sleef_SOURCE_DIR}/include) link_directories(${sleef_BINARY_DIR}/lib) add_executable(hellox86 hellox86.c) set_target_properties(hellox86 PROPERTIES C_STANDARD 99) add_dependencies(hellox86 sleef) target_link_libraries(hellox86 sleef) # add_executable(dfttutorial tutorial.c) set_target_properties(dfttutorial PROPERTIES C_STANDARD 99) add_dependencies(dfttutorial sleef) find_library(LIBM m) target_link_libraries(dfttutorial sleef sleefdft ${LIBM}) sleef-3.5.1/CONTRIBUTORS.md000066400000000000000000000013431373003144100150720ustar00rootroot00000000000000# List of contributors | Name | Affiliation | Github profile | | -------------------- | ----------------------- | ---------------------------------- | | Naoki Shibata | Nara Institute of Science and Technology | https://github.com/shibatch | | Jilayne Lovejoy | Arm Inc. | https://github.com/jlovejoy | | Francesco Petrogalli | Arm Ltd. | https://github.com/fpetrogalli-arm | | Diana Bite | Arm Ltd. | https://github.com/diaena | | Alexandre Mutel | Unity Technologies | https://github.com/xoofx | | Martin Krastev | Chaos Group | https://github.com/blu | sleef-3.5.1/Configure.cmake000066400000000000000000000775351373003144100155560ustar00rootroot00000000000000include(CheckCCompilerFlag) include(CheckCSourceCompiles) include(CheckTypeSize) if (BUILD_STATIC_TEST_BINS) set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") set(BUILD_SHARED_LIBS OFF) set(CMAKE_EXE_LINKER_FLAGS "-static") endif() if (NOT CMAKE_CROSSCOMPILING AND NOT SLEEF_FORCE_FIND_PACKAGE_SSL) find_package(OpenSSL) if (OPENSSL_FOUND) set(SLEEF_OPENSSL_FOUND TRUE) set(SLEEF_OPENSSL_LIBRARIES ${OPENSSL_LIBRARIES}) set(SLEEF_OPENSSL_VERSION ${OPENSSL_VERSION}) set(SLEEF_OPENSSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR}) endif() else() # find_package cannot find OpenSSL when cross-compiling find_library(LIBSSL ssl) find_library(LIBCRYPTO crypto) if (LIBSSL AND LIBCRYPTO) set(SLEEF_OPENSSL_FOUND TRUE) set(SLEEF_OPENSSL_LIBRARIES ${LIBSSL} ${LIBCRYPTO}) set(SLEEF_OPENSSL_VERSION ${LIBSSL}) endif() endif() if (ENFORCE_TESTER3 AND NOT SLEEF_OPENSSL_FOUND) message(FATAL_ERROR "ENFORCE_TESTER3 is specified and OpenSSL not found") endif() if (NOT (RUNNING_ON_APPVEYOR AND SLEEF_CLANG_ON_WINDOWS)) # We rely on Cygwin tools in order to test the builds on # appveyor. However, if we try to link these libraries, cmake finds # the Cygwin version of libraries, which causes errors. # Some toolchains require explicit linking of the libraries following. find_library(LIB_MPFR mpfr) find_library(LIBM m) find_library(LIBGMP gmp) find_library(LIBRT rt) find_library(LIBFFTW3 fftw3) if (LIB_MPFR) find_path(MPFR_INCLUDE_DIR NAMES mpfr.h ONLY_CMAKE_FIND_ROOT_PATH) endif(LIB_MPFR) if (LIBFFTW3) find_path(FFTW3_INCLUDE_DIR NAMES fftw3.h ONLY_CMAKE_FIND_ROOT_PATH) endif(LIBFFTW3) if (NOT LIBM) set(LIBM "") endif() if (NOT LIBRT) set(LIBRT "") endif() endif(NOT (RUNNING_ON_APPVEYOR AND SLEEF_CLANG_ON_WINDOWS)) # The library currently supports the following SIMD architectures set(SLEEF_SUPPORTED_EXTENSIONS AVX512F AVX512FNOFMA AVX2 AVX2128 FMA4 AVX SSE4 SSE2 # x86 ADVSIMD ADVSIMDNOFMA SVE SVENOFMA # Aarch64 NEON32 NEON32VFPV4 # Aarch32 VSX VSXNOFMA # PPC64 ZVECTOR2 ZVECTOR2NOFMA # IBM Z PUREC_SCALAR PURECFMA_SCALAR # Generic type CACHE STRING "List of SIMD architectures supported by libsleef." ) set(SLEEF_SUPPORTED_GNUABI_EXTENSIONS SSE2 AVX AVX2 AVX512F ADVSIMD SVE CACHE STRING "List of SIMD architectures supported by libsleef for GNU ABI." ) set(SLEEFQUAD_SUPPORTED_EXT PUREC_SCALAR PURECFMA_SCALAR SSE2 AVX FMA4 AVX2 AVX512F ADVSIMD SVE) # Force set default build type if none was specified # Note: some sleef code requires the optimisation flags turned on if(NOT CMAKE_BUILD_TYPE) message(STATUS "Setting build type to 'Release' (required for full support).") set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo" "MinSizeRel") endif() # Function used to generate safe command arguments for add_custom_command function(command_arguments PROPNAME) set(quoted_args "") foreach(arg ${ARGN}) list(APPEND quoted_args "\"${arg}\"" ) endforeach() set(${PROPNAME} ${quoted_args} PARENT_SCOPE) endfunction() # PLATFORM DETECTION if((CMAKE_SYSTEM_PROCESSOR MATCHES "x86") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$")) set(SLEEF_ARCH_X86 ON CACHE INTERNAL "True for x86 architecture.") set(SLEEF_HEADER_LIST SSE_ SSE2 SSE4 AVX_ AVX FMA4 AVX2 AVX2128 AVX512F_ AVX512F AVX512FNOFMA PUREC_SCALAR PURECFMA_SCALAR ) command_arguments(HEADER_PARAMS_SSE_ cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__) command_arguments(HEADER_PARAMS_SSE2 cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2) command_arguments(HEADER_PARAMS_SSE4 cinz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4) command_arguments(HEADER_PARAMS_AVX_ cinz_ 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__) command_arguments(HEADER_PARAMS_AVX cinz_ 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__ avx) command_arguments(HEADER_PARAMS_FMA4 finz_ 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__ fma4) command_arguments(HEADER_PARAMS_AVX2 finz_ 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2) command_arguments(HEADER_PARAMS_AVX2128 finz_ 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128) command_arguments(HEADER_PARAMS_AVX512F_ finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__) command_arguments(HEADER_PARAMS_AVX512F finz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512f) command_arguments(HEADER_PARAMS_AVX512FNOFMA cinz_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512fnofma) command_arguments(ALIAS_PARAMS_AVX512F_DP 8 __m512d __m256i e avx512f) command_arguments(ALIAS_PARAMS_AVX512F_SP -16 __m512 __m512i e avx512f) set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mavx2;-mfma") set(TESTER3_DEFINITIONS_SSE2 ATR=cinz_ DPTYPE=__m128d SPTYPE=__m128 DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=sse2) set(TESTER3_DEFINITIONS_SSE4 ATR=cinz_ DPTYPE=__m128d SPTYPE=__m128 DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=sse4) set(TESTER3_DEFINITIONS_AVX2128 ATR=finz_ DPTYPE=__m128d SPTYPE=__m128 DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=avx2128) set(TESTER3_DEFINITIONS_AVX ATR=cinz_ DPTYPE=__m256d SPTYPE=__m256 DPTYPESPEC=d4 SPTYPESPEC=f8 EXTSPEC=avx) set(TESTER3_DEFINITIONS_FMA4 ATR=finz_ DPTYPE=__m256d SPTYPE=__m256 DPTYPESPEC=d4 SPTYPESPEC=f8 EXTSPEC=fma4) set(TESTER3_DEFINITIONS_AVX2 ATR=finz_ DPTYPE=__m256d SPTYPE=__m256 DPTYPESPEC=d4 SPTYPESPEC=f8 EXTSPEC=avx2) set(TESTER3_DEFINITIONS_AVX512F ATR=finz_ DPTYPE=__m512d SPTYPE=__m512 DPTYPESPEC=d8 SPTYPESPEC=f16 EXTSPEC=avx512f) set(TESTER3_DEFINITIONS_AVX512FNOFMA ATR=cinz_ DPTYPE=__m512d SPTYPE=__m512 DPTYPESPEC=d8 SPTYPESPEC=f16 EXTSPEC=avx512fnofma) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") set(SLEEF_ARCH_AARCH64 ON CACHE INTERNAL "True for Aarch64 architecture.") # Aarch64 requires support for advsimdfma4 set(COMPILER_SUPPORTS_ADVSIMD 1) set(COMPILER_SUPPORTS_ADVSIMDNOFMA 1) set(SLEEF_HEADER_LIST ADVSIMD_ ADVSIMD ADVSIMDNOFMA SVE SVENOFMA PUREC_SCALAR PURECFMA_SCALAR ) command_arguments(HEADER_PARAMS_ADVSIMD_ finz_ 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON) command_arguments(HEADER_PARAMS_ADVSIMD finz_ 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON advsimd) command_arguments(HEADER_PARAMS_ADVSIMDNOFMA cinz_ 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON advsimdnofma) command_arguments(HEADER_PARAMS_SVE finz_ x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_FEATURE_SVE sve) command_arguments(HEADER_PARAMS_SVENOFMA cinz_ x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_FEATURE_SVE svenofma) command_arguments(ALIAS_PARAMS_ADVSIMD_DP 2 float64x2_t int32x2_t n advsimd) command_arguments(ALIAS_PARAMS_ADVSIMD_SP -4 float32x4_t int32x4_t n advsimd) set(TESTER3_DEFINITIONS_ADVSIMD ATR=finz_ DPTYPE=float64x2_t SPTYPE=float32x4_t DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=advsimd) set(TESTER3_DEFINITIONS_ADVSIMDNOFMA ATR=cinz_ DPTYPE=float64x2_t SPTYPE=float32x4_t DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=advsimdnofma) set(TESTER3_DEFINITIONS_SVE ATR=finz_ DPTYPE=svfloat64_t SPTYPE=svfloat32_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=sve) set(TESTER3_DEFINITIONS_SVENOFMA ATR=cinz_ DPTYPE=svfloat64_t SPTYPE=svfloat32_t DPTYPESPEC=dx SPTYPESPEC=fx EXTSPEC=svenofma) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") set(SLEEF_ARCH_AARCH32 ON CACHE INTERNAL "True for Aarch32 architecture.") set(COMPILER_SUPPORTS_NEON32 1) set(COMPILER_SUPPORTS_NEON32VFPV4 1) set(SLEEF_HEADER_LIST NEON32_ NEON32 NEON32VFPV4 PUREC_SCALAR PURECFMA_SCALAR ) command_arguments(HEADER_PARAMS_NEON32_ cinz_ 2 4 - float32x4_t int32x2_t int32x4_t __ARM_NEON__) command_arguments(HEADER_PARAMS_NEON32 cinz_ 2 4 - float32x4_t int32x2_t int32x4_t __ARM_NEON__ neon) command_arguments(HEADER_PARAMS_NEON32VFPV4 finz_ 2 4 - float32x4_t int32x2_t int32x4_t __ARM_NEON__ neonvfpv4) command_arguments(ALIAS_PARAMS_NEON32_SP -4 float32x4_t int32x4_t - neon) command_arguments(ALIAS_PARAMS_NEON32_DP 0) set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mfpu=vfpv4") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") set(SLEEF_ARCH_PPC64 ON CACHE INTERNAL "True for PPC64 architecture.") set(SLEEF_HEADER_LIST VSX_ VSX VSXNOFMA PUREC_SCALAR PURECFMA_SCALAR ) set(HEADER_PARAMS_VSX finz_ 2 4 "__vector double" "__vector float" "__vector int" "__vector int" __VSX__ vsx) set(HEADER_PARAMS_VSX_ finz_ 2 4 "__vector double" "__vector float" "__vector int" "__vector int" __VSX__ vsx) set(HEADER_PARAMS_VSXNOFMA cinz_ 2 4 "__vector double" "__vector float" "__vector int" "__vector int" __VSX__ vsxnofma) set(ALIAS_PARAMS_VSX_DP 2 "__vector double" "__vector int" - vsx) set(ALIAS_PARAMS_VSX_SP -4 "__vector float" "__vector int" - vsx) set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-mvsx") set(TESTER3_DEFINITIONS_VSX ATR=finz_ DPTYPE=__vector_double SPTYPE=__vector_float DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vsx) set(TESTER3_DEFINITIONS_VSXNOFMA ATR=cinz_ DPTYPE=__vector_double SPTYPE=__vector_float DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=vsxnofma) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(SLEEF_ARCH_S390X ON CACHE INTERNAL "True for IBM Z architecture.") set(SLEEF_HEADER_LIST ZVECTOR_ ZVECTOR2 ZVECTOR2NOFMA PUREC_SCALAR PURECFMA_SCALAR ) set(HEADER_PARAMS_ZVECTOR_ finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__) set(HEADER_PARAMS_ZVECTOR2 finz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ zvector2) set(HEADER_PARAMS_ZVECTOR2NOFMA cinz_ 2 4 "SLEEF_VECTOR_DOUBLE" "SLEEF_VECTOR_FLOAT" "SLEEF_VECTOR_INT" "SLEEF_VECTOR_INT" __VEC__ zvector2nofma) set(ALIAS_PARAMS_ZVECTOR2_DP 2 "__vector double" "__vector int" - zvector2) set(ALIAS_PARAMS_ZVECTOR2_SP -4 "__vector float" "__vector int" - zvector2) set(CLANG_FLAGS_ENABLE_PURECFMA_SCALAR "-march=z14;-mzvector") set(TESTER3_DEFINITIONS_ZVECTOR2 ATR=finz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=zvector2) set(TESTER3_DEFINITIONS_ZVECTOR2NOFMA ATR=cinz_ DPTYPE=SLEEF_VECTOR_DOUBLE SPTYPE=SLEEF_VECTOR_FLOAT DPTYPESPEC=d2 SPTYPESPEC=f4 EXTSPEC=zvector2nofma) endif() command_arguments(HEADER_PARAMS_PUREC_SCALAR cinz_ 1 1 double float int32_t int32_t __STDC__ purec) command_arguments(HEADER_PARAMS_PURECFMA_SCALAR finz_ 1 1 double float int32_t int32_t FP_FAST_FMA purecfma) command_arguments(ALIAS_PARAMS_PUREC_SCALAR_DP 1 double int32_t purec cinz_) command_arguments(ALIAS_PARAMS_PUREC_SCALAR_SP -1 float int32_t purec cinz_) command_arguments(ALIAS_PARAMS_PURECFMA_SCALAR_DP 1 double int32_t purecfma finz_) command_arguments(ALIAS_PARAMS_PURECFMA_SCALAR_SP -1 float int32_t purecfma finz_) set(TESTER3_DEFINITIONS_PUREC_SCALAR ATR=cinz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purec) set(TESTER3_DEFINITIONS_PURECFMA_SCALAR ATR=finz_ DPTYPE=double SPTYPE=float DPTYPESPEC=d1 SPTYPESPEC=f1 EXTSPEC=purecfma) set(COMPILER_SUPPORTS_PUREC_SCALAR 1) set(COMPILER_SUPPORTS_PURECFMA_SCALAR 1) # MKRename arguments per type command_arguments(RENAME_PARAMS_SSE2 cinz_ 2 4 sse2) command_arguments(RENAME_PARAMS_SSE4 cinz_ 2 4 sse4) command_arguments(RENAME_PARAMS_AVX cinz_ 4 8 avx) command_arguments(RENAME_PARAMS_FMA4 finz_ 4 8 fma4) command_arguments(RENAME_PARAMS_AVX2 finz_ 4 8 avx2) command_arguments(RENAME_PARAMS_AVX2128 finz_ 2 4 avx2128) command_arguments(RENAME_PARAMS_AVX512F finz_ 8 16 avx512f) command_arguments(RENAME_PARAMS_AVX512FNOFMA cinz_ 8 16 avx512fnofma) command_arguments(RENAME_PARAMS_ADVSIMD finz_ 2 4 advsimd) command_arguments(RENAME_PARAMS_ADVSIMDNOFMA cinz_ 2 4 advsimdnofma) command_arguments(RENAME_PARAMS_NEON32 cinz_ 2 4 neon) command_arguments(RENAME_PARAMS_NEON32VFPV4 finz_ 2 4 neonvfpv4) command_arguments(RENAME_PARAMS_VSX finz_ 2 4 vsx) command_arguments(RENAME_PARAMS_VSXNOFMA cinz_ 2 4 vsxnofma) command_arguments(RENAME_PARAMS_ZVECTOR2 finz_ 2 4 zvector2) command_arguments(RENAME_PARAMS_ZVECTOR2NOFMA cinz_ 2 4 zvector2nofma) command_arguments(RENAME_PARAMS_PUREC_SCALAR cinz_ 1 1 purec) command_arguments(RENAME_PARAMS_PURECFMA_SCALAR finz_ 1 1 purecfma) # The vector length parameters in SVE, for SP and DP, are chosen for # the smallest SVE vector size (128-bit). The name is generated using # the "x" token of VLA SVE vector functions. command_arguments(RENAME_PARAMS_SVE finz_ x x sve) command_arguments(RENAME_PARAMS_SVENOFMA cinz_ x x svenofma) command_arguments(RENAME_PARAMS_GNUABI_SSE2 sse2 b 2 4 _mm128d _mm128 _mm128i _mm128i __SSE2__) command_arguments(RENAME_PARAMS_GNUABI_AVX avx c 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__) command_arguments(RENAME_PARAMS_GNUABI_AVX2 avx2 d 4 8 __m256d __m256 __m128i __m256i __AVX2__) command_arguments(RENAME_PARAMS_GNUABI_AVX512F avx512f e 8 16 __m512d __m512 __m256i __m512i __AVX512F__) command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD advsimd n 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON) # The vector length parameters in SVE, for SP and DP, are chosen for # the smallest SVE vector size (128-bit). The name is generated using # the "x" token of VLA SVE vector functions. command_arguments(RENAME_PARAMS_GNUABI_SVE sve s x x svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE) command_arguments(MKMASKED_PARAMS_GNUABI_AVX512F_dp avx512f e 8) command_arguments(MKMASKED_PARAMS_GNUABI_AVX512F_sp avx512f e -16) command_arguments(MKMASKED_PARAMS_GNUABI_SVE_dp sve s 2) command_arguments(MKMASKED_PARAMS_GNUABI_SVE_sp sve s -4) # COMPILER DETECTION # Detect CLANG executable path (on both Windows and Linux/OSX) if(NOT CLANG_EXE_PATH) # If the current compiler used by CMAKE is already clang, use this one directly if(CMAKE_C_COMPILER MATCHES "clang") set(CLANG_EXE_PATH ${CMAKE_C_COMPILER}) else() # Else we may find clang on the path? find_program(CLANG_EXE_PATH NAMES clang "clang-5.0" "clang-4.0" "clang-3.9") endif() endif() # Allow to define the Gcc/Clang here # As we might compile the lib with MSVC, but generates bitcode with CLANG # Intel vector extensions. set(CLANG_FLAGS_ENABLE_SSE2 "-msse2") set(CLANG_FLAGS_ENABLE_SSE4 "-msse4.1") set(CLANG_FLAGS_ENABLE_AVX "-mavx") set(CLANG_FLAGS_ENABLE_FMA4 "-mfma4") set(CLANG_FLAGS_ENABLE_AVX2 "-mavx2;-mfma") set(CLANG_FLAGS_ENABLE_AVX2128 "-mavx2;-mfma") set(CLANG_FLAGS_ENABLE_AVX512F "-mavx512f") set(CLANG_FLAGS_ENABLE_AVX512FNOFMA "-mavx512f") set(CLANG_FLAGS_ENABLE_NEON32 "--target=arm-linux-gnueabihf;-mcpu=cortex-a8") set(CLANG_FLAGS_ENABLE_NEON32VFPV4 "-march=armv7-a;-mfpu=neon-vfpv4") # Arm AArch64 vector extensions. set(CLANG_FLAGS_ENABLE_SVE "-march=armv8-a+sve") set(CLANG_FLAGS_ENABLE_SVENOFMA "-march=armv8-a+sve") # PPC64 set(CLANG_FLAGS_ENABLE_VSX "-mcpu=power8") set(CLANG_FLAGS_ENABLE_VSXNOFMA "-mcpu=power8") # IBM z set(CLANG_FLAGS_ENABLE_ZVECTOR2 "-march=z14;-mzvector") set(CLANG_FLAGS_ENABLE_ZVECTOR2NOFMA "-march=z14;-mzvector") set(FLAGS_OTHERS "") # All variables storing compiler flags should be prefixed with FLAGS_ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") # Always compile sleef with -ffp-contract. set(FLAGS_STRICTMATH "-ffp-contract=off") set(FLAGS_FASTMATH "-ffast-math") # Without the options below, gcc generates calls to libm string(CONCAT FLAGS_OTHERS "-fno-math-errno -fno-trapping-math") # Intel vector extensions. foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) set(FLAGS_ENABLE_${SIMD} ${CLANG_FLAGS_ENABLE_${SIMD}}) endforeach() # Warning flags. set(FLAGS_WALL "-Wall -Wno-unused -Wno-attributes -Wno-unused-result") if(CMAKE_C_COMPILER_ID MATCHES "GNU") # The following compiler option is needed to suppress the warning # "AVX vector return without AVX enabled changes the ABI" at # src/arch/helpervecext.h:88 string(CONCAT FLAGS_WALL ${FLAGS_WALL} " -Wno-psabi") set(FLAGS_ENABLE_NEON32 "-mfpu=neon") endif(CMAKE_C_COMPILER_ID MATCHES "GNU") if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND ENABLE_LTO) if (NOT LLVM_AR_COMMAND) find_program(LLVM_AR_COMMAND "llvm-ar") endif() if (LLVM_AR_COMMAND) SET(CMAKE_AR ${LLVM_AR_COMMAND}) SET(CMAKE_C_ARCHIVE_CREATE " rcs ") SET(CMAKE_C_ARCHIVE_FINISH "true") endif(LLVM_AR_COMMAND) string(CONCAT FLAGS_OTHERS "-flto=thin") endif(CMAKE_C_COMPILER_ID MATCHES "Clang" AND ENABLE_LTO) # Flags for generating inline headers set(FLAG_PREPROCESS "-E") set(FLAG_PRESERVE_COMMENTS "-C") set(FLAG_INCLUDE "-I") set(FLAG_DEFINE "-D") if (SLEEF_CLANG_ON_WINDOWS) # The following line is required to prevent clang from displaying # many warnings. Clang on Windows references MSVC header files, # which have deprecation and security attributes for many # functions. string(CONCAT FLAGS_WALL ${FLAGS_WALL} " -D_CRT_SECURE_NO_WARNINGS -Wno-deprecated-declarations") endif() elseif(MSVC) # Intel vector extensions. if (CMAKE_CL_64) set(FLAGS_ENABLE_SSE2 /D__SSE2__) set(FLAGS_ENABLE_SSE4 /D__SSE2__ /D__SSE3__ /D__SSE4_1__) else() set(FLAGS_ENABLE_SSE2 /D__SSE2__ /arch:SSE2) set(FLAGS_ENABLE_SSE4 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /arch:SSE2) endif() set(FLAGS_ENABLE_AVX /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /arch:AVX) set(FLAGS_ENABLE_FMA4 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /D__FMA4__ /arch:AVX2) set(FLAGS_ENABLE_AVX2 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) set(FLAGS_ENABLE_AVX2128 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) set(FLAGS_ENABLE_AVX512F /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /D__AVX512F__ /arch:AVX2) set(FLAGS_ENABLE_AVX512FNOFMA /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /D__AVX512F__ /arch:AVX2) set(FLAGS_ENABLE_PURECFMA_SCALAR /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) set(FLAGS_WALL "/D_CRT_SECURE_NO_WARNINGS") set(FLAGS_NO_ERRNO "") set(FLAG_PREPROCESS "/E") set(FLAG_PRESERVE_COMMENTS "/C") set(FLAG_INCLUDE "/I") set(FLAG_DEFINE "/D") elseif(CMAKE_C_COMPILER_ID MATCHES "Intel") set(FLAGS_ENABLE_SSE2 "-msse2") set(FLAGS_ENABLE_SSE4 "-msse4.1") set(FLAGS_ENABLE_AVX "-mavx") set(FLAGS_ENABLE_AVX2 "-march=core-avx2") set(FLAGS_ENABLE_AVX2128 "-march=core-avx2") set(FLAGS_ENABLE_AVX512F "-xCOMMON-AVX512") set(FLAGS_ENABLE_AVX512FNOFMA "-xCOMMON-AVX512") set(FLAGS_ENABLE_PURECFMA_SCALAR "-march=core-avx2") set(FLAGS_ENABLE_FMA4 "-msse2") # This is a dummy flag set(FLAGS_STRICTMATH "-fp-model strict -Qoption,cpp,--extended_float_type") set(FLAGS_FASTMATH "-fp-model fast=2 -Qoption,cpp,--extended_float_type") set(FLAGS_WALL "-fmax-errors=3 -Wall -Wno-unused -Wno-attributes") set(FLAGS_NO_ERRNO "") set(FLAG_PREPROCESS "-E") set(FLAG_PRESERVE_COMMENTS "-C") set(FLAG_INCLUDE "-I") set(FLAG_DEFINE "-D") endif() set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_OTHERS}") if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 6.99) set(DFT_C_FLAGS "${FLAGS_WALL} ${FLAGS_OTHERS}") else() set(DFT_C_FLAGS "${FLAGS_WALL} ${FLAGS_FASTMATH} ${FLAGS_OTHERS}") endif() if (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "GNU") set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse") set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse -m128bit-long-double") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "Clang") set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse") set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse") endif() if(CYGWIN OR MINGW) set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -fno-asynchronous-unwind-tables") set(DFT_C_FLAGS "${DFT_C_FLAGS} -fno-asynchronous-unwind-tables") endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 9.3 AND CMAKE_C_COMPILER_VERSION VERSION_LESS 10.2) set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -fno-shrink-wrap -fno-tree-vrp") set(DFT_C_FLAGS "${DFT_C_FLAGS} -fno-shrink-wrap -fno-tree-vrp") endif() # FEATURE DETECTION # Long double option(DISABLE_LONG_DOUBLE "Disable long double" OFF) option(ENFORCE_LONG_DOUBLE "Build fails if long double is not supported by the compiler" OFF) if(NOT DISABLE_LONG_DOUBLE) CHECK_TYPE_SIZE("long double" LD_SIZE) if(LD_SIZE GREATER "9") # This is needed to check since internal compiler error occurs with gcc 4.x CHECK_C_SOURCE_COMPILES(" typedef long double vlongdouble __attribute__((vector_size(sizeof(long double)*2))); vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d }; } int main() { vlongdouble vld = vcast_vl_l(0); }" COMPILER_SUPPORTS_LONG_DOUBLE) endif() else() message(STATUS "Support for long double disabled by CMake option") endif() if (ENFORCE_LONG_DOUBLE AND NOT COMPILER_SUPPORTS_LONG_DOUBLE) message(FATAL_ERROR "ENFORCE_LONG_DOUBLE is specified and that feature is disabled or not supported by the compiler") endif() # float128 option(DISABLE_FLOAT128 "Disable float128" OFF) option(ENFORCE_FLOAT128 "Build fails if float128 is not supported by the compiler" OFF) if(NOT DISABLE_FLOAT128) CHECK_C_SOURCE_COMPILES(" int main() { __float128 r = 1; }" COMPILER_SUPPORTS_FLOAT128) else() message(STATUS "Support for float128 disabled by CMake option") endif() if (ENFORCE_FLOAT128 AND NOT COMPILER_SUPPORTS_FLOAT128) message(FATAL_ERROR "ENFORCE_FLOAT128 is specified and that feature is disabled or not supported by the compiler") endif() # SSE2 option(DISABLE_SSE2 "Disable SSE2" OFF) option(ENFORCE_SSE2 "Build fails if SSE2 is not supported by the compiler" OFF) if(SLEEF_ARCH_X86 AND NOT DISABLE_SSE2) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_SSE2}") CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m128d r = _mm_mul_pd(_mm_set1_pd(1), _mm_set1_pd(2)); }" COMPILER_SUPPORTS_SSE2) endif() if (ENFORCE_SSE2 AND NOT COMPILER_SUPPORTS_SSE2) message(FATAL_ERROR "ENFORCE_SSE2 is specified and that feature is disabled or not supported by the compiler") endif() # SSE 4.1 option(DISABLE_SSE4 "Disable SSE4" OFF) option(ENFORCE_SSE4 "Build fails if SSE4 is not supported by the compiler" OFF) if(SLEEF_ARCH_X86 AND NOT DISABLE_SSE4) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_SSE4}") CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m128d r = _mm_floor_sd(_mm_set1_pd(1), _mm_set1_pd(2)); }" COMPILER_SUPPORTS_SSE4) endif() if (ENFORCE_SSE4 AND NOT COMPILER_SUPPORTS_SSE4) message(FATAL_ERROR "ENFORCE_SSE4 is specified and that feature is disabled or not supported by the compiler") endif() # AVX option(ENFORCE_AVX "Disable AVX" OFF) option(ENFORCE_AVX "Build fails if AVX is not supported by the compiler" OFF) if(SLEEF_ARCH_X86 AND NOT DISABLE_AVX) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_AVX}") CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m256d r = _mm256_add_pd(_mm256_set1_pd(1), _mm256_set1_pd(2)); }" COMPILER_SUPPORTS_AVX) endif() if (ENFORCE_AVX AND NOT COMPILER_SUPPORTS_AVX) message(FATAL_ERROR "ENFORCE_AVX is specified and that feature is disabled or not supported by the compiler") endif() # FMA4 option(DISABLE_FMA4 "Disable FMA4" OFF) option(ENFORCE_FMA4 "Build fails if FMA4 is not supported by the compiler" OFF) if(SLEEF_ARCH_X86 AND NOT DISABLE_FMA4) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_FMA4}") CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m256d r = _mm256_macc_pd(_mm256_set1_pd(1), _mm256_set1_pd(2), _mm256_set1_pd(3)); }" COMPILER_SUPPORTS_FMA4) endif() if (ENFORCE_FMA4 AND NOT COMPILER_SUPPORTS_FMA4) message(FATAL_ERROR "ENFORCE_FMA4 is specified and that feature is disabled or not supported by the compiler") endif() # AVX2 option(DISABLE_AVX2 "Disable AVX2" OFF) option(ENFORCE_AVX2 "Build fails if AVX2 is not supported by the compiler" OFF) if(SLEEF_ARCH_X86 AND NOT DISABLE_AVX2) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_AVX2}") CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m256i r = _mm256_abs_epi32(_mm256_set1_epi32(1)); }" COMPILER_SUPPORTS_AVX2) # AVX2 implies AVX2128 if(COMPILER_SUPPORTS_AVX2) set(COMPILER_SUPPORTS_AVX2128 1) endif() endif() if (ENFORCE_AVX2 AND NOT COMPILER_SUPPORTS_AVX2) message(FATAL_ERROR "ENFORCE_AVX2 is specified and that feature is disabled or not supported by the compiler") endif() # AVX512F option(DISABLE_AVX512F "Disable AVX512F" OFF) option(ENFORCE_AVX512F "Build fails if AVX512F is not supported by the compiler" OFF) if(SLEEF_ARCH_X86 AND NOT DISABLE_AVX512F) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_AVX512F}") CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif __m512 addConstant(__m512 arg) { return _mm512_add_ps(arg, _mm512_set1_ps(1.f)); } int main() { __m512i a = _mm512_set1_epi32(1); __m256i ymm = _mm512_extracti64x4_epi64(a, 0); __mmask16 m = _mm512_cmp_epi32_mask(a, a, _MM_CMPINT_EQ); __m512i r = _mm512_andnot_si512(a, a); }" COMPILER_SUPPORTS_AVX512F) if (COMPILER_SUPPORTS_AVX512F) set(COMPILER_SUPPORTS_AVX512FNOFMA 1) endif() endif() if (ENFORCE_AVX512F AND NOT COMPILER_SUPPORTS_AVX512F) message(FATAL_ERROR "ENFORCE_AVX512F is specified and that feature is disabled or not supported by the compiler") endif() # SVE option(DISABLE_SVE "Disable SVE" OFF) option(ENFORCE_SVE "Build fails if SVE is not supported by the compiler" OFF) if(SLEEF_ARCH_AARCH64 AND NOT DISABLE_SVE) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_SVE}") CHECK_C_SOURCE_COMPILES(" #include int main() { svint32_t r = svdup_n_s32(1); }" COMPILER_SUPPORTS_SVE) if(COMPILER_SUPPORTS_SVE) set(COMPILER_SUPPORTS_SVENOFMA 1) endif() endif() if (ENFORCE_SVE AND NOT COMPILER_SUPPORTS_SVE) message(FATAL_ERROR "ENFORCE_SVE is specified and that feature is disabled or not supported by the compiler") endif() # VSX option(DISABLE_VSX "Disable VSX" OFF) option(ENFORCE_VSX "Build fails if VSX is not supported by the compiler" OFF) if(SLEEF_ARCH_PPC64 AND NOT DISABLE_VSX) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_VSX}") CHECK_C_SOURCE_COMPILES(" #include #ifndef __LITTLE_ENDIAN__ #error \"Only VSX(ISA2.07) little-endian mode is supported \" #endif int main() { vector double d; vector unsigned char p = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }; d = vec_perm(d, d, p); }" COMPILER_SUPPORTS_VSX) if (COMPILER_SUPPORTS_VSX) set(COMPILER_SUPPORTS_VSXNOFMA 1) endif() endif() if (ENFORCE_VSX AND NOT COMPILER_SUPPORTS_VSX) message(FATAL_ERROR "ENFORCE_VSX is specified and that feature is disabled or not supported by the compiler") endif() # IBM Z option(DISABLE_ZVECTOR2 "Disable ZVECTOR2" OFF) option(ENFORCE_ZVECTOR2 "Build fails if ZVECTOR2 is not supported by the compiler" OFF) if(SLEEF_ARCH_S390X AND NOT DISABLE_ZVECTOR2) string (REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${FLAGS_ENABLE_ZVECTOR2}") CHECK_C_SOURCE_COMPILES(" #include int main() { __vector float d; d = vec_sqrt(d); }" COMPILER_SUPPORTS_ZVECTOR2) if(COMPILER_SUPPORTS_ZVECTOR2) set(COMPILER_SUPPORTS_ZVECTOR2NOFMA 1) endif() endif() if (ENFORCE_ZVECTOR2 AND NOT COMPILER_SUPPORTS_ZVECTOR2) message(FATAL_ERROR "ENFORCE_ZVECTOR2 is specified and that feature is disabled or not supported by the compiler") endif() # OpenMP option(DISABLE_OPENMP "Disable OPENMP" OFF) option(ENFORCE_OPENMP "Build fails if OPENMP is not supported by the compiler" OFF) if(NOT DISABLE_OPENMP) find_package(OpenMP) # Check if compilation with OpenMP really succeeds # It does not succeed on Travis even though find_package(OpenMP) succeeds. if(OPENMP_FOUND) set (CMAKE_REQUIRED_FLAGS "${OpenMP_C_FLAGS}") CHECK_C_SOURCE_COMPILES(" #include int main() { int i; #pragma omp parallel for for(i=0;i < 10;i++) { putchar(0); } }" COMPILER_SUPPORTS_OPENMP) endif(OPENMP_FOUND) else() message(STATUS "Support for OpenMP disabled by CMake option") endif() if (ENFORCE_OPENMP AND NOT COMPILER_SUPPORTS_OPENMP) message(FATAL_ERROR "ENFORCE_OPENMP is specified and that feature is disabled or not supported by the compiler") endif() # Weak aliases CHECK_C_SOURCE_COMPILES(" #if defined(__CYGWIN__) #define EXPORT __stdcall __declspec(dllexport) #else #define EXPORT #endif EXPORT int f(int a) { return a + 2; } EXPORT int g(int a) __attribute__((weak, alias(\"f\"))); int main(void) { return g(2); }" COMPILER_SUPPORTS_WEAK_ALIASES) if (COMPILER_SUPPORTS_WEAK_ALIASES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64" AND NOT SLEEF_CLANG_ON_WINDOWS AND NOT MINGW AND BUILD_GNUABI_LIBS) set(ENABLE_GNUABI ${COMPILER_SUPPORTS_WEAK_ALIASES}) endif() # Built-in math functions CHECK_C_SOURCE_COMPILES(" int main(void) { double a = __builtin_sqrt (2); float b = __builtin_sqrtf(2); }" COMPILER_SUPPORTS_BUILTIN_MATH) # SYS_getrandom CHECK_C_SOURCE_COMPILES(" #define _GNU_SOURCE #include #include #include int main(void) { int i; syscall(SYS_getrandom, &i, sizeof(i), 0); }" COMPILER_SUPPORTS_SYS_GETRANDOM) # # Reset used flags set(CMAKE_REQUIRED_FLAGS) set(CMAKE_REQUIRED_LIBRARIES) # Save the default C flags set(ORG_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) ## # Check if sde64 command is available find_program(SDE_COMMAND sde64) if (NOT SDE_COMMAND) find_program(SDE_COMMAND sde) endif() # Check if armie command is available find_program(ARMIE_COMMAND armie) if (NOT SVE_VECTOR_BITS) set(SVE_VECTOR_BITS 128) endif() # find_program(SED_COMMAND sed) ## if(SLEEF_SHOW_ERROR_LOG) if (EXISTS ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log) file(READ ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log FILE_CONTENT) message("") message("===== Content of CMakeError.log =====") message("") message("${FILE_CONTENT}") message("") message("=======================================") message("") endif() endif(SLEEF_SHOW_ERROR_LOG) if (RUNNING_ON_TRAVIS AND CMAKE_C_COMPILER_ID MATCHES "Clang") message(STATUS "Travis bug workaround turned on") set(COMPILER_SUPPORTS_OPENMP FALSE) # Workaround for https://github.com/travis-ci/travis-ci/issues/8613 set(COMPILER_SUPPORTS_FLOAT128 FALSE) # Compilation on unroll_0_vecextqp.c does not finish on Travis endif() if (MSVC OR SLEEF_CLANG_ON_WINDOWS) set(COMPILER_SUPPORTS_OPENMP FALSE) # At this time, OpenMP is not supported on MSVC endif() ## # Set common definitions if (NOT BUILD_SHARED_LIBS) set(COMMON_TARGET_DEFINITIONS SLEEF_STATIC_LIBS=1) set(SLEEF_STATIC_LIBS 1) endif() if (COMPILER_SUPPORTS_WEAK_ALIASES) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_ALIAS=1) endif() if (COMPILER_SUPPORTS_SYS_GETRANDOM) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_SYS_getrandom=1) endif() sleef-3.5.1/Jenkinsfile000066400000000000000000000146231373003144100150040ustar00rootroot00000000000000pipeline { agent any stages { stage('Preamble') { parallel { stage('Armclang') { agent { label 'armclang' } steps { sh ''' echo "armclang+SVE on" `hostname` export CC=armclang rm -rf build mkdir build cd build cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SVE=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install ''' } } stage('Armclang AAVPCS') { agent { label 'armclang' } steps { sh ''' echo "armclang+SVE+AAVPCS on" `hostname` export CC=armclang rm -rf build mkdir build cd build cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DFORCE_AAVPCS=On -DENABLE_GNUABI=On -DBUILD_QUAD=TRUE -DENFORCE_SVE=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install ''' } } stage('Intel Compiler') { agent { label 'icc' } steps { sh ''' echo "Intel Compiler on" `hostname` export CC=icc rm -rf build mkdir build cd build cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install ''' } } stage('Static libs on mac') { agent { label 'mac' } steps { sh ''' echo "macOS on" `hostname` export PATH=$PATH:/opt/local/bin:/opt/local/bin:/usr/local/bin:/usr/bin:/bin export CC=gcc-9 rm -rf build mkdir build cd build cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `sysctl -n hw.logicalcpu` ninja install ''' } } stage('FreeBSD') { agent { label 'freebsd' } steps { sh ''' echo "FreeBSD on" `hostname` rm -rf build mkdir build cd build cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 ninja install ''' } } stage('LTO with gcc') { agent { label 'gcc-10' } steps { sh ''' echo "LTO with gcc on" `hostname` export PATH=$PATH:/opt/sde-external-8.56.0-2020-07-05-lin export CC=gcc-10 rm -rf build mkdir build cd build cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install ''' } } stage('LTO with clang') { agent { label 'clang-10' } steps { sh ''' echo "LTO with clang on" `hostname` export PATH=$PATH:/opt/sde-external-8.56.0-2020-07-05-lin export CC=clang-10 rm -rf build mkdir build cd build cmake -GNinja -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DLLVM_AR_COMMAND=llvm-ar-10 -DDISABLE_FMA4=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install ''' } } stage('gcc-4.8 and cmake-3.5.1') { agent { label 'gcc-4' } steps { sh ''' echo "gcc-4.8 and cmake-3.5.1 on" `hostname` export CC=gcc-4.8.5 BUILD_DIR=`pwd` cd .. mv $BUILD_DIR $BUILD_DIR.tmp mkdir $BUILD_DIR mv $BUILD_DIR.tmp $BUILD_DIR/sleef cd $BUILD_DIR cp sleef/CMakeLists.txt.nested ./CMakeLists.txt cp sleef/doc/html/hellox86.c sleef/doc/html/tutorial.c . rm -rf build mkdir build cd build /usr/local/bin/cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE .. ninja ''' } } stage('clang-6.0') { agent { label 'clang-6' } steps { sh ''' echo "clang-6.0 on" `hostname` export CC=clang-6.0 rm -rf build mkdir build cd build cmake -GNinja -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. ninja ctest -j `nproc` ''' } } } } } } sleef-3.5.1/LICENSE.txt000066400000000000000000000024721373003144100144420ustar00rootroot00000000000000Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sleef-3.5.1/README.md000066400000000000000000000011361373003144100140720ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/shibatch/sleef.svg?branch=master)](https://travis-ci.org/shibatch/sleef) In this library, functions for evaluating some elementary functions are implemented. The library also includes DFT subroutines. The software is distributed under the Boost Software License, Version 1.0. See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt. Contributions to this project are accepted under the same license. Copyright Naoki Shibata and contributors 2010 - 2020. Main Page : https://sleef.org/ GitHub Repo : https://github.com/shibatch/sleef sleef-3.5.1/appveyor.yml000066400000000000000000000057741373003144100152170ustar00rootroot00000000000000version: 1.0.{build} build_cloud: lithium max_jobs: 2 image: Visual Studio 2019 configuration: Release environment: matrix: - ENV_BUILD_STATIC: -DBUILD_SHARED_LIBS=TRUE -DENFORCE_TESTER=TRUE COMPILER: MSVC DO_TEST: TRUE - ENV_BUILD_STATIC: -DBUILD_SHARED_LIBS=FALSE COMPILER: MSVC DO_TEST: FALSE install: - hostname - call "D:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat" build_script: - set BUILDFOLDER=%CD% - set ORGPATH=%PATH% - if "%DO_TEST%" == "TRUE" echo PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%PATH% > q.bat - if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat" - if "%DO_TEST%" == "TRUE" call p.bat - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_SHARED_LIBS=FALSE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE;ninja' - if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%" - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -G Ninja -DRUNNING_ON_APPVEYOR=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE ..;ninja' - if "%DO_TEST%" == "TRUE" cd "%BUILDFOLDER%" - if "%DO_TEST%" == "TRUE" del /Q /F build-cygwin\bin\iut* - if "%DO_TEST%" == "TRUE" echo PATH %ORGPATH%;c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%CD%\build\bin > q.bat - if "%DO_TEST%" == "TRUE" powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat" - if "%DO_TEST%" == "TRUE" call p.bat - mkdir build - cd build - cmake -G"Visual Studio 16 2019" .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC% - cmake --build . --target install --config Release - if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 4 -C Release) - cd "%BUILDFOLDER%" - echo PATH %ORGPATH%;c:\Cygwin64\bin;c:\Cygwin64\usr\bin;%CD%\build-cygwin\bin;%CD%\build-clang\bin > q.bat - powershell -Command "(gc q.bat) -replace ' ;', ';' | Out-File -encoding ASCII p.bat" - call p.bat - mkdir build-clang - cd build-clang - cmake -G Ninja .. -DRUNNING_ON_APPVEYOR=TRUE -DCMAKE_C_COMPILER:PATH="C:\Program Files\LLVM\bin\clang.exe" -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE %ENV_BUILD_STATIC% - ninja test_script: - if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 4 -C Release) artifacts: - path: build\install\**\* name: SLEEFWindowsx64 sleef-3.5.1/doc/000077500000000000000000000000001373003144100133575ustar00rootroot00000000000000sleef-3.5.1/doc/build-with-cmake.md000066400000000000000000000074541373003144100170410ustar00rootroot00000000000000# Introduction [Cmake](http://www.cmake.org/) is an open-source and cross-platform building tool for software packages that provides easy managing of multiple build systems at a time. It works by allowing the developer to specify build parameters and rules in a simple text file that cmake then processes to generate project files for the actual native build tools (e.g. UNIX Makefiles, Microsoft Visual Studio, Apple XCode, etc). That means you can easily maintain multiple separate builds for one project and manage cross-platform hardware and software complexity. If you are not already familiar with cmake, please refer to the [official documentation](https://cmake.org/documentation/) or the [Basic Introductions](https://cmake.org/Wiki/CMake#Basic_Introductions) in the wiki (recommended). Before using CMake you will need to install/build the binaries on your system. Most systems have cmake already installed or provided by the standard package manager. If that is not the case for you, please [download](https://cmake.org/download/) and install now. For building SLEEF, version 3.4.3 is the minimum required. # Quick start 1. Make sure cmake is available on the command-line. ``` $ cmake --version (should display a version number greater than or equal to 3.4.3) ``` 2. Download the tar from the [software repository](http://shibatch.sourceforge.net/) or checkout out the source code from the [github repository](https://github.com/shibatch/sleef): ``` $ git clone https://github.com/shibatch/sleef ``` 3. Make a separate directory to create an out-of-source build. SLEEF does not allow for in-tree builds. ``` $ cd sleef-project $ mkdir my-sleef-build && cd my-sleef-build ``` 4. Run cmake to configure your project and generate the system to build it: ``` $ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_INSTALL_PREFIX=../my-sleef-install \ .. ``` This flag configures an optimised `libsleef` shared library build with basic debug info. By default, cmake will autodetect your system platform and configure the build using the default parameters. You can control and modify these parameters by setting variables when running cmake. See the list of [options and variables](#build-customization) for customizing your build. > NOTE: On **Windows**, you need to use a specific generator like this: > `cmake -G"Visual Studio 14 2015 Win64" ..` specifying the Visual Studio version > and targeting specifically `Win64` (to support compilation of AVX/AVX2) > Check `cmake -G` to get a full list of supported Visual Studio project generators. > This generator will create a proper solution `SLEEF.sln` under the build > directory. > You can still use `cmake --build .` to build without opening Visual Studio. 5. Now that you have the build files created by cmake, proceed from the top of the build directory: ``` $ make sleef ``` 6. Install the library under ../my-sleef/install by running: ``` $ make install ``` 7. You can execute the tests by running: ``` $ make test ``` # Build customization Variables dictate how the build is generated; options are defined and undefined, respectively, on the cmake command line like this: ``` cmake -DVARIABLE= cmake -UVARIABLE ``` Build configurations allow a project to be built in different ways for debug, optimized, or any other special set of flags. ## CMake Variables - `CMAKE_BUILD_TYPE`: By default, CMake supports the following configuration: * `Debug`: basic debug flags turned on * `Release`: basic optimizations turned on * `MinSizeRel`: builds the smallest (but not fastest) object code * `RelWithDebInfo`: builds optimized code with debug information as well - `CMAKE_INSTALL_PREFIX`: The prefix the use when running `make install`. Defaults to /usr/local on GNU/Linux and MacOS. Defaults to C:/Program Files on Windows. ## SLEEF Variables sleef-3.5.1/doc/html/000077500000000000000000000000001373003144100143235ustar00rootroot00000000000000sleef-3.5.1/doc/html/CMakeLists.txt000066400000000000000000000013051373003144100170620ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.4.3) include(ExternalProject) find_package(Git REQUIRED) ExternalProject_Add(libsleef GIT_REPOSITORY https://github.com/shibatch/sleef CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib ) include_directories(${CMAKE_BINARY_DIR}/contrib/include) link_directories(${CMAKE_BINARY_DIR}/contrib/lib) add_executable(hellox86 hellox86.c) add_dependencies(hellox86 libsleef) target_link_libraries(hellox86 sleef) # option(BUILD_DFT_TUTORIAL "Build DFT tutorial" OFF) if (BUILD_DFT_TUTORIAL) add_executable(dfttutorial tutorial.c) add_dependencies(dfttutorial libsleef) find_library(LIBM m) target_link_libraries(dfttutorial sleef sleefdft ${LIBM}) endif() sleef-3.5.1/doc/html/CNAME000066400000000000000000000000111373003144100150610ustar00rootroot00000000000000sleef.orgsleef-3.5.1/doc/html/aarch32.xhtml000066400000000000000000001570651373003144100166420ustar00rootroot00000000000000 SLEEF - Math library reference (AArch32)

SLEEF Documentation - Math library reference (AArch32)

Table of contents

Data types for AArch32 architecture

Sleef_float32x4_t_2

Description

Sleef_float32x4_t_2 is a data type for storing two float32x4_t values, which is defined in sleef.h as follows:

typedef struct {
  float32x4_t x, y;
} Sleef_float32x4_t_2;

Trigonometric Functions

Vectorized single precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinf4_u10(float32x4_t a);
float32x4_t Sleef_sinf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinf4_u35(float32x4_t a);
float32x4_t Sleef_sinf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cosf4_u10(float32x4_t a);
float32x4_t Sleef_cosf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cosf4_u35(float32x4_t a);
float32x4_t Sleef_cosf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincosf4_u10(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincosf4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinpif4_u05(float32x4_t a);
float32x4_t Sleef_sinpif4_u05neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cospif4_u05(float32x4_t a);
float32x4_t Sleef_cospif4_u05neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincospif4_u05(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u05neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincospif4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanf4_u10(float32x4_t a);
float32x4_t Sleef_tanf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanf4_u35(float32x4_t a);
float32x4_t Sleef_tanf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Power, exponential, and logarithmic function

Vectorized single precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_powf4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_powf4_u10neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_powf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_logf4_u10(float32x4_t a);
float32x4_t Sleef_logf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_logf4_u35(float32x4_t a);
float32x4_t Sleef_logf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log10f4_u10(float32x4_t a);
float32x4_t Sleef_log10f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log2f4_u10(float32x4_t a);
float32x4_t Sleef_log2f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log1pf4_u10(float32x4_t a);
float32x4_t Sleef_log1pf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1pf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_expf4_u10(float32x4_t a);
float32x4_t Sleef_expf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_exp2f4_u10(float32x4_t a);
float32x4_t Sleef_exp2f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_exp10f4_u10(float32x4_t a);
float32x4_t Sleef_exp10f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_expm1f4_u10(float32x4_t a);
float32x4_t Sleef_expm1f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sqrtf4(float32x4_t a);
float32x4_t Sleef_sqrtf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sqrtf4_u35(float32x4_t a);
float32x4_t Sleef_sqrtf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cbrtf4_u10(float32x4_t a);
float32x4_t Sleef_cbrtf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cbrtf4_u35(float32x4_t a);
float32x4_t Sleef_cbrtf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_hypotf4_u05(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u05neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_hypotf4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u35neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Inverse Trigonometric Functions

Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinf4_u10(float32x4_t a);
float32x4_t Sleef_asinf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinf4_u35(float32x4_t a);
float32x4_t Sleef_asinf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_acosf4_u10(float32x4_t a);
float32x4_t Sleef_acosf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_acosf4_u35(float32x4_t a);
float32x4_t Sleef_acosf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanf4_u10(float32x4_t a);
float32x4_t Sleef_atanf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanf4_u35(float32x4_t a);
float32x4_t Sleef_atanf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atan2f4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u10neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atan2f4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u35neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Hyperbolic function and inverse hyperbolic function

Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinhf4_u10(float32x4_t a);
float32x4_t Sleef_sinhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinhf4_u35(float32x4_t a);
float32x4_t Sleef_sinhf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_coshf4_u10(float32x4_t a);
float32x4_t Sleef_coshf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_coshf4_u35(float32x4_t a);
float32x4_t Sleef_coshf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanhf4_u10(float32x4_t a);
float32x4_t Sleef_tanhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanhf4_u35(float32x4_t a);
float32x4_t Sleef_tanhf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinhf4_u10(float32x4_t a);
float32x4_t Sleef_asinhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_acoshf4_u10(float32x4_t a);
float32x4_t Sleef_acoshf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acoshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanhf4_u10(float32x4_t a);
float32x4_t Sleef_atanhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Error and gamma function

Vectorized single precision error function

Synopsis

#include <sleef.h>

float32x4_t Sleef_erff4_u10(float32x4_t a);
float32x4_t Sleef_erff4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erff_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision complementary error function

Synopsis

#include <sleef.h>

float32x4_t Sleef_erfcf4_u15(float32x4_t a);
float32x4_t Sleef_erfcf4_u15neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfcf_u15. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision gamma function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tgammaf4_u10(float32x4_t a);
float32x4_t Sleef_tgammaf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision log gamma function

Synopsis

#include <sleef.h>

float32x4_t Sleef_lgammaf4_u10(float32x4_t a);
float32x4_t Sleef_lgammaf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Nearest integer function

Vectorized single precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

float32x4_t Sleef_truncf4(float32x4_t a);
float32x4_t Sleef_truncf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_truncf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_floorf4(float32x4_t a);
float32x4_t Sleef_floorf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floorf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_ceilf4(float32x4_t a);
float32x4_t Sleef_ceilf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceilf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_roundf4(float32x4_t a);
float32x4_t Sleef_roundf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_roundf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_rintf4(float32x4_t a);
float32x4_t Sleef_rintf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rintf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Other function

Vectorized single precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaf4(float32x4_t a, float32x4_t b, float32x4_t c);
float32x4_t Sleef_fmaf4_neon(float32x4_t a, float32x4_t b, float32x4_t c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmodf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmodf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmodf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

float32x4_t Sleef_remainderf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_remainderf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_remainderf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

float32x4_t Sleef_frfrexpf4(float32x4_t a);
float32x4_t Sleef_frfrexpf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexpf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_modff4(float32x4_t a);
Sleef_float32x4_t_2 Sleef_modff4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modff. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for calculating the absolute value

Synopsis

#include <sleef.h>

float32x4_t Sleef_fabsf4(float32x4_t a);
float32x4_t Sleef_fabsf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabsf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for copying signs

Synopsis

#include <sleef.h>

float32x4_t Sleef_copysignf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_copysignf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysignf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for determining maximum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaxf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmaxf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaxf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for determining minimum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fminf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fminf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fminf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fdimf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fdimf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdimf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

float32x4_t Sleef_nextafterf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_nextafterf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafterf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

sleef-3.5.1/doc/html/aarch64.xhtml000066400000000000000000010036101373003144100166320ustar00rootroot00000000000000 SLEEF - Math library reference (AArch64)

SLEEF Documentation - Math library reference (AArch64)

Table of contents

Data types for AArch64 architecture

Sleef_float32x4_t_2

Description

Sleef_float32x4_t_2 is a data type for storing two float32x4_t values, which is defined in sleef.h as follows:

typedef struct {
  float32x4_t x, y;
} Sleef_float32x4_t_2;

Sleef_float64x2_t_2

Description

Sleef_float64x2_t_2 is a data type for storing two float64x2_t values, which is defined in sleef.h as follows:

typedef struct {
  float64x2_t x, y;
} Sleef_float64x2_t_2;

Sleef_svfloat32_t_2

Description

Sleef_svfloat32_t_2 is a data type for storing two svfloat32_t values, which is defined in sleef.h as follows:

typedef struct {
  svfloat32_t x, y;
} Sleef_svfloat32_t_2;

Sleef_svfloat64_t_2

Description

Sleef_svfloat64_t_2 is a data type for storing two svfloat64_t values, which is defined in sleef.h as follows:

typedef struct {
  svfloat64_t x, y;
} Sleef_svfloat64_t_2;

Trigonometric Functions

Vectorized double precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sind1_u10purec(double a);
double Sleef_sind1_u10purecfma(double a);
double Sleef_cinz_sind1_u10purec(double a);
double Sleef_finz_sind1_u10purecfma(double a);

float64x2_t Sleef_sind2_u10(float64x2_t a);
float64x2_t Sleef_sind2_u10advsimd(float64x2_t a);
float64x2_t Sleef_sind2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_sind2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_sind2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_sindx_u10sve(svfloat64_t a);
svfloat64_t Sleef_sindx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_sindx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_sindx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u10 with the same accuracy specification.


Vectorized single precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinf1_u10purec(float a);
float Sleef_sinf1_u10purecfma(float a);
float Sleef_cinz_sinf1_u10purec(float a);
float Sleef_finz_sinf1_u10purecfma(float a);

float32x4_t Sleef_sinf4_u10(float32x4_t a);
float32x4_t Sleef_sinf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_sinf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_sinf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_sinf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_sinfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_sinfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_sinfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_sinfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification.


Vectorized double precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sind1_u35purec(double a);
double Sleef_sind1_u35purecfma(double a);
double Sleef_cinz_sind1_u35purec(double a);
double Sleef_finz_sind1_u35purecfma(double a);

float64x2_t Sleef_sind2_u35(float64x2_t a);
float64x2_t Sleef_sind2_u35advsimd(float64x2_t a);
float64x2_t Sleef_sind2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_sind2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_sind2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_sindx_u35sve(svfloat64_t a);
svfloat64_t Sleef_sindx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_sindx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_sindx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u35 with the same accuracy specification.


Vectorized single precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sinf1_u35purec(float a);
float Sleef_sinf1_u35purecfma(float a);
float Sleef_cinz_sinf1_u35purec(float a);
float Sleef_finz_sinf1_u35purecfma(float a);

float32x4_t Sleef_sinf4_u35(float32x4_t a);
float32x4_t Sleef_sinf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_sinf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_sinf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_sinf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_sinfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_sinfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_sinfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_sinfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification.


Vectorized double precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cosd1_u10purec(double a);
double Sleef_cosd1_u10purecfma(double a);
double Sleef_cinz_cosd1_u10purec(double a);
double Sleef_finz_cosd1_u10purecfma(double a);

float64x2_t Sleef_cosd2_u10(float64x2_t a);
float64x2_t Sleef_cosd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_cosd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_cosd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_cosd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_cosdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_cosdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_cosdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_cosdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u10 with the same accuracy specification.


Vectorized single precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cosf1_u10purec(float a);
float Sleef_cosf1_u10purecfma(float a);
float Sleef_cinz_cosf1_u10purec(float a);
float Sleef_finz_cosf1_u10purecfma(float a);

float32x4_t Sleef_cosf4_u10(float32x4_t a);
float32x4_t Sleef_cosf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_cosf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_cosf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_cosf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_cosfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_cosfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_cosfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_cosfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification.


Vectorized double precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cosd1_u35purec(double a);
double Sleef_cosd1_u35purecfma(double a);
double Sleef_cinz_cosd1_u35purec(double a);
double Sleef_finz_cosd1_u35purecfma(double a);

float64x2_t Sleef_cosd2_u35(float64x2_t a);
float64x2_t Sleef_cosd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_cosd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_cosd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_cosd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_cosdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_cosdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_cosdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_cosdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u35 with the same accuracy specification.


Vectorized single precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cosf1_u35purec(float a);
float Sleef_cosf1_u35purecfma(float a);
float Sleef_cinz_cosf1_u35purec(float a);
float Sleef_finz_cosf1_u35purecfma(float a);

float32x4_t Sleef_cosf4_u35(float32x4_t a);
float32x4_t Sleef_cosf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_cosf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_cosf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_cosf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_cosfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_cosfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_cosfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_cosfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincosd1_u10purec(double a);
Sleef_double2 Sleef_sincosd1_u10purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u10purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u10purecfma(double a);

Sleef_float64x2_t_2 Sleef_sincosd2_u10(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u10advsimd(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u10advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_cinz_sincosd2_u10advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_finz_sincosd2_u10advsimd(float64x2_t a);

Sleef_svfloat64_t_2 Sleef_sincosdx_u10sve(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_sincosdx_u10svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_cinz_sincosdx_u10svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_finz_sincosdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincosf1_u10purec(float a);
Sleef_float2 Sleef_sincosf1_u10purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u10purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u10purecfma(float a);

Sleef_float32x4_t_2 Sleef_sincosf4_u10(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u10advsimd(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u10advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_cinz_sincosf4_u10advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_finz_sincosf4_u10advsimd(float32x4_t a);

Sleef_svfloat32_t_2 Sleef_sincosfx_u10sve(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_sincosfx_u10svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_cinz_sincosfx_u10svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_finz_sincosfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincosd1_u35purec(double a);
Sleef_double2 Sleef_sincosd1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u35purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u35purecfma(double a);

Sleef_float64x2_t_2 Sleef_sincosd2_u35(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u35advsimd(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u35advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_cinz_sincosd2_u35advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_finz_sincosd2_u35advsimd(float64x2_t a);

Sleef_svfloat64_t_2 Sleef_sincosdx_u35sve(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_sincosdx_u35svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_cinz_sincosdx_u35svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_finz_sincosdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincosf1_u35purec(float a);
Sleef_float2 Sleef_sincosf1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u35purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u35purecfma(float a);

Sleef_float32x4_t_2 Sleef_sincosf4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u35advsimd(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u35advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_cinz_sincosf4_u35advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_finz_sincosf4_u35advsimd(float32x4_t a);

Sleef_svfloat32_t_2 Sleef_sincosfx_u35sve(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_sincosfx_u35svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_cinz_sincosfx_u35svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_finz_sincosfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification.


Vectorized double precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sinpid1_u05purec(double a);
double Sleef_sinpid1_u05purecfma(double a);
double Sleef_cinz_sinpid1_u05purec(double a);
double Sleef_finz_sinpid1_u05purecfma(double a);

float64x2_t Sleef_sinpid2_u05(float64x2_t a);
float64x2_t Sleef_sinpid2_u05advsimd(float64x2_t a);
float64x2_t Sleef_sinpid2_u05advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_sinpid2_u05advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_sinpid2_u05advsimd(float64x2_t a);

svfloat64_t Sleef_sinpidx_u05sve(svfloat64_t a);
svfloat64_t Sleef_sinpidx_u05svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_sinpidx_u05svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_sinpidx_u05sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification.


Vectorized single precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinpif1_u05purec(float a);
float Sleef_sinpif1_u05purecfma(float a);
float Sleef_cinz_sinpif1_u05purec(float a);
float Sleef_finz_sinpif1_u05purecfma(float a);

float32x4_t Sleef_sinpif4_u05(float32x4_t a);
float32x4_t Sleef_sinpif4_u05advsimd(float32x4_t a);
float32x4_t Sleef_sinpif4_u05advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_sinpif4_u05advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_sinpif4_u05advsimd(float32x4_t a);

svfloat32_t Sleef_sinpifx_u05sve(svfloat32_t a);
svfloat32_t Sleef_sinpifx_u05svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_sinpifx_u05svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_sinpifx_u05sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification.


Vectorized double precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cospid1_u05purec(double a);
double Sleef_cospid1_u05purecfma(double a);
double Sleef_cinz_cospid1_u05purec(double a);
double Sleef_finz_cospid1_u05purecfma(double a);

float64x2_t Sleef_cospid2_u05(float64x2_t a);
float64x2_t Sleef_cospid2_u05advsimd(float64x2_t a);
float64x2_t Sleef_cospid2_u05advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_cospid2_u05advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_cospid2_u05advsimd(float64x2_t a);

svfloat64_t Sleef_cospidx_u05sve(svfloat64_t a);
svfloat64_t Sleef_cospidx_u05svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_cospidx_u05svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_cospidx_u05sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification.


Vectorized single precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cospif1_u05purec(float a);
float Sleef_cospif1_u05purecfma(float a);
float Sleef_cinz_cospif1_u05purec(float a);
float Sleef_finz_cospif1_u05purecfma(float a);

float32x4_t Sleef_cospif4_u05(float32x4_t a);
float32x4_t Sleef_cospif4_u05advsimd(float32x4_t a);
float32x4_t Sleef_cospif4_u05advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_cospif4_u05advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_cospif4_u05advsimd(float32x4_t a);

svfloat32_t Sleef_cospifx_u05sve(svfloat32_t a);
svfloat32_t Sleef_cospifx_u05svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_cospifx_u05svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_cospifx_u05sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospid1_u05purec(double a);
Sleef_double2 Sleef_sincospid1_u05purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u05purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u05purecfma(double a);

Sleef_float64x2_t_2 Sleef_sincospid2_u05(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u05advsimd(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u05advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_cinz_sincospid2_u05advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_finz_sincospid2_u05advsimd(float64x2_t a);

Sleef_svfloat64_t_2 Sleef_sincospidx_u05sve(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_sincospidx_u05svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_cinz_sincospidx_u05svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_finz_sincospidx_u05sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincospif1_u05purec(float a);
Sleef_float2 Sleef_sincospif1_u05purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u05purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u05purecfma(float a);

Sleef_float32x4_t_2 Sleef_sincospif4_u05(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u05advsimd(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u05advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_cinz_sincospif4_u05advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_finz_sincospif4_u05advsimd(float32x4_t a);

Sleef_svfloat32_t_2 Sleef_sincospifx_u05sve(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_sincospifx_u05svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_cinz_sincospifx_u05svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_finz_sincospifx_u05sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincospid1_u35purec(double a);
Sleef_double2 Sleef_sincospid1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u35purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u35purecfma(double a);

Sleef_float64x2_t_2 Sleef_sincospid2_u35(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u35advsimd(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u35advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_cinz_sincospid2_u35advsimdnofma(float64x2_t a);
Sleef_float64x2_t_2 Sleef_finz_sincospid2_u35advsimd(float64x2_t a);

Sleef_svfloat64_t_2 Sleef_sincospidx_u35sve(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_sincospidx_u35svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_cinz_sincospidx_u35svenofma(svfloat64_t a);
Sleef_svfloat64_t_2 Sleef_finz_sincospidx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincospif1_u35purec(float a);
Sleef_float2 Sleef_sincospif1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u35purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u35purecfma(float a);

Sleef_float32x4_t_2 Sleef_sincospif4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u35advsimd(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u35advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_cinz_sincospif4_u35advsimdnofma(float32x4_t a);
Sleef_float32x4_t_2 Sleef_finz_sincospif4_u35advsimd(float32x4_t a);

Sleef_svfloat32_t_2 Sleef_sincospifx_u35sve(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_sincospifx_u35svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_cinz_sincospifx_u35svenofma(svfloat32_t a);
Sleef_svfloat32_t_2 Sleef_finz_sincospifx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification.


Vectorized double precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tand1_u10purec(double a);
double Sleef_tand1_u10purecfma(double a);
double Sleef_cinz_tand1_u10purec(double a);
double Sleef_finz_tand1_u10purecfma(double a);

float64x2_t Sleef_tand2_u10(float64x2_t a);
float64x2_t Sleef_tand2_u10advsimd(float64x2_t a);
float64x2_t Sleef_tand2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_tand2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_tand2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_tandx_u10sve(svfloat64_t a);
svfloat64_t Sleef_tandx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_tandx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_tandx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u10 with the same accuracy specification.


Vectorized single precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_tanf1_u10purec(float a);
float Sleef_tanf1_u10purecfma(float a);
float Sleef_cinz_tanf1_u10purec(float a);
float Sleef_finz_tanf1_u10purecfma(float a);

float32x4_t Sleef_tanf4_u10(float32x4_t a);
float32x4_t Sleef_tanf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_tanf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_tanf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_tanf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_tanfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_tanfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_tanfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_tanfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification.


Vectorized double precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_tand1_u35purec(double a);
double Sleef_tand1_u35purecfma(double a);
double Sleef_cinz_tand1_u35purec(double a);
double Sleef_finz_tand1_u35purecfma(double a);

float64x2_t Sleef_tand2_u35(float64x2_t a);
float64x2_t Sleef_tand2_u35advsimd(float64x2_t a);
float64x2_t Sleef_tand2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_tand2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_tand2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_tandx_u35sve(svfloat64_t a);
svfloat64_t Sleef_tandx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_tandx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_tandx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u35 with the same accuracy specification.


Vectorized single precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_tanf1_u35purec(float a);
float Sleef_tanf1_u35purecfma(float a);
float Sleef_cinz_tanf1_u35purec(float a);
float Sleef_finz_tanf1_u35purecfma(float a);

float32x4_t Sleef_tanf4_u35(float32x4_t a);
float32x4_t Sleef_tanf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_tanf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_tanf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_tanf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_tanfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_tanfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_tanfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_tanfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification.

Power, exponential, and logarithmic function

Vectorized double precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_powd1_u10purec(double a, double b);
double Sleef_powd1_u10purecfma(double a, double b);
double Sleef_cinz_powd1_u10purec(double a, double b);
double Sleef_finz_powd1_u10purecfma(double a, double b);

float64x2_t Sleef_powd2_u10(float64x2_t a, float64x2_t b);
float64x2_t Sleef_powd2_u10advsimd(float64x2_t a, float64x2_t b);
float64x2_t Sleef_powd2_u10advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_cinz_powd2_u10advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_finz_powd2_u10advsimd(float64x2_t a, float64x2_t b);

svfloat64_t Sleef_powdx_u10sve(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_powdx_u10svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_cinz_powdx_u10svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_finz_powdx_u10sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_pow_u10 with the same accuracy specification.


Vectorized single precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_powf1_u10purec(float a, float b);
float Sleef_powf1_u10purecfma(float a, float b);
float Sleef_cinz_powf1_u10purec(float a, float b);
float Sleef_finz_powf1_u10purecfma(float a, float b);

float32x4_t Sleef_powf4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_powf4_u10advsimd(float32x4_t a, float32x4_t b);
float32x4_t Sleef_powf4_u10advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_cinz_powf4_u10advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_finz_powf4_u10advsimd(float32x4_t a, float32x4_t b);

svfloat32_t Sleef_powfx_u10sve(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_powfx_u10svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_cinz_powfx_u10svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_finz_powfx_u10sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_powf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_logd1_u10purec(double a);
double Sleef_logd1_u10purecfma(double a);
double Sleef_cinz_logd1_u10purec(double a);
double Sleef_finz_logd1_u10purecfma(double a);

float64x2_t Sleef_logd2_u10(float64x2_t a);
float64x2_t Sleef_logd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_logd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_logd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_logd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_logdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_logdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_logdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_logdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u10 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_logf1_u10purec(float a);
float Sleef_logf1_u10purecfma(float a);
float Sleef_cinz_logf1_u10purec(float a);
float Sleef_finz_logf1_u10purecfma(float a);

float32x4_t Sleef_logf4_u10(float32x4_t a);
float32x4_t Sleef_logf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_logf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_logf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_logf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_logfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_logfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_logfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_logfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_logd1_u35purec(double a);
double Sleef_logd1_u35purecfma(double a);
double Sleef_cinz_logd1_u35purec(double a);
double Sleef_finz_logd1_u35purecfma(double a);

float64x2_t Sleef_logd2_u35(float64x2_t a);
float64x2_t Sleef_logd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_logd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_logd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_logd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_logdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_logdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_logdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_logdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u35 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_logf1_u35purec(float a);
float Sleef_logf1_u35purecfma(float a);
float Sleef_cinz_logf1_u35purec(float a);
float Sleef_finz_logf1_u35purecfma(float a);

float32x4_t Sleef_logf4_u35(float32x4_t a);
float32x4_t Sleef_logf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_logf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_logf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_logf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_logfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_logfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_logfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_logfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u35 with the same accuracy specification.


Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log10d1_u10purec(double a);
double Sleef_log10d1_u10purecfma(double a);
double Sleef_cinz_log10d1_u10purec(double a);
double Sleef_finz_log10d1_u10purecfma(double a);

float64x2_t Sleef_log10d2_u10(float64x2_t a);
float64x2_t Sleef_log10d2_u10advsimd(float64x2_t a);
float64x2_t Sleef_log10d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_log10d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_log10d2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_log10dx_u10sve(svfloat64_t a);
svfloat64_t Sleef_log10dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_log10dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_log10dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10_u10 with the same accuracy specification.


Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log10f1_u10purec(float a);
float Sleef_log10f1_u10purecfma(float a);
float Sleef_cinz_log10f1_u10purec(float a);
float Sleef_finz_log10f1_u10purecfma(float a);

float32x4_t Sleef_log10f4_u10(float32x4_t a);
float32x4_t Sleef_log10f4_u10advsimd(float32x4_t a);
float32x4_t Sleef_log10f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_log10f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_log10f4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_log10fx_u10sve(svfloat32_t a);
svfloat32_t Sleef_log10fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_log10fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_log10fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification.


Vectorized double precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log2d1_u10purec(double a);
double Sleef_log2d1_u10purecfma(double a);
double Sleef_cinz_log2d1_u10purec(double a);
double Sleef_finz_log2d1_u10purecfma(double a);

float64x2_t Sleef_log2d2_u10(float64x2_t a);
float64x2_t Sleef_log2d2_u10advsimd(float64x2_t a);
float64x2_t Sleef_log2d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_log2d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_log2d2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_log2dx_u10sve(svfloat64_t a);
svfloat64_t Sleef_log2dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_log2dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_log2dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2_u10 with the same accuracy specification.


Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log2f1_u10purec(float a);
float Sleef_log2f1_u10purecfma(float a);
float Sleef_cinz_log2f1_u10purec(float a);
float Sleef_finz_log2f1_u10purecfma(float a);

float32x4_t Sleef_log2f4_u10(float32x4_t a);
float32x4_t Sleef_log2f4_u10advsimd(float32x4_t a);
float32x4_t Sleef_log2f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_log2f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_log2f4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_log2fx_u10sve(svfloat32_t a);
svfloat32_t Sleef_log2fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_log2fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_log2fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2f_u10 with the same accuracy specification.


Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log1pd1_u10purec(double a);
double Sleef_log1pd1_u10purecfma(double a);
double Sleef_cinz_log1pd1_u10purec(double a);
double Sleef_finz_log1pd1_u10purecfma(double a);

float64x2_t Sleef_log1pd2_u10(float64x2_t a);
float64x2_t Sleef_log1pd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_log1pd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_log1pd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_log1pd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_log1pdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_log1pdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_log1pdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_log1pdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log1pf1_u10purec(float a);
float Sleef_log1pf1_u10purecfma(float a);
float Sleef_cinz_log1pf1_u10purec(float a);
float Sleef_finz_log1pf1_u10purecfma(float a);

float32x4_t Sleef_log1pf4_u10(float32x4_t a);
float32x4_t Sleef_log1pf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_log1pf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_log1pf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_log1pf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_log1pfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_log1pfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_log1pfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_log1pfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expd1_u10purec(double a);
double Sleef_expd1_u10purecfma(double a);
double Sleef_cinz_expd1_u10purec(double a);
double Sleef_finz_expd1_u10purecfma(double a);

float64x2_t Sleef_expd2_u10(float64x2_t a);
float64x2_t Sleef_expd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_expd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_expd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_expd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_expdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_expdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_expdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_expdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expf1_u10purec(float a);
float Sleef_expf1_u10purecfma(float a);
float Sleef_cinz_expf1_u10purec(float a);
float Sleef_finz_expf1_u10purecfma(float a);

float32x4_t Sleef_expf4_u10(float32x4_t a);
float32x4_t Sleef_expf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_expf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_expf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_expf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_expfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_expfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_expfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_expfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expf_u10 with the same accuracy specification.


Vectorized double precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp2d1_u10purec(double a);
double Sleef_exp2d1_u10purecfma(double a);
double Sleef_cinz_exp2d1_u10purec(double a);
double Sleef_finz_exp2d1_u10purecfma(double a);

float64x2_t Sleef_exp2d2_u10(float64x2_t a);
float64x2_t Sleef_exp2d2_u10advsimd(float64x2_t a);
float64x2_t Sleef_exp2d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_exp2d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_exp2d2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_exp2dx_u10sve(svfloat64_t a);
svfloat64_t Sleef_exp2dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_exp2dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_exp2dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification.


Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp2f1_u10purec(float a);
float Sleef_exp2f1_u10purecfma(float a);
float Sleef_cinz_exp2f1_u10purec(float a);
float Sleef_finz_exp2f1_u10purecfma(float a);

float32x4_t Sleef_exp2f4_u10(float32x4_t a);
float32x4_t Sleef_exp2f4_u10advsimd(float32x4_t a);
float32x4_t Sleef_exp2f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_exp2f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_exp2f4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_exp2fx_u10sve(svfloat32_t a);
svfloat32_t Sleef_exp2fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_exp2fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_exp2fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification.


Vectorized double precision base-10 exponential function function with 1.09 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp10d1_u10purec(double a);
double Sleef_exp10d1_u10purecfma(double a);
double Sleef_cinz_exp10d1_u10purec(double a);
double Sleef_finz_exp10d1_u10purecfma(double a);

float64x2_t Sleef_exp10d2_u10(float64x2_t a);
float64x2_t Sleef_exp10d2_u10advsimd(float64x2_t a);
float64x2_t Sleef_exp10d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_exp10d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_exp10d2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_exp10dx_u10sve(svfloat64_t a);
svfloat64_t Sleef_exp10dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_exp10dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_exp10dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification.


Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp10f1_u10purec(float a);
float Sleef_exp10f1_u10purecfma(float a);
float Sleef_cinz_exp10f1_u10purec(float a);
float Sleef_finz_exp10f1_u10purecfma(float a);

float32x4_t Sleef_exp10f4_u10(float32x4_t a);
float32x4_t Sleef_exp10f4_u10advsimd(float32x4_t a);
float32x4_t Sleef_exp10f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_exp10f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_exp10f4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_exp10fx_u10sve(svfloat32_t a);
svfloat32_t Sleef_exp10fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_exp10fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_exp10fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expm1d1_u10purec(double a);
double Sleef_expm1d1_u10purecfma(double a);
double Sleef_cinz_expm1d1_u10purec(double a);
double Sleef_finz_expm1d1_u10purecfma(double a);

float64x2_t Sleef_expm1d2_u10(float64x2_t a);
float64x2_t Sleef_expm1d2_u10advsimd(float64x2_t a);
float64x2_t Sleef_expm1d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_expm1d2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_expm1d2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_expm1dx_u10sve(svfloat64_t a);
svfloat64_t Sleef_expm1dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_expm1dx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_expm1dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expm1f1_u10purec(float a);
float Sleef_expm1f1_u10purecfma(float a);
float Sleef_cinz_expm1f1_u10purec(float a);
float Sleef_finz_expm1f1_u10purecfma(float a);

float32x4_t Sleef_expm1f4_u10(float32x4_t a);
float32x4_t Sleef_expm1f4_u10advsimd(float32x4_t a);
float32x4_t Sleef_expm1f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_expm1f4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_expm1f4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_expm1fx_u10sve(svfloat32_t a);
svfloat32_t Sleef_expm1fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_expm1fx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_expm1fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification.


Vectorized double precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u05purec(double a);
double Sleef_sqrtd1_u05purecfma(double a);
double Sleef_cinz_sqrtd1_u05purec(double a);
double Sleef_finz_sqrtd1_u05purecfma(double a);

float64x2_t Sleef_sqrtd2_u05(float64x2_t a);
float64x2_t Sleef_sqrtd2_u05advsimd(float64x2_t a);
float64x2_t Sleef_sqrtd2_u05advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_sqrtd2_u05advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_sqrtd2_u05advsimd(float64x2_t a);

svfloat64_t Sleef_sqrtdx_u05sve(svfloat64_t a);
svfloat64_t Sleef_sqrtdx_u05svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_sqrtdx_u05svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_sqrtdx_u05sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification.


Vectorized single precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u05purec(float a);
float Sleef_sqrtf1_u05purecfma(float a);
float Sleef_cinz_sqrtf1_u05purec(float a);
float Sleef_finz_sqrtf1_u05purecfma(float a);

float32x4_t Sleef_sqrtf4_u05(float32x4_t a);
float32x4_t Sleef_sqrtf4_u05advsimd(float32x4_t a);
float32x4_t Sleef_sqrtf4_u05advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_sqrtf4_u05advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_sqrtf4_u05advsimd(float32x4_t a);

svfloat32_t Sleef_sqrtfx_u05sve(svfloat32_t a);
svfloat32_t Sleef_sqrtfx_u05svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_sqrtfx_u05svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_sqrtfx_u05sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification.


Vectorized double precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u35purec(double a);
double Sleef_sqrtd1_u35purecfma(double a);
double Sleef_cinz_sqrtd1_u35purec(double a);
double Sleef_finz_sqrtd1_u35purecfma(double a);

float64x2_t Sleef_sqrtd2_u35(float64x2_t a);
float64x2_t Sleef_sqrtd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_sqrtd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_sqrtd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_sqrtd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_sqrtdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_sqrtdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_sqrtdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_sqrtdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification.


Vectorized single precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u35purec(float a);
float Sleef_sqrtf1_u35purecfma(float a);
float Sleef_cinz_sqrtf1_u35purec(float a);
float Sleef_finz_sqrtf1_u35purecfma(float a);

float32x4_t Sleef_sqrtf4_u35(float32x4_t a);
float32x4_t Sleef_sqrtf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_sqrtf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_sqrtf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_sqrtf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_sqrtfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_sqrtfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_sqrtfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_sqrtfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification.


Vectorized double precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrtd1_u10purec(double a);
double Sleef_cbrtd1_u10purecfma(double a);
double Sleef_cinz_cbrtd1_u10purec(double a);
double Sleef_finz_cbrtd1_u10purecfma(double a);

float64x2_t Sleef_cbrtd2_u10(float64x2_t a);
float64x2_t Sleef_cbrtd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_cbrtd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_cbrtd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_cbrtd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_cbrtdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_cbrtdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_cbrtdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_cbrtdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification.


Vectorized single precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cbrtf1_u10purec(float a);
float Sleef_cbrtf1_u10purecfma(float a);
float Sleef_cinz_cbrtf1_u10purec(float a);
float Sleef_finz_cbrtf1_u10purecfma(float a);

float32x4_t Sleef_cbrtf4_u10(float32x4_t a);
float32x4_t Sleef_cbrtf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_cbrtf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_cbrtf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_cbrtf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_cbrtfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_cbrtfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_cbrtfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_cbrtfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification.


Vectorized double precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cbrtd1_u35purec(double a);
double Sleef_cbrtd1_u35purecfma(double a);
double Sleef_cinz_cbrtd1_u35purec(double a);
double Sleef_finz_cbrtd1_u35purecfma(double a);

float64x2_t Sleef_cbrtd2_u35(float64x2_t a);
float64x2_t Sleef_cbrtd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_cbrtd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_cbrtd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_cbrtd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_cbrtdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_cbrtdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_cbrtdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_cbrtdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification.


Vectorized single precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cbrtf1_u35purec(float a);
float Sleef_cbrtf1_u35purecfma(float a);
float Sleef_cinz_cbrtf1_u35purec(float a);
float Sleef_finz_cbrtf1_u35purecfma(float a);

float32x4_t Sleef_cbrtf4_u35(float32x4_t a);
float32x4_t Sleef_cbrtf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_cbrtf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_cbrtf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_cbrtf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_cbrtfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_cbrtfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_cbrtfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_cbrtfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypotd1_u05purec(double a, double b);
double Sleef_hypotd1_u05purecfma(double a, double b);
double Sleef_cinz_hypotd1_u05purec(double a, double b);
double Sleef_finz_hypotd1_u05purecfma(double a, double b);

float64x2_t Sleef_hypotd2_u05(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u05advsimd(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u05advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_cinz_hypotd2_u05advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_finz_hypotd2_u05advsimd(float64x2_t a, float64x2_t b);

svfloat64_t Sleef_hypotdx_u05sve(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_hypotdx_u05svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_cinz_hypotdx_u05svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_finz_hypotdx_u05sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_hypotf1_u05purec(float a, float b);
float Sleef_hypotf1_u05purecfma(float a, float b);
float Sleef_cinz_hypotf1_u05purec(float a, float b);
float Sleef_finz_hypotf1_u05purecfma(float a, float b);

float32x4_t Sleef_hypotf4_u05(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u05advsimd(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u05advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_cinz_hypotf4_u05advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_finz_hypotf4_u05advsimd(float32x4_t a, float32x4_t b);

svfloat32_t Sleef_hypotfx_u05sve(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_hypotfx_u05svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_cinz_hypotfx_u05svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_finz_hypotfx_u05sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_hypotd1_u35purec(double a, double b);
double Sleef_hypotd1_u35purecfma(double a, double b);
double Sleef_cinz_hypotd1_u35purec(double a, double b);
double Sleef_finz_hypotd1_u35purecfma(double a, double b);

float64x2_t Sleef_hypotd2_u35(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u35advsimd(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u35advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_cinz_hypotd2_u35advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_finz_hypotd2_u35advsimd(float64x2_t a, float64x2_t b);

svfloat64_t Sleef_hypotdx_u35sve(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_hypotdx_u35svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_cinz_hypotdx_u35svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_finz_hypotdx_u35sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_hypotf1_u35purec(float a, float b);
float Sleef_hypotf1_u35purecfma(float a, float b);
float Sleef_cinz_hypotf1_u35purec(float a, float b);
float Sleef_finz_hypotf1_u35purecfma(float a, float b);

float32x4_t Sleef_hypotf4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u35advsimd(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u35advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_cinz_hypotf4_u35advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_finz_hypotf4_u35advsimd(float32x4_t a, float32x4_t b);

svfloat32_t Sleef_hypotfx_u35sve(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_hypotfx_u35svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_cinz_hypotfx_u35svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_finz_hypotfx_u35sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification.

Inverse Trigonometric Functions

Vectorized double precision arc sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asind1_u10purec(double a);
double Sleef_asind1_u10purecfma(double a);
double Sleef_cinz_asind1_u10purec(double a);
double Sleef_finz_asind1_u10purecfma(double a);

float64x2_t Sleef_asind2_u10(float64x2_t a);
float64x2_t Sleef_asind2_u10advsimd(float64x2_t a);
float64x2_t Sleef_asind2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_asind2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_asind2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_asindx_u10sve(svfloat64_t a);
svfloat64_t Sleef_asindx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_asindx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_asindx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u10 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_asinf1_u10purec(float a);
float Sleef_asinf1_u10purecfma(float a);
float Sleef_cinz_asinf1_u10purec(float a);
float Sleef_finz_asinf1_u10purecfma(float a);

float32x4_t Sleef_asinf4_u10(float32x4_t a);
float32x4_t Sleef_asinf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_asinf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_asinf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_asinf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_asinfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_asinfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_asinfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_asinfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification.


Vectorized double precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_asind1_u35purec(double a);
double Sleef_asind1_u35purecfma(double a);
double Sleef_cinz_asind1_u35purec(double a);
double Sleef_finz_asind1_u35purecfma(double a);

float64x2_t Sleef_asind2_u35(float64x2_t a);
float64x2_t Sleef_asind2_u35advsimd(float64x2_t a);
float64x2_t Sleef_asind2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_asind2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_asind2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_asindx_u35sve(svfloat64_t a);
svfloat64_t Sleef_asindx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_asindx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_asindx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u35 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_asinf1_u35purec(float a);
float Sleef_asinf1_u35purecfma(float a);
float Sleef_cinz_asinf1_u35purec(float a);
float Sleef_finz_asinf1_u35purecfma(float a);

float32x4_t Sleef_asinf4_u35(float32x4_t a);
float32x4_t Sleef_asinf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_asinf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_asinf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_asinf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_asinfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_asinfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_asinfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_asinfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification.


Vectorized double precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acosd1_u10purec(double a);
double Sleef_acosd1_u10purecfma(double a);
double Sleef_cinz_acosd1_u10purec(double a);
double Sleef_finz_acosd1_u10purecfma(double a);

float64x2_t Sleef_acosd2_u10(float64x2_t a);
float64x2_t Sleef_acosd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_acosd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_acosd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_acosd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_acosdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_acosdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_acosdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_acosdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u10 with the same accuracy specification.


Vectorized single precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_acosf1_u10purec(float a);
float Sleef_acosf1_u10purecfma(float a);
float Sleef_cinz_acosf1_u10purec(float a);
float Sleef_finz_acosf1_u10purecfma(float a);

float32x4_t Sleef_acosf4_u10(float32x4_t a);
float32x4_t Sleef_acosf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_acosf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_acosf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_acosf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_acosfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_acosfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_acosfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_acosfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification.


Vectorized double precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_acosd1_u35purec(double a);
double Sleef_acosd1_u35purecfma(double a);
double Sleef_cinz_acosd1_u35purec(double a);
double Sleef_finz_acosd1_u35purecfma(double a);

float64x2_t Sleef_acosd2_u35(float64x2_t a);
float64x2_t Sleef_acosd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_acosd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_acosd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_acosd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_acosdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_acosdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_acosdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_acosdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u35 with the same accuracy specification.


Vectorized single precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_acosf1_u35purec(float a);
float Sleef_acosf1_u35purecfma(float a);
float Sleef_cinz_acosf1_u35purec(float a);
float Sleef_finz_acosf1_u35purecfma(float a);

float32x4_t Sleef_acosf4_u35(float32x4_t a);
float32x4_t Sleef_acosf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_acosf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_acosf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_acosf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_acosfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_acosfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_acosfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_acosfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atand1_u10purec(double a);
double Sleef_atand1_u10purecfma(double a);
double Sleef_cinz_atand1_u10purec(double a);
double Sleef_finz_atand1_u10purecfma(double a);

float64x2_t Sleef_atand2_u10(float64x2_t a);
float64x2_t Sleef_atand2_u10advsimd(float64x2_t a);
float64x2_t Sleef_atand2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_atand2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_atand2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_atandx_u10sve(svfloat64_t a);
svfloat64_t Sleef_atandx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_atandx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_atandx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u10 with the same accuracy specification.


Vectorized single precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atanf1_u10purec(float a);
float Sleef_atanf1_u10purecfma(float a);
float Sleef_cinz_atanf1_u10purec(float a);
float Sleef_finz_atanf1_u10purecfma(float a);

float32x4_t Sleef_atanf4_u10(float32x4_t a);
float32x4_t Sleef_atanf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_atanf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_atanf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_atanf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_atanfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_atanfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_atanfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_atanfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification.


Vectorized double precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atand1_u35purec(double a);
double Sleef_atand1_u35purecfma(double a);
double Sleef_cinz_atand1_u35purec(double a);
double Sleef_finz_atand1_u35purecfma(double a);

float64x2_t Sleef_atand2_u35(float64x2_t a);
float64x2_t Sleef_atand2_u35advsimd(float64x2_t a);
float64x2_t Sleef_atand2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_atand2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_atand2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_atandx_u35sve(svfloat64_t a);
svfloat64_t Sleef_atandx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_atandx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_atandx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u35 with the same accuracy specification.


Vectorized single precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atanf1_u35purec(float a);
float Sleef_atanf1_u35purecfma(float a);
float Sleef_cinz_atanf1_u35purec(float a);
float Sleef_finz_atanf1_u35purecfma(float a);

float32x4_t Sleef_atanf4_u35(float32x4_t a);
float32x4_t Sleef_atanf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_atanf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_atanf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_atanf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_atanfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_atanfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_atanfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_atanfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2d1_u10purec(double a, double b);
double Sleef_atan2d1_u10purecfma(double a, double b);
double Sleef_cinz_atan2d1_u10purec(double a, double b);
double Sleef_finz_atan2d1_u10purecfma(double a, double b);

float64x2_t Sleef_atan2d2_u10(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u10advsimd(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u10advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_cinz_atan2d2_u10advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_finz_atan2d2_u10advsimd(float64x2_t a, float64x2_t b);

svfloat64_t Sleef_atan2dx_u10sve(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_atan2dx_u10svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_cinz_atan2dx_u10svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_finz_atan2dx_u10sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atan2f1_u10purec(float a, float b);
float Sleef_atan2f1_u10purecfma(float a, float b);
float Sleef_cinz_atan2f1_u10purec(float a, float b);
float Sleef_finz_atan2f1_u10purecfma(float a, float b);

float32x4_t Sleef_atan2f4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u10advsimd(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u10advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_cinz_atan2f4_u10advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_finz_atan2f4_u10advsimd(float32x4_t a, float32x4_t b);

svfloat32_t Sleef_atan2fx_u10sve(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_atan2fx_u10svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_cinz_atan2fx_u10svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_finz_atan2fx_u10sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atan2d1_u35purec(double a, double b);
double Sleef_atan2d1_u35purecfma(double a, double b);
double Sleef_cinz_atan2d1_u35purec(double a, double b);
double Sleef_finz_atan2d1_u35purecfma(double a, double b);

float64x2_t Sleef_atan2d2_u35(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u35advsimd(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u35advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_cinz_atan2d2_u35advsimdnofma(float64x2_t a, float64x2_t b);
float64x2_t Sleef_finz_atan2d2_u35advsimd(float64x2_t a, float64x2_t b);

svfloat64_t Sleef_atan2dx_u35sve(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_atan2dx_u35svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_cinz_atan2dx_u35svenofma(svfloat64_t a, svfloat64_t b);
svfloat64_t Sleef_finz_atan2dx_u35sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atan2f1_u35purec(float a, float b);
float Sleef_atan2f1_u35purecfma(float a, float b);
float Sleef_cinz_atan2f1_u35purec(float a, float b);
float Sleef_finz_atan2f1_u35purecfma(float a, float b);

float32x4_t Sleef_atan2f4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u35advsimd(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u35advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_cinz_atan2f4_u35advsimdnofma(float32x4_t a, float32x4_t b);
float32x4_t Sleef_finz_atan2f4_u35advsimd(float32x4_t a, float32x4_t b);

svfloat32_t Sleef_atan2fx_u35sve(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_atan2fx_u35svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_cinz_atan2fx_u35svenofma(svfloat32_t a, svfloat32_t b);
svfloat32_t Sleef_finz_atan2fx_u35sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification.

Hyperbolic function and inverse hyperbolic function

Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>

double Sleef_sinhd1_u10purec(double a);
double Sleef_sinhd1_u10purecfma(double a);
double Sleef_cinz_sinhd1_u10purec(double a);
double Sleef_finz_sinhd1_u10purecfma(double a);

float64x2_t Sleef_sinhd2_u10(float64x2_t a);
float64x2_t Sleef_sinhd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_sinhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_sinhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_sinhd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_sinhdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_sinhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_sinhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_sinhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float Sleef_sinhf1_u10purec(float a);
float Sleef_sinhf1_u10purecfma(float a);
float Sleef_cinz_sinhf1_u10purec(float a);
float Sleef_finz_sinhf1_u10purecfma(float a);

float32x4_t Sleef_sinhf4_u10(float32x4_t a);
float32x4_t Sleef_sinhf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_sinhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_sinhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_sinhf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_sinhfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_sinhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_sinhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_sinhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>


double Sleef_sinhd1_u35purec(double a);
double Sleef_sinhd1_u35purecfma(double a);
double Sleef_cinz_sinhd1_u35purec(double a);
double Sleef_finz_sinhd1_u35purecfma(double a);

float64x2_t Sleef_sinhd2_u35(float64x2_t a);
float64x2_t Sleef_sinhd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_sinhd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_sinhd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_sinhd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_sinhdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_sinhdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_sinhdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_sinhdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>


float Sleef_sinhf1_u35purec(float a);
float Sleef_sinhf1_u35purecfma(float a);
float Sleef_cinz_sinhf1_u35purec(float a);
float Sleef_finz_sinhf1_u35purecfma(float a);

float32x4_t Sleef_sinhf4_u35(float32x4_t a);
float32x4_t Sleef_sinhf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_sinhf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_sinhf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_sinhf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_sinhfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_sinhfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_sinhfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_sinhfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>

double Sleef_coshd1_u10purec(double a);
double Sleef_coshd1_u10purecfma(double a);
double Sleef_cinz_coshd1_u10purec(double a);
double Sleef_finz_coshd1_u10purecfma(double a);

float64x2_t Sleef_coshd2_u10(float64x2_t a);
float64x2_t Sleef_coshd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_coshd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_coshd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_coshd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_coshdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_coshdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_coshdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_coshdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float Sleef_coshf1_u10purec(float a);
float Sleef_coshf1_u10purecfma(float a);
float Sleef_cinz_coshf1_u10purec(float a);
float Sleef_finz_coshf1_u10purecfma(float a);

float32x4_t Sleef_coshf4_u10(float32x4_t a);
float32x4_t Sleef_coshf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_coshf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_coshf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_coshf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_coshfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_coshfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_coshfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_coshfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>


double Sleef_coshd1_u35purec(double a);
double Sleef_coshd1_u35purecfma(double a);
double Sleef_cinz_coshd1_u35purec(double a);
double Sleef_finz_coshd1_u35purecfma(double a);

float64x2_t Sleef_coshd2_u35(float64x2_t a);
float64x2_t Sleef_coshd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_coshd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_coshd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_coshd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_coshdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_coshdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_coshdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_coshdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>


float Sleef_coshf1_u35purec(float a);
float Sleef_coshf1_u35purecfma(float a);
float Sleef_cinz_coshf1_u35purec(float a);
float Sleef_finz_coshf1_u35purecfma(float a);

float32x4_t Sleef_coshf4_u35(float32x4_t a);
float32x4_t Sleef_coshf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_coshf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_coshf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_coshf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_coshfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_coshfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_coshfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_coshfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>

double Sleef_tanhd1_u10purec(double a);
double Sleef_tanhd1_u10purecfma(double a);
double Sleef_cinz_tanhd1_u10purec(double a);
double Sleef_finz_tanhd1_u10purecfma(double a);

float64x2_t Sleef_tanhd2_u10(float64x2_t a);
float64x2_t Sleef_tanhd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_tanhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_tanhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_tanhd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_tanhdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_tanhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_tanhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_tanhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float Sleef_tanhf1_u10purec(float a);
float Sleef_tanhf1_u10purecfma(float a);
float Sleef_cinz_tanhf1_u10purec(float a);
float Sleef_finz_tanhf1_u10purecfma(float a);

float32x4_t Sleef_tanhf4_u10(float32x4_t a);
float32x4_t Sleef_tanhf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_tanhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_tanhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_tanhf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_tanhfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_tanhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_tanhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_tanhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>


double Sleef_tanhd1_u35purec(double a);
double Sleef_tanhd1_u35purecfma(double a);
double Sleef_cinz_tanhd1_u35purec(double a);
double Sleef_finz_tanhd1_u35purecfma(double a);

float64x2_t Sleef_tanhd2_u35(float64x2_t a);
float64x2_t Sleef_tanhd2_u35advsimd(float64x2_t a);
float64x2_t Sleef_tanhd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_tanhd2_u35advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_tanhd2_u35advsimd(float64x2_t a);

svfloat64_t Sleef_tanhdx_u35sve(svfloat64_t a);
svfloat64_t Sleef_tanhdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_tanhdx_u35svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_tanhdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>


float Sleef_tanhf1_u35purec(float a);
float Sleef_tanhf1_u35purecfma(float a);
float Sleef_cinz_tanhf1_u35purec(float a);
float Sleef_finz_tanhf1_u35purecfma(float a);

float32x4_t Sleef_tanhf4_u35(float32x4_t a);
float32x4_t Sleef_tanhf4_u35advsimd(float32x4_t a);
float32x4_t Sleef_tanhf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_tanhf4_u35advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_tanhf4_u35advsimd(float32x4_t a);

svfloat32_t Sleef_tanhfx_u35sve(svfloat32_t a);
svfloat32_t Sleef_tanhfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_tanhfx_u35svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_tanhfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u35 with the same accuracy specification.


Vectorized double precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

double Sleef_asinhd1_u10purec(double a);
double Sleef_asinhd1_u10purecfma(double a);
double Sleef_cinz_asinhd1_u10purec(double a);
double Sleef_finz_asinhd1_u10purecfma(double a);

float64x2_t Sleef_asinhd2_u10(float64x2_t a);
float64x2_t Sleef_asinhd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_asinhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_asinhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_asinhd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_asinhdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_asinhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_asinhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_asinhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

float Sleef_asinhf1_u10purec(float a);
float Sleef_asinhf1_u10purecfma(float a);
float Sleef_cinz_asinhf1_u10purec(float a);
float Sleef_finz_asinhf1_u10purecfma(float a);

float32x4_t Sleef_asinhf4_u10(float32x4_t a);
float32x4_t Sleef_asinhf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_asinhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_asinhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_asinhf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_asinhfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_asinhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_asinhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_asinhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

double Sleef_acoshd1_u10purec(double a);
double Sleef_acoshd1_u10purecfma(double a);
double Sleef_cinz_acoshd1_u10purec(double a);
double Sleef_finz_acoshd1_u10purecfma(double a);

float64x2_t Sleef_acoshd2_u10(float64x2_t a);
float64x2_t Sleef_acoshd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_acoshd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_acoshd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_acoshd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_acoshdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_acoshdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_acoshdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_acoshdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

float Sleef_acoshf1_u10purec(float a);
float Sleef_acoshf1_u10purecfma(float a);
float Sleef_cinz_acoshf1_u10purec(float a);
float Sleef_finz_acoshf1_u10purecfma(float a);

float32x4_t Sleef_acoshf4_u10(float32x4_t a);
float32x4_t Sleef_acoshf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_acoshf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_acoshf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_acoshf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_acoshfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_acoshfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_acoshfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_acoshfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

double Sleef_atanhd1_u10purec(double a);
double Sleef_atanhd1_u10purecfma(double a);
double Sleef_cinz_atanhd1_u10purec(double a);
double Sleef_finz_atanhd1_u10purecfma(double a);

float64x2_t Sleef_atanhd2_u10(float64x2_t a);
float64x2_t Sleef_atanhd2_u10advsimd(float64x2_t a);
float64x2_t Sleef_atanhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_atanhd2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_atanhd2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_atanhdx_u10sve(svfloat64_t a);
svfloat64_t Sleef_atanhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_atanhdx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_atanhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

float Sleef_atanhf1_u10purec(float a);
float Sleef_atanhf1_u10purecfma(float a);
float Sleef_cinz_atanhf1_u10purec(float a);
float Sleef_finz_atanhf1_u10purecfma(float a);

float32x4_t Sleef_atanhf4_u10(float32x4_t a);
float32x4_t Sleef_atanhf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_atanhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_atanhf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_atanhf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_atanhfx_u10sve(svfloat32_t a);
svfloat32_t Sleef_atanhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_atanhfx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_atanhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification.

Error and gamma function

Vectorized double precision error function

Synopsis

#include <sleef.h>

float Sleef_erfd1_u10purec(float a);
float Sleef_erfd1_u10purecfma(float a);
float Sleef_cinz_erfd1_u10purec(float a);
float Sleef_finz_erfd1_u10purecfma(float a);

(SP2) Sleef_erfd2_u10((SP2) a);
(SP2) Sleef_erfd2_u10advsimd((SP2) a);
(SP2) Sleef_erfd2_u10advsimdnofma((SP2) a);
(SP2) Sleef_cinz_erfd2_u10advsimdnofma((SP2) a);
(SP2) Sleef_finz_erfd2_u10advsimd((SP2) a);

svfloat32_t Sleef_erfdx_u10sve(svfloat32_t a);
svfloat32_t Sleef_erfdx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_erfdx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_erfdx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erf_u10 with the same accuracy specification.


Vectorized single precision error function

Synopsis

#include <sleef.h>

float Sleef_erff1_u10purec(float a);
float Sleef_erff1_u10purecfma(float a);
float Sleef_cinz_erff1_u10purec(float a);
float Sleef_finz_erff1_u10purecfma(float a);

float32x4_t Sleef_erff4_u10(float32x4_t a);
float32x4_t Sleef_erff4_u10advsimd(float32x4_t a);
float32x4_t Sleef_erff4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_erff4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_erff4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_erffx_u10sve(svfloat32_t a);
svfloat32_t Sleef_erffx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_erffx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_erffx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erff_u10 with the same accuracy specification.


Vectorized double precision complementary error function

Synopsis

#include <sleef.h>

double Sleef_erfcd1_u15purec(double a);
double Sleef_erfcd1_u15purecfma(double a);
double Sleef_cinz_erfcd1_u15purec(double a);
double Sleef_finz_erfcd1_u15purecfma(double a);

float64x2_t Sleef_erfcd2_u15(float64x2_t a);
float64x2_t Sleef_erfcd2_u15advsimd(float64x2_t a);
float64x2_t Sleef_erfcd2_u15advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_erfcd2_u15advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_erfcd2_u15advsimd(float64x2_t a);

svfloat64_t Sleef_erfcdx_u15sve(svfloat64_t a);
svfloat64_t Sleef_erfcdx_u15svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_erfcdx_u15svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_erfcdx_u15sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification.


Vectorized single precision complementary error function

Synopsis

#include <sleef.h>

float Sleef_erfcf1_u15purec(float a);
float Sleef_erfcf1_u15purecfma(float a);
float Sleef_cinz_erfcf1_u15purec(float a);
float Sleef_finz_erfcf1_u15purecfma(float a);

float32x4_t Sleef_erfcf4_u15(float32x4_t a);
float32x4_t Sleef_erfcf4_u15advsimd(float32x4_t a);
float32x4_t Sleef_erfcf4_u15advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_erfcf4_u15advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_erfcf4_u15advsimd(float32x4_t a);

svfloat32_t Sleef_erfcfx_u15sve(svfloat32_t a);
svfloat32_t Sleef_erfcfx_u15svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_erfcfx_u15svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_erfcfx_u15sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification.


Vectorized double precision gamma function

Synopsis

#include <sleef.h>

double Sleef_tgammad1_u10purec(double a);
double Sleef_tgammad1_u10purecfma(double a);
double Sleef_cinz_tgammad1_u10purec(double a);
double Sleef_finz_tgammad1_u10purecfma(double a);

float64x2_t Sleef_tgammad2_u10(float64x2_t a);
float64x2_t Sleef_tgammad2_u10advsimd(float64x2_t a);
float64x2_t Sleef_tgammad2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_tgammad2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_tgammad2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_tgammadx_u10sve(svfloat64_t a);
svfloat64_t Sleef_tgammadx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_tgammadx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_tgammadx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification.


Vectorized single precision gamma function

Synopsis

#include <sleef.h>

float Sleef_tgammaf1_u10purec(float a);
float Sleef_tgammaf1_u10purecfma(float a);
float Sleef_cinz_tgammaf1_u10purec(float a);
float Sleef_finz_tgammaf1_u10purecfma(float a);

float32x4_t Sleef_tgammaf4_u10(float32x4_t a);
float32x4_t Sleef_tgammaf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_tgammaf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_tgammaf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_tgammaf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_tgammafx_u10sve(svfloat32_t a);
svfloat32_t Sleef_tgammafx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_tgammafx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_tgammafx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification.


Vectorized double precision log gamma function

Synopsis

#include <sleef.h>

double Sleef_lgammad1_u10purec(double a);
double Sleef_lgammad1_u10purecfma(double a);
double Sleef_cinz_lgammad1_u10purec(double a);
double Sleef_finz_lgammad1_u10purecfma(double a);

float64x2_t Sleef_lgammad2_u10(float64x2_t a);
float64x2_t Sleef_lgammad2_u10advsimd(float64x2_t a);
float64x2_t Sleef_lgammad2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_cinz_lgammad2_u10advsimdnofma(float64x2_t a);
float64x2_t Sleef_finz_lgammad2_u10advsimd(float64x2_t a);

svfloat64_t Sleef_lgammadx_u10sve(svfloat64_t a);
svfloat64_t Sleef_lgammadx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_cinz_lgammadx_u10svenofma(svfloat64_t a);
svfloat64_t Sleef_finz_lgammadx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification.


Vectorized single precision log gamma function

Synopsis

#include <sleef.h>

float Sleef_lgammaf1_u10purec(float a);
float Sleef_lgammaf1_u10purecfma(float a);
float Sleef_cinz_lgammaf1_u10purec(float a);
float Sleef_finz_lgammaf1_u10purecfma(float a);

float32x4_t Sleef_lgammaf4_u10(float32x4_t a);
float32x4_t Sleef_lgammaf4_u10advsimd(float32x4_t a);
float32x4_t Sleef_lgammaf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_cinz_lgammaf4_u10advsimdnofma(float32x4_t a);
float32x4_t Sleef_finz_lgammaf4_u10advsimd(float32x4_t a);

svfloat32_t Sleef_lgammafx_u10sve(svfloat32_t a);
svfloat32_t Sleef_lgammafx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_cinz_lgammafx_u10svenofma(svfloat32_t a);
svfloat32_t Sleef_finz_lgammafx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification.

Nearest integer function

Vectorized double precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

float64x2_t Sleef_truncd2(float64x2_t a);
float64x2_t Sleef_truncd2_advsimd(float64x2_t a);
svfloat64_t Sleef_truncdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_trunc with the same accuracy specification.


Vectorized single precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

float32x4_t Sleef_truncf4(float32x4_t a);
float32x4_t Sleef_truncf4_advsimd(float32x4_t a);
svfloat32_t Sleef_truncfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_truncf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

float64x2_t Sleef_floord2(float64x2_t a);
float64x2_t Sleef_floord2_advsimd(float64x2_t a);
svfloat64_t Sleef_floordx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floor with the same accuracy specification.


Vectorized single precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_floorf4(float32x4_t a);
float32x4_t Sleef_floorf4_advsimd(float32x4_t a);
svfloat32_t Sleef_floorfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floorf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

float64x2_t Sleef_ceild2(float64x2_t a);
float64x2_t Sleef_ceild2_advsimd(float64x2_t a);
svfloat64_t Sleef_ceildx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceil with the same accuracy specification.


Vectorized single precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_ceilf4(float32x4_t a);
float32x4_t Sleef_ceilf4_advsimd(float32x4_t a);
svfloat32_t Sleef_ceilfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceilf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float64x2_t Sleef_roundd2(float64x2_t a);
float64x2_t Sleef_roundd2_advsimd(float64x2_t a);
svfloat64_t Sleef_rounddx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_round with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_roundf4(float32x4_t a);
float32x4_t Sleef_roundf4_advsimd(float32x4_t a);
svfloat32_t Sleef_roundfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_roundf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float64x2_t Sleef_rintd2(float64x2_t a);
float64x2_t Sleef_rintd2_advsimd(float64x2_t a);
svfloat64_t Sleef_rintdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rint with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_rintf4(float32x4_t a);
float32x4_t Sleef_rintf4_advsimd(float32x4_t a);
svfloat32_t Sleef_rintfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rintf with the same accuracy specification.

Other function

Vectorized double precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmad2(float64x2_t a, float64x2_t b, float64x2_t c);
float64x2_t Sleef_fmad2_advsimd(float64x2_t a, float64x2_t b, float64x2_t c);
svfloat64_t Sleef_fmadx_sve(svfloat64_t a, svfloat64_t b, svfloat64_t c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fma with the same accuracy specification.


Vectorized single precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaf4(float32x4_t a, float32x4_t b, float32x4_t c);
float32x4_t Sleef_fmaf4_advsimd(float32x4_t a, float32x4_t b, svfloat32_t c);
svfloat32_t Sleef_fmafx_sve(svfloat32_t a, svfloat32_t b, svfloat32_t c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmodd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmodd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmoddx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmod with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmodf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmodf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fmodfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmodf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

float64x2_t Sleef_remainderd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_remainderd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_remainderdx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_remainder with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

float32x4_t Sleef_remainderf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_remainderf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_remainderfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_remainderf with the same accuracy specification.


Vectorized double precision function for multiplying by integral power of 2

Synopsis

#include <sleef.h>

float64x2_t Sleef_ldexpd2(float64x2_t a, int32x2_t b);
float64x2_t Sleef_ldexpd2_advsimd(float64x2_t a, int32x2_t b);
svfloat64_t Sleef_ldexpdx_sve(svfloat64_t a, svint32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ldexp with the same accuracy specification.


Vectorized double precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

float64x2_t Sleef_frfrexpd2(float64x2_t a);
float64x2_t Sleef_frfrexpd2_advsimd(float64x2_t a);
svfloat64_t Sleef_frfrexpdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexp with the same accuracy specification.


Vectorized single precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

float32x4_t Sleef_frfrexpf4(float32x4_t a);
float32x4_t Sleef_frfrexpf4_advsimd(float32x4_t a);
svfloat32_t Sleef_frfrexpfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexpf with the same accuracy specification.


Vectorized double precision function for obtaining integral component of an FP number

Synopsis

#include <sleef.h>

int32x2_t Sleef_expfrexpd2(float64x2_t a);
int32x2_t Sleef_expfrexpd2_advsimd(float64x2_t a);
svint32_t Sleef_expfrexpdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expfrexp with the same accuracy specification.


Vectorized double precision function for getting integer exponent

Synopsis

#include <sleef.h>

int32x2_t Sleef_ilogbd2(float64x2_t a);
int32x2_t Sleef_ilogbd2_advsimd(float64x2_t a);
svint32_t Sleef_ilogbdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ilogb with the same accuracy specification.


Vectorized double precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_float64x2_t_2 Sleef_modfd2(float64x2_t a);
Sleef_float64x2_t_2 Sleef_modfd2_advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_modfdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modf with the same accuracy specification.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_modff4(float32x4_t a);
Sleef_float32x4_t_2 Sleef_modff4_advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_modffx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modff with the same accuracy specification.


Vectorized double precision function for calculating the absolute value

Synopsis

#include <sleef.h>

float64x2_t Sleef_fabsd2(float64x2_t a);
float64x2_t Sleef_fabsd2_advsimd(float64x2_t a);
svfloat64_t Sleef_fabsdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabs with the same accuracy specification.


Vectorized single precision function for calculating the absolute value

Synopsis

#include <sleef.h>

float32x4_t Sleef_fabsf4(float32x4_t a);
float32x4_t Sleef_fabsf4_advsimd(float32x4_t a);
svfloat32_t Sleef_fabsfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabsf with the same accuracy specification.


Vectorized double precision function for copying signs

Synopsis

#include <sleef.h>

float64x2_t Sleef_copysignd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_copysignd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_copysigndx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysign with the same accuracy specification.


Vectorized single precision function for copying signs

Synopsis

#include <sleef.h>

float32x4_t Sleef_copysignf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_copysignf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_copysignfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysignf with the same accuracy specification.


Vectorized double precision function for determining maximum of two values

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmaxd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmaxd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmaxdx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmax with the same accuracy specification.


Vectorized single precision function for determining maximum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaxf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmaxf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fmaxfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaxf with the same accuracy specification.


Vectorized double precision function for determining minimum of two values

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmind2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmind2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmindx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmin with the same accuracy specification.


Vectorized single precision function for determining minimum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fminf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fminf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fminfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fminf with the same accuracy specification.


Vectorized double precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

float64x2_t Sleef_fdimd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fdimd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fdimdx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdim with the same accuracy specification.


Vectorized single precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fdimf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fdimf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fdimfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdimf with the same accuracy specification.


Vectorized double precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

float64x2_t Sleef_nextafterd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_nextafterd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_nextafterdx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafter with the same accuracy specification.


Vectorized single precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

float32x4_t Sleef_nextafterf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_nextafterf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_nextafterfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafterf with the same accuracy specification.

sleef-3.5.1/doc/html/additional.xhtml000066400000000000000000000334431373003144100175200ustar00rootroot00000000000000 SLEEF - Additional Notes

SLEEF Documentation - Additional Notes

Table of contents

Frequently asked questions

Q1: Is the scalar functions in SLEEF faster than the corresponding functions in the standard C library?


A1: No. Todays standard C libraries are very well optimized, and there is small room for further optimization. The reason why SLEEF is fast is that it carries out computation directly on SIMD registers and ALUs. This is not simple as it sounds, because conditional branches have to be eliminated in order to take full advantage of SIMD computation. If the algorithm requires conditional branches according to the argument, it must prepare for the case where the elements in the input vector contain both values that would make a branch happen and not happen. This would spoil the advantage of SIMD computation, because each element in a vector would require a different code path.



Q2: Do the trigonometric functions (e.g. sin) in SLEEF return correct values for the whole range of inputs?


A2: Yes. SLEEF does implement a vectorized version of Payne Hanek range reduction, and all the trigonometric functions return a correct value with the specified accuracy.

About the GNUABI version of the library

The GNUABI version of the library (libsleefgnuabi.so) is built for x86 and aarch64 architectectures. This library provides an API compatible with libmvec in glibc, and the API comforms to the x86 vector ABI, AArch64 vector ABI and Power Vector ABI. This library is built and installed by default, and certain compilers call the functions in this library.

How the dispatchers work

The dispatchers in SLEEF are designed to have very low overhead. This overhead is so small and cannot be observed by microbenchmarking.

Fig. 7.1 shows a simplified code of our dispatcher. There is only one exported function mainFunc. When mainFunc is called for the first time, dispatcherMain is called internally, since funcPtr is initialized to the pointer to dispatcherMain (line 14). It then detects if the CPU supports SSE 4.1 (line 7), and rewrites funcPtr to a pointer to the function that utilizes SSE 4.1 or SSE 2, depending on the result of CPU feature detection (line 10). When mainFunc is called for the second time, it does not execute the dispatcherMain. It just executes the function pointed by the pointer stored in funcPtr during the execution of dispatcherMain.

There are advantages in our dispatcher. The first advantage is that it does not require any compiler-specific extension. The second advantage is simplicity. There are only 18 lines of simple code. Since the dispatchers are completely separated for each function, there is not much room for bugs to get in.

The third advantage is low overhead. You might think that the overhead is one function call including execution of the prologue and the epilogue. However, modern compilers are smart enough to eliminate redundant execution of the prologue, epilogue and return instruction. The actual overhead is just one jmp instruction, which has very small overhead since it is not conditional. This overhead is likely hidden by out-of-order execution.

The fourth advantage is thread safety. There is only one variable shared among threads, which is funcPtr. There are only two possible values for this pointer variable. The first value is the pointer to the dispatcherMain, and the second value is the pointer to either funcSSE2 or funcSSE4, depending on the availability of extensions. Once funcPtr is substituted with the pointer to funcSSE2 or funcSSE4, it will not be changed in the future. It should be easy to confirm that the code works in all the cases.

static double (*funcPtr)(double arg);

static double dispatcherMain(double arg) {
    double (*p)(double arg) = funcSSE2;

#if the compiler supports SSE4.1
    if (SSE4.1 is available on the CPU) p = funcSSE4;
#endif

    funcPtr = p;
    return (*funcPtr)(arg);
}

static double (*funcPtr)(double arg) = dispatcherMain;

double mainFunc(double arg) {
    return (*funcPtr)(arg);
}

Fig. 7.1: Simplified code of our dispatcher

ULP, gradual underflow and flush-to-zero mode

ULP stands for "unit in the last place", which is sometimes used for representing accuracy of calculation. 1 ULP is the distance between the two closest floating point number, which depends on the exponent of the FP number. The accuracy of calculation by reputable math libraries is usually between 0.5 and 1 ULP. Here, the accuracy means the largest error of calculation. SLEEF math library provides multiple accuracy choices for most of the math functions. Many functions have 3.5-ULP and 1-ULP versions, and 3.5-ULP versions are faster than 1-ULP versions. If you care more about execution speed than accuracy, it is advised to use the 3.5-ULP versions along with -ffast-math or "unsafe math optimization" options for the compiler.

Note that 3.5 ULPs of error is small enough in many applications. If you do not manage the error of computation by carefully ordering floating point operations in your code, you would easily have that amount of error in the computation results.

In IEEE 754 standard, underflow does not happen abruptly when the exponent becomes zero. Instead, when a number to be represented is smaller than a certain value, a denormal number is produced which has less precision. This is sometimes called gradual underflow. On some processor implementation, a flush-to-zero mode is used since it is easier to implement by hardware. In flush-to-zero mode, numbers smaller than the smallest normalized number are replaced with zero. FP operations are not IEEE-754 conformant if a flush-to-zero mode is used. A flush-to-zero mode influences the accuracy of calculation in some cases. The smallest normalized precision number can be referred with DBL_MIN for double precision, and FLT_MIN for single precision. The naming of these macros is a little bit confusing because DBL_MIN is not the smallest double precision number.

You can see known maximum errors in math functions in glibc at this page.

Explanatory source code for our modified Payne Hanek reduction method

In order to evaluate a trigonometric function with a large argument, an argument reduction method is used to find an FP remainder of dividing the argument x by π. We devised a variation of the Payne-Hanek argument reduction method which is suitable for vector computation. Fig. 7.2 shows an explanatory source code for this method. See our paper for the details.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpfr.h>

typedef struct { double x, y; } double2;
double2 dd(double d) { double2 r = { d, 0 }; return r; }
int64_t d2i(double d) { union { double f; int64_t i; } tmp = {.f = d }; return tmp.i; }
double i2d(int64_t i) { union { double f; int64_t i; } tmp = {.i = i }; return tmp.f; }
double upper(double d) { return i2d(d2i(d) & 0xfffffffff8000000LL); }
double clearlsb(double d) { return i2d(d2i(d) & 0xfffffffffffffffeLL); }

double2 ddrenormalize(double2 t) {
  double2 s = dd(t.x + t.y);
  s.y = t.x - s.x + t.y;
  return s;
}

double2 ddadd(double2 x, double2 y) {
  double2 r = dd(x.x + y.x);
  double v = r.x - x.x;
  r.y = (x.x - (r.x - v)) + (y.x - v) + (x.y + y.y);
  return r;
}

double2 ddmul(double x, double y) {
  double2 r = dd(x * y);
  r.y = fma(x, y, -r.x);
  return r;
}

double2 ddmul2(double2 x, double2 y) {
  double2 r = ddmul(x.x, y.x);
  r.y += x.x * y.y + x.y * y.x;
  return r;
}

// This function computes remainder(a, PI/2)
double2 modifiedPayneHanek(double a) {
  double table[4];
  int scale = fabs(a) > 1e+200 ? -128 : 0;
  a = ldexp(a, scale);

  // Table genration

  mpfr_set_default_prec(2048);
  mpfr_t pi, m;
  mpfr_inits(pi, m, NULL);
  mpfr_const_pi(pi, GMP_RNDN);

  mpfr_d_div(m, 2, pi, GMP_RNDN);
  mpfr_set_exp(m, mpfr_get_exp(m) + (ilogb(a) - 53 - scale));
  mpfr_frac(m, m, GMP_RNDN);
  mpfr_set_exp(m, mpfr_get_exp(m) - (ilogb(a) - 53));

  for(int i=0;i<4;i++) {
    table[i] = clearlsb(mpfr_get_d(m, GMP_RNDN));
    mpfr_sub_d(m, m, table[i], GMP_RNDN);
  }

  mpfr_clears(pi, m, NULL);

  // Main computation

  double2 x = dd(0);
  for(int i=0;i<4;i++) {
    x = ddadd(x, ddmul(a, table[i]));
    x.x = x.x - round(x.x);
    x = ddrenormalize(x);
  }

  double2 pio2 = { 3.141592653589793*0.5, 1.2246467991473532e-16*0.5 };
  x = ddmul2(x, pio2);
  return fabs(a) < 0.785398163397448279 ? dd(a) : x;
}

Fig. 7.2: Explanatory source code for our modified Payne Hanek reduction method

It is a soup ladle. A sleef means a soup ladle in Dutch.


logo
Fig. 7.2: SLEEF logo

sleef-3.5.1/doc/html/apple-touch-icon.png000066400000000000000000000043321373003144100202020ustar00rootroot00000000000000‰PNG  IHDRhÿ‹ pHYs5Ô5Ô^eåPLTEþþþ•••¼¼¼oooÞÞÞIII¨¨¨5r!hIDATx^í™»w²JÆ7È@+j¤Œ¡UñÒb$¦ˆ˜_Ô´B.þûß Ã0ÄKâûIqÖYç)Èà·ž}™ &pQÿ Ùf5 EbàòÇðÿäÈvb¤ÂÞÌuß´-(;¢-¡†6_z°âËuáÀNt«°Ô7>ÜxRìºã§ DW»9šÐΖ;×`I~±£¿" ä“r=€ÀÈ®»¦wr@“€`倣ìzC’)èÐ&•$ ˆ]ÈUË@i⨸ à Ûvtý c=Î(hv‚X€:$å ”ƒòhf à,¨#@ªÁ°ˆÌÒõ¤e'ÕÁ9¢—3`êòÈìäÈí H©äŽÐ€ƒî‹š¥ ÄsäéÊ<Õ¼¤h&jf뺡EÕéô.@j äB™CA¢HËíÏ€d·¡™-ó‹OQr­\¢ú1ˆˆälÉA´ÇydºŽ«I=á#Ðй£±??"G'j€Ð!mOBC±ÈQG„¦¸¥ÈˆL(é…$üRN’½phÍô¢f"O—Ê/Ÿ–_Ô,Ñï šä—àt¾!2<  q>7ÌT3ˆÞ”ƒ€õ¹‹cC€ÜŒ”5( À+0$@±14É®ÅeG²0$@l܈ª)¤Ž)h˜{â©¶ò’ P[<â‡÷ Ôã³Jc“µVÉvšÚCI®Ûǰ¤ˆW×ý†÷®K,ãn~¸c@×m„¡»&m·?<ÕVÞÔBãw¼£­º\r±\8 ÇAžj>Ñ„ºð2ßÜe§¹Äª ‹b 2=šeC·Š”Œ•þñ¡OK_‘!£ C +ýZØù7 1CfE†>+2”âj ÝJ I •:g3Î7Ýr{¡“T·ÏrÖso6\º&Ô†ÚΣóvht¾ÃjœÍ´ºW<5‚™ÌaåÔê!Œ.ºéZyJÓø„ú ›(…0솆.•^b  § jQì2¼ƒa;Zm&_+¯³ŽÑ†¿W;K?c ÚIË g ²A_Š>[ZO‰îæà±Ú;˽ˆšu– s$@;)ôÕS<º{fèà4.m²zT{c –#“ƒÚ¯¤N13”&6\P€åÌ‘ÒË@R”¶sÀù0Ñ(HîsC©yôöe¾÷¥q×ûêËñÔ€æº=ŠÍñ½·šÀ®Õ_ ¶ÌÐá§M&›†(„ìÐEˆCÜíÒ&qvÎc»ãÖ]/L6ªoÝõRŸê ÆP5†®3„+2ô1¸Ö ? õÌ+'cãç µèÊQmül¾†æÇ3´îë¿dÈJq.m¥H­wZÑ/†~-½ÕöZ/À?gˆ–þw)~!VúëÝj¤H!9 ð­†`¹†ÖæO·úÕP’+2²ÿQ†PUZü[ AU†d:«‰,5+Ju5†^飣 ÙV5©F©Ñý‡E–¤•päT¯*2Wr*Š ¥ú¾šU™4ª‰ÌÖ?+ŠL¯¦Ö?,E²ÜÁ‰|Õkub™H‘äë>0uêÀ%õ.@œD':ty7:zAÝA¯Uò…‰³$dµ¦:A–néÀ5™ß_Ï£ª]¨¨åû¾Î•bpt=i”@ž²ÖÞîÛ¦êÍÂîÛx¶yˆü¯Ú BŽî·H¨”ëY;Bð|ú"˜X+^Í”{J¤zs¹œª×T÷§þ1©.-"˜k8\P€ðCÄ@Š×.ƒdÝ8Ú ˜8XÂCB,Ôp¾> Ъ%.‹€Š\ ™%òÔ7˜HÌ(^‚£ét*Q}‡+OëÁp9R=õiÖí?lkÚˆä( Æ„¡ àÒ2{&ÜßÖš¾ìï?òó‹ñÒ°î>3 LH/ƒÄÚ€k4€iG F’ÿ¼ 4·§EdSrS ”:ôغ„ÄoËÿ²í 4ýÛ!"·üi94ö“ t÷— MoùßAŽõ „‚¶÷;Hâ»L8úúê·Jôû_|ߟ~9 $Øã)(À§ ©ÿìœÕ dáE÷€€x6Ð{ ‰)wmÛ¸µží¤ ¢ŸühE@êç۪ߜ6Í•= ÖÍn³¡~´a<Ã4%¹þrhÏAØ€¾ ­ÑTŸÒiD¿»ªx ƒEmŸýØ"7[å>:õšù¾QÐÎf Hú0 g<àŽèÍ¥$¥' ”hÀ@k öÄ Á¾H6Û…9È •u°Æ@sxU3Ð;PÄÅž¥#ͰZ›´;æ}4ôw«4|\MVƒÎVÝ<=üyßÖ6Ázn1|ˆ-’Å–ˆæ&qR|Wx´À€}B.”°ØŽ˜#dÀUz¬—ÅbKI@B¾åžÝˆåkO©r¬ÿV ^+a‰Uÿo%'|‹ Kd à¿!«å³>ä=pšk™?æ~|§y%µÎ7«ˆì(×ÍîʃB –ëgcósK–ˆÌ„oê`$n½øNÃb3RÛ9‰L€>•í}´šî°ì?‘wùòº½Eþô86çðÈÈØ¢‘Ö“FªàI`J½šàyØWêK(„Xlº‘çÈ?­YÇf ˆÎ§ÀÔzMP=6ÊŸ¤[”ìô™ÖÁ]ÚPP›`6²%ö$) ™§ (1Ð;¨uÒ  eKänáèÌww‚ž,GÔÚÖS÷<‡j>@IÈÑK–Nÿ[E*ðõ8]>ß?Êó½¶Y>#ÿIömßiÝì®Û¾‚ çºp“Ã6oÂB ûp°»þúB©2*wÒÃÇIEND®B`‚sleef-3.5.1/doc/html/benchmark.xhtml000066400000000000000000000121311373003144100173310ustar00rootroot00000000000000 SLEEF - Benchmark Results

SLEEF Documentation - Benchmark Results

Table of contents

Benchmark results

These graphs show comparison of the execution time between SLEEF-3.2 compiled with GCC-7.2 and Intel SVML included in Intel C Compiler 18.0.1.

The execution time of each function is measured by executing each function 10^8 times and taking the average time. Each time a function is executed, a uniformly distributed random number is set to each element of the argument vector(each element is set a different value.) The ranges of the random number for each function are shown below. Argument vectors are generated before the measurement, and the time to generate random argument vectors is not included in the execution time.


  • Trigonometric functions : [0, 6.28] and [0, 10^6] for double-precision functions. [0, 6.28] and [0, 30000] for single-precision functions.
  • Log : [0, 10^300] and [0, 10^38] for double-precision functions and single-precision functions, respectively.
  • Exp : [-700, 700] and [-100, 100] for double-precision functions and single-precision functions, respectively.
  • Pow : [-30, 30] for both the first and the second arguments.
  • Asin : [-1, 1]
  • Atan : [-10, 10]
  • Atan2 : [-10, 10] for both the first and the second arguments.

The accuracy of SVML functions can be chosen by compiler options, not the function names. "-fimf-max-error=1.0" option is specified to icc to obtain the 1-ulp-accuracy results, and "-fimf-max-error=5.0" option is used for the 5-ulp-accuracy results.

Those results are measured on a PC with Intel Core i7-6700 CPU @ 3.40GHz with Turbo Boost turned off. The CPU should be always running at 3.4GHz during the measurement.

Click graphs to magnify.

 

Performance graph for DP trigonometric functions
Fig. 6.1: Execution time of double precision trigonometric functions

Performance graph for SP trigonometric functions
Fig. 6.2: Execution time of single precision trigonometric functions

Performance graph for other DP functions
Fig. 6.3: Execution time of double precision log, exp, pow and inverse trigonometric functions

Performance graph for other SP functions
Fig. 6.4: Execution time of single precision log, exp, pow and inverse trigonometric functions

sleef-3.5.1/doc/html/compile.xhtml000066400000000000000000000402071373003144100170340ustar00rootroot00000000000000 SLEEF - Compiling and installing the library

SLEEF Documentation - Compiling and installing the library

Table of contents

Preliminaries

In order to build SLEEF, you need CMake, which is an open-source and cross-platform building tool. In order to test the library, it is better to have the GNU MPFR Library, Libssl and FFTW.

CMake works by allowing the developer to specify build parameters and rules in a simple text file that cmake then processes to generate project files for the actual native build tools (e.g. UNIX Makefiles, Microsoft Visual Studio, Apple XCode, etc). If you are not already familiar with cmake, please refer to the official documentation or the basic introductions in the wiki.

Quick start

1. Make sure cmake is available on the command-line. The command below should display a version number greater than or equal to 3.5.1.

$ cmake --version

2. Checkout out the source code from our GitHub repository.

$ git clone https://github.com/shibatch/sleef

3. Make a separate directory to create an out-of-source build. SLEEF does not allow for in-tree builds.

$ cd sleef
$ mkdir build && cd build

4. Run cmake to configure your project and generate the system to build it:

$ cmake ..

See the list of options and variables for customizing your build.

5. Now that you have the build files created by cmake, proceed from the top of the build directory:

$ make

6. You can execute the tests by running:

$ make test

7. Install the library under ../my-sleef/install by running:

$ make install

Common CMake variables

Below is the list of common cmake variables that are used to configure a build for SLEEF.

  • CMAKE_BUILD_TYPE: By default, CMake supports the following configuration:
    • `Release`: Basic optimizations are turned on. This is the default setting.
    • `Debug`: Basic debug flags are turned on. Optimization is disabled.
    • `MinSizeRel`: Builds the smallest (but not fastest) object code
    • `RelWithDebInfo`: Builds optimized code with debug information as well
  • BUILD_SHARED_LIBS : Static libs are built if set to FALSE
  • CMAKE_C_FLAGS_RELEASE : The optimization options used by the compiler.
  • CMAKE_INSTALL_PREFIX : The prefix it uses when running `make install`. Defaults to /usr/local on GNU/Linux and MacOS. Defaults to C:/Program Files on Windows.

SLEEF-specific CMake variables

Below is the list of SLEEF-specific cmake variables.

  • SLEEF_SHOW_CONFIG : Show relevant cmake variables upon configuring a build
  • SLEEF_SHOW_ERROR_LOG : Show the content of CMakeError.log
  • BUILD_TESTS : Avoid building testing tools if set to FALSE
  • ENABLE_ALTDIV : Enable alternative division method (aarch64 only)
  • ENABLE_ALTSQRT : Enable alternative sqrt method (aarch64 only)
  • DISABLE_LONG_DOUBLE : Disable support for long double data type
  • ENFORCE_LONG_DOUBLE : Build fails if long double data type is not supported by the compiler
  • DISABLE_FLOAT128 : Disable support for float128 data type
  • ENFORCE_FLOAT128 : Build fails if float128 data type is not supported by the compiler
  • DISABLE_OPENMP : Disable support for OpenMP
  • ENFORCE_OPENMP : Build fails if OpenMP is not supported by the compiler
  • ENABLE_LTO : Enable support for LTO with gcc, or thinLTO with llvm
  • LLVM_AR_COMMAND : Specify LLVM AR command when you build the library with thinLTO support with clang.
  • SLEEF_ENABLE_LLVM_BITCODE : Generate LLVM bitcode
  • BUILD_INLINE_HEADERS : Generate header files for inlining whole SLEEF functions
  • DISABLE_SSE2 : Disable support for x86 SSE2
  • ENFORCE_SSE2 : Build fails if SSE2 is not supported by the compiler
  • DISABLE_SSE4 : Disable support for x86 SSE4
  • ENFORCE_SSE4 : Build fails if SSE4 is not supported by the compiler
  • DISABLE_AVX : Disable support for x86 AVX
  • ENFORCE_AVX : Build fails if AVX is not supported by the compiler
  • DISABLE_FMA4 : Disable support for x86 FMA4
  • ENFORCE_FMA4 : Build fails if FMA4 is not supported by the compiler
  • DISABLE_AVX2 : Disable support for x86 AVX2
  • ENFORCE_AVX2 : Build fails if AVX2 is not supported by the compiler
  • DISABLE_AVX512F : Disable support for x86 AVX512F
  • ENFORCE_AVX512F : Build fails if AVX512F is not supported by the compiler
  • DISABLE_SVE : Disable support for AArch64 SVE
  • ENFORCE_SVE : Build fails if SVE is not supported by the compiler
  • DISABLE_VSX : Disable support for PowerPC VSX
  • ENFORCE_VSX : Build fails if VSX is not supported by the compiler
  • DISABLE_ZVECTOR2 : Disable support for ZVECTOR2
  • ENFORCE_ZVECTOR2 : Build fails if ZVECTOR2 is not supported by the compiler
  • BUILD_GNUABI_LIBS : Avoid building libraries with GNU ABI if set to FALSE
  • ENFORCE_TESTER3 : Build fails if tester3 cannot be built
  • BUILD_DFT : Avoid building DFT libraries if set to FALSE
  • SLEEFDFT_MAXBUTWIDTH : This variable specifies the maximum length of combined butterfly block used in the DFT. Setting this value to 7 makes DFT faster but compilation takes more time and the library size will be larger.
  • DISABLE_FFTW : Disable FFW-based testing of the DFT library.
  • BUILD_QUAD : An experimental quad-precision library will be built if set to TRUE

Compiling and installing the library on Linux

In order to build the library, you need to install OpenMP. In order to test the library, you need to install libmpfr, libssl and libfftw3. Availability of these libraries are checked upon execution of cmake. Please change the directory to sleef-3.X and run the following commands.

$ sudo apt-get install libmpfr-dev libssl-dev libfftw3-dev
$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX=/usr ..
$ make
$ make test
$ sudo make install

Parallel build is only supported with Ninja.


In order to uninstall the libraries and headers, run the following command.

$ sudo xargs rm -v < install_manifest.txt

Building the library with LTO support

You can build the library with link time opimization(LTO) support with the following commands. Note that you can only build static libraries with LTO support. You also have to use the same compiler with the same version to build the library and other source codes.

$ CC=gcc cmake -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE ..

In order to build the library with thinLTO support with clang, you need to specify LLVM AR command that exactly corresponds to the clang compiler.

$ CC=clang-9 cmake -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DLLVM_AR_COMMAND=llvm-ar-9 ..

Building the header files for inlining the whole SLEEF functions

Header files for inlining the whole SLEEF functions can be built with the following commands. With these header files, it may be easier to inline the whole SLEEF functions than using LTO. You have to specify "-ffp-contract=off" compiler option when compiling a source code that includes one of these header files.

$ cmake -DBUILD_INLINE_HEADERS=TRUE ..

Installing the library on Debian and Ubuntu

If you are using Debian 10(Buster), Ubuntu 18.04(Bionic) or later, then you can install the library through apt-get. The DFT library will not be installed.

$ sudo apt-get update
$ sudo apt-get install libsleef-dev

Compiling the library with Microsoft Visual C++

You need Visual Studio 2019. Open developer command prompt for VS2019 and change directory to sleef-3.X. When configuring a build with cmake, you need to use a specific generator: `cmake -G"Visual Studio 16 2019" ..` This generator will create a proper solution `SLEEF.sln` under the build directory. You can still use `cmake --build .` to build the library without opening Visual Studio.

Below is an example of commands for building SLEEF with Visual Studio.

D:\sleef-3.X> mkdir build
D:\sleef-3.X> cd build
D:\sleef-3.X\build> cmake -G"Visual Studio 15 2017 Win64" ..    &:: If you are using VS2017
D:\sleef-3.X\build> cmake -G"Visual Studio 16 2019" ..          &:: If you are using VS2019
D:\sleef-3.X\build> cmake --build . --config Release -- /maxcpucount:1

Compiling the library with Clang on Windows

You need Visual Studio 2019. Install ninja via VS2019 installer. Download and install clang on Windows from llvm.org. Below is an example of commands for building SLEEF with Clang on Windows.

D:\sleef-3.X> "c:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
D:\sleef-3.X> mkdir build
D:\sleef-3.X> cd build
D:\sleef-3.X\build> cmake -GNinja -DCMAKE_C_COMPILER:PATH="C:\Program Files\LLVM\bin\clang.exe" ..
D:\sleef-3.X\build> ninja

Compiling and running "Hello SLEEF"

Now, let's try compiling the source code shown in Fig. 2.1.

#include <stdio.h>
#include <x86intrin.h>
#include <sleef.h>

int main(int argc, char **argv) {
  double a[] = {2, 10};
  double b[] = {3, 20};

  __m128d va, vb, vc;
  
  va = _mm_loadu_pd(a);
  vb = _mm_loadu_pd(b);

  vc = Sleef_powd2_u10(va, vb);

  double c[2];

  _mm_storeu_pd(c, vc);

  printf("pow(%g, %g) = %g\n", a[0], b[0], c[0]);
  printf("pow(%g, %g) = %g\n", a[1], b[1], c[1]);
}

Fig. 2.1: Source code for testing

Fig.2.2 shows typical commands for compiling and executing the hello code on Linux computers.

$ gcc hellox86.c -o hellox86 -lsleef
$ ./hellox86
pow(2, 3) = 8
pow(10, 20) = 1e+20
$ █

Fig. 2.2: Commands for compiling and executing hellox86.c

You may need to set LD_LIBRARY_PATH environment variable appropriately. If you are trying to execute the program on Mac OSX or Windows, try copying the DLLs to the current directory.

Importing SLEEF into your project

Below is an example CMakeLists.txt for compiling the above hellox86.c. CMake will automatically download SLEEF from GitHub repository, and thus there is no need to include SLEEF in your software package. If you prefer importing SLEEF as a submodule in git, you can use SOURCE_DIR option instead of GIT_REPOSITORY option for ExternalProject_Add.

cmake_minimum_required(VERSION 3.5.1)
include(ExternalProject)
find_package(Git REQUIRED)

ExternalProject_Add(libsleef
  GIT_REPOSITORY https://github.com/shibatch/sleef
  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib
)

include_directories(${CMAKE_BINARY_DIR}/contrib/include)
link_directories(${CMAKE_BINARY_DIR}/contrib/lib)

add_executable(hellox86 hellox86.c)
add_dependencies(hellox86 libsleef)
target_link_libraries(hellox86 sleef)

Fig. 2.3: Example CMakeLists.txt

sleef-3.5.1/doc/html/convention.png000066400000000000000000000642041373003144100172210ustar00rootroot00000000000000‰PNG  IHDRDðõÂÛH0PLTE±QtÔŸÏÖØÆíØxxxŸŸŸþþþOOO’ÿzzÿ»µµÿLLqÁr°Ü>J`1hIDATx^ìœ1oÛFÇHÓzÈrŒÌ”hM,D ‘Ü–zóZ CÖ˜wöµ@`eëèà~œlý àí4¸€6²û/RÔU'>-KÿÉ2!êîô~÷Þ½÷(¦·[²Ÿj{º¾ÆÇ!ç¡=ÃÞEý¥·]ýáL¯©Ÿu•¶щ1F;(äü®+5⹌bØ»¨+v£·[‰1wz=ýÁnž!¢×Žˆr [Çä˵ñ]Eô+{)ôVˬ¨yÈç&¦ ™¢214ˆ"§ûƒgDSTä€(uG4æM!ªp·Ö•œQ:­]ÐT˜ ‰%Ÿy s¡[DA(¢²*n`Í/@ª´Ö}3×7¢–ås@T¥8Æ8¯‘ Òzºè=¢*£¥jG ÆW£fœhGe©ƒŠÝËCßB’§r…Ö2?“ D\ª‹(ºü"*‘&Ó2)»CiGTšM @æ¨G†h\ËV/àŒê#ª8 ¢ ½<ÚÇUTÔ‘.&ÐÅQZ›+ä7Ó•=\r‡;¢CŽè[Æ>ztg§­;zlQi_9ÕEaQÀJ"*j”ƒ¢1¢ŠW!‚ ¢ t»…ÙˆÒQôÆC>nŃ’uÙƒäé}$j×EQ þÉ/¢§<(mR²øºº&u/6h]ˆKAÓH“"ª D€hH†¨ «U‹ÿ@¤ Ýâ#¢B7,µè5d9Îàu…¾zBÓº€Ã¨ïŒnáEi´¶z’ÊO ‘Cw‘åÛ¦Cô¦Ú2¢1'@>rN©¥/KJ¨8–6Œ/>Zè“»RàV°.[Iþ—Ü…uá0zàQ]ð’¥8BÙ¼èýH›!ªBôÉxÑwˆvLl‰JD,Õ³äÊ?ã(Ú &ekŠ4ôÁæE$Z}‹Ê(õ§¤´)Ù…uAÖûú¹#ª8% ôÃ*òÀR&WXDôÒÿS.“¢×D¯áCIEeôÜ3¢–Ð^ZÝ ²kNˆâË~Έ,²‡ëÄòÄŠ^-lÑ÷øø¯*gG–@ "D;´ˆèõzˆ"¹VÑ7ÏÑ0 ê=ÊÒ]ž˜Â½q÷(²EÚ·ÖCõ zD5òE-!j,ˆ–.±1¢üù#*3'JÓŠ‘¡‰U——$y¾È¾´ô Ž<Ù ‡Ñ3Æž¢–=jÅìE¯A¤ç8gïÿUž•Œ½zˆÞYÂ<Ñ¢_‹ˆí§óQâÇT?™Wq…¢«g™Ø£™=&RQ<õT55Ê~-w#Dqƒàh¬Wi|Ú ùÊ_ä=¬+0ìrÿÕ“;]L¬¸ ¬^:5î„ùèèE¿Žä0Íû —sh2«qBôÛjrk¥#‡ÁÑO‹ýEòïƒß6DT¦ÿ1wƼq#W‹“®µ€ê•l›‹ °€U$ â’.mŠàšÓ’¹ME›.íuùWˆÊ .€å®s¡ÒA\8¸Ë:ÀÝ*Zzµßò=>ÎLÈ%=—šáìr<ß›÷Þ¼a÷J\ yëFÔ·z)þ öˆ×Gô§æžå´ÏnS½ÇÙmLeÏêätFÅšï>¿L}WšË>*d0׬ D#ËÇ£EƒÕ[e=ô‰™Ä—ãDôózÛ¹ˆŸ½)†¼O݈ºsàžWÎôlÆñPD—¿š0™mÄáˆ.#Ž…ì'H?Û\äFtR9|û4®aÑ…•€Áh¢t@%³ð ÕeŒfÔ=Ô$æznÒ ¢Q¢a%pzá¤áŒÀÄzñ\‘ÀÜAwé¸p^N@öÚm.²¥fê³õDƒLº$3Úø Q6ðÈfíp@7¢|Aâ³h:´üfëÇæ†Èt•ŸPDaÌF”ZÞ…weW}W?ŸöM z2'=ÑQá‡Ó@÷  XÓ¢«×dáE턚*d2oD±•”ó £þˆê5øsD?¬(#pÀgÛŸ‹ Ö¦ªõŠ¢˜ÑE‰9[Íô) è BTÓß³…C-&­ï"R}DG ?­â.€(g÷ U|\®ç"…IŠ2n ÑëjªBbð1° Äv úe Etøå±°šºhÒ%ºycö_5ÓÛ¶GÖ{&wý÷dè¼I _4[½éhPŽè¶•—äpUr(Õ9ëF41ÿpÿÛ²ü—Ê~é‘'è¹öJ·èÍ„ Lp<8¢¿Ó’Ý’Žþ rƒ{° ÑÇ—wåéxÿr^îÿø³|';ÝÔ@”G–¹†j´…kSßp„FÒ¥»hËž­X.z^…˜‚W!ÃÓÙˆB4ÏÕFPÚbÏ]3¢£2D¶ÂÂbR[hÑtSþ_ 3<:Þ@”üD\Réðõe¦¥¡#º|R…èÏ//ÿ=~ü||5'ô™È{ˆ_´¢ü¦G`;ºM)êy-R°Ù§:8f+ ‚ {3,Qf¸Q¾ËTÈš#*©nQŸÄj LŠKlº=w[D.4ƒèÅ«:0HM&qí]^¬;°M__~›Œ³ËøbŽè󲨅ô·?þ͸¢\¹ã.†Tð‡éí^R6TÿYQÅ:2ÐC'>„Qs¶2)¸”fG.ׇ(fua·ÆÝKQ~«d¡J罫&êÈ¿ÖG”+е´4ÿÝíÍ£Ri“ÜÇ¡îUGšˆÎ…ç?ïþùìQ®çþ_¬Âk!ŠÈRèí‘Á}…pø†p1h^  2 ±Æl5"ܘrº!J;7¢ˆtÓ¯K[ï¶¥(^c[© š¯AI+ì¹Íí… ¬@–MÄ.¸O¦{¥>—Û27ëÐ;‡Þé—ó–û_刾ÂPÖ4¢¸H¸|cÜtmx8¿×šþ …`µàF×ÿÀ˜­føŒíá[¢p§–eöºu°ND¹vÜFh.F`=hØtOJU,J”ïB —ÆœAEQ‚\¥±_Œbpñ@ôb±þÜÿ&GôÅ ¢5…¨­G\{m^™i ‘·ÊÏÊ7{!šõ5£úÍ>(Ÿ”‡6"Í#ª%gF•j‹žÛ=¢äO)õl.5ÖÖs©Á;XQ{)ûl^™XIç"bjÂ`#zz/9ÿx™—p£ˆ¢—ÚÎ%wª …B+Útô!¸{Dñgò&£!%OŒ.x¦µ šêùõÖ!¢jŠk!‚!Éѳ;*¿ƒè3~^ûˆ‚]U”ÖÔ@”Jž×(sGǾ=œ[†WáKWQ(¢tâèÀA='#ª2Lâ‚VÛ¦ž›â”ѤUD‡š¥s-'4¢T2݈æá _¯ ú|­RT¯Æ5¢³òÝ;Yn·æ!yy-öÇ'Å€-Ú9%ò™fHÆ“%â”r†(ñIþˆf†‘¶"„þ=C”R¤¢éz.±¢Çë#ª šщ‘-+Ù[n¥þ—gUȈy%êÆhöΈ ¢Þ(¢ü,jM‡®ë˜_²\¿ñ¼=EŽîú#ºm"ºé(ßy#Ê ÂþeÄDm¿wˆj ½¯-ÖD¡¬ QÞìÔš11Žc~Aô¶p%4J¢§q÷ñ¹¬ ÑOô¥ÏP”¬ ’bÐ>5‰¢LgŠOPeK“(!ˆÒIä/E¹îW"Êt~ÏEà3åP[D$楨¾ !zwȬè(„_Œ%D¯Þ-EAôŵ/)™=D¿±ŒâÄeM¤(Gp¶(_HQ7¢ÄwèñNž|-Ï¢Ú·¬Õ ªÍ#ê¾D®€ ŒÈò+Nèpæ'E³×,?¯ð¹t‡(BÔ+µ70|1ÖsoD1òF´g#š"š‘Ë!Qj¨±t¦Ëá1=Dµ¼H"Šõ9@×8 ==[Y~^àséQ÷ÐÂê_Ó¹¨ƒÑò#€7M)zÒ€Åë#ºi#J,k·€©~Üý) ‚ôJ´+DÝ2ƒ¯Õîzj¬¶˜ˆž¾ûŒÏ¥CD‡ŽÔˆ„6êX;„#Êèù"ÚÑšR9½Û(¢ü”.¤(Ÿ¢p©m^ZCW»_»D4ud—%:œ¤ž„èºmŠÏ¥CDg®@× à‡#ÊÔè@ŠÂèvƒˆf#ŠŽRrS¿ÑÄ [èQÂ\k1Ý÷Ljf+ºí>—î=÷€´ Ña-D3æí"J pÜ¢ìýê.ë‚^¬—´;°Vß‹Ÿ·SD}ŽáV š"z¶"8Oñ¹t„¨[ˆ")†G”Õ›F”Æl5ë‚mÑ“neD%¢€iÞ‹!JtQ ˆº…(’Rw@0D¢šSù5ËR|.Dµ‡¨{”Ü ¥&ÑL„;O85„(Œöj[t©++-f$-Ûñx™}M3ÓšÛÑtU}‡ˆºe’’4¢Ý}öpãsé QrŸûœjDé1QJßB´g#š4(Œ& øE¡#Ñ^sˆ’– ‘*s­Sœø!zºF4nQÒm9 ¢èÑÑ ºÚç’¶Žhê#D‘”ÄéÃCØÑØ ¢0:è Q· ÎF²Íß¹ˆ§j´ÀLx†ïÚCtè¢HJCD¡ —Ò ûLœÑ¢8”‚úLM)š†"ʫݣKÉšB”EXä‡(¿·S)j'¥`÷¹º‘¡ãn¡Ï²ìÔiÕ4¢/Z@Ÿ|HŸCKŠºctÁkQÑçrÖ&¢œ•Œh,ë¼7£ÙXiD#ªA#ˆ¢v‰h¿9DÉÛb#:]Žr!ŒH˜ÛuùIˆrÜt0¢×ªÎk3X~»j-úZÖ¢¤›ðî•bíˆÚ°Ñª>¢H?DIÀÐ)¢vö5>é1uiE$G1cƒž}~[@”Œ=!}²* G4ÇÝöJù\þÐ"¢Øö ¢lÇ5Ó$¢±‰èVcˆ²[Û(íãÆ¶pDÝN!ndù€Q\¢ØWÂU¢Ü|]û’Û€>±mŠMâXK\ˆî¯è¶—yy*ÎnQ÷è§Æ,F ÛE'³çtƒˆfˆºÓ‹© /)„(F3D¹ŠøGø¦ó¶'Á’¾DÙWåoÑų ëRŸôbWè¶§äÙè[C4õ€O DQýIuÚ¢=‡fZQ¾Õ Q>F #š#Š® ±UˆÒ a*S’_̶s¦K ˆ2¹\%3e–“-Úèg ºOn1q¦K[ˆ"ý£‹Pl²¬lk[tyƒÛši‡ˆbC©(Ç_„#J38·EcÑ ‰¦‹ÌµÏ“oQÈó.B±¥N­lmD/ˆ\¸È??“†²6ù ®&zÐOÉKª¾_Ô>/g]gˆÂT}DY†"Н†ÑW™‹¨ Ú˦Kcó<ù6xɼõô f!zžQˆ8û†ó\„G¸5D}õÜT É%^ÖZÑE=ÇAÁ$Oïb-Š+µD‘aÁˆ²„Eç¨tº`/"͵P äO¡…SºÛ@”Cn}ÃèÙ *.ñ²z#ºÿŸ‹}l Ñ4äs"•½Îk£Ëéäã0·0vbÑÅ[¸Û¢˜ãPD1ys(UuèºëˆáC+Ñ/¿Œfªürc£ D‡jÞu×DÙDÉkä‹èë=>ónúá¸ED'žï(TxÞJâ zéÒ(„ ’BÍxLI ÑW1E4K„ ñC  ¢¼qBE?¶ŽvÜÕ7rÖ"ÕÓ‰Ðch&ʯÑ÷Ö‰èÌOÍà ‚b'® —.ˆž>;½÷¹<^Ñ𴆨}„¨#8Q“ìw§ŠK Ñ,2w;.f™K 6ë‹S³wÇ´ Et´ùˆ›ý£‹/BÚÔF”§É|۬w’Œ=°hõ% :“¥ûfFxÑúå³· ykž… Èe9¢¯ï•Û«WË­h+Úɰîn¬Ñ¡èA,¬ÑbªÇÉ‘†X#Ê$€ÕÉÊtÉ/石»vÛ2}ßñ¼erܧD7wæwgÇýÐ]öœ=ÌÙñQ¿¢<èNBo^ˆ®4<’IÚ£˜eOæ­äÊŸ—¢ÂMD­Ø…õ#šÚˆ¦72G¥Aì¤,öML&÷R‹î7ãñ—ñþÂ-zúª4ráó6U…'¹)¤)•ùÂï«Dšð™Ù#xÌá*Úc"æ…)B¡ôR_DiG Üé¢óÔ@â)nDá2YfØÝ„¹–ÆÌgY„ctýˆŠBÕ$?I¨@\T§œ´€¡X¡ —/Ò‹ç‹wÔÆ+‚óÓ6Ùˆæ²0™?âqôÔu<?9@±µµT¯[Ȱ¬!zï^«…ìˆ2û‚ö‹BA£ˆŽ‚eÎÂþNHìîDGü * ãÙSŠÂ·6D5O"?Ï»º÷fÆé}{ãqö®26e2óòt±[4~ü|UÁßD3ã9ôÈéˆ]7¢¢DV^Ý‘lg22¨h@ÖðhѬЍ~ˆ^…T`‹ÖúÀ(–ÍýDV˜ÇúɈ]ݣ΋²ˆXVaÉn Ñÿêb<‡>{Ô® FtW¼ã{†°ŒÄ¬®háî( Ft‹å\»f‰tD]ôž‘D±Êˆo»î{²>ôHa½gä#Q\eÀ¤»æüi|©-]ˆ1Å]ôž‘Dq—ŸmºëÌ_ý@Ê­¶t!Ʀ“|Ú{£DZ,ß÷ÙÛàc¨ÎRÔ%úô8ûRYQŽ‚|0LÕ¸ê€A¶–<èÀ†ÈH;! ]5Îýv†ÛÐô<Ÿ§ †êle qžæ¡àœîïrYÒ’"$Ûé½äöú=L»§ÜÊ(È_üµö^{m5¤"Š ‚ Š ‚(*‚(*¢¨ ¢¨ ¢¨ ˆ¢‚ ˆ¢‚ Š ‚ Š ‚ Š ‚(*‚(*ü’þø{:%DQá:˜~L§õ‡tBˆ¢ÂK‡õè®\j4s‡NQTt0^|ˆ-²ÎtLöÀ)Ũ(*L1 Cé£ÚzŒFºGöÐ'£¢¨àÝÒ¡\!¦ýllÇèÜî”/}R1*Š ž:8õF8$Í&p­ÏFZUô”bT<,è0¦À!1ÂjŒ¾€YcÊÆŸ¢¨.À@ïE-ƨ¯mÏ„Ÿ'£¢¨Ðpû¶ÄûµX:Î5p ôl_p³¦Ó@|PƒÃo†éò½…½2T'ü<lµêÒé ŠJˆÞãöÀN¹ÞþYSêÐuic7ô4° QL§‚(*!ª¨¯%ZäïÑ–õ5pŸ8e{µh@S:DQ Ñù¸=Hå½€»7D1$s |OD¾ÝÕ¢ùÉ-‰¢¢tHŒ†Ù5Gºµ7D1&cž*NV`­¯]ÐI!ŠJˆÇèžJtÛb´'DG6ý6ß ²¿Z´ “B•åÝS‰¦ÿ£ÅÎmmÌöEÿ ùð¯é?YY_-ré„E%DÙ»ý•h‚çì Qwd&÷ýô=€ïi£2§ÈS@Ñ©!ŠJˆm0Ø_‰îo¦Ð"Ï̩ؖ¡QúÇnoQ›N QTB”h†Û½•hþÕή5U”tÖK±QƒÄØÙ#<ÉÕ"QTB”j°¯M‰àì QÒP¦Ó?ha}µ¨’=ô¢¨„(‘‡×û*Ñ `P¢æŠ’%$Õ¨—k ]è2Žš.«(*!ŠÞ!gA¹å3Ñõ!Jyñ¸ÇÆhž×¯-¯QñÅ!r¨Ùˆ¢¢š#e…x_%ÊŠvjC´¤h‹ŽCÇyŒF€c··¨K9ºá1*ŠJ%êstzí=•(㡵#D§¼fº:6F/ò]Ú]-zQZ-Ú4=FEQ©Dûyvb½·eEÛ;BÔGî}x¬]os‰`yµ¨ô¸£¢¨,çncÔåé²õ•(£Ñ­QŠøÛ3¸fBÄdˆOiŠ‘(*˹}¬iÞ&šTçɘ+QFéQ ø]rnþ’ Û«E°;·Ì>¢¨0u {¢>Ô5†D>Uáßq%ÊN}ˆÒŒç¢L`£–W‹À)ÚÀE… W¬ñF€³k¸õ{ÑZm8DË£‹4вttÌ~oÑÀùÑ÷bj¢¨0zõ‹B`¸o¤XÙ³ðI›«?æ—ÚÂ袜íÚXÞ±¿Z4]mG:øÔ8DQ!Pôh_ͪÜÇÉÖ£~—ȃûXQ1f1êý•V‹" |A EF¢4KB´ïîˆÑG–yO‡p^Ý­Ðcƒ‡é“[D46o²¿Zh¨15QTb4ïÈ D§NmŒ>Ѩb«Ò0~¬¨§\‹Í@æpýŒß ‰(*N]o¶>éA}Œ>iÐû´I±ÓsˆZ[-ò¿¹~iaµ¨•.Q“E… 7jš¦”Þ£Ã'Ç9ŸŠÌå鈭„è%‘—ö¯óD½éBQ³E…‘“¹µÎÄ«ÑÞã+QÚy×eEáZ Ѱû!ìÚÆ«Ecß^RÃE…Oª´‹úGoóÝ ¸õ›-âãhfÌø`ËvG0¦@•Jtÿm-ôL˯¹}Õ¦*>`ÊÈx'vE–˜jà%ùÚ¡f#Š ©«ß#C(¢iuBδŠ6f·!.,ÎH¹Mïi4¢¨ yXÈÁCk—_Ѩ2!CQ~ž3V”>µ¢ÙK{ÃcTfœt—zU©¨ÕeEíàwj¼9¸Ô`DQ'nùúã\+—ZB›!ʧpl£¡È»M»#Œ(*xÀ€_Q?®ˆ]ÐS&C4W½KE~´4­s¦\Þ6QTÐhg&¢Ï€»mzŽ*¿>y¢÷o@9ß0~dXS£!zÔ`DQA£ó1•h¤ŠL«î? ÑQLfl€a1õÍå^ÿ$F'hSãEEQ®Dûã}ñt´N‹ö9F™N1Di¸Öï-î/ÌRtA)¿HSº©!*Š Aïà-ßiÿ~@ÜXâ—Å¥X[¿·xâ_`"48DEQa4(åÓç××_ïR”˜]¿8—æÀÚvˆRˆ…ñÝ¥L“CTüR>} TÏ묮+ûœªLDÔ·8®³jO´o{Q±Gƒ¿¡ˆ¢‚ïr>m€]…™Ÿêöð@[ùáeá`LµÌ·ÂýZ«ïÔ^‹ÝæC4F-[+°s$Œùa›5QTbÔMÏMc°³¿Áר3ÏÏ R^±¹¢k+°—Ôºð°hM Eò›;¯rã"´vÖ„1ûWó‡hù^ðùýñŠ.J!úKÇãÿôp:{¢¢¨¨ô=w˜¥³³«=ÿЭ3Ày$q?U•¦ºmtµ8‡h_ói퉊¢Â(.£$jïýÁí:Ósˆ–¥]:¡]ê1šj¬í4,5QT:ų£>êS”‡‰lv}ËÏC”§ òöæàÈ­’˜ï-æÔ7€ÖpDQa(ÐÊífªÅUýs1‡h-ƒbˆÒŠS@|lSÐx0åÜÓ¦ëÄå+Þ¦ÔhDQaRl ¸ i’ƒzX¥JC´§¢S¶ÉÚÇïß.už{swÿòTnúF/ïÔ`DQa©¸ÁBuw£¡ªU¹ß¢ÔW`'@Ç\,ÿ‡®9÷’Œ6‚‡HL½tN}sE… »ãOD@…/hÀÙ»ƒ1»0¶"րŒîçŸA±¬…Z¢M§€(*вˆŠªùéfOˆò¼2~¡ä¤ãáÝJ]³^Ý‹ü_Õ!Š0¤@•]Ž©EmšU†èûa9F¢¯­„hÀ€G%\›,êò €4ù¢4QTày!•S¥#z„~äÆì•ë?xpËp9턨Úž ß`Q—w‚&Ù+Á15QTàa\K^äŸ*(*2ÃÓq >â\¬[+!J3 Uº- è=ø ŸÂk&¢¨ ¢À ‹›Çñò$½<ˆCtËk2#äæßT|Ünä ‹çÝš(*pˆ®¾§=p½9«ŠQ\‰Fïan.ºöÃ÷çgq_ñÇÓ@•áþÐ%ò^W=ª\‰^š ‹?œ_l”Ñã˯è”E%D‰<¼=0D‰‚«ªuÊ˹@†èÂâq”ˆ¯‡&/ºNéÎòŸš¤¢¨Pî‘_­C”(¬ªFM»2^Ö-ß‹vÓ&¢~—Ž&(žjmmNèØ¨(*!Jz†(QT¹¨Ë!ÊWš¡TffPCúhf5çEf€S ÈøÐ¥U1:,†(EÀ>–|Ž¡ð•z‰WÀš‹(*\_$½ Qš£*F×…åócFh*33Ñ~£òË{|â­±ˆ¢Â ”Ù¡!šlS¢–¹ÉS—¸Û>ubòÌnv‰Ä‚¾4QTu>&D=n˜Ô]Í»bw#Šæ±¼Ižo”{º•þqx>psE…+ô QH—óWUùÇãÅx¹Èœ ŽKÐ2zÐ}`=©±ˆ¢Âøˆ€ë@Š6TA!å&Ö]Žë÷ Ÿ¹‚€{[†5QT˜j8×î!J›ÂwjMécÁ:`MVHã;ÐÊ´«Ð×ùX—®½¨+Š s '®ÑÚù Qí_ì°x˜»³–|m^=þ°¼‹ó›jä7:FEQas%ºz,"\ªd–+zi¬? Ë×›ñM5@—Ž(*pˆFß=ì¨E»üuÎçË9ÚZ):O3›gÃ˹ºéŠŠ¢‡èôf»nZMÀ‹µ£ú€9yˆÂµÖ§8ük…(­pA Føn@ßThÕ¬2•I§YŽëÚüì¡Ï¯¿26XðÉ9»!JÁ2¦#Š £ß~ÈÌpÞ׎6˜~»üÍöC–fQ›ž€‹Tç$sßÀÚØ§ö_'D›(*@šw3¤“Æ@{˜çÖéq…OkòGZéÞã†k\ŒÞ*ÑK ˜L1úÅ)5‰¢Â( QUúâÐ #>Ú5[€r³± ¾n“€·ße!ºAÚ}p,Ë88©E%F»YœÞòÙì]ô¡òêôiŒF‰£*N¬O\ž!&3>»Òà ¨ë¬MÆ–PˆŠ¢Â‹ò»!Äìèò[UÏÔõß-—oÜÂEdSÝ"S¦iˆN´D#EG£O)DEQ!Øþuå ~3 i'@'}£ý ý@õêÒ±tƒJ¨loÔ%"C³g ¢¨0ÊOÆÜÄW_ùh¨1…ã&´¶óÀ6ˆÉMZ‰òTÑ–IŒ¶èdE… ·õ©K‡):Æô«mšxY¯>Sè$~ -¬j JÑ&0:ˆ¢Â»ò”èí;•6¸ÌŒÞÔ¯­Š(‰Ñ‘¹}€ˆ —›]Á5[vZˆ¢Âˆûõö Ù@}‹ÌÌ ê¾=OŠÛ$F5:æ3œ/)âã3+“Žˆ°E'„(*”糯ª¥‹–|]>÷lU¶ Jb4‚¹¢ôl¹¼¥è âãÑtZˆ¢‚f/k†kn²oPëÒ^H%ýtÓ4túÖ:÷øI¡Ù3g.¢¨Àï¢AMæy@ö«\ŽJ¨¯Î¶·!=`]õ|E!'Õ¦µ“Ã~ t´;DYiž¢= h€7¬iNç‚(*xˆYƒ˜fŽ[qí‚è¸/Õ›¸ÞÖ(êr81™Ã™=ˆúÖ‚Ô¿Áò5QT!æÞD´RãŠQ™×Ù!Qµ¬¹ Æã^ˆí%GkŠòäß[¢P‘"nÍo&¢¨p…qnV{+ä÷U§sѸõ«¯æñÿW¯‰æ,·¡¢ÌÌR=êkàˇwmÜEnÌ «¯aŠ4ðÖ­hmŸ­]«¯4Oƒjl]QekÃ-qk~Et²Åa£¼x9wª«¾Ë{˜¼hdo÷–gš¤ QƒcT6N&R+ ÈÎ!çòr«ê;5¨3h p+~G2trTV÷¨©ˆ¢B€Q„tÓóòàM#w]y–Ç Ú¹n‘éÃFŒF@w–ü6Y‹(*lpý^XdüÜC”&™Œ íôÝlêýõ+K—s¨^;v¼÷nùÐ\óE:tËw‘A÷‡h½¢>õÝ5Ɖ[[zæ½ ¯‰(Ú¾A=šÙé‡Hbtƒ…(Ú\ßÔKÊ*чoåî Ñ×¼"º¦§\Ù—<@ý–kV£{ ïn0àVz;WÆ$1ꉢ &#øDÄ•èhí ÑTÀqÍä>õ›B%:1wjžÌ;ãòÖÌúmc舢'‚¼òvxÔ_mˆ^P €ý·ÆØÙ$™¾ÓpÆ|µŒ9W©ñÞmIQPóxüVPS<Ý/?ûrЭ1ÝÉ{8d ß›:SÞI(*Š ¼'êÃúïÀå¥ÛöaW¯éŠâÖ|J˜oº×šÿ”Ól!Š6+Q^©«D1ÈUÁâ%òc²G^:¾X›‰Žâ‚©EŽÀ7ƒÎ€aýž(\^eu÷†¨uøµÙïÏ?u‰È[ˆ¢ÍE˜ÇÅ¥Ú>׆(éq®t½#ÁšCÔ2|W8†Æ)Êg6Jm.BÐ}à–ŸU|e‘Þ#¸Yd¦lùôË?Ò|õ'Í!j•+ëà˜mãÄYŒ5¼AF@îeXøz”mØÎvhTvðÛÊ6@vÀ*£ô§ 40´1²Œ¦[E•(Ú\„ÀëÝË@!¶ß™bMD«mˆN=@i´*ÛÛÄX_Ðêºkô¾Üz´Â»E›Ì¼f© äW ýöO^jà¸%zuUJ É6|Wøe2îÌ¥£ñ ˆñ%Š6!@—Cô¶.wn³o¤ºÎÒ¨ôº °6™k¢o7d†Wþ÷Ô€7E›‹0jïÝKbžÊ帅è™À­ˆQ² ß>]Y˜ˆôÀ?L騀–/)Ú`„ÀÙ¢åK‚ý¾ú.M´žÞÞØ©ˆQ² ›¯| õ=£,µðûs“E…Q¼·!ñÓiÛ‰™hWÈ´ ‹¸´ÅƒTlg‰ŒÏÝEI鈢ÍE¢5„Ç妓ÞEOð{d0[|g@6ˆn–¿¥”90æß*MDö†(ùËŠ#—¡Jd­ >ƧՃÅ'¯ƒßüÕz–fJm0Â=‡h >ÙcJbtÐ" ŽAá8H‹ß’eøê ,DÑæ"Œ^}à=¼=T4ê,Ñ:^Ñ!ÑtÅ»­¶Uô§ÀE›´áâã‡Äë…è0¼'ºÄøèÖŠº~¡(¸ÁrltÂ[@“cTF¢>ÐÎ XE;™!ýòêÈ ‘฻R_ÐQ|þÃÔÝo.Q´ÑHŒö¨H4Ø3CÏ¥(“Fÿ7TÅûôGl>zºCä<¼¿IìFo›Ï¿EŒ0Â‚Š„*>Dèpßû±Ÿuó®%ß/k^d§XÐ%—­cú ®4:ÔxDQ!ÀšJ¬0> EIc}è^Ël‘š¦##Vêˆè¯b"zT„úÞXm"ÂhH%BàåŽhtò´¥¬ 6;d›:?„³¦D;&ªM–¿} %)ÚH„ õt(ç÷ÛÓžu°PdÒìã5}$a7}#ˆKWwEÑf"|û$þêSOºþëdÈ6DQ Ñû¯ê/M·Rürù#å<¸–B4“jtlƒáœÒ°`Qô$úHPëÿüþá[õµbEçI»J4ÀbûqHG1B»˜ÉŽ(z >€áô¡þ¾Ñ©æƒwKÔ~ºQPºM&pêý'ŸÐqxèñãpµ(zHˆ:[—.vtÆÞÅä\+¯J61M= Mv–s“~:ÅÇmQôDÝsß(=wi?a.Àjë‚îQ`Àý@†!J4›dïðã†ɾèé !J47½†¿ÜŽ'âÔŠ ÆvöD}2@‹Òã6cQôd¥†Mq‘oâ¨Ì ¨¬aˆšÃÒã¦t2ˆ¢¢˜)Ê£°£ü…2ÌÌš g¢æh OvOT•Æ¢ðcýÓ«+DRnÖKg×›©Ô ÇTB45JýÓQQTœΓèð¿·3$ÜV¼5é×¼{´U=!1<µË袗»üõ;á›åòkQ´¹ëôõÙ Àk¾Â©H(ô=q%ª“5ÙÔ°`C•ɲ֔ˆt¾°ëñË@#E…‡¨C1Ï^_ƒŠ}TÿzùÖ¥B%:Clat¾â_oÈ×Êðyü8ç_< EÑÆ"h}ôšiHÝD‡êà÷I}A†èâN›‡—I?;vúbû´•E›‹0j}ôš)y½êÒRéñ‹X2WÓm. ŸÂñÓq¢hc‚î£}þã~a§~c_ˆÚA#¯œ_ó¨Çâ¥KWWYªÛ¢hsFq)DC@Ŷ#ÑîžµC¨ò7lJešÔß½llÚ,éNÜ@m.B0,†ètÿ½f+Voƒ±Í­G»Û0ÅE¦hÇ@x8ùŒÞ-Ïül,¢¨À!:p½{BüÊ8´¢õ¼X‘ÏoØzF=·Ñˆ)‰Ñ>¢hƒZ…îùvŸ„vîB@µ¢SQ[,Dóâ(‡®Ù’®ŠÓ‰¢Þ-Ïül&¢¨0zåå>M“^‡ª)gðk ×Ö‡w….Ѥ8`Ð!>ý­Ks E”ĨE àS''Vv~{˜‹ÂEܧ7Ë—”â«ïXz{ô¹«ÈèàLy¢¨wAm6Âè±OaÝ2Py {Ÿ?>€{–Éjˆ>=‰³1Wô*ýý2S}Q´ùHŒvÊîðkU^8òÇI™ˆûŸð¿C ì+Úå?¹M†l²µ'墆#Œ¶©ùÓW1%LpAuDPœ§]µY[D}5àm‘m¼âu2ÙͱcŠEÑ#ˆ³“iî6azT çPoûVüÀ€h†!Çèšl³JÃ.]8š~mªhœ©¯ÈÇ¢hÑûFgHøšˆvf v²(Ë\ ¼.ïVö1 Ûðö¥†ä㥡¢kJ˜¡Et‰{Q´¹Ao»ÆsýNã%M€Î.S¨ÜŽ@ù˜|¸d¾&bÀ% ñÆDxzoLmQ´¹ÿH`Mä¯ð& ©ZB¸Yˆ¶Ê-øÀ,È2¼ˆµÚvOD&wÅE'p•¡(*DYï·}W㢓z’ÆÐevDl>¾íÛϤiêTfS`4Ç/ºåÿ Wm0Â,K‘é¾á[W*N¼D/¯ »O§|öÇT$º¹{I†xí|—d`<ÇÏ/;©5QTàIí!€íÏ:´úá¦<æcîjHñcÚ­ ]ãÑM¾^z"»c"Ö¦ÚP_Ýç”vˆ‚ì—à “å£éQœ½ |ó[Wm.B 9D“bó%Õ‡eLY%Ьcð³šänmx11ƒZæ3@5ÐÖP±ÑdÁ뛤|±å¥(Ú\N©½Ú5¾2ŠË·ymÚA?}¦î‘_¤³Á€%à_Ô½EÏWÀPm,‡(_o¶‡«´ruv4ºvžiÜ}M©Uotƒdž/q*Ñ©†#Š6Á×pbуN|òÝ ñnI>…Ùg]2äEÖ¾`¾›É˹3 E‹i@å µaú©®D÷( ÒÔ^Ïq­ºå­"3x9Xˆ¢ FˆÞ¸ÈwG/¼÷zaò$ ‹óL&P±…¥X[ Q®ÇŒ(*ÌŠ!Šåõ—ƒý•èaÓ’A±¶°¢Džc-DEÑæ#l€V¢Îµ†U¢tÀÈ@ iÅÚ*ãe,…hQ´Ù+ÑU}=:]oM!f÷ÈÀÐ!/óÒ‡‘ ~¢æD‹bˆ†R‹6a•‡h+ùÏZ“üåÀ?hÑ7ÈB ¯ò]ÌÐ Dg¯m†¨¾wçz²¢Ûp„ 0æÝ}»v_á EßÙã{͈¦F!ª—/ ÁºÏEMF¦Ðæ%šƒÚ·ÍƒDã·Gî{¿”±U–Ð¥½EŽððÜ-„(ÍwtïôÁþ’¢4†ìª©Uöbt\æÐxDQC4QtMõ1Ú£2_V;ÐÉŸË•¨™Uk²†V¢=j:¢¨àó.NwÏûbF_žqæMZ\‰š Aö‡h,Š6¡?þ¥f©V€[o³~¬è¤]ÙDߥ”˜+QBP%Ÿ-Ž¿‘Côóë߈¢ÍE”ævÛÔïí¿i1\‰ c*½"ŠÞa|”ð¯K!h‰¢ÍEhš€®¯¿£:œ'жwÝÌ•¨!açiTãè1›·¥ÕpÞa(Š6!à…MÀÀ}mŒ®‰©+]C€UæJÔˆ7­‹&GG9DC`@—Ž(Ú\„z¢ÑÔÞ¦0m=¥*Éü´Óà¼'jŸ³ªßBt“çI™™Vh‹˜ß QZf?Ûg@wP’©á¸d‚pT‹¢ FÈ/2üãAÎpäFÛÿSA°.†èÇ+0pw¼)˯–2¡ð¿×¢hó‘ )¯¾ñŠº°ţ¸s+†¾G§Qßû ðòÉ´o:©šÿ=|ˆ¢ÍGb´J÷»ÖU)ã*÷?úà%£F]ßœ¸â :ÚdB–‹Ž0×PoK´wÕ˜_°®P¯®+*Y¿¢W[ïÉѬ˜4€kê 0EO aVß´7AòÏ"—ÈÛ~œa\£1‡èýà в<{bvZ bÊOîEÑSBT¢X]Ÿü;Ä4C?£VÅ·‡¨ÚFÕØbˆf<¥D׃WŸ¨ˆ|݈¢'ƒXìXyYž… 'ô3E³'yèY Qî-N}üÎë å'^A¹¢è) ð©ÏžiÜÅDQžgóŠZÎW¢iè)›!Z^åÁÇ>œsgÖé ¼(z"þ!ý;ðÍú ˇ¥¥90°¢<&éhEù÷ŠNÿ·Qôt.9DéœTùìwJ!J[Ñê…X}Äá4½=†Ã·‹c(Š6³êðî{¯~‡’kÚ¹ E5ÐæÏ¼u…ÅqgÒ"ŠNë=W® ÑÐÍd­Vô³mM[¼ˆß'¿tÜå˜GkõÝOÀ0;=ú ŠžR‰þq_)z‘çc¯ú¾ÑÏ Ji€Ì(#a{s$ÜnÿmÞDÑÓA*Q}»3jsû‚\ÖéjLŒ°F3À±¢zL Ù–ÏØ¬© ê5¥!zÿþ芢§ƒT¢3¼!¦¾hª±¦ðÕÃÃ{­ž^-ÁF-,ü`™¢Áö Õ7º3Æø-Q¡ý EÑÓA*Q gß<„„o²Žú²…>×´3®yˆ*w´ *À=3åþж(z:Èž(€Î~E9&¡ž$m‹ÿi ð_Ó4ùÁ&­¶¼ÙkãRÝU$¥_Ñgê%‡N3=ÒT\¸oÛ„°÷ŒCÔ”(Úh„Q)D)ú¶F¾µÁçåœW•n!D/¯”)Õœ¦ÿtý‘Yˆ2¥&Åž(Úd>â§Ù2vìΨëknq­œ2u8Dg€ú1[îéa•_{}¿¾–Ö…Æ#ŒT!D·ò-vö¸ÞÖD½aÕø…¬JnoMÂöf-bLC”ÑPl«¢f#Š ˆJÇ'WhQ ŸÞ@}_\j­aœí¸]D¶Õ¨!!z\OwDѦ#ŒÆyˆ&lÐ>lc±Ž~V‰J'"+Bd3DÝᦠj:¢¨\p%š(»[QÑ:ü4Cà6L°Õ«í¤^ØÊ+ÑŽ(Ú|„¸¢¤÷Åß®£7¿S-‰Q ²‡è;­ÞºªŸT%*Š ÷ÅÆ¢`´øz÷A3?)ÚDIŒj¸d W¢¾æ^†£‡§T‰Š¢ÂèZêð;“®øÛôðÇÊñ Ü0¨4µÐe,J7GÀ¹A— 8¡JTpm9=`E†Cô¨ºöÓïòðPÑÌÆ`z}¥Þ ¯ˆæGƧC.l›(*ŒŠý6{Ó‰k¸oë.Js ·$éEßÖ¸N¸Åó­ý®ÉQ·L<„ ¢Ãý‹E‹ÜõêO5=t™¡`q|K˜•¡b3áoEÑSA!Þ·áÉxànÄ|QZÝ­ØÚŽ¢!—_³Ñ1t*ˆ¢B÷€ ÏGƒ®ûYâ®zÕ»£NîV—fªQ•δ=#á/DÑÓA­Q¥‚ÈÊÖÕí NžXšã%ªt9¹E >á1EO!è¢4Ù¦h˜ °®{Ñå›6æ“ûtº 5$ W¯…ˆ¢'„ðÍÁ!Js`èRëzp5{o·F·ÉŒp‘˜;ãîu,ŠžÂá!šÈ —øË~eQè11}dÈhÎá (#á;¢è)!¼ˆ¹khᅧ)bå|8µ{3å=sf<'Át î[Qô”ç=çâÌX¹ªú NÅmÛ†„Å+).è|E…E3aWÂê8+M-ÒS‚jþØbQôŒVj?<œ¾~iµ¨ï)Òy#Òòî{›)*€Î ‰Q*2w\ªgÃ^®ªStŠ„û4D%€[‹)*€Î !@›J¬T¼K襽ºrxS¨D=`l¾\Ô!Æ_â•(z.£G¦…»Œ¡ÃÖ\P%Ñ_ºÅå\_¤Jð´DÑ3AböÝÚÏÝ òšÄå5lªÚ cQôLFqÙÂ]ŠNŠç×ÖD5Þ{ŽŒææ;/«âi¹ЦEÏ!(Y6Ý9µ6Ä‚EÞÚË}!JiäñqQ—géc(ŠžÂþ©µ>:|É7ÏÊÞ¢6F¤L5 ¾{Xö¶™zËãÎ QTлTê^.òºþJÑi1DéÊB‹QðdKˆ(zŽHˆ&ÜÕ9&¶ùYOÒ¨ú$Ëd˜Ä,[OÆüJ?ú©fY|GŽ+Šž BR‰~ª¡¨÷ÛÕ›D¨:Dɇæå¶]3ž—M óÅ告ÏKQA*Ѩ~ßñ×ÒðÜÝÏ®_UmÎðV…Ö hLfÌåoÒ×ç^á(«(zFHˆ&¥õsÆ8ƒ$D·ô*bˆ¹Št‰ü?Ñ3¬Í@à-Sâ?áœd9×G­LÓU& mõ*U Ìíç¶ Ï ´ùï ŽKs^Öšœ›¢‚„(Ñ®¾õçnÞå3¬9†â£Åçù7$3tâû{¨x,ŠÛ;票 {¢‡Ü«°îqD¿U ÑÄdãõ•xŸøîz$ÎKQABtºOQQŠ*×w8Dùëæ‹E|KhûœBT‚˜ãˆ“qˆÖÞRØrˆò×míæÝž³ QQTøçÀypËú­ Q"Løî“í‘!³ìÇÚœYˆŠ¢Â£8Ž|`o{Ýho*ºBT!6o\ŸgˆŠ¢¢èÃspõê%Õà)Ø[ZNš—+Qm£Àâ,CTþù÷?J!úêoÇÆ‹zðö—–}…r%j>½oƒÞy†¨(*ø¿ àpˆªmçž[gß!›œ>/çêŽù›îíY†¨(*øþŨÍj-¶;µJ4~·´Ë[¨±…­[õÕ9†¨(*<ü!pr³œôƒ³ûÒîý1:,„¨žßhœeˆŠ¢ÂmbÑt£Ö>}HzõárˆÚÂgßÏ)DEQáýþï;\‰¦Šº;íÛo“Ë!j‹ àœaˆŠ¢Bôw¢û¥ƒâ«?,‡¨ŸÌØ5ct¦!*Š ÿÓó¬M¸õö-h?~»¢PdˆœgˆŠ¢¢è_~­#QÒ;}êÒ|²5=_ˆRׯý }¨3 QQTø…œ wwÕþâ#®fŸ™jêÕY†¨(*ü+m !êñGBÀÉ?Qÿ‚Ìøl¹žeˆŠ¢ÂïpˆÎ¸—þ0¢‡¸fvX¼’t¦È˜Ú®¢èß8ràå‚C´EÁ3ÞTŸ+Í×b^ 6¢.DùÇÿEE#ÞqÁG¹äи¯RÔ)_ ¬;¶C””(ú7Ž,ê.råzÄÖûëŠ]§œ§Úx•Ç_pˆŠ¢ç…ðsÐÎC4æ“}DØŠ¸êVý£8רM“!ú¨]£+ŠþM#üÇ—C4•Ï¥}ŒÒø Óc<¨›1=# ‘„,ÚdJˆê¥+,DÑ¿qdQ7*‡¨à¢]½H>t(aÊUi¤q7ðUòÐQÀ!j8P· —þ¶E…ÿ›È-‡(–Xì¯D×a£èfµ幺ÙU1a›Ì Ïõ&}QTø»ßÒ£0Ñäb†h7€"Jb”·UKïÇ~by’Ç+niP™±(zÑ¿GK:4\"ïbßðÌ ¢$Fóv‡PìhqÖôûdƒTÂÇKQô<‚q^Î¥gkJí«ç XS£AqZ6†,Üí¢ÙB»TâîEѳ@ˆþ÷¿¦ão™Æœ ½\œ[t8ý'Ï ô‚ŽáÅ¢bÛç E…é¶ÈÌñšjáSkz¡Aa|IÆ\ƒÛun9:‚[*Š¢gðÚ ùðùC~ûX‹v€t7T ƒ>™I¸†i€lpÀEÏáßþ`ëpçò¾uø@;÷M}2>ÖT}©gIQ tèläÀËz« ÆD¤Ñ£z¢\Ñhqˆñ5)º²w7ê9„¦¨•h†»à{6oŸ¢¤1äJ´HØÝŠÓ#¢åÐÚݨg£¨ ^þç˜H ¢ÈÃížù >/¦†ª:DiŠØðŠ„(+*?ó­õô-ä:¾¢GÀ^r%Z¦¯F•al³œ¢‚(Òééqiú^·ª‰‰á-0Õx”t_¬‡¨Ÿ‡¢‚ð—¿$çRJGG54 Ü}7¼p%Z&Ðþ¿+nâµU‰^âûóQT)–æ/\5oª! jC”ù‘JøPcÛ•(p{Š ¢èÃ/aùè(²U1•’ö#Ÿ¸=±å=Q‹óPTk»§qQ Qšæ½.±~ÚÚóêá=/ç:»c7Dƒ3RTk»'Pƒbˆ†y%:n+Ö‚°æJô&\k•(_zŠ ‚ÿ¿ÿàÁqË!ŠAÍûd µ>0DmÞšð0@~Å>Eááÿ4°,†(t§ö}ò!¦Ó„Í2Øáuª(>Eá;¢OoPªD'ìJ³5è#A}Š Âýý€¢G•h£ÓÚ äJÔ~ˆîW½¨çßh`}.Š r'ðäÑrîDÑä }x>8À¬ç6b4.¾bûÎy(*ˆ¢nvk=/çw†(W¢¿ ÖûCtÕ#S´*/çÎñY(*ˆ¢ÿßÏ ¼'z`ˆÎ( Þ¢Á¸ 0||Š\£sŠ ÂÏ .†èÎJ4Î+Ñ(ÉH_·÷W¢Tle ¿bÐ:EQ”V€:4D_n¿MÙûñ ë]!ÊW?˜ñbQ QZ¡wŠ Â¿ýÿ‡âC”úHÿÙ<ûzûB4²Ñm0,7é3RT/ V¢à}—|Ø€ž2(>ï Pd̰¢άDÑÙ!!ÊçF# Kz‘ø¼¦'té€[xœ1úÁTá§8E9ðò3­ Qò5ÜíwÛuÖI•,Wùó&®¿úÑJ‹Ñ‚rëݳQT~G4åþù=©×Gz|m@IŒNЩÅMèÈ Í¿9.qN­ ‚ðçiV Q<<Ô‡‰wÝd»²nûcR QEÓ@Ï]ND“Û3QTþf€Ë©—õšjhs¢…*¬Y[½„˳Ó"Õ? QoÎCQAø_¿ÿäñúëP5¢IºZ×(êcP.lûèZºoô@úHgp&Š rm·FkPJ½=kF|~ Cªb2,=Ž®à!a§\‰N€îY(*Áx,[œz{º`7™¢¤—î*ÂÌïqˆÚº{éŽ+Ñ´}Â9E¹¶ûw–wn1õ6è¢h‡hTÕ&?(7iK:ùyˆF†g¢¨ ü®DP´Ïrè×¼~[æm±eÏ·2#åWDa¢À¡³RTE'ÅÔkS-®Tg.¡jRIÿNóã&VºjÃÞç(…èçƒ3QTk»é¾˜zp±C•r'B„*ýG-{1™£‘÷oh@Å碨 ×vßܨîó9|wM ¼äF„ù/ƒÀݵœ3QTE£CÕyヿ|[-r‡×x^“ t¶_»É¢ÏDQA®í•JÇîž4S"rÓˆ„;¨ŒÑ1¬¢W:'íóPT~²¨ÛÆÓà< J9¯Z`ºäJÔḢ49Eáï>ïÕ•ŽSàž*àˆ¬(Dù%–C”¦»ß8³•®Ùýh!î®à–CôœäNàhwˆ2]Qžb¿“OÕ:†÷£y€6qˆž‹¢‚(úÜ¥9œ!Êü‚C”Ý×5¼Í÷ÜS1Dý³QTk»û@Ì!Šë멎i)D£CÇ ƒ,T ÑÉâ,ÿwä Ñ¥ÞqËödQ Ñ«gÃæ×_Æ™V1‡SªD}uŠ ‚ÿç_¬€E±ýp[÷m}Ã!ÊC4«˜À¾"ø(Â[bÊ•¨·8 EáÇß÷¸T‰¾¨†>8DCþXpb³¡`K*1ãåÜ•:Eáß#ÝåÝ· TÑÅÎÉžoÞA§ K¬x9Wcq.Š rm÷ãå\úU¢u﹞%Âm¿¬³jEçˆËŠö,Ý«Ï˹ïÏ&E¹¶{À¡ƒÉîU Õ†-óœJôa£S ¬9D™sPTk»+Ctÿ™P^d݃ãõ=,8DÏIQAøm>*˜4P˜‘2ú_X¨F•Ë!zVŠ rÃËߣEG¤(i8©õ…¤ó¬E¦\Ž9DÏKQAî]“¢¢CÔÇËCÎdÇd ‡è™)*ÈÍúQ—gÅo­Ú%š÷´“+«cŒnÏOQA¼üÏ‘KÓçÅ%ŸçPïtÚ±wtt~Š Âï‚Pt\”íÄ^W}x}†Š ÂÏôD\îSÄ vЀ{†Š ¿ý™öQ'å ÜÃÆƒÝ“)쯊¢‚0ÙZù÷n:g¬÷ÍwƒCV‹6±¹¢Qt'‚j‰ÞÁ%¢Klù~ÿøk #ЫUt‹¢‚P\XÆùKïƒêÉW—BÄÆùíÔþH_‹¢‚ðÙRÝÅùÕ-<2¥~µˆ¿dBt拓Š QTx`n ö¬EÊ8F¥Ë}?]Çò½(ÊBéê–iýY™þö+¿ÞP0¶æà¸¼=+Š BÍöKýÞ§‡.=ƒýÑgHhW\Ù&Š W¢¬híjѨ5Y&€7ß`‘®`¹¢h ‚„è¼VÑXLWPöýÑéÅOœ'³{EQA˜‹å_¿vþBÜhåÒe¶=²2êr‰À}ù5a.Š ¢èaרAÅD:›€;QdÊ37q?¯‰•(úA˜ñ:-—_ºÅ…£c2gÆkÅ‘(Z‰ xp~Œß ¹°Ùgò{îL­DCu›ê)úÁÙ™è Š ‚Ï!ú.‚mDˆBˆzÀB5A&@;÷ €rÉ Í!:Û>ñ­(z4‚àëü*CçáOÆm€a)DÛïo êQQTtšyóôªÑ9bã'º¥ë»/¡(z$‚ Ó‰š«¬&õÌc”C4yÖŽ(z$‚ ·-z>²Å×+E†h—C4ýàŠ¢Ç!W[3'2³ÆdFØáM†¢èq øâG"û47ß'¹ãM€É#EQAúÿùȧÓæÝ¢¢¨1‚ðì¿»… iŽ™¸Å‹¢†B¾5 ™òâ¾¢} EM„>°ÎÛ2FkÑ)²„(*Èpú>+©Œ^±ÅÐQT<öR[JcÑ…(jˆ Š^ЖÈÎù±‡¨"CDQA픺àdŠnsˆvDQ3aƒa.+†©˜ »ÖBT„~¦hÀ%Ú˜_õ B4EEBÙ—„ÞøÂô‘‹rˆzPcQô8!JõÓЛ[˜9—B´®(z‚ [Yˆ&…6|r‹!ª¥»¢èq•¤§SÚ©P-2eéú\‰Š|½EB|Üù@l+D)T…QcD“®(z‚p‰ëÒnÓí½¿yC†h¯i‹OD>bQô8ÁÜ'ñ‡-†b@EÐEC¦ïuŸ/í|MÀ|¦n«¢çè(DQAð8ü¶«±C;Û­þCœU¼,ÅòV= AÐ@‹gV¡iÇ’ã¦óØ^ DÑE8Dinž¢¶ò*÷%õYѾÑÓEQAB”H+óœ[œàž|@‰¢v$D‰ü˜Œ »Dô-Ú—@ÿ¸¢¨‚„¨Å&®?Cqþ‚¯ïéãEa>䵆†“¿Û*:QT|õ“õ% —æ&ÐEM>2fÿîµ0 ÑÏ¢¨‚\«ß#»xè•æ/ô•+Љ Øß ¡«m-z™U¢Sµ(z4‚Ä(Y&b¢)W¢+`(Ї ôA¶Ù@ý‘^ðr®º¢èq‚ÙÆ–<!€¤èñBHÖ‰~Ѐ"óíQQTüÙ‡4pË‹Æ QôhÁ%û¢¬¨ !pÁ!:lœ¢‚ Š.J!ê7HQA"àu©½j7HQA6€r9DÉ×ÊmŽ¢‚ ütý¥Jô jÜ0EAàåÜ à4HQAÊ{¢è5HQAþK1Dgâ)*Â[èÎõ8D¡¨ ¡B)D_=¤¨ š+Q ŽÛEAË•hôp𣍠¥Jô6FQAB Ë˹ºÍQT=,/ç†PÍQT„°Å!Ê76EQA–¢©¢ns!*…(õj¢‚ \C”t³jQABgÄ!:CÃVtAÐàݯ¥¨ !€q>-n³A£Ë!Ú£f)*Bˆ‡è¸iŠ ‚ ;yˆ¶©iŠ ‚v8D›§¨ šC´yŠ ‚Æ¢STÑ&**B´àmž¢‚ è?iÑ&**Òb„qSA5XQA6WQA4¨¹Š ‚6ZQA´Û`EAMVT„[2ÅœÿÈ){…&±Ö¤IEND®B`‚sleef-3.5.1/doc/html/convention2.png000066400000000000000000000540621373003144100173040ustar00rootroot00000000000000‰PNG  IHDR2ÜfM80PLTEÿÿÿ°PrÁÅÎÐèé鮩©///iiiƒÙªLLLŽŽŽ9Âwf©Úÿ{{{,bp IDATxÚí½MlUº7^†ªr§;Á)v[í…GwäR¸ËHÕ`n2ÃtCHÂÌt韔HLZJ`”,pIYÜÈH,Ü„‘³·”/YwH,Â,#eÁÌ2Š¥—Ë_Ìâ}ÏgÕ©ªSÕuN—±??i&¦Ýõáªçwžó|XÖÖÂü½g‡}ÅþtMÿ¼ÿõí¾}sßxÖVÆ}úµMÑÙjOÖ¾w ÿ îÅ-þHïîÊ„ÝûöiŸöÓ}s[úo_Ü÷ôveBof‹ÝÐãû÷çþÞYoìH&¼±˜°‘˜@¨»½˜PChe2ÁYãLx˜°!h¢­¶ÂÕ =´w2a÷>-&,î»Vâ_ä¬Íµq&`±º¸Í˜€É[ߨkÿîîþýûïyÀ„]Ÿ®Éî®>1 ðá»>ýU‘oï*WwàÛýzó™ðëµrö§äU\KGåyÌ6‘£ßm46Î<úÍ~†ïâ·#p íÜ+ïUÁ„_‹ýS&`"ýþÐ*(ºeú‹…‰•fÂsûÌÔ6Ókþ³F–××ø«ø{b}-Ïaþ˜HŸí‰ähZŽîîÿ΀ VMnžoµU„ ”øÅ˜ ¼]bÜ´Œ˜`ë-òí3Nmœ}†LH>¢7Jò“^ _ŵ¸P/›±%5› νäê[˜ m4½DîõÙLØ“"M&ì}¾6bBqûd‹é„7L™°DöZìš.Ñ›Ÿ[žqd+%&“ lA5c6ZåÁ‘nU’ï,&ˆ¿Ì+À„çd&\3e‚Ž&°øψ i°UbG»÷™1Œšû³µË”XÒA#¯-Æ -¿4‹›Šü=Ï:z7f\d0Á¹;üˆ£â[=à‰›; ÝγÙÄ?ð½WÀO Òð1ksfós[|G-‹ ‹ûæŒD—ªÑ„_ã yjDãèc¡æäÅu¡Dãè¡ß)‹W¶oª–£L߈M'\ß÷Èš ¿ÛŸnM2¿Õÿ ßè7fLØ.qúÄ}bþÆ„ té–ÑË‚£z@‹"fD8uMrJ3¸ïŠ˜‘#› j&PMqÀË´R†]ËÝGáî?¤ß‹HûL†Oñ}ö£x6i*G«³£˜€UÙ.&"|{D‘ØÆž¦ÿñ´D®Ðù©–¸¶>¯HŽÂžý2L‹9öúЋ•§Ëd~Ê·ð].O‹‡1áéa!ÂG” xÝý»ˆ70÷kù9‘&xn´èÑ.O¢Û¯ÄÏÍím[ÖÏD"s@­X£0a°A; Ñ æ3áîþ¼„Š$öíûÊügzúLø/ºï{D»"ï`—âqš™ÃÒ‰‚òvâ¦Ãæ2{ Z#1¡¦6òuÚ—ULØ£Že0ÁV,b;„ ä¿ô™°FC¥±G$…qíÒÄ’®FèáF< iÁT2A½ Ã„êÆ¥%™pWÁ„W$®g®Ëºè3a‘ƉbHNÙX++Œ1Á-3éH- ªÈËž<Û¨L]f›nlï¿Çâ?ÙiRC˜pW­èNĽ–>H1Ú·Ç¿A2ö¤t³"L`5m'?þ„œjß·Ö{T¯}«>Èþä[zºU×nÅïó "³úLxŽæÅÑ¢ôÑÏö§ßî‹=%SÖV4WV’(©PøÞ€ Ï«UžZ„ N˜„çö…ÄïîÊ{dªt»bL°ÕTæiK<]눣ýü.&vAÕÙ2É¥4Lò[ãÞ óéZ"ùG(‡dñ„uA?ä—½ðÈX›¨§óbLX#Æ}äÇ}o7}±G$ë—ÅwÇI|Å!@ò.TîEt¯chV=åûª"Ëè{}&ÜUŠz˜¿W„ Q¾”Š;ÏûåÄ"eº]$êßEwû]žC"¦‚¦Çük…Œ/ƱcÙ‚ »%©ÿ{q&ìŠ_.”ÒŒ\…ðO˜“ãdì(&|c¥™ð´|P‚ ì)ÙR,ˆ<ô¯cz9|] yÒÔò;Ÿ—Ó”év &¼’6¨meÜ%ÚŒ¾[„ a𨇊ÓYN,úMôãæÆŽá+*ãHÞM׊¢F)aQFÃeBâ¸(aÂbü%漉ñeú}&(Ô¦¼±zÏÑßöTêK6=ÛËÑ9/a-dŽ^,RbÂþïs’öc¶…"ò¢\Q9m©B#T|OdTRíå0á®ê¾›Í„Åôr$½æÅDšª%UjF‚?” kq±P2á?Š2AúЕɕé;ã½`Lˆ-÷¡´+±†$ô‘'_Κdµê¥d?Ÿ!Gžf òqÕŠúÊ~=&„†&îÍc9ãúàÀ÷-–&¥Zô¥ƒ¼!±¡H%|ïYG陵ðr܈¿f¢æþl9ŸH«,ß¹™QþT1&ÐC>ia‡víW &|ûÉǤf.<&¼ÌQ|²¹¾ùÄ㹄Oɼ!YmŸÆÎÈ*ƒ 19wÄvíû{ô»Å²(©)~th)Œ#é‚H+¥3áIú _ý°skßU%Ñí‰YÙϤϣ¬ÒÞËb‚ûàÁÒ†™GzÖÑ3’ز- ¢ŒžÉ[ôLP%>® ¿K1aWZ~£×ü²ä•J àiaÏ‹-†>…}-Áɾ– Ò|u6æT8Ÿ$™ ™kÒ®/ÿõk2c_( Ž`‚ØÌ]ä¤|Y:‚_ü q Â8Â_o%0™p \óì¡çæÑ+’z7_ÌäõÑÉ•DxðÀû=þ¿ò dâ1?™2booOÚm±eßAQk§2ï¤ô‹ß¥ÎH’®eˆ@”PÊ¥¹ kiT‡3’bžf˜¯“Íe™Zsêå3º 'ÊÿY‹I¯ILXdÖXéL°’Ù¨b¯m·´î?Ç>¿RE²#rØz ^¢µü•È&ReQWD^T.êó‘ðØ*&8˜­·î¿…ÿ¹O?‹ut¢¨¤Ä"'ýÈiGŒöß§tÞoâµÿ±ç²žÈr<§~ÍÒŽ©-¥[þ=éaÂî¤t(˜°;Ãz#YË›íR0!"î.™PkòŸs­,&Äë”R6ÝbZûpú·ØÍ¶ÒÆ‘ø#kzkä53鉭¨w¹=žÇ䦖ìn¼¢`Â[„Oü@CWl­éï§DLˆ‡I÷ç„yyE޼‡‰2WìDŸ=“ãGPwöcÕkÞ{{sBÆZ‘%ðE˜*›W0ÁIJÐZZ<ää&„æz¬ -úÂî}sÖÆ0!eéð2&yãf-Š/}­6ޤG6!, ]&ÄÓ)’vtlã,½ë€Eë÷µÿv¼4f¿Ê6j1 é'ôÙÊLˆ‡·ö§L5E©Ñ÷ò F‘³Vx/‡ »’[gák~.¾e”zÓB²†0!• `BòK»³k^Š0Á‡?';£²#ñô/ÅaE«¼nRú+Œ#GzUc&HöO*&·6Ò±Ö=ЏSì$ &¼EÝe‡2Á*… ±Í¿ý)• *?ý‡Š BýÅœEl鵨.¯ôšã[F­¤ ¦ a¾tùpš ‹q&,f‹hë(ÈØi£/HÆÓF3AýbWÈNj)Œ#Ù4ÔLc“˜ð|âÙié{Œ Š%Í—»Œ’3™à%T‚º%§ '·ø©ó™À©ðtê5¯Åå©°ãc…˜^óTLˆ×Bçåt𘣟c‚~a—´‘÷K1aQŠCÏÉžÖZJû9rÁf ¤$þ’°&)N|‡!]åcÿ4f ¤™ð5ŽþeV˜D2—½L•@0ÏãÆÏÊL ¿‰6#bUe7HZ‹.ÑûÒvHÄ/%C™pm8v'ë!¿Î‰É~]˜ ±$‡ð /K”ø¥˜ð˜›~ZòŽ·ð†lŽ•Á„dpý•dqþ7÷böÑã"#ÕþÇl‹¡ „¡£ dÂU&ëIã…L8ÀL%GTìØÃ™ÀöüOü5ÇÞÿdb~&ì.Ê„k9®Eü|±XWæÒú(ü‚Þü’LØ-íÈ<%±z1õ§ÆŠ˜7„ Ž2Inòõxä}†µÿÉV[ñ£ŸxÀvÕœ_„ wÕµ¿‰ö¥œlòÛˆ‚O£-b‰ û’mÁv'>ó 1¡5œ 1YÛWúe'¾J&<–Ç'¼ÖF2¦eÏ}óI°ý::ø©´JžçF0áuÂòž˜O 0½ÔCŠ /qØQud&HDvŒ {Ômíä=Äø~ =¸XîòÓC™ðXâ³kÙî†4” ‹¹…ñ4¹ôÏE™p-Í„]!77 ¯%s „¡æÈßW´HŠ«„˜°'‹ NV9Z$`Ç:¨°oÇΑb‚ ¢¶6‚ ß U ìOk¥˜Àÿ‚‚L°R¿Jcœ&†Ô@²ÀoÌ™ð²ÆÙ µ]©lÀµY”w.W%ŒÆOÉ„W2ËÑB?ôñø¶Õe‚+Ü„'¤ÐÑèLPûü™*AÚ'¹ ?“²°ræ'¬%ú¢n^Ò!ÂŽ9øúL݄˶3vù¡"æ¼[~»RÆ‘W À;»B9lxtNœ\&<ñK2ánv÷–=*&XšLØ¥Èf‚·LXÛ7¤¿cŒ úLˆDwÃ2ðv§3Ä…•ø²ü´Ò½ÂžK(Äò™ð|v…²#¼â˜ ß-À„·¸ÃLý…6– y%Öa|(Á„g5¬£°´2v”)Öå3a×𞊬€ôïfL"Á–•Mg«ü™TV¯Iœ\ƒþÊÊöì} i¼l&ØyÍO„üÄýùL#Frèhƒ˜p7¯ë†àòóé]´âLàŽj>öé3AÓc.4ÞãÓ°F› ÉøWvUE!&¨*uœ(&½KÊ­ø::bj7öð^N.æ{ÌLx%¯Vöc‚Å„‡å3aOn•P)Lx,¾g°ILpŠ5ýµ¨ÑŽ¢>V.b‰Øü‚»n /“³å5“>Þ@28²FÉ;R3ÁÉí‡%ö‘U™yQÔ'Dè(D5ïHÍ„çsû %nß=kÛY‹EA$&|ñ&ÛY“»Gdåc§¨ðô0&Ø£1Á+ÄEEÿn•+Â.s˜ñ¡ññ»Sa³‰’™ßÇHÈÞòö˜Ÿ±Ó˜Ã¼!Lp²G²€?žfBœËyLHì#;±Tc&ìÒdÂbÑ.»Ü­ÉcB<]O› × 2a.©VS¹"¬‰Àc ÞÄU`:óи>!ƒ w µ¶‹¯ gÂK‚¿—™0b}‚š {е¶{\æ 7™†å¢f1ÁR„õUz¢” ¼]ñTå"à忹LPeàd®¢LPtþ’,˜²óä.Ï YÌßI±Ëe‚ßÚNXG± %?g^Þ[rÖÑ¿$·JgÂó¹ÝNC?aO¼ã¾ ˜1A‘ýc«â:ùLpR‡ä2¡ø°¨Ý‰ôXã"ûôP}¥fp.Ý ¥xYt¨ÍÛ}-=Ìÿˆ‡Ê¼isÏU ÑáñꮩCL YGÿ4ôqŠ1aþDqq¸/î|†+·ïLü¥¢Ê+£RGîPQpÙU€ /Ç“8t˜`K–U.$ñ¥wJ@]fÙa枈ô|OW··E>†tìº1áÙä‡1³DÁ„„›pß0\ˆ {ò©5ÃW[|Çÿ}Ö$vDþûkE¤õš& To†£ÑmºbµozL°ÒMaÔLPt,¢³t(¹þît]µÄV•ÆÕíw”Ï„¡+ê³ JDýåœêMÁ„—bLÐkdYŒ çúË’â’þÑ/7+¶ŸÐ/qNÁ–§4™°;]£œÍ‰¶E¬#GÞ¨ÒdÂZöÜî!“DvÇö ¯EŽ¼Ã¼+ ?•s;º=ðr™PxE•¬q)!:8ƒ ÿŠŠ˜-ÝF–Řð|®ŸCî1*ZþÎJ(ByJa’ ^̘‹¿Æ}逦¢?Ò0&ì*ÐåEÚfµÄÍeBlZ³&£¿>— ’Ò[‹:T>•>”jmdKm"ÔLx‰Q$ »š†³3„ ªÖvw¥a!Ã9£vÔ@Î <0?#5zÏÆ_éœhð"oÚò¾ÿ»Ä°æ×> …ˆöGâMUŒ ´Ëû7×0Öæ†0¡€q´Fz^ ë)ò™@“BçhЧûô˜`‡é'k¹LPL¡]c1»ªYŠ9_'rÊ©7?aT­íý€ì¸ñmJË(ï¯5o¿*vdÿë ú¯û ö*• é k|_÷^àw%;´ë­cnECF‡`[ÿöü½O¾VGÉ0‹éîÓ¶”oüT1&È)ÊC˜ð\,П”w-ÎgB8lAÙi5’Èbæ¦Ãîa“É_ÎìV,5¤ I±Å|%ðuFo{j(d=A+ßí{±öò¯°–YöÝÌ:ÿ$xoÝÿ=Õ oÝwÞ*› ¶Š ŠFñ» e>Öí;Æ„¸ä=•|a»}ØßH ÖЙ:oÄÓ¸s˜°8” ò-µ 0!V) Ç„ÝE™ðFz½Pº[®Õ\Œu¹&4IŠ zilL¸›n‡¹²¡fÝYÃvý÷-^³%n,쌦àwîÉbÂbºÚ€½°_E&SR,µ™è¦]†ge'ÐdBÖl’"¶}­ l«ZNøurëBÍ­Ù›C˜°¾³R}[RåËRÿàÁ}ξµÍÞüå˜ðx$,-òâC×e['j ¹½1±i.*vÂÏ®e‚£œ.eÄ„ðê©´ç &ð6ß^ÓcB$ás¶r˜ʽô¢QWßzÒE˜•üÄæšš Zó˜Gc‚,2Žj ‰ A‹õã}‚÷û¢û ¢Œ9 Õ46Ê ¬£è®âc´~§£"æz™‚5÷±ôáZ¸^í"ÃÐæâ“óþ‹Nüû¤`5^@ŽžÙRz¬.JSÀ`B9bÕØfLÀärDQ?˜L(Ã<úÿ¶â™Fg<`0¡ôf¶›NXOí€ ££ãm7?Á½‘2ð1–vÐùÍP“)ùeößêOM®“yßFs °•0ž,í 1ò›aG‹/%¿Ìþ[ý©Éu2ïÛèo.á±€ À0˜&À`˜L€ `0L&€ À0˜&À`H0˜L&€ À`0˜L&€ À`0˜L&€ À`0˜L&€ À`0˜L&€ À`0˜&À`˜L€ `0ýJÍ„BÈúrƧÆ×QÝ7*À`0˜L&€ À`0˜Lx¤Ñ7•ƒóª³U‹]Ïør=ãSÓë(ï»_΃ôä÷nw Þô›ðÞe£¶0ä Wr~wz´k;¦VàÅJ'Ž? ù};G)TGRv`z¸; ïÀ‰wG<Áúõµ:•é.|¬‡Lí}RÄqíñ ½!ëkå¬Ü=s¥ð"BÆ+û¥ˆ­É·G.S¬ üõÕ顜…?0U ΄ÌI<Ž@)ds¹?z™b0di&%dÝœ£Í”‚ ·s7(€ä"ôê«[Ç*a*XÈgJÎÂß7#"vÞ³!‹èMÍœ¥]„F·:êÚˆ½[™ÎeÊ^;Sd]3i~ÕO‘cB¢ O¸‹p¹…ÝÙÑf¢0ò˜âY,¥à“߸Ó1»fMïº6ª&¶Ñgg»º$öLÿìu泸}ôÈ€ïÌÖo,÷tËÛËäŸcù^\Ù»újÀXÒd7±ò¾Öå'FÿÄ(|¢W¿"ˆÙÚNÏ Òç56´uE§«P {eÓZiÅ@%„>NÕo¾ÙÐ[Ý×Ñ…|e@„¦˜ NT‚Iß¡?ÊçR8°bá'*!Y¸_ér&xF]¸8‰ñe1!šµŽ%j|e€%›,“B%x¥àȧRI£P ”N9ý‘K¥g ˆR˜.ë6ô_T çC/Á!ÝMíÏTWY(f·Fj¢÷µÌ3ú3”Âì²Zî~|e@J), •`YÇÞ]½Ó»hz""éŠNÃë“–}›Œ¼oe1©„Ž®pº³Ôâ§JÍÑâRûm  ¹”Ÿ·¢ÀQ-¯ÅT‚¢ï5ñl)ZÖ‰Õ®ŠBL%Ô´9x¦&‹Tµ²§x î UÝüI%(÷Ž3"\ _J‡;mÔà^‚6 Ý«ÂÝtß8Ð^]…þ_bÁK{ ÍUÖ*Ý *Í·qTÿh:­3ê̲qLX(’E¦L°Kh xT T‚mœ Žzªø“}îKzþ1ü¥T…XM\´j’F$Rº›†ÕkN€9z¢n€ ©„`Áè m®*¹½“Üå–åΤ™À®˜l퉔n÷†é"@hؼB  ºP +Ó†\ºXTš“T53£¢äTõ¦Jè!bÏU V@Wå©ÈÖ<#.…*a¨4'ÃÕQTBh»îTV{>ÀÎÓ {#[ß6ãÒÞ¡Ò|¼÷—Sô*‰²þqY%Ôt%²•2Yþ‹vˆ8Ú~ßøÌKLIcáŸÚ-#&<J³­f—:I&ŒÉ*Á½¨Íb±¹Lv±uÚ!n V Ð#@ð:ºôy¸#|f„ÉPškÓ^†O}óHÿò«½¤udǼ„/t•€\ÓiÇû²§ªQÅï¶ fXÜŒvÚiûµQ¸47•ÉmMƸ]Á%ÛKüšÙU}]¥0л«_xdV”¥gŠh¦^’™q‘/²a}Ý ¥Y™m18´Dô4æ^Ï‹›ûL%Øú[{‡éÆ5?AM¯‹Ùóo|è› ­—VŒz<ê¡w˜°©Ç„q®Š\p±5›Êâ)ZOiéÎ5`.+!Ä¿?@3­4Q.„†S'ÙFƒÇžŒUBØ!Us´óDô'žX^†rN±ÔéjÌd‘v›À?ëØ øÈóŒø¨C ‰>ƒþt;T ‰ßóýªìËjALCÀLH¥†Uœ94_ö}^³/)ÌÞÐ먖H§L`6’ÓSVGÿˆB•p0n¹ÔZâ*¡9Ý2`ÂyaƒÙtøÏu ö\f½c=ä`ýö/§„,V&Öú±K­0’ê«‚1’—àÇ•BŸ0ƒ{ U¤yÞQÒ²o…=˜Š:7×ço¯àû%×ìì1Ä=Ö¿±jZ‹¤Ý Ýf_U`ß–¼„„ú:^’…—`â67¥M⦎<“€îgD%û±ë ’ýy¬ÇÙ>±ýŽfN[¸U¬Ò m$×LÇq¶/jþÜæ¶”œhµ¹8Gj7ÛúQƒJ+°‹¼ju­¡Ù›ŠÀQär«y­Ë·tÐgn† éG&ƒáj\ã‘ ÖžB€Â­beØéÈòÍP%8ó-¥e† q©O«žµXÑËׯ@Gw€ .­Pœ¨Xºz–J”=”-RB%¬£†—¸ødHˆ3èsÍ{_'[ÇZ–0ùµPåÛ `€-˳‘…ad©C·Š…JÀJ'>óÓ øzå@×wÅçk|1Ó*AϾã*L0áˆÂNW ³)KEÏy­ìµ†V ;+MY~lñvnï®ÜÖö]÷ÐP%84™0ÉýnϽ2$€`ꎟÑlî¼^v˜Èy#“ÙìD õY/¼rÕÔNá^BÓÓc‹<9Ôazà‘Q +#©Öž¦NäiÊôž® ј!„—0¾×„ Ü•°FT Ô@g‡e$òŒÉ)rý¸¸Û]©\hÁèÖEਭµ®KÂß•°Fôè¢Ì¥9+‘ÇE{Ëþùßô½D‡–úQ¹‘8 •€mœó‡I)©øÈÀÈ*Á²>èó$¾ŒDž3¨þB¡úQoÙåBhyù¦þÅÅ^±æÂ.MgÇGšŒ&n}yšÕy$gØz,TÂåŸLz>PiÎMä (œ¾"-â#É<ë°0Ó¦‹ f¿©„“:7}¢‡Þm‰#O€è‚y ‡Lz•~8,‘‡­Ü®*%ƒŽãä /d³zŇöë‘J8dPû#€¬ºáºìô ÚŸ`iÎOä ³ÍUy7rP\ÆoLË"¸—à íÚ7³éì€GV)Hë²oÒ:·5$‘§Iä5+¹Ï cVUÆ„q ­B®ËvQX *`ª>ÖeÆ¡ù‰<äÜ~FŒöêÁhwm‰i í6j¬ò…½­-Ö¡J8ø%€1/g=ËÚÖ0"€/»œ(†;r€Ð»_ÕA+¨‰ÎåÔp¸ëÐDžÔ¶Ó”‚ÇU‚e0sªª„铇ôîôè‘Zxm.¡%OÐGIgkýãÓ‰X1àåq‰Ë(^˜]ƒÝ5E"OUK¼Z{²©“÷÷GP ¤ömAØHŽžuÔAŸÍ‡±‚æ$H â“\D'ÅV›®RI%ò¸1±<Ûûkôý”¼«‚nè6ëõ³tn×ßÃ÷ÂGC€¸¼3Ülñ¢ÖêZP$òÌg‰e%™Š÷e¤êÝÐŽâx’—>¯yth×ùP¦ áÑ~{-ÓAÉ¡—0\ƒdœµòN¨þÝ×ï’zö`¸—p!½°U~˜ "4;ÿÛ"QõÊCÈ„•r#O˜ñN&øD^‚3Ð.“ˆFöྮ·30Á9ä@é€IÂ:©C¾U’JèäeV“ $ºúV¨ôóaåhV÷²¾P d€áëZ;Už¬T&8þ¸Lët˜8Ú#©¼Jgïö²qæk`%â ~>¬ö Cmû:‡Wxï&¾s¤_±-“=ÙK¨ætíe¨ÄM Ê‚<¸MÛgöeÜ_ÒòV8‡Ï[áõb+6– # i ¬” »k/¿€HCº™eýÌpWÎÇÖš§ÀCÆt€¨Ó»ˆ­ØØD1Q =¾šWrºöbr")òÔƒ RáܹA˰}-?£ÿ]“„ÖÑ€¼bkN ™Ðì0•€­’zª‹üTœOú~3"Ýç½¢toùušraC;<@d߈&ƒ—xÇkw¸J •i‰`©h,õ²×>núo½™VG/ ä’ñŸ, JØ¡ D0é}DyrU‚+L 6zûvüWãHü;mtï4ÝÕ¹=ÝÓK>ÂÆÔj‘QBdzô¹/!%@m—Ä@W?‘'W%„ ªä9I’x\k˜åµÙq¾îÂk‡ŒdûÀõ|¤nZÎoß1X"óÆ=‰ªû^Úúê­pGbÅèÞ›”EœÎ¹U9EæÍÂ1L%µÒÀF!Fõ².€¹–6æÙÈf€µWWµ»ñµ)ƒ:ƹ"ÌK@¤@äó‘—PA-ƒ»|/Á%…ýé\:Âòᄚ Ç"<²ÉˆÚÓ…ÝÌEbʤ:.Õ£t‘?{‡{²üžu&m¨fãnÇ©npâç_¿>–LcÖûuë„. iW ÇVµoóÀÑáÏ@ ÔÆOê"oÎĶÑ4“« dMÛG¬˜¦Él˜Û± ØTjrЦ. _Gïžd*á0>ό޽C;<@\)¨ÑU‚É ŸÔンºÒŒdëqi·ï,ßðˆP˜'í`ê±;=ªþ€0'Öu¶‹·`/\³DƒEŽDö]"ã5~Žñ,£íV:"ãh†”R»‰–…²îZ½àÝK°½ HBö¥@Ç)ðV§,ºŠœAïþVqÒÒïâÐŽ¦ÚT%ðå½­c]ù iiš A›ºë*……()µš“¬ 'IqÚuµç‹•V úMÈjÔÁ賋Wu’.\ý€”Ë»bç[œ"n[ÏÛLh£»O6vÝëâìw š;¨þ‚è¯jkÅCý'áÕâ8A{™ Ý$šKf#§{uºwÖLÍÿ`­5Ü™ž¿þ‡þ%Oä- ´öÈ\hwHÙGŒ¤¨B™lò7¬Ö¿¯tOÍÿcÆoºYì3ëÎÑÜ-> /22"•`Ï¿`ÄQÎ\O™EKLJE¸?­{xßmÒ˜òu.̪Â\Ö*Lv!  t›©J8ÎÓ4µ­£'¹ÜÏZnLš›”#gB_Y¡{¸N°ü)Ë6kÐgºÄÁFÒx^%`$Œ³•zÕo4©ãì±5R*`¿s+¦¤¡Ì{3êGçU~‹A­ð°Ï㡇ß7êðJ-%„Þ!Xb§CC@¯ˆÓ€ê„ ípÑ–¥Û<—zÑJOFà&#âè§Œ™`™ux¥*«±@E3€H2¡ª_ä"5ѦÃ0ªYü˜³p!iÈ,V óûqɉÒP«&^s?:Æl—`‚äÐÚ0®`ñi™xE>E*uúKú~/ V”ÉËÆó&%øƒ°v!@SGµ=vi,YcK–V]äT„.:TB;XR9þë«OÈKmã£D3 ,¿×ço¯Ð0>×¼´TÑ?†êSût»(ÕiY—-Rì`¥`ÒEš&X“J2¼iùªš "p4H4Ì#íZ>£‹ó¬gëõlag½ÅÛYúhå¸ÁŸ~Þž&[ÝVXt2-íÀÕ¹hÖEÚí£?1_ücF—T±—0HæUœ[:´ê—Sb"]½s™Ù9úÒ솕Bżc8àM™pÞéŒÒEšÀW¦<Û‰‘äi{}ŠšNÚ× ¢9ŽI¿šö„•BÛ°±à‘ƒÍVãÎH™ûÜKè7rT‚¥Î0âî®oþáã®É³ºÍ…˜LhðUýüˆ*}KÅ41ÄCiˆ#'J³ž5Žž•Ó¤ûãvPØ)„:/ëæ³ùŠøQS‡VõÒÇFíÃѦž¸ó¶&Îó»Ó=ðˆÂáê¼kY¿]6Hr¢2yÅpL4Öê Ó$íˆTnôG¯áüó»L ]>ü.ˆ€ˆ/„^Ÿ&“i?5‘5E({ 하㜰Fݰ>{voJ ,{°‰½ø1(dX$¤#¶Yƒz«f4®\¤•&]×£Tþ«KÜñÈÖ³—ð—Gž{æÐ* ·ú:œãºÈnEí»;Û¡å£ÞGÿ7‡9äùøäöûå^’ôÆ:‚IJaäl‡aò‰^ý”–Ï\‘Ò¤šPÝ `fËTëØ{¬þÞ_2P S\%L§OML¢õ.m–çY‡ÐôÉã½#+#3a[ü/¢¼) îÑ‚¤Ô@)褺RW LÌ#w/W /ËýDýÿï‘Ä"—þW¢éÝÕQo¿‰V΢ËÚªÌ'éRÇXc2ô0€ñæò.5ör[CgÕ*!¬ï<Ä2Œ|DÊjq·zÂ<ï‡m¬­£ÁALÜúÓ¼Üd -›>aÒ‹”nª‹4Y„æàê ´ÜùØa‡WÎó”¡žYå©õ‡âÓ¢IÈë4H€á¿Ùxq£œ 'k°ˆ-® Ýg78rïÜÐ0SšT}a~Ýí/§¹‡m°¡çJ |£Û}¿–ÑgøPdüw 넬î[¤±€FP“Hð‰^cTCL8gá#ÓV@`8WÀG ¡Ž´ô’h¿[Éb&Sýæo‹w"Gèä›hVñ}{^çÞkhŠwߨÍÝ4O‹y ¯fÂ3åg1!  ¡ø<™U6§8ŒR}Ær4‘Ÿðiþ²º ÙG‹çîŒ!óà¾/ÕóR†rªl;k¥ 2Ò@“]¼xÍþy95SÇõˆ‘õÞ™BÆÎë«_}PVÎeð°Ãàøfu„wFvAUá}~v7ËÙK£u›hïeÃ5 µoÂ÷ò×›Þ˜ä›hævA¥ðp4&ÄŽ"•rá:!¦=dˆ„ÖþÂhîï!ʨA!õæOZ¼ÃƉ–u,XQØá¨NŠ:.S¥°"Û9 *ë‡õÎhWêŠþhÝ›ÌÈ̺éIªŒ˜ë „ Ó˜G—Fò˜c(*ì#¬vf«"¥Æ^5´^l¬Âi髨¬2™8xÄÐ|ûHI£—ðê>³šîäåäïwÝ ¢³¤FEX}=•`1Ér¦ñù¨ñ·¾"ÂÏM~nóõ¸2ˆ‰Cø¬1z\”H¸H¼à„½úe)M°èÖ‚ÓOë— IÃ;ò0Ó¾ïò¹‰v5ôÓ(H!i“v­(¼gl ‚úê%‚% U>Çåly‰Ym´ÌQš¿úoýNöÄKÐKñ{ýp8½oÒ:ÈV Ä+H¯¯Îå«™jg¬.–ã¾ç’‚†ŽîÍz 4»z÷ûdÈà(@¹*ÁÒn.YAÂÉ ¶Í^zW×…g#Gó†ÏGFÝg𥪒֦¹œÓ¨¾Ë6åHÙçùGÏ… ÛÍèÝñ’Ä`  ¼øjÀª=,‡É9r3n) ˆ@<’Ëš¶f©cö—¬¬B#ÀX#0r†Yvû(ÑZ»‚éC(LNZ¢ŠEc‰o™©„p£T |/!j“2xáÆíxFÄ ñ…ˆ-ñJZLø·uH´›ùÝÐ Ÿ‹T‚3?/PŠRX*d°ûtˆŽÓ¤ ü»-®Þ£®¯ŽŸP¹ÄJ”¾Í¶Ll÷“ƒC¥°RÄ`wÙ&V-_óщ\Òý±£:—æ>Q7·ÿ¢Î¡¼û‘ÓG×ç0ŽP†RXŠT‚Kgæ¨Ð䉧êa8ÔT±uÇMY®jRuc×u–vVOǪ;0ŽPþª’Û™‘s':d÷f›*ÚÓ8­à<[Þ©Tû5ެШ-Œã”'T „–Õ+³-(Ò?Ÿ¥LP9O…šÝÖ‘ç ×b•@ÆqŽÍÀ[”€ãÂK %p‹j;†gŠöžÌö^ p•Š3;ZKžÞt«eõÐ ÇY…–x€R<ñ—=ËQZûc<²¤Ü9a«Ò%†#ߘ–Ÿq¡‡.IôÀJa¬¬&}€_)‡´ytt"Å·;R½ŒÇ¦ËÒ»Ðó¸m¯b¥àå(y/Aé ˆZ„6g‚s58c>º3ZÚC8â<ÐŽ=±Êˆæ{˜(I)œ']/‘:G™~ÔæÑÓ>Ñ /-Rÿ[Ýã.Ähi"óÏAÚL`ó¢Iò¼B@9Já<ÝÞBo·xx2í' k9õ–þ‡l'¬å£•PÖ¸š™@³úLX±¸Ëý¼D@x“ô,ý?Gú³­@Ù.£Zçæù׿HG2Oð€k”í ¹ÉÄ(×ÓdÂ;ä5x^" 8lø s¦®nxgSiç3ËIþj—,æ|.‰ ÎLg¬˜ÞÑ*„æ%;¤äÍ6™M¤Ñg Èš¶¬XÑx3ö¥š°Š*+£]™ž¦CÏÝÖš1"J*JÒóð¥0¡ÅÜ_¼B_I/ò¤^ø¼•D…m8WÈꕑ®Ls%œË›@u#«‘“Ý&ò€r°Ž˜÷K䱓Îvú7þ† ldyÄ„ZôµÛéV×H(ÖÑëàdGñÛ>ô{”:ì›÷GµQzìÛ“™ñ—NgÙUdë ÐôjÿsÍKŸ“ãYK¿˜ÓÇ·äœe÷tùKÞ"`t¸dQå5Í¥óBÃ>c$pD‹ ÒÅb¯“ª·ÞÒ½4b…¡ØDjôut ý~Fïiö«º¯0:£†Ø-Îͨ«‹ÜUfËñ§]ßõD½ë1•pe¾§”=¶LG“Ð{²{u(^”ûß-ÎϨ£{ –Òé )2ªsŒ™ì_éZ+,7|€ôCQìž:ÐìP’$TZ^þß<•>zèN…Ûp®Þõ™—àë3ß“‹ bPÆ#•àœEò–_µR!í1°Rçö ÷°õTB˜â¤­≄ÀL˜Œ¼„:Š“§ªD ±Ràßjš›]|Ü``X¼6¦·§Ð<% Ìh”G¨g€_4Z™ó ’ú™øu0IM§IâÁNt”¯†ã¦Ðå·uŽ› ™`#(?p•pˆ4µ;¤”ªqì0¿¨êÕ^!Žtíw1ºÚ×n¢pDIë˜}4•öåx 7hé>^[—ˆä)öÖ¤Œ ]åö:V ãm¼‚=ºS«š+ØÅØki7²ÁF%4Ñ‘Æõ%¥ÃÜÏègTiag×ïš4´ãMbÄ~…}c ]; ¥)RuCÄ6°D«JššæâvDôŽÀ¾¸Ó&í&ÆùwU»iËaÖpÀ´_  R O’0Ð,w¬Ì[¨…e’XþR™zl>,Ñ,ú³Ó¯/·(›ÈúîêPé(mß *P¦R˜äÓ›°RPMJžYÇQ£ë .*™@ØCÚ½ÜíUшüã¼­Í$P €2ñ&I6Å«²=mç5Âr‡;±™Üäóvü‹Øë6Üðí³¿@×¾*á((@°I(”S³Ÿ7)¹ƒ¦‰m,Ä>ãË2+«j PdigW=®ë7s•`ÿ„`' $ 1{6G{h…ÌLðã½#¥Y€¦¹jT _DŸt”Âø^‚ÓGèR6š%2AýòÚ`éÐqŸYêQjÚ‘kœ\õ öËÃÝKl/Á'S§œH€r˜à1±Î(êTc¥°žp%$œDdظIc­8ëÄÀ©h9 >½i—…³:ÐFPú|_ì ôÙ•„Ä/Íd'éTO ôîßÞiQ•°Œt§ë¬££}º‡í¼£}b]Y›|Êô ”Az9w[=*qþíTfÄ!6´–ª·×аpœ[ü̽ú)¾ÌëDŸü%z0¯êóPÚéÌŸZbñg”«pªÙ.ó*:ÛÅmzJ;ÚÃNE2)®À÷ÃT—bš.  ,Ÿ[Ö«y:M ÁËödÖˆZqtB¡¬KwÔê7hoo›ñÕQ ôÝÏ«wâB|®«rªÝŒü"8ê7t.:e9g¢Í_3'Õá~¼Å+:€‘A¢òù]Ø{œoæ©L &túw€Þ‹´jôêz)Í0‰˜(‡ óÒ$ÌÓŠ/¬‡²fßY¾'ÍÁP%Ôt–u'ÀÎö-IE4VïhæÔ‰Á¡m``pH¡ÚÂo°û©”ÔʲP M­œ8çÜ—žä˜õÝ×0;à=ÉúåÀP EÓ)*ÔD”f€ê7“)©¼o/1’Ë7<£‹2/¡¦ç+4âæ<ëŒS” W5j<@“G.“²ç•TJj…«l$Í…-³k.P"ÎÆ/‹Õžå×ÅŒèLåBY'Oº¨6<Ñiß#¿ùu£¬ 8B¡å_d‡ "u¸pÉíå!£uŠKj7Å`©fÜœ¯,q•Ð"ÙÒ{ýÁQø0ÌŽcSÈMUl¯JE^Qd›oeœW¬(”:@æ*ÁŽ˜Ph·šôÞ«±ïÁD@©þr^Qä€ÅvìdA³kE»k¾>BÏdBø îr!SÇ­Ÿ<ÈÆòèîP…TBF$†Gª©Ò~;Ú]ëë3ÁÜÍ…´@·Ø)PŠ×e«u$Ææ5=‚ 'º¡§pÓw×´S¤‰7nqMZZ;Ö‘å¿v¹£aŸ.©€²½u$Ææãu\,°›Ÿ¿#S¡|ýŽØKXŒEØëµ/uMA$P®— ŽÄ„Ý/Ž´H¬UŽ—V¸JpuC8lð¡MNÒ®k7Œá^B|@)h‘Â-ñÓV˜¥Û_ öJ‹³®[dðáøS ©³ ½´ó«Ö Å^€a8{ï%dDbÂ-`+]¸\™Ì90Ï{‡bÏÐŒXÊ-ýx¨¸j˜(ÄuõØ«ŽÄô¢T·ÔNôU#•@ì1rˆãHÑòÍŠ.™º–§Û„œT@9ð#• ŒÄDɦX%ô“×Zbqvn“Þ+Å}t²—`÷?h K¦³wÄ„õžA£b@©ºÑÊ®ˆÄLà½sŠr¢~4½ÍIJ¼Ú/ÜÏéQ=cßùë-3û 1Í9U‡W(I)xVn$f@fÎÞâ^B"û¨Öe¾Nô¸Åû€uίž&*úÊR KVn$Æù5nY<µÂNØ"P“ßa­W…Û\ÄÚd›dM¸LyAy@‰8Y(£îxÇ|Ýš"bµz¥IDATTÇŠ·¹86šJÀµ,è"(…"1MPòi=$T«F6»¨Ø9·ª³¼»  e£H$&« je†fd´HÕq£ÌÆBZ,­³Ñ| T tO¡@$F¨{õTü›jŠŽãlê¯ÐöûÇYvÕÿ÷·uã=P €²1<#T‹x厅ªtùBÿTÏ@0}ÆÂMþîè,ñg‚J”®†EbªL%¼ŽP=¹oÐÂö ñ.Ü:B&—¦* µ£3!ÜÊS Ã"1mªH ž—𼆙ð-ï ëýK7(јP÷ušù ¥+…kX5ó u¨Õ\J2a-Ò{­Á{Ú—ž‘„º§3§Ù…æ_€Ò1,3N–}ž9gU§’L`ÔðW°ÖÐ.§ú6¯­^+ °¼„~º|­¾z›Åv°‰ôÂ|Kå%°a€6&ËY]ÕàÜ®Ó!m®n‚iÞ`“U‚¥¬Å$S<.ÓÉj¨ŽÐl‚@+\%`eÒ¾´¬¿­pˆ4¤@|E®dw|á£Z²)‹1 ‰ô·×ø>DÀýP €Íí ŸµI¦¨¡ŒRÌÌê¶Ó’J¸ Ýxb‚ÖéˆY  ›É„W[A#ô›íÛ(ÝÞõ² ž+—B•Po5u•‚7*°‰pܯìüf§Ÿ¶œwogW·’—`k‡@}ì €Jl Ü·þ[¨K4wIÙF™«}…™úÌFÒ^Òí>ºÑ•Øøá…J× C©Eïl'¯écðÐ2 1€Jl |ËjÉi¥K ‡9§9|e!T –sN»Œ3œ¥*°éN³åt¥"œwr¿©„u6XS =P €­Â„ÓÞ‡wòw×:ÙÖÙSà*¡‚.Ó®¹áM@%6ÎüýŠØ]˘§à<Ì=A˜oѲ:KÚÖÑ T`kPá>6ŽV¸J0è¾Rãëy$gëFR¯ntA%¶üP©s•0|ŸØþyùF\jEñ&íWaT’œT .Ll^=h} ©{>}t©?“Ð ¬¹)µ¬ú2¸¤Jƒ¡Ë€MÁA–`ÍT‚Ý£}'20@ŸYÎúBÜO &Q‹õëk›Wv7å%ŒA‹<À¦à÷Z+¡Jh#Ô»õÍ %ËD|žò!tiS‰LÚq æ?}ÐKz ƒ@ p=¼¦s/¡‰.:ý,yЇ{-!ï¶ØH;r“˜Wm,¿5ƒÅ¼’ÍÃ+l®ŠÀQ/ÐÕ ;½†½bR³Ù[¢fÑzØLiÇÇw<ƒ«8lØþY‘ç)¸ õ÷ÚØ•%¹«=Æ”ñ°ž!˜$å³dŸÍ@)H¸sÞ`3ñOløD–aîÓ}‡ÀsøWÛ¢­¹±QÝðâJ?7¡é`S™ð*mÚrb™öpw‘rWÙ¡’_ÙËÇ„Mð,g@©‰–ÚÇ /ï46ô¦ €ÍÃÁ·ˆ@FN‘I mU"^Øì=è³x¿4c­º7Í„ xÉ“ 0ØVl&ž { M„®þ„nIMÝó4[¶i”3R øçé4ú&ã“å“›Á„ÓÞûXx/b ©÷°ÿ$fùjßµc7}OB±n°·`8Ë( öéûÜ 6Ò^j±¸A+Á^ýßH­Þ•™ØCúí›·€Jl6î[6³~Ø[où‹dÒƒ-ìŸ[éÕ{€ÊXÍiíœï°™ø×I‡¶Åæ‹}‡m0زZ@‘éŸ4è;Ë?FêÀ3¼ª\¤6/¼ÄÇóÅÞa--j³-yáß[È wfê—S7¡{<`3ñ{,Þ+áb/Š|IªY SÏãñõg4K^Â8j€Vl"¼Ú¯ÿï!®z|Ñ÷¥± NÕW?X²†ÆxDc;#•%l€M†ÝuÎ}B•phŠyÉ’ß\Ãüøl¸J0©Kvºâ¤PÕ Øt&„=ÜIfªÓë¦*:íyo¨—`¸¨x›Ÿ´‰ÐIp›çÁ?±F^Bg!£ÈˆJ0ë‰':là£/õÐÛð:›ˆ“Ö€¶âbp²Šü‡¨„¦™•€>`s™0†.´"ä«„#«_f«„×»ÚJ¡íôð6›‡·¼úsý³ˆ¹*á BJ#†Ï ¬kR?JÛ$k ( O´d(UÂj¸ =í9ý…L/!Ðî’JêÄÑc&™¬@iLxµe Q –adÓÿ¯Mgy &ùÕÝÈÇð €ÍII%‘Ô/ï7Qƒü²wœp7À$¿:ê}”;´øpŸy¨n*ä‘Öy:˜-¤ÍEoMT¹9à‹ºQ~uØû¨‚Àcl.þE"©]¾.7úèÝt@”0 Šfh›‹ÁÞ¤Â`‹:±¡Ð_t×õ°÷Qm𛌃oaà…^Âa…H’Þ‘MRÁ‰)Ñ›IÊr¤ŽègÒB%a„]x€Ítšñz¿y ë(-ÍWH•r—´¹°ÑLR–'CQÆ\ÒmëÈ{±I ïÜ‚·ØD&œ~¡__»CªÂ™Ž(ô ü”7Py2ò}_áÈG–¯?­ ( Ϋ]{Õ GÊ!„¢‹lªqäÕH”YéžRèFÞv %„¿<º¡—`eõ>²yl'@KÉŽ­(pdòB¹|B-°ILxðƒóÐ{ }å¶€(î¯`º$8³^ç*¡–ò" ñóÈ7*÷ÊÃiëLŸ«„Bªø#>¾B‚žR¹Ú ‡Ú Ç“@±Jhôêà46• ®(Uó3œ^©MÞxr79à`ÅñPÿâ¢NÁGÕ;¶š›ˆ—´|&‚.ÊÈÿ‘ú|¥v“+¡J0êžÍ&2/Á‡‘k€Í„ëñ 1[ý+ɪ†«u%`ÄR0lÃzdV§À®\_°¹L°ü–• ßLH _œš>ùÇ ÌZ^Žw~¬œç*Á¬EÅÑ•DÛaà…ýྻ`¥Â7¨Q»ÒÔS&*Á¾>e•ÀöÄ•ÁQl.îÓÁñˆ~ ÉÙÇVo¶„—à$f¥ÕFQ ¤N!¼20°¹øák%©T²Í½„ñ„cm dÿÄ&6ß²Î~W JÙæ£ ”¬S©‘+´Ïöœ.¼Àæá "ŽEUþ÷É$CFS \´ñ-8Ðû°™L8íù±”£´Jˆ#°Ä>™¤Èh*‡ŒèH·6(ÀæÁ>}ß%Z(T‡gÅ^B?•,ŒPt†½„ðÑZÖÌÙl&î³=…/A$F®®žŠÿNRnâw¹hÑÀ‘Ï;\öù0{ °™ø×Iw!ßKÀÆÑCþïìW=ôcìwW#+ª‡4z¶\·Ø9_b*á"åàyx€ÍëǤAÉê (KŒàãÑzqªL´$Õ±\Ükè¼CBVö ×Ðic(ÇðÿÖ§rToàÈýæqõ66©èÔÆAÐ €M„Û­ñÀ>Çù¯TM[~ U‚åäö(êözu*ì;½™e×ám6“ ¼67‚W™)­pwÍÍgBq¯·#9êaïì.¼À¦Á~ðϳŸEâèôeÉÿv¢Ý5•ut¢wéUÅKnäMeá%T`O°™8ÇuE oM=Ý™ˆ¹ÿv {ÍÅgÇR*»ã ›É„Weq Te3B%t”µÿãýŸ#ýÙV`¶'àDR¡Œ°‰xéå„*ÁQÖ%WØîZF‹yZï윩#³5½Š¤F©Àæá ©|Gݾˆ´ÎÎD%Ú%õ‘™ß U”pЀMaÂ|+ˆ,åWe![%` ´Ø’þÐèòF/ªlüÛ`#6îéû•ÈBñ”ß¹ªçÎò_ã«ö:b2lQ1@‘Jo,äÀæá¾H°Îœ’ã„œÔøb6F…6í.oÜát TJ—%òÁ|À¦á_§*“CœÖƒ\%ød*í fB¹„>Æ*i%â`Šš›ˆ£/±ëœÁi¤ê^Ñ0ϧêF;æM¯]4nbg¹"À/ä4[•Ö8fÀUÂeB–^¢ÍÅ94B#»C¨±ÊO„`£°™Lxà9CfiÒN¨6B-³e}2¹®K*Áѽúüê R ½.¼À¦Á9ÚX9ž¿µ,‘íä)ËŠJÆe•ÐÖ·ôƒP%xP €Í¤éa‘o™N¨´V É<½q9Q¢¢mêWXé'õœ?ÁÛlì? Ív¸BF*௸S©A:.jD „kÚáÞ{žE§`‹°©8mCœ^[´µöÓʼn¾ žÐŽÿ8AÝ‚À`«0a¸Uc³µûÇ“9nñ˜þž€sæóÌ´&àÅKZW 2» îåL+ÆG Ú—T`‹ÀmUZC™Àr„ÈÎZs:kï#ï ÁõA%¶^î« a=IÂE õ÷ÃM¶>¨À–€ýà~i]Šl Œf]f c[ÙРƒFnf³®1³"ÎäY¶ ~xa¸¸’-°W)'ÐI/Ãaîâÿ¯ê.ð6G€­‚ƒÃÛ]¯£ú¿b-}/õÐÛj Š ¶¶\÷îØK€­êLô•Ú ãŒÊ@²Ñ,l])~]ú<ùÙàGx䀭 gÞ³Dw$§§è„Za)H‡õ#7S¹ZD¶2DÿFE¸§MÃKgÑÕÛæ DÎÄ(àš¶ ~‰ŽÆÓCˆÃì ê·ôË""ã‚Åœ€í¡°Ô§™ÐC–4º£œÞ'Í%ݾ €-­ºb/ÁQtd±Ṉ̃ÞìH™Õøô$é»: €-zaw¤†Âan ‡+óIÍÎ ¨À–F%j˜ÇÕ¹-;Ìè=®VuÆqJ^‚hyô¨ÀÖF µôu¦W¿êI+7ñ•úHgg„ªÿ¨ÀW 3\%L1[F^¹æ?;ºn0™¨Ù䥡•Øâø0T 4IµrwhnvÓ`¤æ€gð¹Ð°å•B׊ufQû=à/U Júܦ‚½ÀÖ‡gIýëÊ9;3¤/’‹ZLð8—@%¶:h{$¡U&Îé‹døÌ,—öÛÃS¸õotÓJN¶ÅJa"ªq+ 6é T`{x HòüÔÂ_£þ²ßm0Á§¨Àö@õo¤3¡’ná‰;Ó3è%?NÎ *°]”B=Ϙcr워ӤeœüĶÙ65ðËáƒeÙgµöÈÌÅè‘s4§ €mÍš’j¹7Hô£>W 64J<**áM„¦=ýc/r# -/ß„‡ ØÆð™J8„Pý íL:Þ@«„ GØSlgô©J˜À¡¥ßæ¢2ùÍN².Û¬r- †¾­½?öW „Bô'`ûê’_-ft®ëΛ¢ã y(µ&¥yÛ M´â„£4›Ú²Üv×\``Ã±qÄë’õ÷ؼpw­ Llg8/D=ƒýíïŒv×–ài¶5\ÞðÅ ô™PA\%¸°»Øî¨‰Ì#¤ÏlY1•à£Yx’€í aá»&LàM‚]©G€íÏ&Äm£é²A‹©„ix€mo1‡¹uÃë]m¥°©„Œãlk¹Ëa×oéþ~¨&Bày¶-z+ÌÀ!Q uô¾ —¨JC3Gûsûº$ìCÚá!íš› Ù]sÎQ9"éâÎ O°M£ýTkï²¶C0™°­m£Q6ÅÚÄO`*ÁrL¦lgy”hÝkœrÃ6è< SLEEF - DFT library reference

SLEEF Documentation - DFT library reference

Table of contents

Tutorial

I now explain how to use this DFT library by referring to an example source code shown below. This source code is included in the distribution package under src/dft-tester directory.

// gcc tutorial.c -lsleef -lsleefdft -lm
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <complex.h>

#include "sleef.h"
#include "sleefdft.h"

#define THRES 1e-4

typedef double complex cmpl;

cmpl omega(double n, double kn) {
  return cexp((-2 * M_PI * _Complex_I / n) * kn);
}

void forward(cmpl *ts, cmpl *fs, int len) {
  for(int k=0;k<len;k++) {
    fs[k] = 0;
    for(int n=0;n<len;n++) fs[k] += ts[n] * omega(len, n*k);
  }
}

int main(int argc, char **argv) {
  int n = 256;
  if (argc == 2) n = 1 << atoi(argv[1]);

  SleefDFT_setPlanFilePath("plan.txt", NULL, SLEEF_PLAN_AUTOMATIC);

  double *sx = (double *)Sleef_malloc(n*2 * sizeof(double));
  double *sy = (double *)Sleef_malloc(n*2 * sizeof(double));

  struct SleefDFT *p = SleefDFT_double_init1d(n, sx, sy, SLEEF_MODE_FORWARD);

  if (p == NULL) {
    printf("SleefDFT initialization failed\n");
    exit(-1);
  }

  cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
  cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);

  for(int i=0;i<n;i++) {
    ts[i] =
      (2.0 * (rand() / (double)RAND_MAX) - 1) * 1.0 +
      (2.0 * (rand() / (double)RAND_MAX) - 1) * _Complex_I;

    sx[(i*2+0)] = creal(ts[i]);
    sx[(i*2+1)] = cimag(ts[i]);
  }

  forward(ts, fs, n);

  SleefDFT_double_execute(p, NULL, NULL);

  int success = 1;

  for(int i=0;i<n;i++) {
    if ((fabs(sy[(i*2+0)] - creal(fs[i])) > THRES) ||
        (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) {
      success = 0;
    }
  }

  printf("%s\n", success ? "OK" : "NG");

  free(fs); free(ts);
  Sleef_free(sy); Sleef_free(sx);

  SleefDFT_dispose(p);

  exit(success);
}

Fig. 4.1: Test code for DFT subroutines

As shown in the first line, you can compile the source code with the following command, after you install the library.

$ gcc tutorial.c -lsleef -lsleefdft -lm

This program takes one integer argument n. It executes forward complex transform with size 2n using a naive transform and the library. If the two results match, it prints OK.

For the first execution, this program takes a few seconds to finish. This is because the library measures computation speed with many different configurations to find the best execution plan. The best plan is saved to "plan.txt", as specified in line 28. Later executions will finish instantly as the library reads the plan from this file. Instead of specifying the file name in the program, the file can be specified by SLEEFDFTPLAN environment variable. Instead of constructing or loading a plan, the library can estimate a modestly good configuration, if SLEEF_MODE_ESTIMATE flag is specified at line 30.

This library executes transforms using the most suitable SIMD instructions available on the computer, in addition to multi-threading. In order to make the computation efficient, the library requires the input and output arrays to be aligned to some boundaries so that the data can be accessed with SIMD instructions. By using Sleef_malloc, as seen in line 37 and 38, this alignment is ensured. Memory allocated with Sleef_malloc has to be freed with Sleef_free, as seen in line 68. When a transform is executed, you need to pass the pointer returned by Sleef_malloc. You can allocate an aligned memory region yourself, and pass the pointer to the library.

The real and imaginary parts of the kth number are stored in (2k)-th and (2k+1)-th elements of the input and output array, respectively. At line 54, the transform is executed by the library. You can specify the same array as the input and output.

Under src/dft-tester directory, there are other examples showing how to execute transforms in a way that you get equivalent results to other libraries.

Function reference

Sleef_malloc - allocate aligned memory

Synopsis

#include <stdlib.h>
#include <sleef.h>

void * Sleef_malloc(size_t z);

Link with -lsleef.

Description

Sleef_malloc allocates z bytes of aligned memory region, and return the pointer to that region. The returned pointer points an address that can be accessed by all SIMD load and store instructions available on that computer. Memory regions allocated by Sleef_malloc have to be freed with Sleef_free.


Sleef_free - free memory allocated by Sleef_malloc

Synopsis

#include <stdlib.h>
#include <sleef.h>

void Sleef_free(void *ptr);

Link with -lsleef.

Description

A memory region pointed by ptr that is allocated by Sleef_malloc can be freed with Sleef_free.


SleefDFT_setPlanFilePath - set the file path for storing execution plans

Synopsis

#include <stdint.h>
#include <sleefdft.h>

void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode);

Link with -lsleefdft -lsleef.

Description

File name for storing execution plan can be specified by this function. If NULL is specified as path, the file name is read from SLEEFDFTPLAN environment variable. A string for identifying system micro architecture can be also given. The library will automatically detect the marchitecture if NULL is given as arch. Management options for the plan file can be specified by the mode parameter, as shown below.

Table 4.2: Mode flags for SleefFT_setPlanFilePath
Flag Meaning
SLEEF_PLAN_AUTOMATIC Execution plans are automatically loaded and saved. Plans are generated if it does not exist.
SLEEF_PLAN_READONLY Execution plans are automatically loaded, but not saved.
SLEEF_PLAN_RESET Existing execution plans are reset and constructed from the beginning.

SleefDFT_double_init1d, SleefDFT_float_init1d, SleefDFT_longdouble_init1d - initialize the tables for 1D transform

Synopsis

#include <stdint.h>
#include <sleefdft.h>

struct SleefDFT * SleefDFT_double_init1d(uint32_t n, const double *in, double *out, uint64_t mode);
struct SleefDFT * SleefDFT_float_init1d(uint32_t n, const float *in, float *out, uint64_t mode);
struct SleefDFT * SleefDFT_longdouble_init1d(uint32_t n, const long double *in, long double *out, uint64_t mode);

Link with -lsleefdft -lsleef.

Description

These functions generates and initializes the tables that is used for 1D transform, and returns the pointer. Size of transform can be specified by n. Currently, power-of-two sizes can be only specified. The list of the flags that can be passed to mode is shown below.

Table 4.3: Mode flags for SleefDFT_double_init
Flag Meaning
SLEEF_MODE_FORWARD Tables are initialized for forward transforms.
SLEEF_MODE_BACKWARD Tables are initialized for backward transforms.
SLEEF_MODE_COMPLEX Tables are initialized for complex transforms.
SLEEF_MODE_REAL Tables are initialized for real transforms.
SLEEF_MODE_ALT Tables are initialized for alternative real transforms.
SLEEF_MODE_ESTIMATE Execution plans are estimated.
SLEEF_MODE_MEASURE Execution plans are measured when they are needed.
SLEEF_MODE_VERBOSE Messages are displayed.
SLEEF_MODE_NO_MT Multithreading will be disabled in the computation for transforms.

Return value

These functions return a pointer to the data that is used for 1D DFT computation, or NULL if an error occurred.


SleefDFT_double_init2d, SleefDFT_float_init2d, SleefDFT_longdouble_init2d - initialize the tables for 2D transform

Synopsis

#include <stdint.h>
#include <sleefdft.h>

struct SleefDFT * SleefDFT_double_init2d(uint32_t n, uint32_t m, const double *in, double *out, uint64_t mode);
struct SleefDFT * SleefDFT_float_init2d(uint32_t n, uint32_t m, const float *in, float *out, uint64_t mode);
struct SleefDFT * SleefDFT_longdouble_init2d(uint32_t n, uint32_t m, const long double *in, long double *out, uint64_t mode);

Link with -lsleefdft -lsleef.

Description

These functions generates and initilizes the tables that is used for 2D transform, and returns the pointer. Size of transform can be specified by n. Currently, power-of-two sizes can be only specified. The list of the flags that can be passed to mode is shown below.

Return value

These functions return a pointer to the data that is used for 2D DFT computation, or NULL if an error occurred.


SleefDFT_double_execute, SleefDFT_float_execute, SleefDFT_longdouble_execute - execute a transform

Synopsis

#include <stdint.h>
#include <sleefdft.h>

void SleefDFT_double_execute(struct SleefDFT *ptr, const double *in, double *out);
void SleefDFT_float_execute(struct SleefDFT *ptr, const float *in, float *out);
void SleefDFT_longdouble_execute(struct SleefDFT *ptr, const long double *in, long double *out);

Link with -lsleefdft -lsleef.

Description

ptr is a pointer to the plan. in and out must be pointers returned from Sleef_malloc function. You can specify the same pointer to in and out.


SleefDFT_dispose - dispose the tables for transforms

Synopsis

#include <stdint.h>
#include <sleefdft.h>

void SleefDFT_dispose(struct SleefDFT *ptr);

Link with -lsleefdft -lsleef.

Description

This function frees a plan returned by SleefDFT_double_init1d, SleefDFT_float_init1d, SleefDFT_longdouble_init1d, SleefDFT_double_init2d, SleefDFT_float_init2d, or SleefDFT_longdouble_init2d functions.

sleef-3.5.1/doc/html/favicon.png000066400000000000000000000007571373003144100164670ustar00rootroot00000000000000‰PNG  IHDR@@XGlíPLTEÿÿÿy=¦¶]ÿ xįÆÛÿÆÿ¯ŸÀîûª pHYs5Ó5Ó !Cw}IDATx^ÕÒ±Žƒ0 `s­am†î'žà$†®Â*²wÊ­HHÍëŸm‚Š9¤[NýS)Ÿ0q ï’ÞZ‡ªH€S†éÜ[Û_¡ ”oMÊ uŒ®Uw˜26®Ž–!u­€€¾ŠFV@¯‚CGT» èXŸ¡ªË<&~n‚£%¡‚ÃÀRÚ5ˆ‘E¼ ƒ,ܺŘ`O¯Ð¿!…ÄÖ-ÐIÕLà È0±}öÈ ¼'ÀÿÚx 3¸W2hÒcqÍ2ƒJ€u²˜î  ap@û–ø#„#ˆ<çdžA›&]†”¼ö8ÞÃÿMŠƒ[²Þ˜³÷ž’ ŸTa™Âœ¤òЦ#tô¡‚.TA‘¨”î €§ n( Ö@·Ðdp[Jœ58ûnÞ×{߀õŸnQìÍÝà§ÀÑA· üpt:ãdwe'ýöÚ›ií?_…ŸÈÕÁ[å'‡À—IEND®B`‚sleef-3.5.1/doc/html/hellox86.c000066400000000000000000000007021373003144100161370ustar00rootroot00000000000000#include #if defined(_MSC_VER) #include #else #include #endif #include int main(int argc, char **argv) { double a[] = {2, 10}; double b[] = {3, 20}; __m128d va, vb, vc; va = _mm_loadu_pd(a); vb = _mm_loadu_pd(b); vc = Sleef_powd2_u10(va, vb); double c[2]; _mm_storeu_pd(c, vc); printf("pow(%g, %g) = %g\n", a[0], b[0], c[0]); printf("pow(%g, %g) = %g\n", a[1], b[1], c[1]); } sleef-3.5.1/doc/html/index.xhtml000066400000000000000000000564761373003144100165320ustar00rootroot00000000000000 SLEEF Vectorized Math Library

SLEEF Vectorized Math Librarylogo

Table of contents

Overview

SLEEF stands for SIMD Library for Evaluating Elementary Functions. It implements vectorized versions of all C99 real floating point math functions. It can utilize SIMD instructions that are available on modern processors. SLEEF is designed to effciently perform computation with SIMD instruction by reducing the use of conditional branches and scatter/gather memory access. Our benchmarks show that the performance of SLEEF is comparable to that of the best commercial library. Unlike vendor-tuned assembly-optimized libraries, SLEEF can be easily ported to other architectures by writing a helper file, which is a thin abstraction layer of SIMD intrinsics. SLEEF is also designed to work with various operating systems and compilers. Link time optimization can be used to reduce the overhead of calling functions.

The library contains implementations of all C99 real FP math functions in double precision and single precision. Different accuracy of the results can be chosen for a subset of the elementary functions; for this subset there are versions with up to 1 ULP error (which is the maximum error, not the average) and even faster versions with a few ULPs of error. For non-finite inputs and outputs, the functions return correct results as specified in the C99 standard. All the functions in the library are thoroughly tested and confirmed that the evaluation error is within the designed limit, by comparing the returned values against high-precision evaluation using the GNU MPFR Library. Especially, we carefully checked the error of the trigonometric functions with arguments close to an integral multiple of π/2.

SLEEF also includes subroutines for discrete Fourier transform(DFT). These subroutines are fully vectorized, heavily unrolled, and parallelized in such a way that modern SIMD instructions and multiple cores can be utilized for efficient computation. It has an API similar to that of FFTW for easy migration, and distributed under BSL, which is a permissive open source license. The subroutines can utilize long vectors up to 2048 bits. The helper files for abstracting SIMD intrinsics are shared with SLEEF libm, and thus it is easy to port DFT subroutines to other architectures. Preliminary results of benchmark are now available.

Supported environments

This library currently supports several SIMD architectures :



In addition to the SIMD implementation, Pure C (scalar) version is provided. The library provides dispatchers that automatically choose the best subroutines for the computer the library is executed on. The supported combinations of the architecture, operating system and compiler are shown in Table 1.1.

Table 1.1: Environment support matrix
GCC Clang Intel Compiler MSVC
x86_64, Linux Supported Supported Supported N/A
AArch64, Linux Supported Supported N/A N/A
AArch32, Linux Supported(*1) Supported(*1) N/A N/A
PowerPC, Linux Supported Supported N/A N/A
System/390, Linux Supported Supported N/A N/A
x86_64, FreeBSD Supported N/A N/A
x86_64, OS X Supported(*2) Supported(*2) N/A
x86_64, Windows Supported(Cygwin)(*3) Supported(*3) Supported

The supported compiler versions are as follows.


  • GCC : version 5 and later
  • Clang : version 6 and later
  • Intel Compiler : ICC version 17
  • MSVC : Visual Studio 2019

*1 NEON has only single precision support. The computation results are not in full accuracy because NEON is not IEEE 754-compliant.

*2 LTO is not supported.

*3 AVX functions are not supported for Cygwin, because AVX is not supported by Cygwin ABI.

All functions in the library are thread safe unless otherwise noted.

Credit

Partner institutes and corporations

 
NAIST logo

Division of Information Science of Nara Institute of Science and Technology participates through Naoki Shibata.

 
 
 
ARM logo

As the leading IP company in semiconductors design, ARM participates through Francesco Petrogalli.

 
 
 
Unity Technologies logo

As the leading company in developing a video game engine, Unity Technologies participates through Alexandre Mutel.

 

License

SLEEF is distributed under Boost Software License Version 1.0.

open source logo   Boost Software License is OSI-certified. See this page for more information about Boost Software License.

History

3.5.1 (Released on Sep 15, 2020)

  • Fixed a bug in handling compiler options

3.5 (Released on Sep 1, 2020)

  • IBM System/390 support is added (PR #291)
  • The library can be built with Clang on Windows (PR #300)
  • Static libraries with LTO can be generated (PR #290)
  • Alternative division and sqrt methods can be chosen with AArch64 (PR #289)
  • Header files for inlining the whole SLEEF functions can be generated (PR #283)
  • IEEE remainder function is added (PR #271)
  • GCC-10 can now build SLEEF with SVE support (PR #310)

3.4.1 (Released on Oct 1, 2019)

  • Fixed accuracy problem with tan_u35, atan_u10, log2f_u35 and exp10f_u10 (PR #260, #265, #267)
  • SVE intrinsics that are not supported in newer ACLE are replaced (PR #268)
  • FMA4 detection problem is fixed (PR #262)
  • Compilation problem under Windows with MinGW is fixed (PR #266)

3.4 (Released on Apr 28, 2019)

  • Faster and low precision functions are added (PR #229)
  • Functions that return consistent results across platforms are added (PR #216, #224)
  • Many functions are now faster (PR #239)
  • Quad precision math library(libsleefquad) is added (PR #235, #237, #240)
  • Testers are now faster (PR #223)

3.3.1 (Released on Aug 21, 2018)

  • i386 build problem is fixed
  • FreeBSD support is added
  • Trigonometric functions now evaluate correctly with full FP domain. (PR #210)

3.3 (Released on July 6, 2018)

  • AArch64 SVE target support is added (PR #180, #182)
  • DFT is now faster (PR #186)
  • 3.5-ULP hyperbolic functions are added (PR #192)
  • PowerPC VSX target support is added (PR #195)
  • Modified Payne-Hanek argument reduction is added to the trigonometric functions in libsleef (PR #197)

3.2 (Released on Feb 26, 2018)

  • The whole build system of the project migrated from makefiles to cmake. The makefile build system is now removed.
  • GNUABI version of the library with compatibility tests is added.
  • Benchmarks that compare `libsleef` vs `SVML` on X86 Linux are available in the project tree under src/libm-benchmarks directory.
  • Extensive upstream testing via Travis CI and Appveyor
  • log2 is added.
  • The library can be compiled to an LLVM bitcode object
  • Added masked interface to the library to support AVX512F masked vectorization.
  • Use native instructions if available for `sqrt`.
  • Removed `libm` dependency.
  • fmod(FP remainder), asin, acos, log, pow, log10, exp2, exp10 and log1p functions are now faster.
  • Fixed a bug that was making the error of sinpi, cospi, sincospi, and tgamma functions larger than the specifications on very rare occasions.
  • Fixed a bug that was preventing the dispatcher from choosing the FMA4 implementation.

See Changelog for older changes.

Publication

  • Naoki Shibata and Francesco Petrogalli : SLEEF: A Portable Vectorized Library of C Standard Mathematical Functions, in IEEE Transactions on Parallel and Distributed Systems, DOI:10.1109/TPDS.2019.2960333 (Dec. 2019). [PDF]
  • Francesco Petrogalli and Paul Walker : LLVM and the automatic vectorization of loops invoking math routines: -fsimdmath, 2018 IEEE/ACM 5th Workshop on the LLVM Compiler Infrastructure in HPC (LLVM-HPC), pp. 30-38., DOI:10.1109/LLVM-HPC.2018.8639354 (Nov. 2018). [PDF]
sleef-3.5.1/doc/html/misc.xhtml000066400000000000000000000210421373003144100163330ustar00rootroot00000000000000 SLEEF - Other tools included in the package

SLEEF Documentation - Other tools included in the package

Table of contents

Libm tester

SLEEF libm has three kinds of testers, and each kind of testers has its own role.

The first kind of testers consists of a tester and an IUT (which stands for Implementation Under Test.) The role for this tester is to perform a perfunctory set of tests to check if the build is correct. It also performs regression tests. The tester and IUT are built as separate executables, and communicate with each other using a pipe. Since these two are separate, the IUT can be implemented with an exotic languages or on an operating system that does not support libraries required for testing. It is also possible to perform a test over the network.

The second kind of testers are designed to run continuously. It repeats randomly generating arguments for each function, and comparing the results of each function to the results calculated with the corresponding function in the MPFR library. This tester is expected to find bugs if it is run for sufficiently long time.

The third kind of testers are for testing if bit-identical results are returned from the functions that are supposed to return such results. The MD5 hash value of all returned values from each function is calculated and checked if it matches the precomputed value.

DFT tester

SLEEF DFT has three kinds of testers. The first ones, named naivetest, compare the results computed by SLEEF DFT with those by a naive DFT implementation. These testers cannot be built with MSVC since complex data types are not supported. The second testers, named fftwtest, compare the results of computation between SLEEF DFT and FFTW. This test requires FFTW library. The third testers, named roundtriptest, executes a forward transform followed by a backward transform. Then, it compares the results with the original data. While this test does not require external library and it runs on all environment, there could be cases where this test does not find some flaw. The roundtrip testers are used only if FFTW is not available.

Gencoef

Gencoef is a small tool for generating the coefficients for polynomial approximation used in the kernels.

In order to change the configurations, please edit gencoefdp.c. In the beginning of the file, specifications of the parameters for generating coefficients are listed. Please enable one of them by changing #if. Then, run make to compile the source code. Run the gencoef, and it will show the generated coefficients in a few minutes. It may take longer time depending on the settings.

There are two phases of the program. The first phase is the regression for minimizing the maximum relative error. This problem can be reduced to a linear programming problem, and the Simplex method is used in this implementation. This requires multi-precision calculation, and the implementation uses the MPFR library. In this phase, it uses only a small number of values (specified by the macro S, usually less than 100) within the input domain of the kernel function to approximate the function. The function to approximate is given by FRFUNC function. Specifying higher values for S does not always give better results.

The second phase is to optimize the coefficients so that it gives good accuracy with double precision calculation. In this phase, it checks 10000 points (specified by the macro Q) within the specified argument range to see if the polynomial gives good error bounds. In some cases, the last few terms have to be calculated in higher precision in order to achieve 1 ULP or better overall accuracy, and this implementation can take care of that. The L parameter specifies the number of high precision coefficients.

In some cases, it is desirable to fix the last few coefficients to values like 1 or 0.5. This can be specified if you define FIXCOEF0 macro.

Finding a set of good parameters is not a straightforward process.

Benchmarking tool

SLEEF has a tool for measuring and plotting execution time of each function in the library. It consists of an executable for measurements, a makefile for driving measurement and plotting, and a couple of scripts.

In order to start a measurement, you need to first build the executable for measurement. CMake builds the executable along with the library. Please refer to compiling and installing the library for this.

Then, change directory to sleef-3.X/src/libm-benchmarks/. You also need to set the build directory to BUILDDIR environment variable. You also need Java runtime environment.

$ export BUILDDIR=$PATH:`pwd`/../../build

Type "make measure". After compiling the tools, it will prompt a label for measurement. After you input a label, measurement begins. After a measurement finishes, you can repeat measurements under different configurations. If you want to measure on a different computer, please copy the entire directory on to that computer and continue measurements. If you have Intel Compiler installed on your computer, you can type "make measureSVML" to measure the computation time of SVML functions.

$ make measure
./measure.sh benchsleef
     ...
Enter label of measurement(e.g. My desktop PC) : Skylake
Measurement in progress. This may take several minutes.
Sleef_sind2_u10
Sleef_cosd2_u10
Sleef_tand2_u10
Sleef_sincosd2_u10
     ...
Sleef_atanf8_u10
Sleef_atan2f8_u10
Sleef_atanf8_u35
Sleef_atan2f8_u35

Now, you can plot the results of measurement by 'make plot'.
You can do another measurement by 'make measure'.
You can start over by 'make restart'.

$ make plot
javac ProcessData.java
java ProcessData *dptrig*.out
gnuplot script.out
mv output.png trigdp.png
java ProcessData *dpnontrig*.out
gnuplot script.out
mv output.png nontrigdp.png
java ProcessData *sptrig*.out
gnuplot script.out
mv output.png trigsp.png
java ProcessData *spnontrig*.out
gnuplot script.out
mv output.png nontrigsp.png
$ █

Then type "make plot" to generate graphs. You need to have JDK and gnuplot installed on your computer. Four graphs are generated : trigdp.png, nontrigdp.png, trigsp.png and nontrigsp.png. Please see our benchmark results for an example of generated graphs by this tool.

sleef-3.5.1/doc/html/naistlogo.png000066400000000000000000000275711373003144100170440ustar00rootroot00000000000000‰PNG  IHDRVÉÉ0PLTEÿÿÿú¯Ss¾èéé ÆÑЬ¯­‰‰‰CCCgddiªÙWÊ‹ôJJþ——Zó pHYs  šœ.ïIDATx^ì\=S#É}3€?ª7ÊTåÔTWŽ6ãÊÉãªCB¢|u XH†²x7ÕÕj›HÁ ØMP –…°,$î\Õý—ÁÕý3ÓÏtïHàkš/KO"ÕhêgÞÏçí-L1ÅSL1ÅSL1…~ts¶®M0æ(¥Ym‚1C)ÍM: Úà”²‰‚Ö”Jµ ©nÜi“Œ¥%S“˜b²Ù(½­VÏ&²»ç %véØ{ x5T4,J/xBèSŽK0c±ó—Úÿ–ŽþÇ:Ú”ÒcïÝs ,˜œJiÆýú¡­¥»Wô§fÐÙ÷ŽšXURÚCEà£'ŠÄy²ïzÅ–Z”±¬÷šy}B)Ýäž¿å/xŽ äD›ð{n¬¥¥¯©‹àHË´}Ö|_`Ÿv½=nf)¥Ï,œk¦÷úRõpZZE‹Ÿ¤w]“i4,`¶h€‡* Ç< Õ(þÒ‡à.DØ8Æz3Þõ†ï–÷ßf×ÍM«hB¸°ª÷’Uðc…¶‚jnÈÓ\t)e®Ë×á&¤OqÄ)-iiÁ uña§ÔñWœ>ÛÙíz ¢ –»ª¹p×kÂTLÿ+ `ù¦ºña[K`ì׈L™÷s¦ÇÎÂ! Š‚ïl1¨UÙG-èº×ÚôB=»6ýƒ¨uukó¥PE±Ü3]}‘  £*»LÜçð·¢”Ó…?ï™â¬ qÒ СsÒªx88¨Š’î¢"2|ÒŒt¥Å9Ä÷‡!R&¶"ŒÄ ¦U椔€ÙàAXºÉ]€¹˜LÉ#iãö„"c$È^¹_…¹Øœ€Ó¦›Ÿ™ |Ç@¡÷q @ߘÚÒÒNóèÓá6,àÚ(œ°š WK`f*²e··7'–gºo|VrCÉÒáõÁñ€ÁY¾À»:_7††™Gì Îû<ÛÕü²‡4·ÝEÊÚ_Ñ0v%õ¤ö‡º\Z9X/êúR³yäd3D7Î]±PDº-þŽH±hKú IRkðÏîêýÍ Ð‚_H-èÎf˜>3¡•IÙ0ðÕšï/ú;‹n¼Jü‘ VBÈà2^;¾€óÃaÖ±Ü,ʼn}þnNÏòÞM×%€\%©06¬A÷nÓ ˜ÉyM|Ú•öO>m¾¥8¡/üÍA%>L”ƈ. @šŽ„1äâ#”;R­P G.[žXÄ7ï1±œ™˜îß C"`Õ*EùVóÏš_5\q¡Ÿa™ˆ pœ8?@¤¼?êTt¿»7ÕêÆíá«}´0sÞ+¢|Nª¢ FŒ†ç®ÅL05`` 2¨ºõ+¡¾¯ã‚VðŠˆæ²(Z@Ù0ßG@ƒWZ7ÉP¡ê>Œ¢P>$.ŸÁØ´5m¸"½ÑF: 0^ Æ}³¸*°`ò/VOaE{7gç‡&_öI°‚© †Àu‰¸{#Tr Í»!=Xî)í¾[—­@ ‘ä…鷓СLÑ/‰!iYµý+˜¾AÑéÖº¯3P»@ƒl‘Πèþ*·DÐ(þ梅¦ÀB¾«¸ÓfhH}nc&Ñ}mðRèW-|ÞÁNw3;WþÉ¥½›c1>=ЖݕŸúIðÖècàÓ üö+,Z+¦c‘ݣÑö¿SøÛÀP e ;5µÙDPeœ„чwbÿKklñ %§f1ÌÈÜ&Ðð«ÃRÍÐ£Ð Ôæu…å1PG H@)ˆ:ÀUÚlî —Æ´h¹+û§ØÑnKt Œi£KÙ=Åg€Ù é³æÃŠ@v š“%úÿ-îø``!r9÷ǵ™ô'•"òé'ø¿±À°)"ûW¶)¡p.ú’™9zòb ¼6¤‰öœ¨+ô+P/ÚôÞH¿û?#íÂðõ~’@…>ëß—ë_W)­~°C˜»p‡#wÔ’w õœ-T @¶C19.­¸¥&Ñ€®÷­FY{³ƒÝî}B\,ã"ع/ðqhB…˜9ê‘îݨ?Pa,[ÑKÈ.œšw/zè ‡(/ykQZ½6™£,Ê^ÈÞðsT"œHëäGÚ—ìß[c+ÊqÑs \‚ü(«2òºï~’š0ŒÐÜüÈDY}4›öðA‹@5ˆ¨–×OÄþ¥ý—¥L 2Àº‹ðdܬqM*®V-1é3h̽)ÙÙì2‹ ô{fC¨É¸Gbô´ÆìRr?dÆw´çBô­Å:¬&9_îG‘‹h~0§jº³6¿i#;äGV_=ŸŠl̪ ÝÜÆÖ¢°CÜ Õ×<â"sbßœºéÌаd÷[÷/üä¿ø<€KçWTå:د[ïã“åøþ£+¹97: 1hÆ—‹<“·b‚:’d+îñ †%ëȆQ”¤:'œ£;ØsˆFYþ¶²jÐuD@EPù¬$o÷B­„{’€\ö(dðËD6^ ±"nË(Hð¿cêäìO†–—·d±1ë‹mqùh†W”g‘kÐ]&bFÍ>í¼Š¸h…zc˜ý!þ ¥Ç£!³Cý¹m4’°Kw(-C¡êÃE.†Ý^òø±/Ë`)Z¾ 8“€R…®' €²ê¹¾*œÕ’²Ón_£>6ÍJ+Q [óŒ4ÐpçD œj n4”Àά°2¼ bÈ¡0ör©Ô 1F—=° ((# 5Äв¦:Rzw–´ç`6èâLñÄ@ïâf\Q×'+-Þž`µªMLs“ÂIÂJ@­¡R›¾4ÁìxB~uãÚNÉSÛQþsjoœ ®N>êÐDd½º:>½!—0,™øŒq=—´’¦g™Õ `ÁSÝú2ñ˜0€µ±èL<ÄM$„ñÜW…ú‘ $OXa;ûpÔ =.ø EÃÛE´” <¶°½×oxnßLYI8–ÁàÑÑfh+M=Áú˜fÁßò¿˜ÓH›öRd—v›Û棶Dó˜ÒÓVÂØKBn Ë'”²ëGµßz¹€™¤¯ž ÉOš"þ¦Za[Ýlåœô…}üÌ{õ:¬†ßýISÄw?hjøõWê7ØbÍò.^æ<²¥­‘6Ä|óGE·#…?kjøúéÕ;r—n=ª‘51{ÏBW'àç¼¢ü¦ðD‘ªïU}@ª¡êƒÙ¨„×Ú(†”]€äóДð÷BAÍôÅÅà¸9ÁÐÇñcn‹ å øÛ|^- …/ÕBÀââ_F˜\âê¾:JZ#—Âp!Ô×óªÏ+Y¶^(žh*øÇââ¢r:76±k Ïå­l';zW%„b)+Lÿ¹#àßJI°p÷$ß/”°Íí{Yô6¥ŒûS».§x”“0#ÏQK‚ù¼¢üèð¥šÀ”A0%C^pxLè)àÝ›=ç}w–k!¯J¯ÝuÈ«$B½à≢,>5G~Î9²¢K€a©V2ºwgjƒ¾—©”UQ]ªy€‹ŸT’ [Å\Ø#ïeê 2ZƬDeér ¤FÑ<@)~Ç øAÁ<|5jË™B!tP6Õ ›åû}«„oòy5 |ý Åàל€§#•0Á!ÉkK]Ý7ü›PZAYT1‰þ—¹³ù«ºßø öæŽ ß,rc»ö‡Ô*R"‘Pª¶„vA….¡~!$ˆ‘Ê&›A0¶6c!ã¸ÙØ‹ÄÞd¤Æ$ñÆCÉ‹ ‹.uWUBQW•möAuï™™gΜ9÷9ÇçÜQ¿+‘Í=>Ï÷s¾oç$.à 08 P ¨wøf‹  ˆ40&Œ;b‚ï; ðˆ;ÖäÇ@a!·J ÐÛ›7l`¼7™ ‡jèBA€¸sªã,âúbó3ÃäˆÚbÁXp‚!ƒ 91Ì€˜ÎêoÜü÷óâ¹Lc ûÎàZk ð6®6˵ B¢k.ô|@ضî›|«Bò‚Ð @È h ÂSè–ŸӺø <Žmì` a1… @hÇÅ_‚6 $ ›@\˜cš ="$aì²Y—EWgb—m£`ÜØTÄî1s!‡ ×Ài|¾/¤w9ïZ^°·Yéu†ì@Í'@ 'À=šˆm‡0Ê3]4@T3„‚„¾Þ„ Ÿ½ä8›”XS«!2T4@ {È!h^rèn W„ˆ0Že A®@vŒCpáôŸœ&'äÕ·m\! HAÂbË2)¢¤gbau;„ ‚!ƒ @hnŸ¹Þn\laͰ À (CÐ>"lïÍÌ® H}a©XL rØk`joçùvtW¾À0jÚ@ƒä <³aI0Ësð¸·lr @„€ ƒà¬ÎÁx6»œß¼ÛC $ù1ЄSH‚Y GÜ—³w! ,¦.À>5Z¼±÷«kÏÙ¬ È@‚. Œ÷ö€º`ÔcAî¸B@ÐRü ­¡¯¢¼è¢€C ä à ä6sg%óÖàÞ ‰`\   rÚ¿›Ý1ôn$ÀÀ‚Ѓ @H ˆˆÐ½ü‰#É BD€ W'VÈá^ð¬•Í1‚ P-&. t5™L*$¦BAB+Ð7ùˆ<© ÂŽ2úÒÀ>ôÍñâHT·ƒ añy' €ÅÎ5MËC–äxóy­Õ)ÝAˆKÍò)  0„‚ˆÝo†U-”vJ´iìOêàƒ 4@ '›œíP‰–î4]@ˆH.À7ñf—‘}ào_I€:9ƒ @H ˆÃ û›]Z÷p£oþÏ&ä@$ÈAú×@QëÝ÷K¹Urd& €Å ‚p\î ¯h'§p Wê,Ç š‚î œÙD­CÓñìbGãî. Aÿ .UpÆHaµE¡‡q7""$„yÃ#IØ{™ÕÀh-Û$læ. ‰Õ‰ páÌ ‘\È­Mvä 9B@Ð7¹¢dªÉ…À®9C  Q÷ B$ûæi^r²ïfö˜BÓP€_Ncû‹^p– +9™=‰ÕAO D*´g7/dÏàqëq. ä<.:§Aw»U°ùïÆâJrº+.„}/@È!øÒ·„‚'î„Ƈ?dûu‰Ñ°µØ$Cˆ޼#œ‡à?>êh ¸O<àihÀvûÐÐ&F÷[ëuÈ£µÄYGBÁøéCÆÀäüj/w!G÷âÞAs°Vö!BÁàüîÖ¢c$ˆÚP¢+„‚  XEÂj—‡'F‹é×®*ŽÁ ã‚Р> d °›¥Åÿ\ò™x@¼ÙZš4ƒ ÀÂ79Ϥ9ž²r«ƒUxA,Qm½¢„H>\À#±FG9;œx‘@0ýòÀ„B܃u¶d/Ø&³‘|,Tr$MNÁ)?68Zp¿Úñ± >WÏÇK˜»~ò1…`÷¬Ø .@øIs Õ{ §Ê‹j_ š£‚[;†ø°ãëÝ£Â?y héÂJBÉÆÖäõèqr]ŒKb7¸B°»MŽrÐq/˜DÁ}'°ÎßNéÙÖ"ÀS.Dü+²^D¸`~Lœ€u¦¶Ó(Œa!ˆ}òo¢tP2 ¸æ jëW°Ž„§8l” ,R <@ÎèÕ‚?;§$(Ic¸ ‘4zg; ôûl™le'€|0ê/‚H"kfßÍ99׈µ/÷EÒ •'B*?%uQ$Ž_ ܳ8ŸŒÑm]ÖÕŒ'Îo²9!{ÂY6*œÎ^|3"BÛ¢@ù²æ½¢ƒ¨› ,p‚ *"_ ð’Ðýç³5óÛ•x¬$4Æ_KK6ÍA ‚è{9„uÔ_Áè]€½0§# Åÿ…ÐÁY?DM0„Çû¿ Ð)€€Œ’Ò?ïä¥jµV÷AÔ 4|Œ ­ÞGR+ƒ¡ùMÏ×=A®ýÂ è ˆ‚t $i!…qûKµusžZBp9 „1¶µX€²!'ðÀö´ݪö4€Ðmå~W_DM0„Ǥo‚ô.À„³=¼Ï´åšÇlʼ϶–8R쀾Ѕà‰Î` -ò‚åµVRò“S éz¬–¢Ú:ýaöDƒ¸!ƒ €ÔÃlAØJ7z;U¿Üú°š*B\Ǽƒmà‚‰ 2B¼"bM ßÍ_(•E’t| }¾ë#£-0Á1'˜‚ðƒ`ož'´a(êÁ³i”»¯‹\:Vú² _éPyDâ—p áÕ âì@Šœ÷TêÿŸ”`£RšHò¦°ÚNß“»CÂlžy_L”ÇâôÓ›€Ë_$ŽQ² {8¬ ºCáHg@hºDIl;õ–ûÉ8C Ì>ê5€$¨áãt®wßMmfc³Ôgu?Ç@4Æ„‚XASÂÕ5Óu8˜l`6½Œ%ˆbg~'CPÂã±Ah€·ÙÔÅ%©A«t(XUàÐÐAÙ/…i"Âe„‚– \­íÒ³Y³óUÜâ‚z'Д]4À šÆ7& ´.L”0Î`A=+‰¬²"o‹(xÖ nr~÷}ßÔƒð%‚p‚V(Š=5œrÁÀÉ'öAh€A °a;]€'‡7 ‡ÕÚNìµ1FQ@H ˆ ´á…tƆ_/ZdyBw„úHP§¸†"èz1’T ææî1ʈ@È Z‚ðB²Aep$~R$̆ä¿}æB`ˆ™ €xüi B̃D… U^È×v–§ÒÆnÈ‚!ƒ @H`Y.Õ …‹“CªÂÕî=ÌýCY‚8ѨN€@к<0uƒj²º¯“Ôý`!S4ÀÀAÈãÄžÕ¶v–âYì'"T@È ˆà3B„ü€ ZœBAh€»B%¨æAÍœ @H 2Ú×ç7¾ÞYRkF²•ó @È ðŒO˜~ДNŠ=ë¡ ªNà0ƒ @Hš_ª<¦¾±Y)Éö¾×\ˆ„/ê ' X>¤}VŽ+$%jAíu!ƒ 4fƒ°n×#5Qxoyw“$ÄÉ1P: 2`ABž‡«e“`z~y·‚`Ñ?BAh_çB´BªI±°ÒÛÁÜź_êAøPA€G'€ÚÌ-]Ï©*®! 4º‚0â¯míÒ§ÓÜ!ˆ+ä5 d ù·q"ê9·$II+¬ÿHP¡ ê<¼£Nážå°EFÃsƒ @È  àÓÜ@8ÛwÑø«~ù‡  BAh›Û „xz1­a3!Ê­a™àˆ' D;xÿù¿9,gT­­åzc @È òÑ “9By48¬ .À'ÍîÓ4—Ý H\@X—cô Ä!Û‡ú`EúþÒv·¸\÷ئ×ÀÏ™„!drávaÒµÞðÜõf orZÔÉgS ‰u¹“ú ú©›î$Ãò/C,dXþDÍ DS4bù8ŽÔÐuWòë"à.Q§à ¤ýâå´6Ä#̘œ@rð}¢'„1 0  Dî/ê½'=.©#É=‘,ÃÜ*\…cA€j‘ Ñ@ÓR‰’—Û¤’>ðÝìÚè·c ìX2ON (ìo¡ƒP8Ømå¸.:ê·ÓV¼r!°‡?½ð‹ ßúÕÀ{"žH•¬ˆ}ÑL è® à7ÆÔŸéä΂ûlV¼s»·S \é.Àòq/h'±P !i©ôQ ³â:Ì䦈ەƒ ßuáõo TEõtƒ”±ù…¨ÅíJôΤ¸ƒߪˆ/»[P`L° Ç1Uñ“bÁ .Lù‡Aç¬Èüð÷UÚÉìl„pžÐ[,äôˆÅ0À×à ïL}ÀÂC€9B°ãPRPDq!3P ÓCv¡\r!(¤ ¤LÌ„ÔpH ‡+ J®<Ã Ø +Q&ö¦;ù\qy7(M/Öw-‰}J Øó(?3¹2È/6².Ü-´c­Î‚] PŠŸç„¥jukçâÜ䤶Sl\ü×vÎlÂ¥|Å]@h–‰³Jc¢6¶³¾ÔPÚhÅŸ¾¨Ÿ— ¼cHêÿ¡òêxµ¶uuin2–o“çቼ hŽÄÅ*‚o ,–`Ø ý˜­·óÍ…àoùÑälBÜ0Æ¥(9%¶´5{ƒ`Ü¿§¾b]ËcY |¾)n\mG+þúBȬx1)‘bìä¡Ñ¨í ¦®ÜÝc$£5LcjkXR XËcÕ’AG`L"A‡\ˆ:&÷h€ÌŠä¥<Ì/}«Z)‘Ã|G‚ú1¹×B½[}~£EÀwA@?&’YñТ<À*ÂÕRiýj²†û's† õ%4 U¡ýô@±’¤Æ&çæ—×T™ll–ýAÌŠ¿¦_8sBû¦É…á´ŸœOû†£\#AüŸŠ8Ì è5"D-¬®ŽÏ÷Þà‰m!xÄ‚åª8@HF¥«¬ˆÉ«“×»c6‡z~¬Ë­a¡þ¸X€/À{ÊK°f¾‘ ªâ!Q€%,ÀxÆ•ª+yG‚¨ˆ„dV<ôS'DZxB¹k¶m ±‚ à„Sb4RuÛ¶/Ä.DU\hà?‚„¦ÓW— H0Ø0Õ?;¸ÞÈ+Ö©!¾À>"Ôh ÕK‡M_</t3$;KbÜ!ÈÂTbB@á1v)U«ÊÈȪt—ÜÈ zaI2ƒmñ‘ÓŒŒ=B¢7")¬ŸiKcTùB­a! [Dˆ¤°þ¹=ÔÜ EK„ ‚Ö5Â1ɉÊßÛ–^—ôA(@z ”!èB²·Ûhߥ­aþ!U€;ë»}é°ä2‹%BA/ [¼:„ð­ô#… @Hà5"œV¯É)fôˆÞ ¨>¬ÂÇØ8ØìËŠ­Øx:T?+r„ÜT’𑲳½œ`š1Ì?”AÈ è?"TÞO\Dýâî‚jDˆ·$=ƒðbŽÖW«h˜•Ôh–ûzÚ„­ÚZÖžÛšáú“¹T PØ7Œ)¡À9ýíéŸlʳQA‡Á¯]@ˆq‰sº€wAW3=3BA7¢8¾µdr™P‘ W"ÔxïÀ6"TÓ¾ÑÍaoMí“r‚[ëyUÅ¡„È…'[ïŽb õFUdÅ øÈ‚!@4`;B¤Š¼z¯1¼}¤†¬Ø¨ ˆˆ@Ð1"<ß’½\¬TJªµ{IÛþè‚;B@Ð+ñ|RFkL¸µÞˆó¨Šs"" ë{‹§7ÀúqÖä‚zà0ݺŸ²šÓ8c{Æ‚¼*®! 脘™ p-ûó?êùU:€ü‰^—$Riú…`!‚!à„°`¡Ýɘ—S™l‚! kç4:¤VfböúìõÑA ôÔ,´3\`ïÕÑ÷GèภàN€Ï®ê†‚‚¾Ç‡'œcx$¨F„Üø˜#üPsÖÌnE:)yêå.N@Açfù5Á'PTÎÒ;3?ŒÈ@߈íuŽðT&ärJ—œœX³I‡þˆ7Æð;“^‡¨|ž†…¦JÑÖ"ÃÁ–åP)Ú‰GA8¸U˜{jôUÃ7ûeaxk)»š`hת×~&̽FˆÛ4«é—q˜Ò"†¤ÙH!ˆˆð±™¬AØêíí¥ùd1šÚÂ9’磂 4€ê„!¶xTA{ˆzÁ~ùÖc$ˆáæLöò Bü‡>4Õ’Åo«€"S‡ @ ¸Ñ,3VÕ÷Ç`nœ°‹ϸE‚°¿#ÅBÌz|b§œ!éYˆ„éPÕD.ãtÐk*Ãe¥jöŽ×c ‡`¬ïs¡°ùÝžâœx$¥ÉýBQ|;+Ôh0/<Ñöà>YMc ½.â¡óa°œÄ»7È.Ë ‚Oñ;“¾Ï „°`fy‘|n¤¦ˆßçB+Ë-ÊïLzÆ„©Qb£‡ j‚A^áÌÞÖaæ :AP(?Ûgj4­”Éä Á…`b ½4ËüVà?ºì €ü!ØûÅÏy¡ô´ ±<cøIù€ð×¥®sP@ÞÄoö~¬L p5AªgüG„jCFA®+¡0Öη@þ4¹8Ð?Ñ!¹Ó‰†Ç}(à9S·ƒ ob*6:Q/Ì´:nS€)_2† Äì„èûsrÐ+Ðô§ûÆ®Â#>A˜ÌDŠßðœÀÿa'}“ìxÙ6Ò..à»aéÐ 3;ú!|à7ɯøo{g¯Õ8’öq'z-£ Þ$ÝÎ^@m`º1É*lã¤&HD€›f9€nšdèžÄ†¦Iw³½M÷v»T%ý% `Yušçô™ÁöñýôÔó]%5/üë&à?±ü÷)t£1Qnÿ& ûõÏ•…ðëÙ4O{ÿ™mWá~Á0Ot@¬Ÿ€ƒøsðS›šI޵PKZþÛg`f†Z{f²èÆtÒ¤žîÂθÁŒ;jÿ¨è+8"©ƒÍ‘þLj57šº‰XP>‹ýp1J ¥`?ýöã§ÏØ)2Óʇú[Aµꎙl qG´­ùÓïÓ4zKs–š äSãšKu×SëH¡õ'°§ö.!è'i.Æùl5a5M jú/µâfVÃi_Üimvb§[bÃðÔð@]¿'´G_G`v¡¼oNðŒ™É‚ÙYz¬ÃP[ã×aóÖà3û„´5ПÎíå'¨ÆŠ£©éƒ›'r… (u`úºÛ›Ouˆ³!Ü&ŠŠZ®!Ÿï›gâ¿uµ‚Ê—·ç@YhÿGâ…®Þºˆÿg¶Tr°½ WSÈg¨?‡õ ,¹™Ê¯è)§Q@7_3•®y-¤Ëfü^X:‚ 8úæ}Ät³H_}å‘–‚Cò:læPpgÅ”kyóÿþýá…ë™ûiôá^¦…t¤e&8 roȦ"p6c7E=~ðÔ¥÷LÇûHoW°O”zæ=Á¢‚˜Ãgn&x:÷ÄQЫæ0„}ôDt̓pLmCXs3m$®› ¦{8Gû`| jC‡ºLäoóÅÄÆþÕö}Ê1P®ÕH@÷ýÞg¤ ag¡¼"ÐiØ<ºlÊÑ[[(·vtª…„‡huæO«tÊŠØMB¢‡‡.ÔhÕ)¾ÚŠgŋؚ9—ú˜j^k` ;VƒÃfÔû}×G$ÕeÃ…²V]l©^͸>Gšt€|J©@z0ìíìK‰zÊ€ªÞ2×ÀZ²ÂͨéB•c­–Ù ì?~cñ°’n¾@@I?}`ÔK99‹Ãb  l6ÿ 9¤#‘”ÒÂN‘ >ÔØ^$¤FN%ß(« làˆ,¹v-\ƒõIÕ(б[¥n´Y R/®ìüºÀl‘‹ ×SI‚ÍHOÑ·˜º‡‹ ¼Wßí»$ÆiºÀYÂüõkSŭʃy‹Íš#î•¶LÖk„K_öÞÝ.ÀÖ«C ÿüÈJüû²Ënjז‹ùðEŠkÔKUš»^ ÔH0ö˜\ê-l]¯ã½ÖÒ•¯åÁà[Û÷,q³ß)º.(@)Ób•ð¨.Ø “öÓ‹ˆéÒ³DIÀ~¸+[·;‚ÍîÏ!Q²p-û¨è5ÑcYh J·7t$Wn:úa ŠÉ®µ¥\«ÃÊæý¤‘²XØÖ%„vù‹6)M¯l6ÐGîeBï..^Ìe+ºíø©\ò¦¬´µ-Cg©Žà¢².§ðØtß¿Œ¡K‡+ h•¦ âÉ»C™ÈÚÆéR øU”EðÒŒB¶U¸:2\Ùë¿ð\R‚m7Œ2ªƒòØ ³^òBZX¼%à¡vý‰ ¨å(ƒjÞâÃa/o­¤–' vÔÿmB9bÖöËk@+ùh^Q"•£v¯¢4Dèý—u„("Ú¶Jq¸D-NÌúƒbØËH7P{…ž±S‚ƒlŽ?‹rTÄè’ÎÊAk›«O‹‡;’šé¡9úÒÛœzÑå‡.ézQÛÈ[ù,ÜÐzñ8ØR¦n ±öX\‡m­|OŸ, ¯%>X†Áíž|ç ²úp»™ÔÒ´9ÚÚK*<éºkP HWƒØ8Kj=Xx¹zGj ñŸÃ"í\[J÷f¯NJ௑B p±–Õ~  ¸Â8ލM¿u_>¤¡ƒ·r¿ÄZ³lËg)¹·¨2Þ »R¸äØDÈrò”ý¯~bø]«Å–®³¥ô >'  ±Â¦8¼¿ÑG;°n4LÍ#_ùöp,Ì-²}\DB1z`’6[éÆ˜û¤£5X!kÏÂs×atŠNQ ÁhL1ux2ÀãiTo5ŒŒ‚_ÈÒ£<P[BZÔhÌ6Èx[C½BN)u*qÕ¡ùâAqõÃÅÅÉÎt@ž@¾á°BV@ HÓƒUh4*ƸŠ^ãæ™·’ï‡W„ Ç4E+ÀæIÔ› FÚU—P–ÀûÛN£±wéz–ÇÝç‰Ñ­j1ÝÊCvxAj|H0@“\‹¯xyΉ},LI˜~ÚLØ’Bˆø'ÅG o޾)0[“+ÁÆ H°&Ø%#¡³£¦KïýïÛ\¦Ÿ"Æ3ö¤ÜÒ÷ú^þÌ ?Xwô%.XpTìa³4›çèSo—|'­¤Í]åUÒæä–²Í2?„Úvô%.X¨£:d˜ ÀجO>+@Ÿú"ÒÞæÊÞz tjÙ!=mµZêKÕÆü4€;É0]ëƒ7®Í²”èûÓ^s5ÀŒ<ðaaœ=yÁ €Š YÀ%Þɨ 'àTn)ÏP>b€Åüîä8G”:ñ¹±+pTê!€MäðÞ¥^€uˆ‹="þ“êâcàƒ@2=K¦2T»ˆ„ K¾j_!÷I@\y“•<  )>P4 •ri=‹8íK^PyѯI¬B÷{įl’v€úþ.Lèóð{lŒO…Uáœ8\C"˜`º6€@¸!Ö¿IœÞ.ñ§×€ué‹“ž\ò @ ‰` €qK†Y*Y1ÄUpù&kþŠ‚Çr¨ÿêJP)Ø`9 IZ¦k…Wãå,u2œRàŒ ”škAù(V@@=¶ä€@¸ªÒèi#8ª“ߦ׳ø|MµävQ̨¨7ðHæDOºA¡!›N°M¸H1¨¼(  ¢!à,ž#âɈç)&± ¬Ž‰€i¾Ò’ôðIp„)¦tÒdHBžÈ‡¦Ðþ*Pö$©Y@M„SºªŽØ¦4µÁ?žD0Ž0 )ay¥ÞS6‰Ãˆs<­¬# ^ ÿi¡#¨Í0²~ÒfTäºzK:|J hr¶ÊëìJ¤4 Fè†êV·2dEÄöj.<£5Ás…Z9à“ª¢oì·òŠ¢Qú§ҳÃVzr¹—@,‘RÕ—>ÒÐ侂tù4`ðÊ*8•ÊœèmÔD@Õ6è™ñ™/,GʳcùdF(àÓ ¯šñÇà™8ö™qš!Ð/Uˆõò(G¨†HAMÄLÏ·û2k@5lKå€# õ¨…ÕHë) ±J©8¸&ÈG( -H9î{€ÊÆ€twªwÄaeÔGx>8BðÀa‰Á@€¿uÉu‰5 b¼7?&Þü,†è¦G¥þþÀ~ü«ü b¼ûpùpñžU~V)àa·¯ò*¯ò*¯ò*¯ò*¯ò*ÿ Pãö&±IEND®B`‚sleef-3.5.1/doc/html/naistlogo.svg000066400000000000000000002463511373003144100170560ustar00rootroot00000000000000 image/svg+xml sleef-3.5.1/doc/html/nontrigdp.png000066400000000000000000001352671373003144100170530ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝy\•eþÿñëÊr‘EvD\ÄÐ)Ò`-)- É%kÔ\ (+EÉšfÔ4sF¾YSiDF™!2ŠÙ¯E[P[\ ÃÈ¢‚(0¶óûãþv¾gXÎ}Ï9çõ|Ì÷}Ý×}]Ÿëæ;ßǼ½—£Ðh4€¾î†ž.s ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E°Z½zuO×€˜0a‚âøƒ™Ïí)&ª™aЃ;À³:sæÌŒ3ÜÜÜìì솾dÉ©=)))<<¼gk3©›o¾9==]»k¬õšhØz×°tǺ§ XF3mÚ´I“&åç绸¸8p@:4kÖ¬ž­ÍÌL´Þ>/^,--]ºt©···­­mppð“O>)š0aBZZš´=vìØgžy&***$$d̘1_}õ•Ô^YYy÷Ýw;99nÛ¶M¡P\¾|¹Ã?ÿüóO<áïïïââ2}úôóçÏw.£Ë>UUUîîîï½÷žÔgáÂ…“'OnkkÓ3f}}ýO<1tèЄ„„;vLáããóùçŸK*** …Z­~òÉ' ’““‡>{öìë½xñâC=äáááéé¹páÂÚÚZý×AKÿ°cÇŽ]¿~ý¤I“ìììBCCøá‡wÞyÇßßàÀ‰‰‰Æð+fÈ_G—SëÛåÅ;vlrrò¤I“‚‚‚öîÝÛÝ…ÒõÊ+¯ :ÔÉÉÉÝÝ}ùòåúר夀>Œ 0ŸÁƒ=zéÒ¥»ví:wž{÷î}ÿý÷ ’’’æÍ›'5Ι3ÇÃ㪪ê›o¾Ñ&Õ~øáüñرcUUUcÆŒ‰‰‰ioo7¤§§çŽ; ÓÓÓ?ýôÓŒŒ +++=cΟ?¿  ààÁƒõõõ{öìqssën9›7o Ù¸qã¹sç²²²:3gNSSÓ™3g .\¸0þ|ý×ÁÀa…Û¶m{饗jkkCCCï»ï¾C‡åçççææfffîÛ·Ïð+Öþª ŸZWws÷îÝ………³gÏÖs¡$EEEÉÉÉŸ|ò‰Z­...~àô¯Ñð¿  Ð`F555Ï=÷ÜÍ7ßlccãããóꫯJíãÇóÍ7¥íÐÐЗ^zIÚ–îò]¸páìÙ³Bˆšš©ýàÁƒBˆK—.éž[^^.„(++“ú´¶¶:88œ:uJ·ý}þö·¿1bÀ€_|ñ…þþÒ]ÄsçÎuX ··÷gŸ}¦{î•+W4ÍM7ÝôöÛok»ikþñÇ…çÏŸ—Ú „?ýôSwסÃtÝ +¾qãFi[º)-U¢ÑhæÎû—¿üÅÀ+ÖyXÙª ™ZwØî.fhhè† ¤m=J«¸¸ØÆÆfÇŽjµZÛx­A@Æ;À³rvv^»víÚµk¯^½ºcÇŽ¸¸¸€€€iÓ¦uèæêê*mØÙÙ !*++œ¥ö!C†t¼¤¤D¡PDEEi[ìíí«ªªn¼ñFû$$$lذa„ ·ß~»þþjµÚÚÚzذa×q1„¢¼¼ÜÚÚÚÏÏOÚ•,//÷ôôìò:¸»»>¸———´¡T*•J¥“““v·¡¡AÏêt¯Xg†T%;µ®ÒÒÒî.¦···´¡ÿBI†ºsçÎW_}5..n̘1ùË_î¹çSÿ½Ð3lmm—,Y²~ýú“'OvÀy{{ÿüóϵµµRîòUU)õÕWzžeÕÓ§½½}Á‚Ó§O?räȶmÛ/^¬§YYYkkkqqq@@€nû€®^½*më¾¢|à ]¿väëëÛÚÚZQQáãã#„(..–»«¿ƒî†5!WÌ üýý»¼˜B…B!mx¡f̘1cÆŒ_ý5==ýþûï¿|ùòµþ}ïÌçÒ¥KË—/ÏËËkhh¨««Ûºuëùóç ü!œáÇßzë­IIIMMM—/_^³fMç>~~~ÑÑѱ±±ÚgwíÚÕÚÚj`ŸuëÖUTT¼ûî»Ò÷¥ôô÷óó›>}z|||yy¹F£)**’"ÙM7Ýôᇠ!Z[[SRR´ózzzªTªÎ5Lœ8qÙ²euuuµµµIIIÓ¦MÓ½«©_wÃÈ+fÝ]L]†\¨¢¢¢Ï>ûìêÕ«666nnn …ÂÊÊêZÿ‚YYY©©©f[;ÀœÀó±³³ûå—_þô§?¹»»ûùùmÛ¶mûöí“&M2ðôÌÌ̪ª*OOψˆéûF666ú¼ûî»#GŽœ4iÒ€Æ÷ÑGio!êïóå—_nÚ´)++K©TFFF®\¹röìÙ¿üò‹ž1ß{q#GFDD8::Μ9Sºßû /=ú¶Ûn»ù曵“®\¹rïÞ½ƒ Ò}W».++«‘#G»ºº¾û_R=ÃÈ+f]^Ìd/ÔÕ«WŸ{î9ggçuëÖíÞ½ÛÞÞ^\ã_pÿþý»ví2ýŠ=@¡ùí§èE¾üòËY³fuùC8]â0 ×ÈËË+**BTVV>ûì³>ø`OWz>‚è5.\¸0kÖ¬+W® 0àÞ{ï}ñÅ{º"Лð4À"ð4À"€ °`€E ,`À‹@X0À"ôXnkk[±b…›››R©Œ‰‰©®®6¼ÏO?ý4{ölWWWGGÇÈÈÈÜÜ\óÖè}z,oذ!333''§´´´¹¹yîܹ†÷IHH¨®®þî»ïªªªÆwÏ=÷´µµ™·|@/£Ðh4=2±Orròã?.„(,, >sæL`` !}‚ƒƒ…gΜ *//÷ññé‘…z…ž¹\SSSYY&íÙÛÛŸ:uÊÀ>+V¬Ø½{wUUÕ/¿ü²uëÖððpooo3/лX÷ȬõõõBˆj[œœœ¤FCúÜvÛmï¼óŽ———bÈ!ü±B¡èn.›ææfc¯`>Fyx¹g°£££¢®®NÛ¢V«¥FÙ>íííS¦LùãÿxéÒ%¥Rùæ›oFDDœ>}ZÊÃ577›í1o…Â|”›m®>¹¨¾:WŸ\sõ¢‰˜«wÍÕ'ÕWçê“‹b®^4sõ®¹ú䢤¹Œ2NÏ<íâââíí——'íªTªÆÆÆÐÐPCúÔÖÖž?þ±Çsuuµ³³[ºtissó‘#G̽@¯Òc_NHHHIIQ©Tµµµ+W®ŒŒŒ”¾€••••ššª§««k``à–-[Ôjussó믿ÞÒÒÒ!<ÐAàU«VÝÿý¾¾¾ …"33Sjß¿ÿ®]»ô÷ùàƒjkk‡ æââòÖ[oeee >¼g–è%zìg̦?Ï[ÌÕS1Wïš«O.йzÑDÌÕ‹&b®Þ5WŸ\sõ¢‰zé\V«W¯¾þQ~ÏÖ¬YcÎ5Nš4©ïÍÕ'ÕWçê“‹b®^4sõ®¹úä¢úê\}rQÌÕ‹&b®Þ5WŸ\”±bw€¿kÆŠu=ö0æD“˜0aBZZZOWÿcÝÓ0«3gÎ<ýôÓß|óÍÏ?ÿìííùÖ[o !¦N:jԨ͛7ëvž:uêþýûu[>ûì³)S¦Ø~×]w}úé§ xþùç·oß^^^®T*o»í¶”””€€€}ž~úé>úèÇtvv¾ï¾û^|ñEGGÇë_»®ãǯ_¿þ›o¾ùå—_üýýg̘±|ùrWWW#N‘””,m;99ÕÕÕé-//÷ññé|ÖáÇŸy晼¼<;;»èèè·ß~[ÏémmmO?ýtzzzccãwÞ¹uëVwww!Dwí]{äqáÂu/÷Zxy‰×^3ëŒBîE£ÑL›6mРAùùùW®\Ù·oߘ1côŸòØc5é¸ýöÛ oÿðÃ;õá‡þôÓO¹¹¹NNN³fÍêÜçâÅ‹›7o...ÎÎÎþòË/üñë[wGŸ~úéĉ‡zøðáË—/gggk4šxzKK‹!ÝfÍš¥ ÀÕÕÕÚËòÈ#Lœ8±Ëôûí·ßN›6mæÌ™*•*??ÿÁÔú† 233srrJKK›››çÎ+õ﮽kfN¿BˆŸ~2÷ŒB0`Q.^¼XZZºtéRooo[[Ûààà'Ÿ|Rÿ)VVV¶:n¸áÃÛûõë×yÀ &4È×××××W¥Ruî³mÛ¶;î¸ÃÃÃ#,,,11ñ믿îr-=ô‡‡‡§§çÂ… kkk¥ö±cÇ>óÌ3QQQ!!!cÆŒùꫯ:œ¨Ñhyä‘… nÞ¼988X©TŽ5êÅ_”Ò¦ža“““'Mš´wïÞŸþù‰'žð÷÷wqq™>}úùóç»\©öhéš´··gddÄÇÇwyµŸ}öÙE‹=õÔS~~~~~~wÞy§þÓ·nݺbÅŠ77·M›6érÌûî»ïäÉ“999qqq]†[­×_ýË/¿Ü´iS‡öâââC‡½üòËNNN...)))üqUU•t4..ÎÍÍMš¨¬¬¬ººZ÷ÜK—. !¼½½;O§ØØØXOOO!DEEÅž={¶lÙâîîÞ¿ÿuëÖ={¶  @ßuüÍÖ­[-ZÔ¿ÿ·êêêZ[[wîÜùÎ;ï\¸paúôéwß}weeew§××× !¨=êääT__ß]»!åõm`À²8;;¯]»ößÿþ·Z­^½zõÒ¥K»‹©’?ýéOù:œœœ oŸûÌ××÷êÕ«¿þúk‡q&Nœ¸lÙ²ºººÚÚÚ¤¤¤iÓ¦IÏ'ËR([¶lÙ¶mÛòåË ¥Ÿ†ÊÈÈ0pX??¿èèèØØXéæð•+WvíÚÕÚÚªÞúúúŒŒŒ„„=}}ôÑ´´´ï¿ÿ¾¹¹yãÆ---S¦LÑszBBBJJŠJ¥ª­­]¹redd¤¡»kïÚ!ú+7¾¡CÍ=£DÓ×YÂ544ÄÇÇÚÙÙ9::†……íØ±C:Ôùæ÷ßß¹ñí·ßî²³¶ý‰'žÐS@[[ÛŒ3<<<úõëçååõÐC;wN:´dÉ’;î¸C*²Ãà666‡ªªªš;wîàÁƒÝÝÝçÏŸéÒ%©=44tûöíÒvSS“âìÙ³O?vìØ½÷Þëììlkk˜œœ,`Ȱ¦¾¾>))) ÀÁÁaÈ! ,hmmí0Åøñãß|óMíî?ÿùOOOÏ––=×§½½ýùçŸ÷ôôtppÿöÛoõŸÞÚÚº|ùr{{ûèèè .èo樓ëÒX}˜BÑ÷×}˜±b@,`À‹@X0ðßÛ¿ÿm·Ý&mO˜0!--­gëéì÷Y•áf̘±cÇŽž®â¿×Û¯C@_pæÌ™3f¸¹¹ÙÙÙ >|É’%Bˆèèè tè9yòäÇ{L1uêT…B±mÛ6í¡Ó§O+ iwêÔ©O>ù¤žI5ÍSO=µfÍi7))Iûƒº¿ƪjÇŽÖÖÖºí?ýôÓìÙ³]]]###sss;ŸÛÖÖ¶bÅ 777¥RS]]møøkÖ¬INNnnn¾þ%tpüøñèèhWWW;;»   U«V]¾|Ùè³è^'''Ū¨¨è|ÊáÇÿøÇ?ÚÙÙ9;;/Z´Hÿ¹Ý][C®¹VÀ³BoÖÿŒxΘÙp`ôzfÚ´iƒ ÊÏÏ¿råʾ}ûÆŒ#„ˆÿ׿þ¥V«µ=Ïž={èСøøxi7((èÍ7ßÔ}óÍ7ƒƒƒ Ÿ÷ÀÍÍÍ“'O–vgÍšuM§WKKK—íÆªÊÅÅeéÒ¥7nìОP]]ýÝwßUUU7îž{îikkëÐgÆ ™™™999¥¥¥ÍÍÍsçÎ5|üÐÐPww÷ýë_׿]Ÿ~úéĉ‡zøðáË—/gggk4šxzwW»3Ýë_]]Ýô›Gydâĉ>>>úûí·Ó¦M›9s¦J¥ÊÏÏðÁõŸÛݵ5äšk•?øË8wÑÜ3JÀèõ.^¼XZZºtéRooo[[Ûàà`éÎíÔ©SÝÜܶo߮홚š:~üx) !î½÷Þ’’’‚‚!į¿þúÞ{ïÅÆÆ>ïÞ½{§L™¢P(¤]݇]ëëëŸx≡C‡0 $$䨱cÝ5j½öÚkcÇŽÕî–””XYY•–– !~þùç'žxÂßßßÅÅeúôéçÏŸ—úŒ;699yÒ¤IAAA{÷î}å•W†êäääîî¾|ùòÎU]¼xñ¡‡òðððôô\¸pamm­vœgžy&***$$d̘1_}õUçÅN:õ2dH‡ösçÎ=ðÀ^^^J¥2!!áâÅ‹UUUúlݺuÅŠ!!!nnn›6m:xð J¥2p|!DTTÔÞ½{;·ÿ×ËÑh4<òÈÂ… 7oÞ¬T*Gõâ‹/JiSϰºW»»?Jº×ßÆÆÆÖÖÖÖÖ¶½½=##Cû1ºž}öÙE‹=õÔS~~~~~~wÞy§þs»»¶†\s D@¯7xðàÑ£G/]ºt×®]çÎÓ¶ßpà ±±±Ú{¼ÍÍÍï¼óŽnêèׯßÂ… ¥»wï3f̈# Ÿ÷ĉÝÝ\?~AAÁÁƒëëë÷ìÙãææÖ]£ÖC=tæÌ™üü|i7==}Ò¤IþþþBˆ‡~øÇ|xòäÉÿøÇ?BBB|}}_{í5éÐ’%K ¥{­ÙÙÙ---’áŸÿüç;v\½z5555..îšæ½r劣£cçö²²²}ûö¥¦¦2D¡PŒ92  ËFݳ ýöÛo !4Í;ï¼³xñb!DEEÅž={¶lÙâîîÞ¿ÿuëÖ={Vºk-„ˆõôôBôë×O£Ñœ8q¢®®N©TÞrË-ª*..>tèÐË/¿ìäääââ’’’òñÇkïÖÆÅÅIü¾ûî+++Óÿʨ®Ûn»­½½ÝËËËÁÁaïÞ½o¾ù¦ö–¸¤¾¾^1pà@m‹“““Ôh GGGímX£,çÒ¥KBˆAÝaµW[ÿEÖÖ­[-ZÔ¿ÿíuuu­­­;wî|çw.\¸0}úô»ï¾»²²²»s»»¶×Íû*0úggçµk×þûßÿV«Õ«W¯^ºté'Ÿ|"„ðòòš>}zjjª"55uÞ¼yvvvº'6lìØ±/¼ðÂéÓ§ï¿ÿþkštРA]†ŠÒÒRkkëaÆÉ6v°hÑ¢÷ß¿¥¥åË/¿T«ÕR=%%% …"**jÔ¨Q£F=z´½½½6’iSÜСCwîܹmÛ6ooï[o½õ£>ê0xyy¹µµµŸŸŸváR£´ëêê*mH×§¡¡Á+ÐÞÞ>eÊŸK—.566.[¶,""â§Ÿ~Òí#ýA]]¶E­VwùÝ©¯¯wvv6âr¤lÜ!X2¬öjw÷Gyï½÷´©Ò}ù\׉'rss»|þY©T*ŠØØØqãÆÙÙÙ­ZµÊÆÆæ‹/¾èîÜî®íõ_ó¾Š €>ÅÖÖvÉ’%C† 9yò¤Ô—™™yòäÉœœœ.SG\\ܺuë,X`ccsMs7î‡~èÜîïïßÚÚZ\\,ÛØATT”͇~˜žž>wî\)¼I¯Å~õÕWg~S]]}×]wI§èÞn1cÆçŸ^SS³páÂûï¿¿CêóõõmmmÕ~vXªÄ××÷š–ÜAmmíùóç{ì1é[ÊK—.mnn>räˆnooï¼¼KAAÁ¸qã:4^Ïr‚‚‚üýýß}÷ÝíFÿ°Ú«ÝÝeÞ¼yšß899u9û–-[î¸ãŽ.ÿ)ÄÎÎ.00P»+M§û'îpnw×öú¯y_E@¯wéÒ¥åË—çåå544ÔÕÕmݺõüùóÚßž¹ë®»\]]gΜyë­·†„„t>=&&æÀ«V­ê|¨­­íªF£{tÆŒºwç´üüü¦OŸ_^^®ÑhŠŠŠŠ‹‹»lìp¢••Õ‚ ^yå•={öHÏ?K£EGGÇÆÆJ÷!¯\¹²k×®ÖÖÖç}öÙgW¯^µ±±qssS(VVVº&Nœ¸lÙ²ºººÚÚÚ¤¤¤iÓ¦IôBºÒ×¥«!„puu ܲe‹Z­nnn~ýõ×[ZZ¤ •••%ÝxB$$$¤¤¤¨TªÚÚÚ•+WFFFJ1O·O—ãK>ûì³3ft¨çz–£P(¶lÙ²mÛ¶åË—666ž9sæé§ŸÎÈÈ0pXÿ(Õ××gdd$$$t×áÑGMKKûþûï›››7nÜØÒÒ2eÊ=çvwm»kïRhÇoQ›Ü¸ëú‡—ë éë,a®¡¡!>>>00ÐÎÎÎÑÑ1,,lÇŽºÖ­['„HOO×m¼ë®»ž}öÙC}øá‡J¥RÛ¡Ãÿxþþûïu;···5*''GÚ?~ü›o¾)m«ÕêG}Ô×××ÁÁAúàsw !F­ÛX__Ÿ””ààà0dÈ ´¶¶j4šÐÐÐíÛ·K}N:5~üø4hܸqÿïÿý¿ÎUUUUÍ;wðàÁîîîóçÏ¿té’Ô®;NSS“âìÙ³ Óý½(‰tú™3g¦OŸîìììààð‡?üaïÞ½Rÿ%K–ÜqÇÒvkkëòåË]\\ìíí£££/\¸Ð¹OwãŸ:uÊËËë×_í|­®g9æØ±c÷Þ{¯³³³­­m```rr²4‚!Ãêù£t {ý5Í?ÿùOOOÏ–––Î=%íííÏ?ÿ¼§§§ƒƒCxxø·ß~«ÿÜî®mwí½”±bBóŸÿˆÕ÷(}`™]<Æø4o˜¥–íÓO?}á…>ÜÓ…ôY117<ð: IDAT13gμ¦S£ï1V¬ëûá } a¬XÇ;À‹@X0À"€ °`€E ,`À‹@X„ Àmmm+V¬pssS*•111ÕÕÕ†÷qrrRü§ŠŠ ó–èez,oذ!333''§´´´¹¹yîܹ†÷©®®núÍ#<2qâDó–èe¦G&öññINN~üñÇ…………ÁÁÁgΜ ¼¦>ÞÞÞ¯½öÚC=ÔÝD E­`RŠx™š7ÌR01cźž¹\SSSYY&íÙÛÛŸ:uêZûìܹÓÚÚzÖ¬Yú§ëð¼ôêÕ«¥ö¼¼Bˆwß}×ÑÑ1::ºJôB}ÿQ| ú*>‚€…èÝ?ƒ€™€ °`€E ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E ,`À‹Ðc¸­­mÅŠnnnJ¥2&&¦ººúšú>|øü£³³ó¢E‹ÌX8À,bbdþpz,oذ!333''§´´´¹¹yîܹ†÷ùöÛo§M›6sæL•J•ŸŸÿàƒš·v@ï£Ðh4=2±Orròã?.„(,, >sæL`` !}&Ož}úÝwß]YY©g:ÅZ½zµÔž——§íSTTÔÐРÝÕ=¤»ÝÐÐPTTÔå!F`F`F0Å×ï÷° F`F`F`F0|„øøø!NIÏ<\SSãêêzäÈ‘ &H-J¥òí·ßž3gŽlŸ{ï½W©Tþõ¯•r¬F£š––öý÷ß777oܸ±¥¥eÊ”)=µ@¯`ÝS¯ZµJ­VGDD455EEEeffJíû÷ï/--‹‹ÓÓç±Ç«««»ë®»n¼ñÆO>ùÄÓÓ³§èúþû±¼ ½ï€ßôîw€030À"\[NMMåsS€ÞèÚð AƒüýýMS &tm_ž={öìÙ³MT à÷NöÃTBˆìlÓ×ðß¹|ìØ±ââbínqqññãÇM\Æ'€—,YÒÚÚªÝmmm5qIŸL.--1b„vwĈ%%%&. ã“ À®®®åååÚݲ²2ggg—€ñÉàèèèE‹ýøã¦¸¸xñâÅÑÑÑæ© #’ ÀÏ?ÿ¼ƒƒÃðáÃûõë7lذ¬_¿Þ<•`D2?ƒäèèøÁ”•••––úûûûùù™§,ŒKæ°¢½½]­V755ùùùµ¶¶¶µµ™¡,ŒK&Ÿ?þ¦›nŠˆˆX°`b÷îÝK–,1Ka“LNLLœ:uªZ­4h"**êàÁƒæ¨ £’yøÈ‘#{öì±²²’vkjjL_F&sX©TªÕjínYY™»»»‰KÀøä866¶¢¢BqñâÅÄÄÄY³f™¥0ŒI&¯_¿ÞÊÊÊ××·¨¨ÈÃãÿþÏ=÷œy*Àˆä8;;»¢¢¢¤¤dÈ!ü0 —’ ÀŸ†††ÆÆF{{{S×€ÑÉ<½|ùòÜÜ\!ÄG}äêêêììüÉ'Ÿ˜¥0ŒI&¿÷Þ{£GB<ÿüó¯¼òÊûï¿ÿ—¿üÅ,…`L2@ÿúë¯vvvuuu .´±±‰5Oe‘Lvrr:sæÌ‰'&L˜`ccÓÔÔdž²0.™¼téÒqãÆi4š;w !¾þúë   ³€1ÉàeË–MŸ>½_¿~C‡B 2dË–-f) c’ÿ¤‘#Gv¹ @/"óhú0À"€Á \UUuüøñªª*SW€‰ÈàË—/ß}÷Ý^^^ãÇ÷òòºçž{jkkÍSF$€—-[ÖÞÞ^PPÐÔÔtúôé¶¶¶eË–™§2ŒHæg8púôi!DppðŽ;BBBÌRÆ$sX£ÑX[ÿ_H¶¶¶Öh4&. ã“ À·ß~ûâÅ‹ËËË…ååå‹/¾ýöÛÍRÆ$€_zé¥Ë—/ûùùõë×ÏÏϯ¦¦fóæÍæ© #Ò÷pssóW_}uèСâââòòr__߀€³U€é»ܯ_¿¿ýíoBˆ€€€ÈÈHÒ/ ÷Ò€ …»»û… L1q[[ÛŠ+ÜÜÜ”JeLLLuuµá}:x* Kæà[n¹å¶ÛnÛ¸qãöíÛßûQ&Þ°aCfffNNNiiissóܹs¯©OBBBÓoüq£”èÃd~xÿþýJ¥2##C·qÞ¼y×?ñÖ­[“““¥_Þ´iSpp°J¥ 4°•••­­íõ—°28??ß³ÖÔÔTVV†……I»AAAööö§NÒ ÀúûdeeíÝ»wðàÁ÷Þ{orr²½½½)êô2@;v¬¸¸X»[\\|üøñ럵¾¾^1pà@m‹“““ÔhHŸÙ³gïÙ³çСCk׮ݹsç¢E‹ôO§øO«W¯–Úóòò´}ŠŠŠ´»º‡t·ŠŠŠº<ÄŒÀŒÐçG0z ×ïwx%AÏñññBœ0…F£Ñs8$$dÏž=#GŽÔV?kÖ¬ï¾ûî:g­©©quu=räÈ„ ¤¥RùöÛoÏ™3çšú!¾øâ‹;ï¼³¾¾^©Tv9—B!³F€¡bbäûdg›i®ìlE¼LÍÆ©ô,cÅ:™;À¥¥¥#FŒÐîŽ1¢¤¤äúguqqñööÖ&~•JÕØØz­}„ýû÷×h4mmm×_ “ À®®®åååÚݲ²2ggg£Lœ’’¢R©jkkW®\©}¹755UŸÔÔÔ’’’ººº£G.]ºtêÔ©ŽŽŽF© ÐWÉ|+::zÑ¢E©©©%%%þóŸ£££2ñªU«ÔjuDDDSSSTTTff¦Ô¾ÿþÒÒÒ¸¸8=}þõ¯=ûì³õõõžžžÑÑÑÚwz=Nö±dÁ“É ‡È‚èÕd°J¥:zô¨yªÀDdž0a‚J¥2O)˜ŽÌਨ¨™3gÆÇÇ{yyiçÍ›gâª02™œ••5`À€÷ß_·‘ èudp~~¾yêÀ¤dÞ o ,`À‹ ó,IuuuCCƒvwøðá&«“ À‡Z°`AYY™n£F£1eIŸÌ#Љ‰‰ëÖ­«­­mÒažÊ0"ùG çÏŸo†:0)™;Àžžž—.]2O)˜ŽÌà°°°ÈÈȸ¸8WWWmã¼yóL\F&€?þøãþýû§§§ë6€½ŽLÎÏÏ7O˜”Ì;Àô ]ßNKK NKKë|466ÖÄU`dÝ`'''0 Ïè:=z´Ã½ï,`À‹@XùÜÞÞþÝwßíß¿_ÑÚÚÚÖÖfúª02™|þüù›nº)""bÁ‚BˆÝ»w/Y²Ä,…`L2811qêÔ©jµzРABˆ¨¨¨ƒeâ¶¶¶+V¸¹¹)•ʘ˜˜êêêkí£Ñh&Ož¬P(òóóR “ ÀGŽY³f•••´ëìì\SSc”‰7lØ™™™““SZZÚÜÜfÉ’%/¼ð‚‹‹‹!Ó)þÓêÕ«¥ö¼¼Iæ°F£Ù·o_nn®îsÛFùôTBBBJJÊ”)SÜÜÜV®\(„ÈÊʺråJ\\\w}|}}'Mš$ réÒ¥?üá™™™ãÇ¿þ’}˜LNLLܵkWTTÔõ¿÷ÛÁªU«ÔjuDDDSSSTTTff¦Ô¾ÿþÒÒR)wÙÇÞÞÞÞÞþ«·¶B ø`xxøo¼aô``N2X¥R=zÔÎÎÎ<Õ`"2@O˜0A¥R™§LGæpTTÔÌ™3ããã½¼¼´óæÍ3qU™LÎÊÊ0`Àû￯ÛHô:28??ߣë|ôèÑôj2ïÏž=»CKTT”ÉŠÀTdðÉ“';´äææš¬L¥ÛŸAJOOB444H•Jåááaúª0²n𫯾*„¨««“6„7Üpƒ§§ç[o½e¦Ò0žn°ô¨ó“O>¹yóf3Ö€IȼLúô 2€¾ °`€Eèö#Xºª««´»Ã‡7Y=˜„L>tèЂ ÊÊÊt5)KÀødNLL\·n]mmm“óT€É?=þ|3Ô€IÉÜöôô¼té’yJÀtd…EFFÆÅŹººjçÍ›gâª02™üñÇ÷ïß?==]·‘ èudp~~¾yêÀ¤dÞ–TUU?~¼ªªÊÔÕ`"2øòåËwß}·——×øñã½¼¼î¹çžÚÚZóT€ÉàeË–µ··455>}º­­mÙ²eæ© #’yøÀ§OŸvqqBïØ±#$$Ä,…`L2X£ÑX[ÿ_kkkFcâ’Ðcñò}4o˜¾0™G o¿ýöÅ‹——— !ÊËË/^|ûí·›¥0ŒI&¿ôÒK—/_öóóëׯŸŸŸ_MMÍæÍ›ÍSF$󴇇ǡC‡Š‹‹ËËË}}}ÌSÆ%€%D_@¯Öõ#Ðiii?üðƒ´Ñ™Q&nkk[±b…›››R©Œ‰‰©®®6¼ÏóÏ??räH;;;WWט˜˜ââb£”èú¾œ––æääÜeܽþ‰7lØ™™™““ãîî¾pá¹sçæääØ'**jΜ9ƒ®««[³fͬY³Nœ8qý%ú°®ðÑ£G;lÝÖ­[“““¥_Þ´iSpp°J¥ 4¤Ï„ ¤ŽŽŽ¾¾¾»ví2Q‘€>Cæ+гgÏîÐuý³ÖÔÔTVV†……I»AAAööö§N2¼Off¦‡‡‡ƒƒÃ /¼°fÍšë/ зÉà“'OvhÉÍͽþYëëë…Ô¶899Iö¹ï¾ûNž<™““7nÜ8ýÓ)þÓêÕ«¥ö¼¼ÙÙ¦¯ú>™G ÏŸ?ÓM7EDD,X°@±{÷î%K–˜¥0ŒI&'&&N:U­V4HuðàAsÔ€QÉ<}äÈ‘={öhŸyvvv®©©1}U™Ì`¥R©V«µ»eeeîîî&. ã“ÿàØØØŠŠ !ÄÅ‹gÍše–Â0&™¼~ýz+++__ߢ¢"þýû?÷Üsæ© #’ÿàìììŠŠŠ’’’!C†ð;À`,Šxù>š7L_€Å ÀmmmVVV>>>>>>RËÏ?ÿìàà`úÂ0&™mžÊ0"™G “’’öìÙ3eÊ!ħŸ~ºnݺ[o½µ¾¾Þ,µ`428??ßÓÓSÚ¾á†þú׿þñ4}UY·@þùçíííRú­­­mooB´µµ•••™¯:Œ¤ÛÕÜÜ,m9ò§Ÿ~B´´´,Z´ÈL¥`<ò_  ,`ô}zÅŠVVVBˆ_~ùeõêÕmmmæ* cê6ßu×]gÏž•¶###+**´íæ¨ £ê6úé§æ¬“â`€EÐ÷0º£ˆ—ï£yÃôu F½GLŒL‡ìl³Ôè•zìè¶¶¶+V¸¹¹)•ʘ˜˜êêjÃû<ýôÓcÆŒ±··÷ññyôÑGëëëÍ[; ÷é±¼aÆÌÌÌœœœÒÒÒæææ¹sçÞçâÅ‹›7o...ÎÎÎþòË/üqóÖè}zìè­[·&''‡„„!6mÚ¬R© é³mÛ6©ƒ‡‡GbbâæÍ›Í_? wé™;À555•••aaaÒnPP½½ý©S§®µâ›o¾7nœþéÿiõêÕR{^^ž¶OQQQCCƒvW÷îvCCCQQQ—‡þ‹ tý5Hÿ*®u4ý \…q'úÿß#ÈŽ` #üw3&Fþ?ã¿Y]ÖÀŒÀŒÐÝñññBœ0…F£1ÖX†+)) øá‡‚‚‚¤ooï5kÖÄÆÆ^SŸ×_}õêÕÇ÷÷÷ïn.…¢gÖú™óû±F›Ë€ÿů,ÿ "ãÌ•-».>ÀÛ™í#X¿«ÿk @–@ IDATï¥s€ñ+ÖõÌ#ÐŽŽŽBˆºº:m‹Z­– ïóòË/ÿýïÏÉÉÑ“~ôÌ#Ð...ÞÞÞÚ[Þ*•ª±±144Ôð>k×®MII9xðàèÑ£ÍY9 —걯@'$$¤¤¤¨TªÚÚÚ•+WFFFJ_ÀÊÊÊJMMÕß'))é­·Þúì³Ï|}}¯^½ú믿öÔ*½E}zÕªUjµ:""¢©©)***33Sjß¿iii\\\w}~þùç””!ĨQ£¤Slll®^½ÚCëô=€­¬¬6mÚ´iÓ¦íiiiúû888ðQ+Àµê±G 0'0À"€ °`€E豯@˜‚"^¦ƒæ ³Ôøýá0À"€G ë#ß';ÛôuÁ`€Eà0¸>< è%ÀôE„R:áh€Eà0ÌÈœ·#dçâ¾`a¸ °ÜLN/ßGó†éë,øw‰/—€±ñ4àÿ³wïaU•iÿÀŸ-  ( Ѝx>1#Z™‰ "¢™šZšç”ÔÄêU'Q™“©“Zy@…$µ+zƒdp¦ÌC8cf:h–'1tsØX×ïõk¿{Íq¯uïýÜûû¹ú–]|y€}ïçYë98<vzxÚ ÎO€À)` NS ï¦ È `xÒ÷‰)Ðàð€éžÊRÂ`p €SÀh*8P$r?o[Í%{Ûœ–FÀÿ‡A)H„ã ”ÀàX~s\ŠA)<¬§€08Lp \w(À pËŠk @'˜ NO€~×I7\Ûp7x N`p €S°ÛàÚÚÚ¸¸¸­[·VWW<8!!! à>ÿŸO?ýtÆ 'Nœ¨©©1›Íöøö„BŒýÛÿÏöíúðÛì6^¾|yjjê¾}û¦M›6~üø}ûöÝçÿãëë;gΜ¢¢¢¹sçÚã{ø/`ßAa·pBB¢E‹BBB„«V­êÞ½{NNN—.]îçÿyòÉ'…;vì°ËwàXîç®ssÜuÖ~«î£ bìÁ žÊB}ì3...¾zõê£>ª~Ú­[7'NX€ïçÿf~óa)îVÀ³Ï&XBoooËõâõÿÜ'ÃZ¼x±zý‡~°ü?çλyó¦åSë²þø7‚fýö7oÞÿ&ïÿ/ _Á1¿Â}r¶W–¶YÚþ.î3è?IÛiòžuÿYõ~q‡}Ïú¯à•¥U#TZÛ9B+ðþ+¶¿²4o…íø½g‰ÿì3ßçOrÖ¬YuqZ}3EQ´úZ÷¯¸¸ØÏÏïßÿþwß¾}Õ+žžž[¶lyî¹çîÿÿÙ±cǨQ£~s,ƒA£6ÞÇ$ Íf× ËÆ ûÈr¨FÝoð¢ÙZ Ù^Åšemß®Í#‡j”giUÞñ‡á@Y”œu 4å²R‡ÊÒê¯]³)Ð\_ÅŽT1´ÖÙç °¯¯o«V­,#þœœœêêêž={þ·ÿÀ}²Û9À111ï½÷^NNNIIÉÂ… ÃÃÃÕŽ۶mûðÃïýÿÔÖÖÖÔÔüòË/Bˆššššš{µda·] ÿô§?•••………™L¦¨¨¨ÔÔTõúž={.]ºôâ‹/ÞãÿÙ²eËÌ™3Õ7n,„0~~~öhÀÿÁi7ŽÌn`—U«V­ZµªÎõ¤¤¤ßüf̘1cÆ Ý¿E€†£‰îcÁ6É÷ðì6€Àà0§€08»m‚Nî77LÆnÉ +lÙí„0€úÜÇ&ƆûØÄïà80Çýœ¬ƒ#%lƒ;ÁúÂùX`W87_N›`€SÀœÀà0§€M°À™üæ&Øá€/ €ÀÞpÀ€c ÀŽ&ÐÀ ߨcT0(6Á§€08 €À)` N`p €SÀœÀà0§€08 €À)` N`p €SÀœÀà0§€08 €À)` N`p €£[¼x±½¿pDøÃ€záC"øeA½ð‡ºÂÝÛo¿mïoþ0 ^øÃ~YP/üa€®0§ 帶¶vÁ‚þþþžžž£G¾~ýº½¿#ptR€—/_žššºoß¾K—.ýüóÏãÇ·÷wŽ®¡½¿‘°hÑ¢!ĪU«ºwïž““Ó¥K{_àÀÙ !þýï[®xxx¤¦¦Þíÿ·÷l¥ÉpR¾'ÀBoooËõb½0!ã`///!Dyy¹åJYY™zànäûúú¶jÕê‡~P?ÍÉÉ©®®îÙ³§}¿+ppg/[¶lóæÍ»víò÷÷ŽŽ.//ÿæ›oìýM€C“o °âOúSYYYXX˜ÉdŠŠŠJMMµ÷wŽNÊ'Àÿ-ùÖ< €À)¸,^¼ØÞߨ*//ïÔ©S...G‚±ÌbÙ(â,~rsskkk=<<åøY,Å8‹%®¿,¼Š‘e÷,ÍI¹ –ãèÛ·ï=þ533YޓŲQBˆÊÊÊiÓ¦¥§§ !E;vì–-[<==5ŒàŲQ4Y¡¡¡÷ø×ãÇKdmåÊ•qqq...ÿøÇ?†úùçŸΙ3Y˜Å²QŒ³X¾ïsýeáUŒ,»gé›`Ù$))éÿ:cÆ d9NËF !.\˜ýÑGýáøúë¯gÍšÕ¥K—Õ«WkÁ;‹e£h²’““Õ.^¼¸aÆèèèŽ;lÞ¼yôèÑï½÷žtAÖ:wî¼oß¾ãǯZµjß¾}.\>|xvv6²0‹e£g±|ßçúË«YvÏÒ…2kß¾ý•+WEéÒ¥‹¢(mÛ¶E–1Î ;zô¨åÓ7nDFFJ¤(J¿~ýE©­­mÓ¦¢(·oß~衇å˜Y,Å8‹%®¿,¼Š‘e÷,=`, äææ~ûí·¹¹¹Èrü,~ºqãFëÖ­-Ÿ6kÖìæÍ›ÈrÀ ÆY'Ož´ž¥ìïï_QQ!uÂËË«¸¸¸Aƒÿÿ.™““ãããƒ,ÇÌbÙ(ÆY*fïû\Yx#ËîYzÀØ&7nÜ8p`ûöí###Û·o?hÐ £Ñˆ,ÇÌbÙ(!D³fÍJKKÕkkk—,Yòøã#˃guèÐaãÆ–O322}× !ºuë¾fÍšªªª>ø`ĈãÆC–cf±lã,–ïû\Yx#ËîYzÀ`›Lœ8ñÖ­[|ðA›6m®\¹2gÎOOÏO?ýY˜Å²QBˆçŸ~ĈcÇŽ 0™LíÚµûÇ?þŒ,G bœuàÀaƵjÕJ]š{þüù¯¾ú*,,LÞ aµ}ŽÁ`ðóó8pàË/¿ìêêŠ,ÌbÙ(ÆY,ß÷¹þ²ð*F–ݳô€°MÎ;çíí­~ZVVÖµk×ÂÂBd9`ËF !jkkEiذáž={~÷»ß¹¸¸èÄ5‹e£ˆ³„F£1===???((èÙgŸõóó“=tÂò}$‚)Ð61 µµµ–Okkk ²3‹e£„ß~ûmÆ …C† uqq)((@–1ÎBøûû<8**jðàÁºJÉ‚Nžü+777õd9`Ë÷}T Y‚¾¯,®YzÀØ&F£ñÉ'Ÿ´ÜMxê©§Š‹‹‘å˜Y,¥(JVVV·n݆ RXX¸xñâ&Mš|òÉ'ÈrÀ ÆY&Lxæ™g._¾¬(ÊåË—GŽ9qâD©ƒEñöö6™LŠ¢DFF&''Ÿ9s&88˜AÖË/¿|íÚ5d9~ËFUVVŽ=ÚÕÕµS§N§OŸF–ÃqÍjÞ¼yYY™åÓÒÒÒ€€©ƒEiÚ´©¢(555žžžj¨··7ƒ¬Ö­[«D«.]º¤_×™,ËÇǧΕ:è„, 1{ßGÅ%HáûÊ⚥¬Ö@uuµÉdª®®F–ãg±lÔÑ£G333{ì±ÂÂÂï¿ÿYÄ5‹åоàààíÛ·üñÇ!!!ÞÞÞUUUnnn ²JKKƒ‚‚,Ÿ¶iÓ¦¨¨Hö¬Gy¤Î•^½zé„, 1{ßGÅ%Hð}eqÍÒ…½GàRÚ±cÇÑ£GE¹råŠz´¦§§§",,,??Y•ŲQÖ–.]Ú¤I“¤¤$EQþùÏè·Æ‰eËFg±\Ñ÷å—_º¹¹¹ºº~ñÅŠ¢>|øÕW_e’““cù4''§G ².^¼h4uúâÈÒË÷} T Y‚Tœ^YÎ¥9 €Dpp°ú*}òÉ'§NªþúFãÔ©S‡Š,‡ÊbÙ(k]»v=uê”åÓk×®…‡‡#˃g±\ѧ(JUU•~»°Ø+ëðáÃ.\°|záÂ…ÌÌLÙ³V¬XÑ AƒFíܹSQ”ÔÔÔ5kÖè„,[°|ß·†Š!EÂî•Å>K?ËZ ooïêêjËõêêjÍ—a K– â,‹ÊÊÊ:WÌf3²0ˆq–ŠÙŠ>Þ.^¼¸ÿþ‹/òÈêÔ©S~~þŽ;  (Êùóç»víŠ,GËbù¾ï$È*M³Wû,=`ü ‚‚‚®^½ª(J§NŠŠŠ,×Fc§NåPY,àÈJKKÿö·¿ÉôX}EÙ¶m›ú¤Y”GI‘eõë×OQ”ÚÚÚ6mÚ(Šrûöí‡zH dÙ‚åû¾*†,A »Wû,=4Ôs}1[#FŒxî¹ç–-[öꫯŽ?þÍ7ßlÛ¶íåË——,Y²hÑ"d9TËF !BCCïñ¯ÇG–ƒ1βvëÖ­ƒîÝ»wïÞ½?þøcûöíŸy橃f̘QïõîÝ»ßퟤȚ7o^Ó¦M/_¾Ü¦M›+W®Ì™3gÞ¼yŸ~ú©¶)ÄY^^^ÅÅž¾¾ê§999>>>š§ ËF,ß÷-P1d ì^Yì³ta︔***fÏž}·ß4²*‹e£EùäWo¿ýv@@@\\\RRÒÒ¥KÛµk§ùf,³X6Š8Ë"**ªqãÆ&Løè£Ôsz¥bŒò()²¬?þñ=zôX½zµ¯¯ïûï¿ß±cÇùóçë„,[°|ßgå)wÌ^Yì³ô`P¥Þ÷ãæÍ›·nݪsÑÏÏY˜Å²QBˆþýû¯Y³æá‡V?5&LØ»w/²-ˆqV@@@MMÍÔ©S£¢¢ ФI=R(ƒkѢř3gš5k¦~Z\\RPP uVß¾}Õ ƒŸŸßÀ_~ùeWWW̓¥ –ïû\‘½Š)K×W×,=` 7//¯ÒÒRË•>}ú9rYŽÄ8KQ”“'OîÝ»7###33³Gƒ zûí·å w™L®Órʬ矾¨¨è½÷Þk×®ÝÅ‹_}õÕ-ZlݺUö,ûBÅ%@ÑÀÞß8¢¤¤¤ÚÚÚ:wïÞ­Çb ²,–BtèÐaãÆ–O322ô»«Å2‹e£ˆ³ CÏž=çÏŸ¿uëÖ>øÀh4.Y²Dê !D¬•©S§þüóÏÆ cõþûï+ŠÒ½{÷Æ÷èÑÃÅÅåý÷ßg`_¨²<†z “Éäîîn}155uíÚµ”4‹e£„6lX«V­:vìXPPpþüù¯¾ú*,,LÛÆY,EœµsçNuWª«W¯FDDDEEEEEµoß^Þ ;Ɖ'fddðÈ*((ÈËËkݺuË–-õK!Ëâúðk–3@Åpä ®¯,®YzÀ.ÐP¿]»v5jÔÈúJQQщ'¤ÎbÙ¨'žxâÂ… éééùùùÇöÙgõ[àÄ2‹e£ˆ³† æååµpáÂyóæyzzê”Bt'V£gú裸¸¸C‡9sæÐ¡C .LLLüÓŸþ$]–å°øgžy¦°°pèСÿó?ÿ3fÌ˜ŠŠŠÈÈHM"ì’e±gÏž.]ºtýO]ºtÙ³gÔYnnnêz’#GŽ˜ÍæÕ«W?òÈ#š¯Ë% pd]LÊ,~š4ib}Öˆê…^ˆ‹‹“:‹¬?CÙqbŒìUŒra#®CöÚ~š‡ÌÌL—™3gîÛ·ïܹsûöí›>}º‹‹ËáÇ¥Îòõõ={öl‹§Nòóó“7+,,ìèÑ£–Ooܸ©m„]²„¿üòK‹¿üò‹¯nÊ,–ŒF#Ë,Ѓ¢M›6Ë–-³¾˜‘‘¡Ç™7”YdýÊŽ€Ýq­zÀ`›<öØc)))sçÎÝ´i“z%000%%¥OŸ>Rg™L&ÿ:ª««åÍ:yò¤õeýŽi¡Ìâº%–«ëÓCM¸f\(·è#Ë"ëÏPvœP1À°¬zÀØVcÆŒyæ™grrrJJJš5kÖ¥K— ôšXN–½bÅŠ.]º¨W²³³ccc,oV‡6nÜhy•fdd(º-€§ÌjÙ²ennn5N¹¹¹-[¶”:‹%N«ë­±<Ô„kÈ…r‹>Ê,²þ Y*8®CsØ êqýúõqãÆíß¿ßÓÓÓÛÛ»¼¼¼ªª*<<<55Uó}ÞȲ80lذV­ZuìØ±  àüùó_}õUXX˜†vÉš8qbPPЊ+¬/Ο?ÿúõëÉÉÉòf±´`Á‚´´´ØØØÞ½{ûøø”••=ztåÊ•ãǯóS•+Ë`0Ü95Àl67jÔHÛ·² ÆY,q}øf—-ú²XBÅ*†\YzÀØ&\_Bª³gÏž8qB<Ù³gÏ®]»ê‘B™e4ÓÓÓóó󃂂ž}öY]+"Ë:|øpXXØ /¼0qâÄV­Z]½z599yëÖ­‡Ò|–eK~~~´LvPeeeEDDFy³ÜÝÝKKKëœ X]]íëëk2™d bœÅòmË`0Üí˜c²,ww÷òòrõ4ÔK—.ÍŸ?ÿرcýû÷×|¯ Ê,–‹&P1d ¨²eé`›Ì›7Oý ººúÓO?9s¦ÐíÏš2 ä’––6wîÜk×®©Ÿ®Y³f̘1²gñãééyç!£Ñ\UU%oËCM¸f±ìùáá›\Èú3”'T Y‚*`¬•ÂÂÂÐÐÐÂÂB¡ÿŸ5eÈâöíÛ4 ÑɲXÞu5j”¢(w®xwuuݾ}»¼Ydsã¹Nø§ÌbÙóãúð=²þ A*†,A° –V<<<,»WUUyxxðÈY4hР[·nœ²Xî«”˜˜8nܸ®]»ÖYñ¾yóf©³æÎVVVvçÜxIƒg±Ü7{JЬ?C„Š!K@ÅÓ;µœœ¬î¼Ê& Ü»“:KP9Ld‘’’òᇦ¤¤dggë”BœµmÛ¶ÀÀ@Ë;K``à¶mÛ¤âš|úôé:³²²Úµk'oÖ„ ,XPç⫯¾:iÒ$mƒˆ³œY† CŠ S 5³~ýúW^yÅÅÅÅ`0¤¤¤<ûì³<²ÀÁq]ˆÎr_%ÞÈæáó›ðO™År¢&ö”Y†&Ãñƒ*` °¶Nœ8‘Ý«W¯:;¯J—År›P–²Æl!:Ë}•ìŽkÏ{Ê‹¬ïDÙIcƒìUL}º‹Ʀe IDAT‹ËáÇåÍbÙ(ÆÈÎfy¸1q–µøøx!„‹‹KÆ ÓÒÒqÍ*((P?Öûƒ,«¼¼¼I“&êÇeee:Ñdq=­”¬ïDÙq²†Š!E‚Š!I–0¶‰Âd2Õ¹˜’’Ò¯_?=²Ú´i³lÙ2ë‹!!!šg)LÏsgÙ(®æþjæÌ™êÇsæÌÑü½,HQ77·êêê:«ªªÜÝݥΪãøñãŸ}öÙÙ³gÙ±ÌbÖó³HNN®­­]¾|yhh¨~AY\ïÛ’õˆ;NÖP1?H…ŠáøYzÀØ&Bˆôôô/ÿS||¼§§§YÙÙÙ­[·^·nåbAAAãÆ5ÏRÕÖÖž9sæ»ï¾;sæLmm­N)ÄY,ŧ»ÎÁÁÁ§OŸ®s1++«]»vRg¤8õü,˜=|cyß–¬ïDßqâìULYšP1¤ÈÒÀ6QÿŽë¥G–ÉdÊÉÉiѢņ Ô‹GŽiÕª•æYõºyó&}(§»Î˜ž…YÏÏ‚ÙÃ7~÷mÉúNè8i‹å C’,má$› “ÉD³ã™%+++kàÀaaa  ذaƒ¶Y,· eÙ(!DRRRtt´‹‹K”âââI“&i›e0æÏŸ¿jÕ*ë‹/¼ðBóæÍÿú׿j›eì|c6‡g¼Nœ8A³oþ¼:éÔd2ü™mO¤w–ÑhÔãË‚®˜MO5!k”BÛ.–5Õ`‹)))ýúõÓ6HÍjӦͲeˬ/fdd„„„hžeÓ™Ud}'ÊŽ*†ÈÊ…‚Š!s–V0¶ e jÛ¶í¿ÿýoõã/¿ü²W¯^ ÕËÅT”xnL“UogB– S§Nùûû?÷ÜskÖ¬éÖ­ÛìÙ³õË"`÷vÉÞóB¤§§ùŸâãã===µ Rho|×Áæ¾-Y߉²ã„Ša{Ù@Cê,M`ìdÊ4gΜ^½z}ûí·{÷îíܹó›o¾©` ´7M£¢¢Õ÷îÝ;ËŠæY,ï:[ HNN®­­]¾|yhh¨ÔYd=?77·ššõãÜÜÜgžy¦]»vS¦L)//×<‹²;KÜ.~=?®߸"ë;QvœP1l"(¢bÆN6¡¬A%%%C‡5 ƒa̘1•••Š¢˜Íæ>ø@ó,åMS??¿‹/ª÷ÝwÍ›7ùå—ÕÝD4Ïby×Ù§éI,çá³l”´çÇõáWd}'ÊŽ*†íA”+\P1œÎ–LEEEƒ pþXPžÍëæævãÆ ooo!Ä… ÂÃÃóóókjjÔ´Úf‘oì ‡ëÅòPJ–B “ÉäîîN™•••5pàÀˆˆˆ°°°„„„h»5.e£ÜÝÝËËËÝÜÜ„—.]š?þ±cÇú÷ïïååEð À‹¦ã„Š¡UÞåB b€k ¸¸¸iÓ¦ m©]SSSUUåííݰaCšD]±|Ï l”Zǯ\¹þç?ÿyöìÙBˆï¿ÿ~ôèÑùùùÚf§¤¤ôíÛW±cÇŽ?ÿùÏ?þø£~`—zÿIÛž(€\ †ùóç¯ZµÊúâ /¼Ð¼ysmw«¬¬´œº©G–¢(ÕÕÕ^^^Š¢\¾|ÙÝÝ]ó,Át›P~²(**ª­­Õé‹ßÉd2ýòË/úE°¼ëL‰òH6Ü.“ðù-.à×óãúðë}[²¾qÇ ÃÄET Û³¤~Òƒ°Mz÷î½qãFEQÖ­[×¶mÛ¹sçž={vîܹøÃ4Ïêׯ߻ᆱ(ÊíÛ·ßyçÇ\Q”Ë—/7mÚTó,²Ù§ á{ËF)|¢³¼ëÌuzËCM/.à×ócüðå}[²¾eÇ ÃFÄET Û³¤>ß`›¤§§7lØÐÓÓÓÇÇ'++KÝ×ßßÿðáÚg]aÙ(…õBt–wYNOby¨ ×Å\{~İ &ÈúNA¨6b\.T ÇcPE€Í ºŸ$AËóÜY6JñðÃÏœ93&&&>>~ÕªU£Fš={öÆ;¶ÿ~m³ÂÂÂF޹páBEQ–/_¾cÇŽC‡]¹r%44´¤¤DÛ,ƒÁ`2™ÜÝݳ²²–0`À€ 6h4wîܬY³æçŸ~饗Æ·dÉ’ššõI©†AªQÚ~Ù{géýBŒ1ÂÇÇ祗^Z·nݵkׄcÆŒIII üüóÏ5 zçw6nÜ8cÆ !DRRÒ¸qãnݺµuëÖÝ»w÷ïß_à AØ(AÛ.z 4x衇ô*..nÚ´iƒ ôRÕÔÔTUUy{{7lØP§ˆÊÊJ‚Ÿ›‘õ‚P14AV.*†“³ãà›ÊŸ$A–`ºM(¿F)|¢³¼ë,˜NOby¨ ×Å*f5¹>|Ϭ²FÖw"BÅÐ Á¼nT ÛÙ¥bh`mð³Ü&”_£T,¢Ãô$[°<Ô„ëâ–=?ìh;Êû¶œÀ¨¶#(¢bØÎ.CCr|—ޝ´´”S–à»M(³FYc¶]Åì®3×…è,5ázÊËžׇoŒïÛªÈúNA¨6¢(¢bØÎ.CCr|—@LpÜ&”e£¬1›†Àò®3×éI,5Ẹ€eÏëÃ7ö÷m9AŰå@Ãv²W 9¾K‡ÕóžäÍb¹M(ËFYc6fy×™ñô$–‡š°\\À²çÇõá×û¶dýÊŽ“‚Š¡Q¢ÿ@ÃvvyÒ£!9¾K‡µxñâ–-[¾þúë›6mzýõ×[´h±xñâO~%o=š˜ÄˆÅlÌò®3ûéIœ5ášÅ²çÇõá×û¶dý»tœP1 å@Ãv²[…°MÂÂÂŽ;fùô‡~ c¥b¶M¨Še£TÌ¢³¼ëÌ~z§-m¸fqíù±|øFæ¾-Y†¾ã¤ b<(Êr¡ bhDÞÇWÛ¤I“&f³Ùò©ÙlÖc}ËmBY6Š1–wÙOOÂXŠ,Æ=?fßTüîÛ’õg(;N¨Œ~ ˆŠa;Ê'=ÂØ&¡¡¡ –Oׯ_¯>f‘=‹å6¡,¥ð]ˆÎò®3ûéIK”ŲçÇì—Åõ¾-Y†²ãdÁìЂå f¿,®CEQ<¨ 6¬mÛ¶ÁÁÁ¹¹¹yyy;wî “=Ë××÷üùóM›65Í›7¿zõj``àµk×~ÿûßi›ÕªU«Ý»wÿîw¿B;vlêÔ©'OžüàƒÒÓÓ8 aËF !Þ~ûíÄÄÄ^x!88øÒ¥K}ôQLLL‡Ô;;[Q”F=ñÄùË_{ì1̓BCCïñ¯Ç—.ˆq–³G*fßš4iR\\ìêêZSSÓ¸qãÊÊJOOϪªª–-[j^x-YB“ÉÔ¢E‹òòò+W®téÒE½k ÷†Š!]@ÅÐ"KÈY1´ŸLèTÔu‰udff¦¥¥­Zµ*33SÒ¬7Þxã¹çž‹mÔ¨Ñwß}7cÆŒ5kÖøûûïØ±CÃÕÊ•+‡ºråJ!Ä­[·vîÜ)„8{öìÈ‘#µ bÙ(!D‡’’’fÍš¥~š˜˜Ø±cGÍSè³XŠ}ôÑG·lÙ²nݺéÓ§ !fÏž’’2gΚéI‘‘‘zLO=zô=æÆËÄ8Ë¢´´Tׯo—,²¾,MVçÎ7oÞ“””Ô¶mÛ×_]½oÛ«W/ͳ~ÿû߯^½Z½oûÁôèÑC½Þ¸qcͳÈú3”'T é‚*† (+†ðØ&IIIw^œ1cÆ™3g:¤v=eÌ„Ë/á L–âºÝ‚Ó]g® Ñû÷ïoy(ñã?Ι3ç»ï¾Ó6…2ˆqK\¾mß¾ý¹çžsss³Ü·ÍÌÌTïÛöéÓGà !Ä¡C‡†ª>ÏQïÛöïßÿ믿þì³Ï¶lÙ¢mY†²ã„Š!T ÛQV =` ÷’œœ¬ùÎFvÏâ×(£Ñ˜žžžŸŸôì³ÏúùùñÈRqÚW‰ëô$²¹ñ\'üSf±ìùa/@M0Þ*†,AC#rW Ú%ÇüÍ;—Så_YËFÈ7VhÏ7fy¨ ׬ŋ·lÙòõ×_ß´iÓ믿ޢE‹Å‹ò+I³° ¤Yd}'ý‚P1d RP1¤ÍÒžÛäСCï¾ûnAAÁíÛ·Õ+YYY!!!Bˆ£GÊ›eÁr—fâºå]g®Ó“XjÂ5‹åDM®ß,˜½m‘õg(;N¨² T i³4$å7í8ºuëöôÓOwíÚU=Z±hÑ¢wß}W1mÚ4y³,X¾„˜5ŠëBt²YC˜ž¤ ²¹ñ\'ü“e±ìùõêÕ+&&Ʋ?߆ ’’’~üñG̓ˆ³,˜½m‘õgˆ;N¨R T i³´DüÄ™Ÿ:W:tèÀ Ë¢´´Tïú,–â‡lÖ¦'Sa9QóÛo¿mÒ¤IHHÈðáÃ{ôèáååõÝwßéDœeAÙ[#È"ëÏØ¥ãÄ Ë.¨’fiHÎQ»ÃˆŠŠÊÈȰ¾2vìØmÛ¶ÉžR›7oÞêÕ«eÏby×Ù‚ÙÝY–‡špÍâ:Q“åÃ7 f§•’õg(;N¨²©P1dÌÒÀP–Û„²l”à»lÖ¦'ÙŽå¡&\³÷ž€] bH€0Ôƒåñ,%ø.Dçz×YÅl `_\¾q½o `_¨reéA—W@v;wî´üq?óÌ3sæÌyë­·¤ÎbÙ(!Daa¡:µX¶l™Nû¢Qf=ñÄ.\Po>\¿›ÁdAÖJKKõްK–Ù<|þ)³XöüîöÈ«{÷îš? £Ì=zô=î¥Ê›å$P13H bÈ–¥<€z°Ü&”e£¢ƒ`y¨ ×,Æ5ùayfW¨²1†Šqÿ0†z°Ü že£cy×™ëô$–‡špÍr<¾q½oË*†ÔP19KC=XnʲQŒ±¼ëÌu!zÓ¦MëL´îرãùóç5Œ bœu'=?®ßpßV"¨¡bÈ•¥ €¡~,· eÙ(×éI,5ášÅ²çÇõáîÛJC– Š![–090¸ëŒéI`w,{~Œ¾á¾-ØË.¨Òei`¨ËmBY6Š1–w1= ìŽeÏëÃ7ƪ««ÓÓÓÃÂÂÚµkÇ#ˆ+–+\P1Ç Ù$)))::ÚòŒEµ{÷îâââI“&É›Årƒx–blúôéO?ýô¨Q£¬o¿òÊ+ò !Ö®];lذøøxë)CzgDyä‘:Wzõê%{Vþ¥B¿þ%eãû¶%%%Ó§O÷óó{íµ×^zé%KùÕYWd¯bÊÒ„Š!W–ðØ&ƒÁd2¹»»[_LMM]»víÁƒåͰ;–w¦'À}c|fU~~~hhè‰'fÏž}óæÍ>ú¨}ûöÚFØ㊡9 €mb0ÒÓÓ5jd}ñòåË‹-ª¬¬”7ëN ¶ µcq,÷Uñ븴¨¨HñÙgŸ½öÚk±±±z<¡% ‰`lƒÁPgN²…Ùl–7‹å6¡,rÁô$°û¶………ƒ :}ú´z¥¨¨hÞ¼yµµµûöí“1À1ñ¨zÀ`[UVVÖ™–Ì ‹rU$Ë¥ž”Y\¢³„…èpÿê½—ªfFsßV§¬–-[Öù@'dAŽ€kÅОۤÞu¹ ²XnʲQ јbyf•âøñã7nÜ7nÜa-G—Ëà¸V =à °MŒF#Ùã_Ê,–Û„²l”j×®]u‡8qBö,g€éIp7………j‡ÒbÙ²e:u.)³BCCËËËçÌ™£÷(”,HEyäŽw‚;q­zÀ`¹q]ˆÎ×…èdsã¹NøÇâ¨N+•K~~~‡hŽ\¢ÌY bÜ? €m2cÆŒ!C†Œ;–YH„ë<|–¸NO"›ÏuÂ?€S!ëÏwœ(\ÂñN6Âãz'‡)Ð6¹téR¥ž<²@Œ§'‘Íç:á‹ lÇõr!ëÏØ¥ãÔªU«/¾øâ³Ï>‹ŒŒÔûÈ%š,²ñå@±¤¤dúôé4Ð)³à~) 3£ÑÈ2‹¥AƒÕ¹2fÌYB—»4ˆqÖ¦M›Ìfs‹»víJNN–7Ka2™ê\LIIéׯŸ¶AÄY ‘‚‚‚ü±Y³f¿:uêTdd䀤ÎR%//ÏÕÕ5000>>þöíÛzD©Y¾¾¾ùùù#FŒ0`À… xdÁ}Âhm”””TTTxyy5kÖŒSÀo"›Ïu¿ݳdŸÚm0ÒÓÓëûìµ×^Óï:eÜ'L¶ImmíòåËóóóÕ+AAA111qqqwÛ+HŠ,×…è,÷Up,§vSnÍ8lDÖŸ¡ì8;vìnG.iŽ2Ë‚lj7APaa¡:£­°°Pñå—_Λ7/--Mó)eÜ? €m—––×»woŸ²²²£G®\¹²¼¼|ÅŠòfD¸.DŸ9sæäÉ“ëôQÊËË7lØ í¸”,ˆ1²CÚ¸žÝZê â,®Èú3è8ÙŽlº5å¼îãÇßíºæ÷(³àþá °ML&“¿¿‹ÕÕÕRgŒø-D'{v„ 6by¨ ×,–?®g‰¬?CÙqâZ1Ȧ[SÎëæú¸î_{r‹ŠŠŠŽŽÎÉɱ\ÉÎΞ6mÚàÁƒ¥ÎJJJª­­­sq÷îÝŸ~ú©¼Y,%„¨­­]¶lYëÖ­}}}ÛµkçëëÛºuëwÞyçÎo@®,!Dee¥¹>òqÅòP®Y,ÇŠ~~~)ôY\‘õg(;N\+Fhhèc=¦Žßê4HåííýöÛoëñ•í›U]]œœœ››Ë,K{öÚ~š‡ÂÂÂððp!„§§g`` §§§"<<¼°°Pê,ÁñH –R%666888>>þСCgΜ9tèÐÚµk[·n½`Á©³êýê,t2}úôÏ?ÿœ_Ë3«Âþ eÇ $µb(äGI‘eiS mðÍ7ßœ={öĉêŒÐž={víÚUö,Át›P–Ú²eK5N?þxDDDDD„æ›|Pf±ÜW‰=N‡špÍb9Q“ëÃ7Ê-ú(³Èú3Ä'*†ƒ ¾CÕ¤I“#GŽÌž=;--M×£¤ˆ³4fï8Ÿ|ò §,!„Ë]țŲQŠ¢xxx×¹xãÆ ©³@"f³yéÒ¥AAA–w–   eË–Ýù@I– ÆY‘‘‘‰‰‰šY»g±$„HOOÿò?ÅÇÇ{zzJeAÖw"BÅ%ˆ·¼¼<___õãÿýßÿUgíéôx–2KskƒòVA– œJ–ŲQŠ¢Œ9òé§Ÿ>{ö¬åÊ™3gžzê©Q£FIE6kmG67žë„Ê,ÞŠ‹‹sssï¼O'i×û¶Ö¡ú}qâ T 1«?þøc³fÍ ~uêÔ©ÈÈÈH¥ €µ°ãg±l”Âw!:Ë»Îõþa0Xˆîëëk}[DuêÔ)???IƒgYpêùq}øÆõmË:”M*†DAŒ+Ù„_Ê,=` 0Ôƒå6¡,%ø.Dß»w¯_ÖŽA*– ÑYjÂ5«¶¶vùò剉‰ùùùê•   ˜˜˜¸¸8—ÿ\ÿ)QV\\\ZZZ\\\ïÞ½}||ÊÊÊŽ=ºråÊòòrÍ·' Ì‰ bÈ$øV Ê£¤(³taï8¥¥¥,³@"Ì¢[pºë,˜Nh$›ÏuÂ?eˉš\¾FÍ¿¦#dYõg‚P1d RøV EQÊÊÊþüç?ëñ•훥9 €¡,7ˆgÙ(k”÷³²Xî«$˜Nhdy¨ ×,–=?ìv‡Š!K‚Š˜m£üüü×^{íûï¿¿yó¦õEÙ³XnϲQŒ‘Í„FÛ±<Ô„kˉšQQQÑÑÑ+V¬°Ò–;xð`mƒˆ³XžY%û3”'T Y‚*†lYº°÷\n3fÌøâ‹/2¬0ÈQ¾œ ²XÞuf?¡‘Ó¡&\³XNÔäúðë™Udý»tœP1nܸýû÷{zzz{{———WUU…‡‡§¦¦È›%„ y FŸÅYÆ.'T ²@ÅpfÛäñÇ?pà@Æ3É)³XnʲQŒ©7}ïœ5äêêº}ûvƒ„LOÂX–,®=¿äääÉ“'ë÷õí•Åì¾-Y†²ãdŠ!E C®,-Ñ?tædõêÕ“'O>~üxžY,· eÙ(EQòòò¦L™Òµk×VV4O¡Ïb¹¯ûéIdï)”o^\³˜MÔT1ûeq=­”¬?CÙq²`öGhÁr… ³_׊¡<¶‰Á`¸ó¢N?RÊ,??¿ƒZˆ©²²²"""ŒF£¤Y,%„8p`‡ž~úéÆ[.4HÛú,×»Î\‘ÍÃg6áß.Y,ŸS1kÔ‚ ÒÒÒbccëlÑ7~üxÍ·è£Ì"ëÏPvœ,P1d â—ŵbè`›”••ÝyQ§bD™E¹*’åROÊ,ö ÑÉf az8f=?â š,®÷mÉú3”'öð*vü,®C ìý Èͧ> ²ÔMÛsrr,W²³³§M›¦ßñY,%„ðòò2›ÍšY»gYL™2…MPmmí²eËZ·níëëÛ®];__ßÖ­[¿óÎ;µµµRgåçç?ÿüóݺu ²¢y eã,Þ(ÏŸ#Èbyf• ìÏPvœP1d„Š!E–ðØ&}ûö½Ç¿fffJšÅr›P–B¬Y³æèÑ£±±±¾¾¾–‹:½RfYpºëÌuzÙÜx®þé¾59¡Ü¢2‹¬?CÙqBÅ.ˆ®CÛ$))éÿ:cÆ I³T,· å×(® Ñ­CÙ €¹NOby¨ ×,–òóó_{íµï¿ÿþæÍ›ÖeÏâzß–¬?CÙqBÅ*†\YzÀî…åñÌÅu!ºu(›»Î\¢³<Ô„kËžï‡oüîÛ²„Š!K@Å3K[ý0Û€8ˆ8 ×éIdsã¹Nø§ÌbÙócÿðÙ}[–P1d ¨Òfi½óz=~ü¸¤Ywb9VdÖ(® ÑYÞuæ:=‰å¡&\³Xöü¸>|³àñ¶EÖŸ±KÇ C– Š!m–†HlÄÆÆª\¼xqÆ ÑÑÑ;v,((ؼyóèÑ£åÍé±üÛ²žþù:¬X±Âúf°ÔABˆ€€€o¾ù†fÊeÙFšÌvì´K–º—;MoŒ,küøñÑÑÑ4Ä(³˜!ëÏØ¥ã„Š!K@Å<¶Qÿþý׬YóðëŸÆ &ìÝ»Wö, –Û„²l?,ï:[`zØ Ë‰š\¾Y‡rzžCÖŸ±Klj–+\P1$ÍҔߴãðòò*--uqq±\éÓ§Ï‘#Gdϰ;–û*Y0{sby¨ ×,–=?ì(WY†²ã„Š!K@Å6KC˜m“:lܸñ•W^Q?ÍÈÈЯ£I™År›Pfb¿lÖ¦'ÙŽln<× ÿ”Y,'jRv¿ìÒÕcÖ@²þ eÇ C– ÁîeßPG¿O€mtàÀaƵjÕJ]^rþüù¯¾ú*,,Lö,–Û„2kTrr²úA½kœÞ{ï=I³,XÞu¶åôÀ¾¸>|cvßÖ‚¬?CÙq‰ bÈ•¥tŒle4ÓÓÓóó󃂂ž}öY???Y,· eÙ(Áw!:Ù¬!LO§Â²ç—””tÕöqe³û¶ÖÈú3”4–X®pAÅ+K˜m““'Oº»»Ïš5K‘››[ZZª_m¥Ìb¹M(ËF !Nž>>õ.)”+‹åñ,¥b¹%® ÑYjÂ,‹eÏ{JšEÖŸ¡ì8¡b8x@Å6K²>¹vÁÁÁÛ·o7!!!ÞÞÞUUUnnn ²x,P´Wq×…è,qžÄòPfY,'jNž*uc}ûöýúë¯EÙ¾}»«««»»ûºuë¤bœõí·ß6iÒ¤k׮Çøá‡½½½¿ûî;Y7nÜHHHxã7ŒF£N)ôY T Y‚T¨NO€áÿ°Ü&”e£¬q]ˆÎò®3×éI,5áš%8NÔäúð $‚Š!Q*` ÿ‡åñ,UË…è,÷Uâ:=‰å¡&\³Xöü° Y¼¡bÈ$P1$ÉÒ—}@ƒc ;zô¨åÓ7nDFFʞŲQ¼‘ÍÂô$‘Íç:áŸ2‹åDMooo“ɤ(Jdddrrò™3g‚ƒƒõ¢ÉúäWo¿ýv@@@\\\RRÒÒ¥KÛµk÷ꫯʛÅ*†,A *†$YºÂøÁõêÕ«í=Iš¥Ð®Šd¹ÔËJmwâĉœœõã‹/ž;wNö ƾüòK777WW×/¾øBQ”ÇëôFHÄ8‹YÏOÕ´iSEQjjj<==ËÊÊÔh=‚ˆ³XÞ·%ëÏwœP1d RP1dËÒCC{?–XyyyBBÂÝþuêÔ©’f vç¹gq5kÖ¬%K–tîÜYïJAì§' >¼¤¤Ä27¾OŸ>}úô‘:ˆqVƒ ÜÝÝoݺ•™™ù·¿ýÍÛÛ»´´Tö¬àààíÛ·Æoo着*777=‚ˆ³Nž>~óæÍê&z € ‚bccÕê]®aq–5ý¾¸]‚¸f±ìù-Y²Ä²?ŸâôéÓ“'OÖ#ˆ8‹å}[²þ qÇI bH$P1dËÒ…}<ƒÃc¹A<ËFqE6kÓ“À©p¨YUUU^^®Ó·W×3«@",W¸(¨Reé»@k£¤¤¤¢¢ÂËË«Y³f ²XnʲQ½{÷.))¹ÇÿpéÒ%³Ô¸7ß|Óh4nÞ¼933³ªªª}ûöׯ_×0‚8HáååUZZêââb¹Ò§OŸ#GŽÈžra¹okÖ,!Dnnnii©~`²,–‡šp͹°<³JögÈ‚P1Àp­zÀØ&QQQÑÑÑ+V¬°Ü_ÌÎÎŽõÔS;wîÔ$ˆ8KÅø¾-Y†¸ã„ŠáÈACÎ,Í¡ ÀDYY™,–wí™Ã”YdwF¸ÞîÑ;‹wÏÀ b8r€ `›Ì˜1cÈ!cÇŽe–`_,ï: '˜ž„°DY,qzøÆYÆ.'T ‰ b8-l‚e“K—.‘mzF™²»uëV@@ÍÁKzd±ÜWI1|øð’’Ë”¡>}úôéÓGÛ»dŒXöü° Èú3è8iˆå T §…[Gr[¿~ý”)Sê¬Á¨©©iܸ±æ¯nÊ,Ù<|Nþí•Åò9ËF\P1d bœ¿ ¿ m”””¨û+4kÖLö,–Û„²l”ªeË–7oÞœ8qbLLLïÞ½Õ‹: J)³,XÞufæöíÛBˆ ° r,{~,ÅY߉²“Æ^ÅeÁoÂ/Ã&µµµË—/OLLÌÏÏW¯ÅÄÄÄÅŹ¸¸HšÅr›P–R™Íæ¿ÿýï ÿú׿}ôј˜˜ñãÇ =¥”Y,ßt™=zôÃ?üÆoX_ÌÌÌÌÈÈxóÍ7e B$%%EGGש®»wï...ž4i’¼Y,{~œ¾1¾oKÖŸ¡ì¤¡bH$P1dÈÒ :|6Y°`AZZZlllïÞ½Õ#FŽ=ºråÊñãǯX±BÒ,–Û„²lTçÏŸOLLܺu«Ùlž8q↠ô{uSf±|Óe¦eË–ÿú׿ºuë&„0™L_|ñŸqãrrrFŒqîÜ9ƒ„ƒÁd2¹»»[_LMM]»víÁƒåͲàÔóc‰ñ}[²þ e' Cº fW = Ãg??¿ƒvéÒÅúbVVVDD„Ñh”7 äuëÖ­´´´ÄÄÄèýê¦ÉÂØñ¹ººFooo!DAAA¯^½ KKKM&“ŒABƒÁžžÞ¨Q#ë‹—/_^´hQee¥¼Y,±|øÆø¾-Y†²ã„Š!T ‰²t‚ŸM<==óòòê,)1ÁÁÁUUUòfYpZÛLDœUG~~~PPƒ,6wOOjÕªÕÎ;{öì)„ÈÌÌŒˆˆ¨¬¬ÌËË{ä‘GŠŠŠd B †»M’4›Íòf±ìù±øÆ Y†²ã„Š!K@Å €m4jÔ(EQV¬Xa¹¿˜ëêêz[#ŽŸÅrm3ËFYs’½¼OOš4iÒõë×7nÜ(„X³fMBBÂk¯½–ŸŸ_RRò÷¿ÿ]Æ q—^‹Nìž%{ÏýÃ7fgV‘õg(;NvSféô*¦,M¨2fi`›\¿~}ܸqû÷ï÷ôôôöö.//¯ªª OMM 7‹åÚf–N0°·Fv¾±AŒ§'?>**êÒ¥K_|ñűcÇÞzë­Î;oÛ¶­cÇŽ2 èbê—ůçÇõá›u(§ËÈú3”'T Y‚*†´Y’ò›v4gÏž=qâ„ú@¬gÏž]»v•=‹åÚf–|ödgãpcM˜ÍæìììÀÀ@___AEEE~~~ºFØ%‹eÏëØÃ:”_w–¬ïD„Š!K@Å6KCR~ÓˆÙìS–k›Y6JðØ“9Ìûpcâ,–+ù™aÙócÙ¨:¡,»³¨Ž¯b‰²¬CYV 5°÷7 ·ÚÚÚeË–µnÝÚ××·]»v¾¾¾­[·~çwjkk¥ÎŠŠŠŠŽŽÎÉɱ\ÉÎΞ6mÚàÁƒåÍbÙ(!„Édò÷÷¯s1  ººZꬼ¼¼­[·^¼xñ‘Gyì±Ç¶lÙ¢ù6¿ÄAÖ¦L™¢weYu¢,ƒ3fÌØ¶m›æ_ÖîY,F²þ%e–Eii)§,T ‰²XBÅ4KK Ø 666888>>þСCgΜ9tèÐÚµk[·n½`Á©³ ÃÃÃ…žžžžžžBˆðððÂÂBy³X6JQ”‘#G>ýôÓgÏžµ\9sæÌSO=¥nþ!o–ÅO?ýëçççããóÒK/éWµÈ‚E¡¬½YdÕ‰² FFF&&&jþeíže4i‚ˆ³TÅÅŹ¹¹ÅÅÅ̲˜AÅ(‹ìUL_.T '†°M|}}­ªS§NùùùI¥ÊÎÎNIIùðÃSRR²³³uJ!Îâ×(®û:jjj’““Ÿxâ ½‡s4AÌÀdÕ‰¾ ‚ÌfóÒ¥K­c Z¶l™Ùl–:kÓ¦Mw~Ù]»v%''K…Šö…Š!W–0¶‰‡‡Ç÷rnܸááá!uH‡ßÀþnòòò•––ê÷Åé³Èª“]Ê ³GÓ§OÿüóÏõûúvÉâ:Ka2™ê\LIIéׯŸÔY¨e‘½Š)K*†\Yzrá²ãàzðŒ3† 2vìXm¿¬}³X6ʳØè³°KÊcyª'ׯ ôÜsϽøâ‹Ú~YûfqÝ å™UCª,²W1eiBÅ+Kv|3Àuö)Ë¥,,¥ðÉC–EÙ¨zÕÔÔx{{KÅr%?×G,q%„p¹ ©³P1$Êb C®,=à °ø áz0YËɳTÌNõäúˆÀ‚Ó<®³±xÖ‚Š!E–§ÙX¨reéÂŽƒop|Ì–²Ñdq݉å.)-Z´ðôôœ9sæ?ü`¹¨žº$uK\°œÇÁu6–¨o•N(³XBÅ%HAÅ-Kè =¸îÝ»ÇÄÄ,_¾œY–´‡Ä²Q ß7]–»¤üòË/Û¶m‹ŒŒ4 }úôÙ¼ysuuµNƒRÊ,–»¤p=`ŒñDM~{²<³Š¬?CÜqBÅ%ÈC–,=`üà.]º4mÚ´:0ËR˜öX6Jáû¦K–Åûpcš,–+ù¹>"`9ƒ=N—Èú3Ä'T Y‚œ§Š¡ €¡,{H,¥ð}Óe¹KJd‡gqÂïËy,ÏvRøN\b ÃñƒT Ù²ôÐPÜÁd2ùûû×¹P]]-oËF©_ö›o¾¡Ùäƒee£êpss›4iÒ¤I“,GYÈžÅi—U×®]›7oÎ)+***::úÎY,oÖ¥K—JKKµýšŽ—––Wg‹¾òòrÍ·è£Ìb Ãñƒ*†lYº°÷fºJ”ÅvGsü,¬ä—(‹å<®¸N\b C– ÆP1îÀP–=$–ÂNl1ÎÂJ~‰²Tü&jª˜ÝZb9_+T ¹‚T¨Rdé`¸+–=$fÂNl1ÎÂJ~‰²,8õü¸ÞZÂÄ%‰ bH„Š!W–0†{áÔC¢"Îbc*‰²Xî’Â5‹eÏë­%–—¸BÅ%HAÅ-KÛdÓ¦Mw¾2wíÚ•œœ,uËËF1†1•DYXÉ/QËž×[K*f—Âþ eÇ C– CÎ,mal!„Édªs1%%¥_¿~Rg±ì!±lcSI”…•üe±ìùq½µÄY†²ã„Š!K‚ŠŠbPEÀƒ2 ééé5²¾xùòåE‹UVVÊ›åççwðàAËNôª¬¬¬ˆˆ£Ñ(iËF1výúõqãÆíß¿ßÓÓÓÛÛ»¼¼¼ªª*<<<555 @Æ ÆY*²£¤(Ϭâ—åéé™——WçУÑ\UU%i–z[çÎT\]]·oß®aqÖŒ3† 2vìXm¿¬Ý³Èú3”'*†ã T Ù²ô€°M ƒ‹‹K½ÿd6›åÍbÙCbÙ(ö0¦’( ¤À²çÇõÖÒ Aƒž{î¹_|QÛ/k÷,²þ elj+²W1eiBÅ+KÛÄ`0˜L&wwwfY,{H,’*))Q¥uî’ȘգG?üámÛ¶‹‹ÓãëÓ1Î|{~·–äAÖŸ! BÅ(ÈÙal®`–=$–º›[·n”••IšÅrL%„¨­­]¾|ybbb~~¾z%(((&&&..în)?ëòåË‹/>pàÀùóç5ü²v bœežŸ\8Ý2À¨ÒñƬbè`›ùùùñËR±ì!ñkÔúõë§L™âååe}±¦¦¦qãÆš¿ºÉ²XŽ©„ ,HKK‹íÝ»·OYYÙÑ£GW®\9~üø+VÈ›`GŒ¾±¼e&û3ô'p|¨Òeé‚xÓ-ÐV‹-<==gΜùÃ?X.šL&=^Ý”Y,±Ü€$Ò½{÷˜˜˜åË—sʺtéÒ´iÓ:tè wq–‚à ÀÞÈ^Å”¥ Cº,= Ûj“éÓ§þùç̲XöX6JõË/¿lÛ¶-22Ò`0ôéÓgóæÍÕÕÕ: J)³XÂÑ `_Œ{~,q½eFÖŸ¡ì¤±Dö*F¹Ð׊¡‡ö}þ,»K—.•––2ËÚ¹sgMMMRR§,–R5lØp̘1{÷î=wîÜþð‡… Ο?_ö,–¢¢¢¢££srr,W²³³§M›6xð`©³@m۶ݲe Í„Ê,®L&“¿¿‹ÕÕÕRg‘õg(;i,‘½ŠQ.4Áµbèk€X¹uëVZZZbbâô~uSfñàT;±€pxÜ?TŒû‡°6°ß8šüüü   ~Y ðÛ‰íöíÛBˆ tŸRDàØß2#ëÏ ã΀}ÅÐÀ6Á~kà8pìeôèÑ?üðo¼a}1333##ãÍ7ß”1H‘””]§ºîÞ½»¸¸xÒ¤IòftøÝ2#ëÏPvœP1ÀA𫺰Ûêc°ßØÙl^ºt©õØ   eË–™Íf©³êUSSãíí-iãØZ´hqæÌõãêêê””EQΞ=Û©S'IƒEB˜L¦:SRRúõë'u€Ý‘õg(;N¨Á`›øùùzïÅÿøÇúf‰$òæ›àÓJ•õ3*'*†F¨eY%ÇpÍq¨ rNê[ï•9ÌáÆ™+--½téRòç>úhÞ¼y±Xìúõë.—KÓ D"aÆœihõ»ßýîÓ[6Nž<ÙÞÞ®o–!ñæ›à3«”õ3*'*†.A *†VYápF¤¾oMäÚ˜ÈA†át:oܸ1åYÜP(TYY‰DôÍŠÅbÇŽkkk;{öì—¾ô¥æææ—_~ÙápdýƬ² Á6nÜ822²ÿ~Ã0Þyç¶¶¶×_}pp0;vLÇ Ã0G47o^v­=³>¼wïÞ®®.M³¸ù¦eýŒÊÆ)çßb•Y}‹U–&*Æ,Ç8#Rß·&µC’7(Cî*Lʵk×ü~ÿþð‡X,öÍo~óÝwßµ¨j) ’çÚµkõõõÁ`pþüùÇ¿xñâŽ;–,YÒÑѱxñbƒ ´˜ÖeÉëüÇtï4ŠÅbY Rœ%•²~&ç“EDV Å¥‰Š1Ë1Îyï[“Ú!É”!wf egs¸ñÉÅb@ ´´ÔårÉ-..¶4"'Y";?©sÙ”õNj‚¨ºT 0Îa'ÐHíä *EÞ*Ìt”9ÌáÆS=íOdç'rP³Ãþøk”…ô=’ë¿€Þâñø®]»***\.×¢E‹\.WEEÅîÝ»ãñ¸ÖY†aLLLÄîGë,‘ƒJòx<õÿaõŒTe–aáp8 †ÃáäZ7)U$²ê¤² nݺµ££#ë¿6çY"…B!eý¥Ê,©¨e‰DÅÀÜ\ÿôæõz;;;½^ï”SaLÓÌú©0*³ ‘x<¾gÏ¿ß?88˜¼R^^ÞÜÜìõz§» MVN‚¦sûöm·Û=>>®o–²ßöZ0IDATꤲ ƒÁ±±±ìþN;d‰ìüRÛAÜèS™%C£,eßb•¥‰ŠÎùȈÔhB¡PÖgγD*‘H´´´TVVîÛ·ïÂ… —/_¾páÂÞ½{+**¶oßN–­‚‰Ä¾}ûLÓœrÑ¢#—Tf)«N*Ë 4‹ÅvîÜyï®òòò]»v}úT½²DžY• b רzeY pF89'uFd‡¤òÌa•Y"OõL¹yóæõë×?«iÖ·¿ýí?ÿùÏÖýþœd‰\›K=­4AÅÐ*KÙ·Xei¢bè•e&ÀùÆ7¾ñõ¯ýÞîùòåË/½ôRò´}³DvH"•» #²CºsçNGGÇ /¼àp8¾üå/_êÿa hiiÉËË;zô¨¾Y²k¡bh‘•"éÌ**†^Y–Èáä[ŒO>ùäÃ?–•÷(‹â •Y@àСC¿ýío:ȲmÐ}ݸqCd–Ö¤Þ"¹QSên,ã~OÙYDeV’²~FMC— C·,+0ÎÈ¿þõ¯ 68‡ÃaFCCÃÄÄ„€,‘’ÈAͬŒØ?Kä“ü"7ü'DoÔ”·6'õðeýŒÊÆ‰Š¡KP C—,+0ÎÈöíÛ¿öµ¯ŒŒTUU ¼øâ‹?üád‰ìDª©©i†O7nܨiV‚•­²D>É/õÔ·¿È&lÉLY?£²q¢bè4«V`œ‘Ï}îsŸ|òI"‘¨ªªJ$CCC .%²C9¨§žzê£é=ñÄšf%XÑ*K0y·DnÔ”ú’©KfÊú•SÃþA *†nYV`œ‘üüüäÉÚzûöí¢¢"Y";$‘ƒZ¸p¡sFšf%XÑ*+…ýêöϹQSêKþ¥.™)ëgT6N)T ›%¨ºeY pF>ûÙφÃáD"QUU‹Å~úÓŸ~õ«_%²C9(ÁXÑ(‹ýêeIݨ)’Ô%3eýŒÊÆ‰Š¡K`R+†˜gä•W^InlxüñÇ?ó™Ï<ûì³×¯_%²C9(ÁXÑ(‹ýêe%ÉÛ¨™$ìæ›Ô%3eýŒÊÆ‰Š¡WPC‹,+0ÎH,»sçN"‘8uêÔÅ‹-Ýø®2+Id‡$rP"±2¢QûÕ5ÊJ‘ÔùI½ù&uÉLY?£²q¢bhDÅÐ+Ë L€1I’ú ÅY"±2¢EûÕ5ÊÙùI½ù&uÉL$*†.A *†nYV`œápø?ø˜,‘’ÈAASÂVaد®Q–ÈÎOöÍ7yKf)Êz'AT ]‚T =³²‹ pvܸqÃår‰ÉÙ!‰”TÊΖz¸±â,ö«k”%²ó“zóMŸ/‹Aг’úûû{{{oݺµ`Á‚êêjÇcEŠÊ ‘YN§óÆEEE÷^ …B•••‘HDÓ¬ä/ŸÏ—ªº@ ¥¥%//ïèÑ£Y RœµuëÖ5kÖ444d÷׿<+EYï¤,ˆŠaÿ ƒŠ¡[–%r:ýÖÞÐüýï/**Jþ<::ª{–ȵ1‘ƒ’JÙ™ÃR7VœˆÜ¨)õæ›ÔÓJ•õ3*'©D>áBÅÐ+Ë L€32ç†a$¨®®Ö=Kd‡$rPЋÈU‘ûÕ¥f%äv~ ž|Ó‡²~FYC£ *ÆlÆèì¶zdd¤±±ñܹsN§³  À4ÍH$R[[{øða·Û­i–ÈAA/"·]‰Ü¯.5+EÞFMÙÂápòà” ¢ºg‰ÙMÅÐ.H6©#‹˜g‡° p’ÈIä   ‘«0•••3—£‰‰ ½‚g‰´eË–ƒN÷é¦M›f˜-Ø9Ë0Œx<¾gÏ¿ß?88˜¼R^^ÞÜÜìõz“·45ÍJ3¦bh„Š¡]–ææú/ûr:eee«V­*++“%rPÐ…ÛíþàƒÔ¬Œ(Ë ƒYÿ¹ œ%²ó;þ|ww÷tŸž9s&+)ê³ Ãðz½^¯wÊ«ìLÓÌú«ìTfÉCÅÐ%È bh˜eîgÇ­[·~ó›ßüä'?‘‘511±yóæ#Gކ‘H$Þ{ï½äóúf‰¤OäFMÁ7ßÄ^ ¬wRÙ¤I"ò *†vYV`ŒûøÑ~~ÿûß?÷ÜsûÛß¾ûÝïVUUýêW¿Ò:Kä  ‘7ß ÁŸH"Ϭ‚FD>á"#}L€3r÷î]Ã0yäaYO=õÔ|PQQáñxúûû‡‡‡¿ò•¯X´ëFY–ÈAA#"o¾°ŽÈ׿ û•sR+†%rðæiAÖ­[·sçÎ)?ú裷ß~[ë¬üüüäUUU‰DâöíÛEEEYOQœ%rPЈÊ3‡Ÿo À RϬRÖϨlœ€œ“Z1¬ÀàŒ<ùä“gÏž}úé§ ÃˆF£Çoll¼råÊÚµk?þøc}³.\xéÒ¥Ç{ÌãñüóŸÿܱcÇ¥K—þú׿f7Eq–ÈAd“wx²~FeãØ„¼Ša&ÀÉËË …B†a ­X±bxxxll¬´´4ê›õ­o}kíÚµ n·;.Z´è/ùKeeevSg‰s555ápx†?­ÍÿÊ‚XJY?£²q ŽAÊHIII0¬®®6 c``À4Íx}õÕWµ ì@ðkó”õ3*' ·W +0ÎÈóÏ?¿mÛ¶ýû÷†ÑÞÞ~çηÞzkpppÕªUZg¥fkkÖ¬Éú/ÏU–ÈA9÷ì³Ï¾øâ‹Ó}ºråJí‚;|Z©²~Feãä–àŠa¶@gäÚµkõõõÁ`pþüùÇ¿xñâŽ;–,YÒÑѱxñb}³RÆÆÆ~ö³Ÿýú׿¶è÷ç$Kä ’>³JY?““Æ È Áà L€3‹Å@ii©Ëå’”•488¸|ùò™ÿ•k—%rP€´··oÚ´IR+(ëgÔ7Nì³Ñ25wîÜeË–9Ž`08ó ZôÊ€õÊ+¯ `eý €Ocœ‘x<¾k×®ŠŠ —˵hÑ"—ËUQQ±{÷îx<®uÖð„B¡D"‘üùæÍ›YR™%rPèEY?£²q ^‚•¯×ÛÙÙéõzkjj ÇÇÇ{zzZ[[MÓôù|úf•——§~ŽÇãÉÿü¾péÒ¥ì©Ì9(ô¢¬ŸQÙ8ÐÏg¤¸¸¸«««ªªêÞ‹}}}uuu¡PH߬‘ËŠ`ããã………’‚d²~&'ûc tF¢ÑhIIÉ”‹n·{rrRë,HßÝ»wïÞ½k†Õ“ReA¬£¬Ÿ¡qp_lÎH}}}SS“ÏçK­/–––Õ«WkéÛ°aÃÊ•+ßxã{/vww¿ÿþûo¾ù¦ŽA@ÎÕÔÔÌüÒ¦`0¨c–¡°Ÿ¡qÂì!¸bX pFü~cc£Çãq:¦iF"‘ÚÚÚƒj•²`Á‚mÛ¶Y÷ûs’%rP@uwwÿâ¿HþF?ÞØØøØcýéOÊî¼TYs¦i¶µµM÷髯¾ªi–¡°ŸÉIãä„àŠaž΂þþþÞÞÞ[·n-X° ººÚãñÈÈ€täåå…B¡‚‚Ã0†††V¬X1<<<66VZZFu rnýúõGîÓ—^zéĉ:f¥(ëghœ0ˆ¯ÙÅ8;Âáp²¶ ÈJ>e÷È#*W–%rP€”••8q¢ººÚ0Œîîº‰‰‰7n|ñ‹_Ìîà”P@Y爐I`4è‘zð† Rû Sº»»wîÜ©o–ÈAvðüóÏoÛ¶íêÕ«W¯^moo¿sçÎ[o½õöÛo¯ZµJÓ ÀVÚÛÛ%eq0`)aà d ¥¥¥²²rß¾}.\¸|ùò… öîÝ[QQ±}ûv­³žxâ‰Ë—/'žœœ=È“Š˜mb±X (--u¹\2‚ûp8Ôuk ²8°”°Ša¶@gDê9À%%%©7˜ ˜¦ÇMÓt:úf‰`sçÎ]¶l™Ãáƒ3ŸŽ K‹p0€Übœ‘äsW®\I] ›7o¶î`5Y*´ùô O*bVá‰>ÀRccc’²”õ3*'À>„U Kät¶ö†‡‡kkk Ãp:¥¥¥Éû{µµµÃÃÃZg©|ÐNäÓƒ<©ˆY…'ú¤OY?£²q -÷mÛÈs€U>h'òéAžTÄìÁ}€8ÐÔÔ4gΜ{/ž:uêæÍ›7nÔ7+‰s€ì’]1²‹ 0f"ì|cÅAг€\q:7nܘò< UVVF"ƒ;p8ÑhtÞ¼y÷^<|øðÞ½{»ººôÍ`*FúxøámÙ²e†O7mÚ¤i–!ô|c‘ƒì€'ú‹œŸOÓ,‘ƒìÀï÷766z<§ÓYPP`šf$©­­=xð ¦A€M444ËRÖÏ(nœ;W1,Âè‡WYY9:::ؘ˜Ð1Ëz¾±ÈAöÁ}@vÝw“¡îYÊúÅs"+†E¸üðR½ Ë2„žo,rP€}x<5sQeA²NY?£¸q žÆ}ˆ<ßXä ;غukGG‡¤ ÀB¡²,*³XŠ‘>&À¸¿ßoš¦ÇãÉÏÏ/++ËÏÏ_ºti$ikkÓ7Kä ;ƒccc’‚;(..Nþ‡ƒÁ`8–‘À TŒôñ 0¦%ò|c‘ƒÈÇ÷ìÙã÷û“WÊËË›››½^ï”ã7õÊ`*FúxÓòx*Fúx÷!òÈ\‘ƒì@ä·ȹuëÖ% ŸÏ—ú7ZZZòòòŽ=ªo+P1ÒÇ[ q"×D °‘ßb ç8¼@ú¨éã0îCä’ÈAv ò[ ؇H#L€q###çÎs:¦iF"‘ÚÚÚÇ»ÝnM³D °‘ßb `LKä’ÈAv ò[ äÐÖ­[׬YÓÐÐ , €¨éã-И–ÇãQÖY*Ë9(ÀD~‹ ƒcccò²XŠ‘>^‚…©¶lÙ2ç›6mÒ1Kä ;xæ™g¾÷½ïýò—¿ØÄéÓ§_{í5yY¬@ÅH[ 1ÕâÅ‹ÛÛÛ§ûtýúõCCCÚe‰`?ÿùÏ?üðÃk×®Éì&'÷üIÊ`*ÆÿÄSUVVŽŽŽÎð&&&´Ë9(€`ñx|Ïž=~¿pp0y¥¼¼¼¹¹ÙëõΙ3Gß,V b¤g€1U0”—%rPÁ¼^ogg§×ë­©©),,ïééimm5MÓçóé›À TŒôqÀvŠ‹‹»ººR§^'õõõÕÕÕ…B!}³XŠ‘>^‚`;Ñh´¤¤dÊE·Û=99©u+P1ÒÇÀvêë뛚š®\¹’º6oÞ¼zõj­³XŠ‘>&À€ŒÜ½{÷îÝ»’‚;ðûý¦iz<žüüü²²²üüü¥K—F"‘¶¶6­³XŠ‘>žddýúõ+W®|ã7î½ØÝÝýþûï¿ùæ›:öÑßßßÛÛ›ŸOÇ [ )..îêꪪªº÷b___]]](Ò1°ƒ455Í™3çÞ‹§NºyóæÆõÍ`*FúØ ÈH4-))™rÑívONNjØÁw¾ó;wîL¹hšæ»ï¾«u+P1ÒÇh@Fêë뛚š|>_êÞl hiiY½zµ¦A€Mœ|ØívëØÃᘲÃ0%‹é›À TŒô1dAoooòt¢êêjÇ£{[‡#Λ7OX+P1ÒÇÀvhg¤Š‘>^‚ÈÈÖ­[;::$v …”õ—*³XŠ‘>^‚ÈH0“ØAqqqò‡p8œÜó_TT$ €¨éã0 #§OŸ~íµ×$vÇwíÚUQQár¹-Zär¹***vïÞǵÎ`*Fú¸ Èe+Áš.9Äëõvvvz½ÞšššÂÂÂñññžžžÖÖVÓ4}>Ÿ¾Y¬@ÅH/Ád$ïÙ³Çï÷&¯”——777{½ÞéŽI°y`ÅÅÅ]]]©S¯“úúúêêêB¡¾Y¬@ÅHw€Q¶¬û’3ð@¢ÑhIIÉ”‹n·{rrRë,V b¤;À€Œ([ Ö}Éx ëÖ­K$>Ÿ/õo>´´´äåå=zTß,V b¤—`2¢l%X÷%gàøý~Ó4=O~~~YYY~~þÒ¥K#‘H[[›ÖY¬@ÅHw€Q¶¬û’3ðúûû{{{“o}«®®öx<2²XŠ‘&À€ŒŒŒŒ466ž;wÎét˜¦‰Djkk>ìv»u R1d²•`}—œ²uëÖ5kÖ444Ë`*Fúx 4 <𹍲 ·‚ÁàØØ˜¼,V b¤;À€‡·eË–ƒN÷é¦M›ÚÛÛõ ‚qððΟ?ßÝÝ=ݧgΜÑ.°›p8œÜó_TT$) €¨ÿw€¯²²rttt†?011¡W`ñx|Ïž=~¿pp0y¥¼¼¼¹¹ÙëõΙ3Gß,V b¤;À€‡ …6áõz;;;½^oMMMaaáøøxOOOkk«iš>ŸOß,V b¤;À¶S\\ÜÕÕ•:õ:©¯¯¯®®. é›À TŒô=’뿦ŠF£%%%S.ºÝîÉÉI­³XŠ‘>&À¶S__ßÔÔtåʕԕ@ °yóæÕ«WkÀ TŒô1°¿ßoš¦ÇãÉÏÏ/++ËÏÏ_ºti$ikkÓ: €¨éã`›êïïïííM4R]]íñxdd°#L€³[ ìeË–-3|ºiÓ&M³XŠñ@8À^Ο?ßÝÝ=ݧgΜÑ4 €¨„-ÐöRYY9:::ؘ˜Ð1 €¨„ 0`Và`À¬À0+0Ì L€³`À¬À0+0Ì L€³`À¬À0+0Ì L€³`À¬À0+0Ì ÿ´„Ø6ìX…sIEND®B`‚sleef-3.5.1/doc/html/nontrigsp.png000066400000000000000000001300411373003144100170530ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝ}\Tuþÿÿ÷ ˆÈ…ˆ\ˆ¨!ˆ![¬)Z’Zn€e¥æ -•¢fmßUÓÌ]Ùh×’Œ”43dMW+SË«2ÓÕÄ Ó! QÁ1 ®æ÷Çù5ŸY.fƘ„ó¸ßösÞç}Þï×9ãîmŸœ+…V«tuwttX `€,€²@È `€,€²@È `€,€²@È `€,€²@Ȃ͒%K:ºĈ#„¿ûÝלּoG±PÍ €\XÕ¹sçbcc=<<”JåÀçÎ+µ§¦¦FDDtlmuÏ=÷deeéVÍu¼¶™Î5,m±íè2¢Õj'L˜0zôèÜÜ\77·‚‚‚½{÷J›¦L™Ò±µY™…Ž· [__ß­[7K”®¬çêÕ«EEE)))ÞÞÞöööÁÁÁÏ?ÿ¼´iĈ™™™Òò°aÃ^zé¥èèè¡C‡~õÕWRûåË—zè!—ÀÀÀõë×+Šëׯ7›âæÍ›Ï=÷œ¿¿¿››Ûĉ/^¼Ø²ŒVû”––zzz~ðÁRŸY³f3¦±±ÑÀ˜UUUÏ=÷\ÿþý{ôèrìØ1!„Ï_|!u¸té’B¡P«ÕÏ?ÿ|^^Þ¢E‹8uêÔfÇ{õêÕ'žx¢OŸ>^^^³fͪ¨¨0|t ;lذ+VŒ=Z©T†††þðÃï¿ÿ¾¿¿Ïž=“““µZ­égÌ”_GŸ‰SëÛêÉ6lØ¢E‹F´cÇŽ¶N”¾üãýû÷wqqñôôœ?¾ácluR@FXOïÞ½‡ ’’’²uëÖóçÏè¹cÇŽ?ü0///55uúôéRã£>Ú§OŸÒÒÒ#GŽè’j3O=õÔ… Ž;VZZ:tèи¸¸¦¦&SúxyymÞ¼999ùìÙ³YYYŸþù–-[lll Œ9cÆŒ¼¼¼ƒVUU}üñÇmNzzzHHȪU«ÎŸ?Ÿ““Ólë£>ZSSsîܹ¼¼¼+W®Ì˜1Ãðy0qX!Äúõëßx㊊ŠÐÐÐGyäСC¹¹¹'NœÈÎÎÞ¹s§ég¬ÃU™>µ¾¶Næ¶mÛ¶lÙröìÙ©S§8Q’üüüE‹íÞ½[­V<öØc†Ñô_ÐEh°¢òòòW^yåž{î±³³óññY³fÔ>|øðwß}WZ }ã7¤eé*ß•+W~üñG!Dyy¹Ô~ðàA!ĵk×ô÷-))BK}œœœNŸ>­_€á>ùË_ Ô£G/¿üÒpé*âùóç› ··÷¾}ûô÷½qã†V«½ûî»7lؠ릫ùÂ… Bˆ‹/JíyyyBˆŸ~ú©­óÐlº¶†•v_µj•´,]”–*ÑjµÓ¦MûóŸÿlâk9¬ÑªL™ZضNfhhèÊ•+¥e'J§  ÀÎÎnóæÍjµZ×x«¿   ã`€U¹ºº.[¶lÙ²eµµµ›7oNHH˜0aB³nîîîÒ‚R©Bh4šË—/;99¹ººJíýúõk9xaa¡B¡ˆŽŽÖµ888”––Þu×]&öIJJZ¹råˆ#î¿ÿ~ÃýÕjµ­­í€Úq2„¢¤¤ÄÖÖÖÏÏOZ•,))ñòòjõxß¾}¥GGGGGGݪF£1ptúg¬%Sª2:µ¾¢¢¢¶N¦···´`øDIú÷ïÿÑG­Y³&!!aèСþóŸ~øaKÿ‚€N„ èööösçÎ]±bÅ©S§Zà–¼½½oÞ¼YQQ!eàVU•RñW_}eà^V}šššfΜ9qâÄ£G®_¿~Μ9ú744è·÷èÑ£¶¶VZÖDùŽ;ZìÈ××·¡¡áÒ¥K>>>Bˆ‚‚©±­ú›ikX™rƬÀß߿Փ)„P(Ò‚‰'*66666ö—_~ÉÊÊš4iÒõë×oõta< °žk׮͟?ÿäÉ“¦²²2##ãâÅ‹&~gàÀ÷Þ{ojjjMMÍõë×—.]Ú²ŸŸ_LLL||¼îÞã­[·644˜Øgùòå—.]Ú¸qã–-[¤÷Kèïçç7qâÄÄÄÄ’’­V›ŸŸ/E²»ï¾{×®]Bˆ†††´´4ݼ^^^*•ªeÍ£Fš7o^eeeEEEjjê„ ô¯jÖÖ°&2åŒYA['SŸ)'*??ß¾}µµµvvv …ÂÆÆæVÁœœœuëÖYíØÖDXR©üù矟|òIOOO??¿õë×oÚ´iôèÑ&îž]ZZêåå)½ßÈÎήYŸ7Þyç£GîÑ£GXXاŸ~ª»„h¸ÏþýûW¯^““ãèèµpá©S§þüóÏÆüàƒî¼óÎÈÈHggçÉ“'K×{_{íµüüü!C†Œ9òž{îÑMºpáÂ;vôêÕKÿ^\ÝqÙØØÜyçÁÁÁîîî7n4ý”ÖD¦œ1+hõd6côDÕÖÖ¾òÊ+}úôquu]¾|ù¶mÛÄ-þ‚{öìÙºu«åÐÚ_?E@'²ÿþ)S¦´ú!€VqÐiœ>>‹-’²ëÙ³gƒƒƒÏ;زç'Ÿ|ÛÐÐÐê8ÿüç?ÓÓÓ/\¸`Ùr\Ç\.//¿|ùrxx¸´äààpúôéß0Ô‘#GÂÂÂÌZ  ²5ÞŤ§v{öì©kqqqù ò¾ýöÛû÷ï?~ü¸>vvvuuu¿¡HÀmÂ,7/wLvvvBTVVêZÔjµÔhº7ß|ó¯ýëüýý t«««³òmÞ …µo,gÆ®1c‡LÊŒÌÈŒÌhÍ;dRfdFf”óŒ2)3ZhF³ŒÓ1·@»¹¹y{{Ÿ[£çþûï7½}×®]-ŒŽŽÞµk×O?ýtâÄ —)S¦´ìsõêÕôôô‚‚‚íÛ·ïß¿ÿOúSûŽ»¹Ï?ÿ|Ô¨Qýû÷?|øðõë×·oß®Õj÷îÝkâîõõõ¦t›2eŠ.—••éNËÓO?=jÔ¨VÓï7ß|3a„ɓ'«TªÜÜÜÇÜðî+W®ÌÎÎ>pà@QQQ]]Ý´iÓ¤þmµ·ÎÊéWñÓOÖžQAdåêÕ«EEE)))ÞÞÞöööÁÁÁÏ?ÿ¼á]lllìõÜqǦ·wëÖ­å€#FŒ ìÕ«—¯¯¯¯¯¯J¥jÙgýúõ<ð@Ÿ>}ÂÃÓ““¿þúëVå‰'žèÓ§——׬Y³***¤öaƽôÒKÑÑÑ!!!C‡ýꫯší¨ÕjŸ~úéY³f¥§§;::<øõ×_—Ò¦a-Z4zôè   ;vܼyó¹çžó÷÷wss›8qâÅ‹[=RÝ-ÐvvvÒ9ijjÚ²eKbbëï“|ùå—gÏžý /øùùùùù=øàƒ†wÏÈÈX°`AHHˆ‡‡ÇêÕ«<(϶ÚeŽ ÈHïÞ½‡ ’’’²uëÖóçÏwTÙÙÙ}úôqrrzíµ×–.]j¸ó‘#GÂÂÂZ¶?úè£555çÎËËË»råÊŒ3t›vìØñá‡æå奦¦NŸ>½ÙŽgÏž-**š5kV³v…BaxØmÛ¶mÙ²åìÙ³S§N}ê©§.\¸pìØ±ÒÒÒ¡C‡ÆÅÅ555™rì}ô‘­­m«×½ëëë¿þúk{{û»îº«gÏž-“¿þîååå—/_—6988œ>}º­vSÊëÚÀ€Œ(ŠÃ‡3æoû[HHˆ¯¯ï[ÆÅ|çwÜõ讈šÒ¾{÷îVÇ|ä‘GN:uàÀ„„„VíÎÛo¿½ÿþÕ«W7k/((8tèЛo¾éâââææ–––öÙgŸ•––J[<<<¤‰Š‹‹ËÊÊô÷½víšÂÛÛ»åt†‡÷òòB\ºtéã?^»v­§§g÷îÝ—/_þã?æåå:¿ÊÈȘ={v÷îÝ[nª¬¬lhhøè£Þÿý+W®Lœ8ñ¡‡º|ùr[»WUU !zöì©ÛêââRUUÕV»)åum`@^\]]—-[öŸÿüG­V/Y²$%%¥­˜*yòÉ'sõ¸¸¸˜Þ>f̘>ø@ñ+µZ-õQ*•^^^#FŒxá…yä‘òòòV§~óÍ7W¬XqàÀÿf›JJJlmmýüü¤ÕHÒªî]VJ¥R¡Ñhô÷•²q³`iʰºÌ\XX¨P(¢££>>,,L©T.^¼ØÎÎîË/¿lkwéÅ`ú/ÇR«ÕÎÎÎmµ-¯Ë#2eoo?wîÜ~ýú:uÊ@7'''=ºg}Mi···Ÿ>}ºöWº¬£Õj«««[ŽË–-KKK;xðà!CZnõõõmhh¸té’´ZPP 5šràAAAþþþ7nlYŒáa¥{¤…ýúõB|õÕWç~UVV6nÜ8£S¯]»örµ¢ÙÉQ*•ºÎÒtºI[îîæææíí}òäIiU¥RUWW‡††¶ÕnÊÉéÚÀ€Œ\»vmþüù'OžÔh4•••/^Œˆˆ¶666ÖêÑjµ-[í¬k7¬©©iÅŠgÏž­¬¬ÌÍÍMLLô÷÷>^º8|ãÆ­[·644ž·ªªjË–-IIIú<óÌ3™™™ßÿ}]]ݪU«êëëÇŽk`÷¤¤¤´´4•JUQQ±pᨨ()B·ÕÞº~ý Wn~ýû[{F!ßdE©TþüóÏO>ùdqqq·nÝ7mÚ4zôhiëš5kÖ¬Y£ëüý÷ß·lܰaƒôú¨¶Ú:qâÄš5kÊËË=<ø÷¿ÿÝôó0~üøÃ‡/_¾ü¾ûî«®®îׯ_ll¬ôÊe‡Ý¸qã²eËF}õêU77·¨¨¨É“'žtãÆÎÎÎ111ú<û쳕••ãÆÓh4wÝu×îÝ»uñ»ÕÝ/^¬V«###kjj¢££³³³ ··î?þÜ…)¤?êta E×?F°>EëßnøÚw,_sÅ:nÈ `€,€²@~»={öŒ9RZ1bDfffÇÖÓÒíY•ébcc7oÞÜÑUüvýüw1`tç΋õððP*•œ;w®"&&fæÌ™ÍzŽ3æÙgŸBŒ?^¡P¬_¿^·éÌ™3 …ÂÉÉIZ?~üóÏ?o`R­Vû /,]ºTZMMMÕ}P÷öa®ª6oÞéää$}²Hç§Ÿ~š:uª»»»³³sTTÔ‰'ZîÛØØ¸`ÁGGǸ¸¸²²2ÓÇ_ºté¢E‹êêêÚÍ?~<&&ÆÝÝ]©T-^¼øúõëfŸEÿü»¸¸(þ×¥K—Zîrøðáûî»O©TºººÎž=Ûð¾m[SιNÀËB‘hÕÿ zÅœ'Ùt`tzZ­v„ ½zõÊÍͽqãÆÎ;‡*„HLLü׿þ¥V«u=üñÇC‡%&þÿs z÷Ýwu[ß}÷Ýàà`ÓçÝ»wo]]ݘ1c¤Õ)S¦ÜÒîæU__ßj»¹ªrssKIIYµjU³ö¤¤¤²²²ÿþ÷¿¥¥¥aaa?üpccc³>+W®ÌÎÎ>pà@QQQ]]Ý´iÓL?44ÔÓÓó_ÿúWûAßçŸ>jÔ¨þýû>|øúõëÛ·o×jµ{÷î5q÷¶ÎvKú翬¬¬æWO?ýô¨Q£|||šõÿæ›o&L˜0yòd•J•››ûøãÞ·­skÊ9×)4ð7âüUkÏ(! Ó»zõjQQQJJŠ···½½}pp°tåvüøñ›6mÒõ\·nÝðáÃ¥x,„øÃþPXX˜——'„øå—_>øàƒøøxÓçݱcÇØ±c …´ª³kUUÕsÏ=׿ÿ=z„„„;v¬­F·ÞzkذaºÕÂÂB›¢¢"!ÄÍ›7Ÿ{î977·‰'^¼xQê3lذE‹=:((hÇŽÿøÇ?ú÷ïïâââéé9þü–U]½zõ‰'žèÓ§——׬Y³***tã¼ôÒKÑÑÑ!!!C‡ýꫯZìøñã{ì±~ýú5k?þüc=Ö·o_GGǤ¤¤«W¯–––6ë“‘‘±`Á‚Õ«Wø á}Û:·¦œs" ÓëÝ»÷!CRRR¶nÝzþüy]ûwܯ»Æ[WW÷þûïë§Žnݺ͚5Kê°mÛ¶¡C‡4Èôy¿ûî»¶.®Î˜1#//ïàÁƒUUUü±‡‡G[:O<ñĹsçrss¥Õ¬¬¬Ñ£Gûûû !žzê© .;v¬´´tèСqqqMMMR·mÛ¶mÙ²åìÙ³¡¡¡‹-Ú½{·Z­.((xì±ÇZVõè£ÖÔÔœ;w.//ïÊ•+3fÌÐmÚ±cLJ~˜———šš:}útÓO‚ ¶mÛVZZúóÏ?gddDDDx{{ëw(//¿|ùrxx¸´äààpúôiÓ§:tèwß}gÆÃ9{ölQQѬY³šµKË00¬îlO:ÕÀbÔG}dkk;eÊ”fíõõõ_ýµ½½ý]wÝÕ³gψˆˆ¯¿þÚÀ¾mÛöŸó®Š €NO¡P>|x̘1ûÛßBBB|}}ßzë-iÓܹsÏž=+]kݾ}{}}}³døÇ?þqóæÍµµµëÖ­KHH¸¥yoܸáììܲ½¸¸xçÎëÖ­ëׯŸB¡¸óÎ;ZmÔß«W¯^1116lBhµÚ÷ßΜ9BˆK—.}üñÇk×®õôôìÞ½ûòåËüñG骵">>ÞËËKÑ­[7­VûÝwßUVV:::þþ÷¿oVUAAÁ¡C‡Þ|óM77·´´´Ï>ûLwµ6!!A ä<òHqq±áGFõ9²©©©oß¾NNN;vìx÷Ýwu—Ä%UUUBˆž={êZ\\\¤F9;;ë.Úåp®]»&„hÔMVw¶ ÿ(FeddÌž=»{÷îÍÚ+++>úè£÷ßÿÊ•+'N|衇._¾ÜÖ¾mÛöŸó®Š €®ÀÕÕuÙ²eÿùÏÔjõ’%KRRRvïÞ-„èÛ·ïĉ×­['„X·nÝôéÓ•J¥þŽ 6lØk¯½væÌ™I“&ÝÒ¤½zõj5TÙÚÚ0Àhc3³gÏþðÃëëë÷ï߯V«¥z  EttôàÁƒK^^^XXX³ÆöNPP¿¿ÿÆ›µkµZÃÃêÎv[?ÊôéÓµ¿rqqiuöµk×>ðÀ­þ)D©TêV¥éôâfû¶unÛλ*0:½k׮͟?ÿäÉ“¦²²2##ãâÅ‹ºoÏŒ7ÎÝÝ}òäÉ÷Þ{oHHHËÝãââöîÝ»xñâ–›kõhµZý­±±±úWçtüüü&Nœ˜˜˜XRR¢Õjóóó Zml¶£ÍÌ™3ÿñ|üñÇÒýÏÒh111ñññÒuÈ7nlݺµ¡¡¡Ù¾ùùùûöí«­­µ³³óððP(666úF5oÞ¼ÊÊÊŠŠŠÔÔÔ &H7ôšB:ÒÛ¥³!„pww \»v­Z­®««{ûí·ëë률•““#]xB$%%¥¥¥©TªŠŠŠ… FEEI1O¿O«ãKöíÛÛ¬žöŽB¡X»víúõëçÏŸöìÙêêêsçνøâ‹[¶l1qX”–ªªª¶lÙ’””ÔV‡gžy&33óû￯««[µjU}}ýرc ìÛÖ¹m«½U¡ÍßEmqaíúÃK;h»:9#XŸH0þ°F“˜˜¨T*ÃÃÃ7oÞ¬ßaùòåBˆ¬¬,ýÆqãÆ½üòË͆ڵk—£££®C³ÿóüý÷ßëwnjj·ÕØL~~¾bÈ!úUUU©©©NNNýúõ›9sfCCƒV« Ý´i“ÔçôéÓÇïÙ³g¯^½ÂÂÂ>ùä“–U•––N›6­wïÞžžž3f̸víšÔ®?NMMâÇlV˜þ÷¢$ÒîçΛ8q¢«««““Óï~÷»;vHýçÎûÀHË óçÏwsssppˆ‰‰¹råJË>múôé¾}ûþòË/-ÏU{G«Õ;vìøƒ«««½½}``à¢E‹¤LÖÀÒŒþù×jµÿüç?½¼¼êëë[ö”455½úê«^^^NNNß|óá}Û:·mµwRæŠu íÿþ«ëQ(ºþ1€õ)Z¹…°9í;–¯Cå‚Í IDAT:ÚçŸþÚk¯>|¸£ é²âââ&Ož|K/¦F×c®X×õÃ!, ¬Æ\±Žg€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@ÈÂm€,XàáááèèWVVÖ²ÏæÍ›###œœlmm­_! 3ºíðÊ•+³³³8PTTTWW7mÚ´–}ÜÜÜRRRV­ZeýòÔmw5##cÑ¢E!!!BˆÕ«W«TªÀÀ@ý>ãÇB|òÉ'S" º½®———_¾|9<<\Z rpp8}útÇVèn¯\UU%„èÙ³§®ÅÅÅEjlÅÿZ²d‰Ô~òäI]Ÿüü|F£[Õߤ¿¬Ñhòóó[ÝÄŒÀŒ «Lt›#0#0#0#܆#$&&6 qÂLZ­Ö\cµ_yy¹»»ûÑ£GGŒ!µ8::nذáÑGmÙù“O>‰mhh0<¦Bq{#t ŠDã}´ïX¾ æŠu·×`777oooÝ_T*UuuuhhhÇVèn¯,„HJJJKKS©T .ŒŠŠ’Þ€•““³nÝ:©Occcmmm}}½¢¶¶¶¶¶¶#+t·]^¼xñ¤I“"##}}} Evv¶Ô¾gÏž­[·JË6lP*•“&MjllT*•J¥òúõëW2 èúÏÇò 0XÏ«éšÏ`!`€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²pkxݺucÇŽµP)Xέà^½zùûû[¦,H¡Õj;ºËR(ºþ1€õ)÷Ѿcù:€ ˜+Ö¹|ìØ±‚‚ÝjAAÁñãÇÛ?+Vf$Ï;·¡¡A·ÚÐÐoá’0?#בœœ4B¡VµZ­³³³F£±JmæÁ-Ð` Ü ¬ÆJ·@»»»—””èV‹‹‹]]]Û?+Vf$ÇÄÄÌž=ûÂ… Z­¶  `Μ9111Ö© 32€_}õU''§vëÖmÀ€=zôX±b…u*ÀŒLº‘º¸¸¸¨¨ÈßßßÏÏÏ 5™Ï€%ð 0°+=,„hjjR«Õ555~~~ íŸ+3€/^¼x÷ÝwGFFΜ9S±mÛ¶¹sçZ¥0ÌÉHNNN?~¼Z­îÕ«—"::úàÁƒÖ¨ ³²5¼ùèÑ£ü±´êêêZ^^nùª03#W€Õjµnµ¸¸ØÓÓÓÂ%`~Æ¿éÒ%!ÄÕ«W“““§L™b•Â0'#xÅŠ666¾¾¾ùùù}úôéÞ½û+¯¼bÊ0#“>¦téÒ¥ÂÂÂ~ýúñ`€„ï«1W¬3ò,‰F£©®®vpphÿ¬X™‘[ çÏŸâÄ !ħŸ~êîîîêêº{÷n«€9¹ŽìééYTT¤T*GŒ1{öl+VœýôÓ#FØÙÙÕÔÔ˜kâ•+Wfgg8p ¨¨¨®®nÚ´i¦÷IJJ*++ûïÿ[ZZöðÃ766š«0@—dä%X)))aaaZ­ö£>B|ýõ×AAAf™8##cÑ¢E!!!BˆÕ«W«TªÀÀ@Súœ?>99¹oß¾Bˆ¤¤¤7ß|³´´ÔÇÇÇ,…@ûq{0ÀmÈÈàyóæ>}úìÙ³±±±Bˆ~ýú­]»¶ý³–——_¾|9<<\Z rpp8}ú´‰},X°mÛ¶ÒÒÒŸþ9###""ÂÛÛ»ýUº0#Xqçwöïß_·|×]wµÖªª*!DÏž=u-...R£)}FŽÙÔÔÔ·o_''§;v¼ûî» …ÂÀtŠÿµdÉ©]ÿm^ùùùF·ª¿IY£Ñäçç·º‰AÝæGa`ÝæGÁŒÀŒÀŒÀ·á‰‰‰ÍBœ0“ŽyCryy¹»»ûÑ£GGŒ!µ8::nذáÑG5ÚgÊ”)÷Ýw_zzº££ã»ï¾û—¿üåÌ™3ÒÑ-ñhÖ'‡[ åpŒà6a¥·@[ˆ›››···.ñ«TªêêêÐÐPSúTTT\¼xñÙgŸuwwW*•)))uuuGµö1:•Ž ÀBˆ¤¤¤´´4•JUQQ±pᨨ(é X999ëÖ­3ÐÇÝÝ=00píÚµjµº®®îí·ß®¯¯ožhƤ\ZZzüøñÒÒR3N¼xñâI“&EFFúúú*Šììl©}Ïž=[·n5ÜçßÿþwEEÅ€ÜÜÜÞ{|œœš±6@×cäFêëׯϜ9s÷îÝÒêC=´iÓ&WWW«Ôf< Àúäð|¬ŽÜ&¬ô ð¼yóšššòòòjjjΜ9ÓØØ8oÞ¼öÏ €•ÙÞ¼wïÞ3gθ¹¹ !‚ƒƒ7oÞb•Â0'#W€µZ­­íÿ…d[[[n'tFFðý÷ß?gΜ’’!DIIÉœ9sî¿ÿ~«€9 Ào¼ñÆõë×ýüüºuëæççW^^žžžnÊ0#CÏ×ÕÕ}õÕW‡*((())ñõõ °Ze˜‘¡wIkµÚ!C†üðÃÖ,Èìø ë“Ã'‚äpŒà6aÏ ) OOÏ+W®´:–‘Ï ýþ÷¿9rd|||ß¾} …Ô8}útË€9¹Ž|ø¾ûîS*•®®®³gÏ6KI€.ÌH>zôèÒ¥Kmll¤UWW×òòr³L¼råÊìììÕÕÕM›6Íô>ß|óÍ„ &Ož¬R©rssüq³”èÂl ovttT«Õ½{÷–V‹‹‹===Í2qFFÆ¢E‹BBB„«W¯V©T¦ôyùå—gÏžý /HÝüüüÌR  3r8&&&>>þÒ¥KBˆ«W¯&''O™2¥ý³–——_¾|9<<\Z rpp8}ú´)}êëë¿þúk{{û»îº«gÏž_ýµáéÿkÉ’%RûÉ“'u}òóó5nU“þ²F£ÉÏÏou#0#0‚þ&2o †™x&Žf¢Ûá·`F`F`F`„Î5Bbbb³'ÌD¡Õj l®ªªzê©§vìØ!M·qãFGGÇvÎZXXðÃ?I-ÞÞÞK—.7Ú'66ÖÃÃÃÛÛ{×®]ƒ~ã7^ýõ³gÏz{{·~„ #Çf§H4ÞGûÎí:c\œI3öÞn¶ 2W¬3rØÙÙyûöí%%%‡***Ú¶m[ûÓ¯4¬¢²²R×¢V«¥F£} E|||XX˜R©\¼x±Ý—_~Ùþª]˜ñï !|||FŽiÆGmÝÜܼ½½u—¼U*Uuuuhh¨)}”J¥þ£ÂÒ¥i3^tIF^‚¥ÕjwîÜyâÄ ýû¶ÓÓÓÛ?qRRRZZÚØ±c=<<.\%ÅÚœœœ7n$$$èóÌ3ϬZµjòäÉo¼ñF}}ýرcÛ_  3€“““·nÝm–;Ÿõ-^¼X­VGFFÖÔÔDGGgggKí{öì)**’p[}ž}öÙÊÊÊqãÆi4š»îºk÷îÝ^^^æ-ÐÅy’¸gÏž'Nœ4hÕ 2;^‚Àúx Ö­Í`•^‚åâââááÑþièXFð²eËæÏŸ¯V«­S b$‡‡‡:t¨W¯^Nz¬Sfdä%X?þxDDÄ;ï¼cö—``MF°J¥úöÛo•J¥uªÀBŒÜ=bÄ•JeR°#W€£££'Ožœ˜˜Ø·o_]ãôéÓ-\àW¦|”h»ñ/ÀHÎÉÉéѣLJ~¨ßHt:Fpnn®uêÀ¢Œ< @×ÐúàÌÌ̈ˆˆàààÌÌÌ–[ããã-\fÖfvqq!ºŒÖð·ß~Ûl€NÍÈ3ÀS§NmÖm±b°#øÔ©SÍZNœ8a±b°”6?ƒ”••%„Ðh4Ò‚D¥RõéÓÇòU`fmà5kÖ!*++¥!ÄwÜáååõÞ{ïY©4̧Í,Ýêüüóϧ§§[±,ÂÈ3À¤_@×`$Ð5€²@ÈB›/ÁÒWVV¦Ñht«´X=X„‘|èС™3gë7jµZK–€ù¹:99yùòå5z¬Sfdüè3fX¡,ÊÈ`//¯k×®Y§,ÇÈàðð𨨨„„www]ãôéÓ-\ff$öÙgÝ»wÏÊÊÒo$:#877×:u`QFž–”––?~¼´´ÔÒÕ`!Fðõë×z衾}û>¼oß¾?üpEE…u*ÀŒŒàyóæ555åååÕÔÔœ9s¦±±qÞ¼yÖ© 32ò ðÞ½{Ïœ9ãææ&„Þ¼ysHHˆU ÀœŒ\Öjµ¶¶ÿ’mmmµZ­…KÀüŒàûï¿Μ9%%%Bˆ’’’9sæÜÿýV) s2€ßxãëׯûùùuëÖÍÏϯ¼¼<==Ý:•`FFžîÓ§Ï¡C‡ JJJ|}}¬Sæe$Kˆ¾€N­õœ™™œ™™Ùrk||¼…«ÀÌÚ À...`@—ÑzþöÛo›-Щy ôÔ©S›µDGG[¬,ÅH>uêT³–'NX¬,¥Í·@gee !4´ Q©T}úô±|U˜Y›xÍš5BˆÊÊJiAqÇwxyy½÷Þ{V* ói3K·:?ÿüóéééV¬‹h3K–,Y¢V«›5º¸¸X¬,ÂHîÕ«WËF­Vk™b°#¸¤¤D·|ýúõÕ«Wÿþ÷¿·pI`fšZ#컉n6V)ÇHöññÑ_ÎÊÊŠŠŠJII±pU`6ŠD“ºiß±pèhF¾ÜŒ­­mee¥…JÀrŒ\þâ‹/t˵µµ»víòôô´pI˜Ÿ‘ÿûßÓÓÓÍR  Ë3€cbbfÏž}á­V[PP0gΜ˜˜˜öÏZ^^~ùòåððpi5((ÈÁÁáôéÓ&öÑjµsçÎ}íµ×ÜÜÜL™Nñ¿–,Y"µŸ¼ý%º0#W€“““ǯV«{õê%„ˆŽŽ>xð Y&^¼xñ¤I“"##}}} Evv¶Ô¾gÏž­[·èãàààó+///!DïÞ½{öìi–ª]•‘'‰ÝÜÜJKK»wï>xðàsçÎ !zôèaöw„Z/ÁdŽ—`YdF^‚¬È\±ÎÈ`GGGµZ­[-..öôôlÿ¬X™ñïÇÇÇ_ºtIqõêÕäää)S¦X¥0ÌÉH^±b…¯¯o~~~Ÿ>}ºwïþÊ+¯X§2ÌÈøw€·oß~éÒ¥ÂÂÂ~ýú™ë;ÀX™‘ÜØØhcc#½rYj¹y󦓓“å ÀœŒÜ=vìØ+W®èVóòòÂÃÃ-\æg$÷ë×oذaû÷ïB¼÷Þ{O>ù¤U ÀœŒÜ•••••vþüù]»vEEEY§2ÌÈÈ`!ÄðáÃûôéóí·ßŽ1âž{î±BM˜‘¼iÓ¦áÇ?ñÄ/^Ôh4ááágΜ±Ne˜‘‘[ SSS?þøã±cÇ !>ÿüóåË—ß{ï½UUUV© E¢IÝ´ïX¸ +2€sss½¼¼¤å;î¸ãÿý¿ÿwß}÷Y¾*̬Í[ ¿øâ‹¦¦&)ýVTT455 !‹‹‹­WfÒfŽŽŽ®««“–ï¼óΟ~úIQ__?{öl+•€ù 4] `€,z ô‚ lll„?ÿüó’%Kœœœ­UæÔf7nÜ?þ(-GEE]ºtI×nº0«6ðçŸnÍ:°(žÈ `€,€²@È `€,€²@È `€,€²@È `€,Øvt€<Äřԭ÷v ×ÈW€²@È `€,€²@È `€,Øvth[\œñ>Û·[¾è ¸ ®À­àR @§E0€ÎŒ[ ²@È < ´I‘hR7í;®€9p `€,€²@ÈB‡àÆÆÆ xxx8::ÆÅÅ•••™ÞçÅ_:t¨ƒƒƒÏ3ÏÉÉÉéééÖ¯йtÌàòòòË—/‡‡‡K«AAA§OŸ¾Õ>Bˆ#GŽ„……žNñ¿–,Y"µŸ__ßÚÚÚ_~ù¥£ŽÐYtØ[ /^¬V«###kjj¢££³³³¥ö={ö%$$´ÕçæÍ›iiiBˆÁƒK»ØÙÙÕÖÖvÐq:‡ À666«W¯^½zu³öÌÌLÃ}œœœx©€>±kv¶¢{‡ýÿ>¸]ð?„žósÆûðbv C–LûÊ‹0í£D€Ã'‚·‚ Ì„8*„0ûçÇæÓaoÀšÀYàh ¦ÜÍέì@çÂ`üì½y\•ÕÚÿ¿6( ;ÄGp894‹'r@Ë13µÁ2ÒR½ÌS¨œæŽOÚà@*IV¯èÒƒ}ÔÒN–Á1³Ô4ÐÒ@¡@7£‚hm¼Ü¿öÃÃcv¯µö}]Ÿ÷_››^¼ä³¯µÖµÖ` €hØ™Ë9r×Y!V€0`,@ 4@”ººÿã?ðó¾¾J¾À+ÀÏåìWBl´n˺z#àõ` Ê\ÎU®·¹0€:0 ‚€`Z ¾ !+0^Ž*È`pÞ:½œe ¬Q.À^Ïå G7l¾¾ò¿ƒ·Î»€ÿA.§JS_¢ýú«hÖLµì †ÜKp 4` €`ö€Q©»}¯‘Çðpß ´@`V€€ ƒõXú¹œ«ûZZzoŸz£B0`0ÀŸ`€Rì{d°;ƒ+K1{C°°`,@ 4@8 è+ÀX€`)¨? GRÀ.ç ÐÊ/µÖà À0åW·8ëþ?þ› %ßÊ•ùðÁ¯@}OŸÿ ÕFô‚0àOs9K£Bˆ–X:ÁÖà$F€&Žp!0ù ì‚úé~,0x3¸ 0À´@¬ãr¶a¯ÐÀÐÃQ€0`†ú’q&3Þö`ÀX€0` €`ÀX€°õ<÷ÜsŠ‹-Rl\õ! Þð*8Ä‚` [ÏóÏ?¯Øˆ0^…úPoxbA° €°À–àúúúÙ³gGDD8Î1cÆ?~\÷wÀÛ±åxÑ¢EYYYŸ}öYaa᯿þz×]wéþŽx;MtWÂêÕ«çÎÛ³gO!Ä‹/¾Ø½{÷C‡ÅÆÆêþ¾x1†Ý(++Büç?ÿñ< ÌÊʺدû àÏbÉpÒ~+À'OžB„„„xž„††š/ÆÀ„÷ !ª««=OªªªÌ‡ÀŰß8,,¬mÛ¶ß|óùá¡C‡NŸ>Ý«W/½ß/ÇaÇá ¬]»vóæÍIIIÕÕÕÛ·o×ýMðjì·Xñü£ªª*>>¾®®.111++K÷wÀÛ±å 0ðG±ß`à À |Ÿ{î9ÝßÞEQQÑþýû}}}•]¯#ŒàbÔ××Â#ŒÚLàð¿Fíb”‚¬à†K£]Œ§N;v¬Ãáp8BˆqãÆÕÔÔÈÁã•Ñë’È0ê’š,^¼ØÇǧiÓ¦›6m2 #++kÙ²e0££ú7e-Rÿ+a„Ñ.FIà,kHOO¿Äg§L™£-ŒsæÌÉÏÏýõ×o¾ùæO>ùdêÔ©±±±K—.µ\#ŒWFFF†ùâ§Ÿ~Z¹reRRRçÎKJJÖ®];f̘—^zÉr£.©I×®]?ûì³½{÷¾øâ‹Ÿ}öÙ‘#GFŒ‘ŸŸ#Œ0ª7ªSÖ"åð¿Fíb”…î8^DÇŽ;fFll¬a%%%íÛ·‡F/4ÆÇÇïÞ½Ûóá‰' $Õ¨EÚ¯_?Ã0êëëÛµkgƹs箺ê*a„Q‹‘ þW£]Œ’À!XVRPPðÅ_ÀhSã‰'¢££=¶hÑâÔ©S0Âè…Æï¾û®wïÞž#""Nž<)Õ¨E\^^îãóÿ¿U:t(44FaÔb4Q_(–rø_ #Œv1J`k8qâÄÀ;vì8hРŽ;<ØårÁh;c‹-*++Í×õõõóæÍ»é¦›`„Ñ :uZµj•çí[·ò÷³¨—vëÖ-!!aÙ²eµµµ¯¼òÊÈ‘#'L˜#Œ0j1ªSÖ"åð¿Fíb”ö[Ã=÷ÜsöìÙW^y¥]»vÇŽ›9s¦Óé|çw`´—ñþûï9rä¸qã"##ëêê:tèðÿþßÿ‹‰‰Fo3îØ±cøðámÛ¶5·ã>|ø£>Š—gÔ"½ñÆÍ‡#<<|àÀ3fÌðóóƒFÕÕ¿)k‘rø_ #Œv1J`kˆŒŒüá‡BBBÌ«ªªâââJKKa´—±¾¾Þ0Œ&Mš|üñÇ‘‘‘ùË_|}}åé`„ñÏàr¹²³³‹‹‹£¢¢ÆŽ.Û¨K ðÔ¿)ë’hƒhkp8õõõžëëëÍÛP`´—ñ‹/¾hÒ¤‰bèС½{÷öõõ-))F/4 !"""† ’˜˜8dÈeQÅÒï¾ûî‡~0_üøãôŒ&EEE;vì(**R£ÓblȬY³`´Qý›²)b‡’±!ôþ$y­A×é[ĸï¾ûn»í¶¼¼¼ºººï¿ÿ~èС“&M‚ÑvÆ   ùó盫y†alݺµeË–0Âè…ÆãÇ0@a¼ tâÄ ©F-Òo¼ñ“O>1 cãÆ~~~+V¬ f¬®®=z´Âì=ztuu51cNNΨQ£®»îºk~Çßßß|£Œêß”µH;4Œþ$9%°5¸\®[o½Õ3­0lذòòrmg-iÿŒ2Œjjj=q»Ý0Âè…Fò›ñøðÓO?}þùç?ýôIc—.]Š‹‹?üðÃ[n¹Å0ŒÃ‡ÇÅÅÁh£–7e•;0z¿Qÿ)¢¢¢~þùgÃ0ºtéRVVæyîr¹ºté£]ŒØÊÊÊýë_ô¤7\Ã0Ö¯_o¾ `T¹”zc¿~ý 误o×®açλꪫ`´‹QË›²ÆJ±CÃHøO’•QMdï1¦ÍÈ‘#Ç¿`Á‚Ç{ì®»îzúé§Û·oôèÑyóæÍ;F»{÷î}‰ÏîÝ»F½ÄسgÏæäälÛ¶mÛ¶mß~ûmÇŽï¸ã©FõÒ)S¦\ðy÷îÝ/ö)ÛgÍšÕ¼yó£G¶k×îØ±c3gΜ5kÖ;ï¼#Ã¥Ë\^^f~xèСÐÐPy:­Eý›².© b‡†‘ðŸ$+£,tÀíÍÉ“'§OŸ~±ÿ÷0ÚÅøöï<ÿüó‘‘‘)))éééóçÏïСƒ¤ƒ(`„ñO’˜˜Ø¬Y³ÈÈÈ»ï¾ûõ×_7ïæ•)yÔ_.¥Þø÷¿ÿ½GK—. {ùå—;wîüøãÃh£ú7e]R> v`´‹QÃ0..àqêÔ©³gÏ6z£½Œýû÷_¶lÙ5×\c~èr¹î¾ûîmÛ¶Á£·###Ïœ93iÒ¤ÄÄÄ[n¹%((HžK¯”<­ZµÊËËkÑ¢…ùayyyÏž=KJJ(o¼ñFó…Ãá8pàŒ3üüü`´—Qý›².)y;0ÚÅ( €ø‚ƒƒ+++}}}=O®¿þú]»vÁ£· Ãøî»ï¶mÛ¶uëÖ;wöèÑcðàÁÏ?ÿ¼<£é›Ì¥ö–«7Þÿýeee/½ôR‡~úé§Ç{¬U«Vo¾ù&%#6±CÃÀ%ðÑý Ûžž^__ßèá–-[ämáPoìÔ©ÓªU«<nݺUö Œ0^‡£W¯^?þø›o¾ùÊ+¯¸\®yóæI5j‘&7`Ò¤I¿þúëðáÉ_~ùeÃ0ºwïÞ¬Y³=zøúú¾üòËÄŒØÄ #—+Ààrq8uuu fee-_¾<''‡†qÇŽÇoÛ¶mçÎKJJ>üÑGÅÇÇËpÁãŸaÓ¦MæIT?ÿüó€;vì(ϨKÚ—ËuÏ=÷lݺ•ž±¤¤¤¨¨(::ºuëÖ²]ê–Ô8y‚ر©‘ß$£$p 4ølÞ¼¹iÓ¦ Ÿ”••íÛ·Œñ¯ýë‘#G²³³‹‹‹GŒ1vìXÙ`„ñÊ>|xppðœ9sfÍšåt:¥ºôJ!uϘãwß}еk×Ö­[üøã]ºt!fLNNö¼v¹\kÖ¬3f Œ¶3ò±cS#‡?IFYh8x‹"§Njx¡Ù¹sçJKK‰…¾Œ‘§N"oäÀ[o½5qâÄÖ­[ûûû8ðŸÿüç®]»èIëPYYùæ›oöìÙ“˜ñÆoüä“O ÃØ¸q£ŸŸ_@@ÀŠ+ˆqâĉÁƒÃh;£úÂC‹±CÃØ’’ V°5L›6ÍsëIyyy=„;vþøã!C†„„„<òÈ#擞={9r„’‘ü~õóiÒDCžË–vîÜYê×÷cLLÌÆ].WÏž=CBBjkkýýý‰›5kÖðÃèèè·ß~FÛÕZ¤ˆF’Œ’ÀØš6mzîÜ9!Ä™3göïßß·o_!DxxxMM £çPŸŠŠŠ“'O{.4'c¬©©©««‹ˆˆ0?4 ãĉ‘‘‘”ŒBˆ˜˜˜mÛ¶%$$øúúNŸ>]QTT$õw«Þ8nÜ8y_ÜKŒ€óæÍ»óÎ; ÃØ°aƒâû￟8q"1ã?þèy$;â`”„úÂC—”<ˆíb”…ÎågB\wÝu«V­2 cíÚµQQQæÃ¼¼¼N:‘1ºÝîùóçGEEyþñDEE-X°àü­ö5²Ú˽ÿþˆˆˆñãÇ/[¶¬[·nÓ§O§gT†z# Dmmmuu5m#y8œ/¨¾ðÐ%åb‡bG[Ã;ï¼ãããÓ±cGŸÔÔTóá+¯¼òàƒ’1&''ÇÄĤ¦¦ææææåååææ._¾<::zöìÙdŒörûûûŸ9sÆ|]PPpÇwtèÐá¾ûî“÷¶¤Þˆ0@1‚Áù‚ê ]Rl‡Ø‘À–ñïÿ{Á‚›6m¢j ;xð`£‡û÷ï'clÖ¬ÙÉ“'Í××^{mVV–aEEE”Œp¹\äLî#á`4 Î4t”:ꥈ`˜ÄŽ 0—K```yyy£‡'Nœ $c 6'¬««ó÷÷ÿñÇÍ×dŒ¬(///((8ÿ_#“ûH8 „ùùùÑÑÑ o7-))iÖ¬#;À. v®‚e ·ÞzëÅ>µeËÆÄÄĤ¤¤Å‹ÇÆÆšOòóó“““‡ "C§Å»nݺiÓ¦­[·.""Â<˜±   mÛ¶dŒ¨¯¯_´hQZZZqq±ù$**jÚ´i)))¾¾¾4ŒÛ·oÏÈÈ0_/]º4$$ä›o¾Y¶lÙœ9sÞ{ï=F-RFà|AFõ…‡)b‡ˆ2F)è“áÑßy衇Í×3gΔ÷Vo,--MHHB8Î6mÚ8N!DBB‚¼FõF{¹9Àa¿º–æy{Ô9ô[rXR ÎT_xh‘"vh;4Œ’ÀØbJJJ"##Í׿ýö›‚)ÅÆüüüÌÌÌ×^{-333??_ªK‹‘ü^npد®¥yžÃõFUΤaô ¾ÔQ)EìÐ0"vh%°ÅTWW™¯«ªªäíVÕh¶†Ã¼/‡ýêLî#á`äP¥qXRc…–ÂC™±CÈØ—`뉌ŒÌÈÈ0÷öîÝ›¤‘*C/#‡yßÑ£G5ªá’l^^Þ°aÃn¿ýv2F&÷‘p0r¨Ò8,©qCKá¡FŠØ¡aDì€K€°õ¤¦¦ !|}}›4i²aÃ’FªpØËÍaÞ—Ã~uƒÇ}$Œª4KjÜÐRx(“"v;à`,…½{÷®[·îü=”Œ´!¼—›Ã¼¯ ùýꀪ4Kj ÑRx Ú±Ä #¸b†aÈ}z``àK/½$„¨¨¨¸ù替ÿþûŽ;nÞ¼¹k×®4Œ iÕªÕK/½t÷Ýw/^¼8++kÏž=4Œ×_ýƒ>8mÚ´7Þxã™gž)**Bäçç9òðáÃ4Œ˜5k–ùâôéÓï¼óÎC=$„0 cùòåòR]½”ƒQ J#«9bÚ vÈɃعb0—K·nÝ222®¹æ!Ä3Ï<óé§Ÿ®X±bÙ²eÕÕÕ’.jWolÈ«¯¾úÈ#øúú:ŽÌÌ̱cÇÒ02™÷åCiiiïÞ½KKK…n·»iÓ¦ R]½”ƒ‘0–Ô¸Ís±ck;4Œ’h¢û¶áèÑ£žÜ›7ož={vß¾}.\اO2Ɔ̘1£ÿþùùù}úô‰%c¼çž{Z·nÝhÖ3ýLÃÈŠÀÀÀÓ§O›¯kkkIJ 9Tiqqqæ‹ó¸dè´·oßž‘‘a¾^ºtiHHÈ7ß|³lÙ²9sæHš±UoäbÇÖFÄ £$°lWjjj®ºê*•Fìv[Súv}-RªF&ý–&„—Ô?$„¸îºëfÏž=~üøâââ>}ú¸\.F-¨¯v‚ر¯ÑbÇÖFYÈ8Y‹!kÖ¬q»ÝnÞ¼9##C’Qñøã7z˜””4wî\IFgrwÓ06„ö}$|Œ„Ÿ÷P]]d¾®ªª $cäp¯ŒúRÇÐQí4±CÀˆØ±µQ>:Ýy衇Ì?ã†TWW¯\¹Ržtýúõ .løäž{îùè£$éfÍš5cÆŒN:M™2%%%Å|øñǛןÒ0r îwÚµk·cÇóullìÇLƸ}ûö{î¹Ç|íiÑéß¿ÿœ9sÈ2cÆŒ½{÷fdd8p@Áfu]RòFÂý–‚ƒƒßyçsçέZµJÁž1eÆØØØuëÖ !Ö­[ѹsg!DAAAÛ¶mɵ”:ByµÓÄ#bÇÖFYèA‘ýÁÿ&55ÕétÊ3æççGGG¯X±Â󰤤¤Y³f’Œƒ«áYAxÞ—Ï]Ç€‘‘‘õõõ‹-êÝ»7I#Õ%5—Žª/u MÕ+;ö5rˆI`l æ¿ò "ÏXWWwèСV­Z­\¹Ò|¸k×®¶mÛJ2œ:uJ—MA¶6’P­Ò±wïÞuëÖûì³&LX¾|ù¤I“F%ÕH›[/#“ûH8Ü0d ©gçr |¯ŒúRGè¨v;àÊ@ìØ €-CqxíÙ³'11Ñ|°qãÆ&Mš8Ž´´4IƆ<ýôÓ'Nôóó{úé§Ÿzê)’F5øûû;!DÏž=wíÚåv»—.]zíµ×*8‹0î:BlÞ¼ùÃÿMYYÙ¾}ûä…üqZZšy†§IÏž=9"ϨþÇTÿ3*F}•–žž^__ßèá–-[ÞyçIF-C5ÚsÄêÇiZªÄŽå v¤B;v¤ { šBù=u~~~æµ+†a>|Øì˜­««Söÿ”äò.—KÞê!Ü$xÜG¢þÇTÿ3rè·4«ffföë×Ož±]»v ,høpëÖ­={ö”dlÕ{eÔ—:†Žj±#ÄŽl¨ÆŽ$0¶õáÕ¾}ûÿüç?æë>ø OŸ>†Ú0I˜lpR î:–ÁßÚeã‘îß¿?""büøñË–-ëÖ­ÛôéÓe•á%?#±*M‘ýÁÿ&55ÕétÊ3ªª5„ä±úRÇÐQí v$;²!;’À`ÉÔ‡×Ì™3ûôéóÅ_lÛ¶­k×®O?ý´ðŸFý ebbbZZšùzÛ¶mS ÉÈaÞ·!‘‘‘õõõ‹-êÝ»7£–°¿¿ÿ™3gÌ×wÜqG‡îˆ<ã& IDAT»ï¾êêjIFõ?¦–Ÿ‘|•ÆaIêKCGµƒØ‘dDìïƒ%kP^·Ýv›Ãáp8wÞygMMan·û•W^‘dä€úÊðððŸ~úÉ|ýå—_¶lÙrÆŒæÙ’Œæ}Bµ)ˆI»>‡“C•ÆaIêKCGµƒØ‘dDìï÷Û›“'Oúøøàú8«P+¯¿¿ÿ‰'BBB„GŽIHH(..>sæŒ9'*È»Ži™ÜÉáÇt8uuuê8pà€âããW¯^}Ë-·H:çVýÏP]]íïï/„(,,|üñÇ÷ìÙÓ¿ÿÔÔÔàà`eß° •ÕbGª±¼ €­¤¼¼¼yóæ>>JÏÖ>sæLmmmHHH“&MTz þMÈŒËcÇŽ%$$<óÌ3Ó§OB|ýõ×cÆŒ)..–aŒ‰‰ÉÌ̼ñÆ…~øá3Ï<óí·ßÊûúú^ðS’n{Rÿ[å€ÃáxüñÇ_|ñņ|ðÁ–-[Ê;B|È!wÞyçÃ?,„øôÓOׯ_ïùÔêÕ«eÕÿ˜Z~FòUZYYYxx¸Œ¯ Ô£¥Ô «ÄŽ ;À»Ð»M†ƒšK=:t8|øð[o½Õ½{÷Ñ£G—””È“¾úê«Ý»w7/ÑiÚ´éÀwîÜ)OÇaï¨`°—[ )ˆBGc¹–Ž}ò»8ô[â|A¨ÿ­j)u åÕb‡ˆù­bl #FŒ˜8qbnnîÝwß°bÅŠøøøqãÆI2.X° mÛ¶Ï>ûì³Ï>Û¶mÛÇ{lÆŒN§sÇŽ’Œ8Û;Êa/·ú°–mcälî#!Ùz⨟ÍÄù‚2P_ꚪÄ;2Pÿ[•ÀÖТE‹ŠŠ Ã0Nœ8!„øùçŸ ÃøùçŸÃÂÂ$Û´ióÝwß™¯¿ýöÛ¿üå/†a¼üòËýû÷—d Î ÔEuuµ²w#ó¾œî#¡}Ç*Ã’‡9bõ¥Ž¡©ÚAìXbGbG[ÃUW]uöìYã÷À2×ñjjj<7‚Ê3†qúôéàà`Ã0Ž= É(œhRVVV__/Uq>uuueee¿ýö›l‡y_õ¨ÿ­ji,ר±O{W‡*ü’‡9bõ¥Ž¡©ÚAìXbGbG[Cß¾}W­ZeÆŠ+Ú·oÿè£}úôU«VíÙ³çóÏ?—aŒ=zôœ9s ÃX´hч~˜››{ìØ±Þ½{WTTÈ0:Žººº€€€ 8pÀ€ñññ«W¯¾å–[V®\)Ãøè£îرcÙ²e¿þúëßþö· &Ì›7ïÌ™3æê¨ £çg”ñÅ/mTö[9rdhhèßþö·+VüòË/Bˆ;ï¼333³M›6ï¾û® £báÂ…«V­š2eŠ"==}„ gÏž}óÍ7·lÙÒ¿Fõ?¦úŸQ'Ožôññ¹êª«”ËËË›7oîãã£Ì(„8sæLmmmHHH“&M¤ŠjjjTþ2õ¢¾ÔQ)Eìȱc-¬bÇb4¾I¢þWªÌ(ìå°—›Ã¼¯`ÐÄä>»Lh÷[rXR Îô ¥zT&EìH±c!¬bÇZ0¶Ú`ò{G {¹u¦ kar ‡] ª4œ/(õsÄh€;’@ìX«Ø±›}»ÞOee%U£`°wÔá½Ü&´ç}9ìWgr ‡æ8Ti–Ԙ̛¨/uTJ;2@ìÈ€UìX‹Í¾] ÁàÌ@„Wò9Ìûrh br ‡] ª4Kj¬æˆiƒØ‘bGˆ+‡`YCïÞ½/ñÙ½{÷0TWWûûû ! üñ={öôïß?55588Ør¢²²òÞ{ïݲe‹bìØ±o¾ù¦Ó鬯¯_±bŬY³d=>ÌŒÃñ‡£]»v?üð“O>éy¸mÛ¶¿ÿýïû÷ï—aŒˆˆØµkW‡„999wÜqǸqãêëëW¯^-ïë?þ¸oß¾~ýúµiÓÆ0ŒŸþ¹U«V²Üøå—_¶mÛ&„4hPÛ¶m¥ºLÔÿ˜ŠÆ   òòr???ód¸šš§ÓY[[Ûºuë“'OJ5 !êêêZµjU]]}ìØ±ØØX³Ž±œ/(èþ|Aõ…‡.)bGžQ v¬ƒCìÈBÓÀ›Ï=÷\ëÖ­Ÿ|òÉ5kÖ<ùä“­Zµzî¹çÞþF]¨Ü;êAýŸ†2#‡y_Á©)ˆö}$Œú-9,©q8_PKᡱÚAìXbGbG[C||üž={<~óÍ7ñññÄŒ&´÷Žz ¼—MA2ÐØ¤e“ð ‘z#‡*ÍÀù‚ÒP9G¬¥ðÐUíˆKAìPBËÒ”µ`l AAAn·Ûó¡Ûí–z«#‡½£à0ï+°_œ”¶‘O•FxIÍ„ö±úÂC—Ô±c-ˆIÐŽI`l ½{÷67û™¼úê«æ%#‡3{]F󾬚‚0&c$_¥¤ÿ?r˜#V_xè’šþçê±ck#‡Ø‘Á²†;v >¼}ûö111EEE›6mЧd ;|øpóæÍ].WË–-þùç6mÚüòË/W_}uYY™ cÛ¶m·lÙò—¿üE±gÏžI“&}÷Ýw¯¼òJvvöŽ;dŸþù´´´|0&&¦°°ðõ×_Ÿ6mZ§NÌÏNœ8‘€Q08ÞC'Ožôññ¹êª«TJ«ªªBCCUµH9 Ÿ½ÇÁÈá|Aõ…‡.© bF/7rˆI`l.—+;;»¸¸8**jìØ±áááČΠ4µöœ9ùí·ßΜ9óË/¿”áÒeô‘‘!i€í%ÆòòòæÍ›ûøø(3 !Μ9S[["û4f@ÂU#‡9b¡£ÔÑ%eá?IF&±# €ÁåÂá@ùàààÊÊJ___óÃúúúæÍ›KÞë2z ü–pèСѣG:t¨C‡[·nÍÉÉyá…ºté²zõêV­ZI’®\¹òÕW_ÍÏÏ7 £iÓ¦ýë_ÿùÏÞpà ’tLî#á`ô@øOÒá%5sÄL@ìH±c!ˆ+kÖ`n¤lÄÎ;7lØðâ‹/îܹ“€ñ©§ž?~|rrrÓ¦M¿üòË)S¦,[¶,""âÃ?´Üe²dÉ’Ûn»mÉ’%Bˆ³gÏnÚ´IqðàÁÑ£GK2vêÔ)==}êÔ©æ‡iii;w–äÒeä@rròu×]÷Æo¬X±bòäÉBˆéÓ§gffΜ9SeSРAƒä53æÍó’P/å`ôPYY©À¢×¨¾]_™±k×®k×®6mZzzzûöíŸ|òIsޏOŸ>’ŒW_}õÒ¥KÍ9âW^y¥GæófÍšI2ª/<´H;ÄŒˆ Q;’À °5¤§§ŸÿpÊ”)yyy¹¹¹fYlw£`°w”Ã^n„ç}94iižç°G@ã®ÂpXRÛ¸qãøñãýýý=sÄ;wî4爯¿þzËuBˆÜÜÜÛn»Í\Š1çˆû÷ïÿÉ'Ÿ¬[·î7ÞaÔRx¨—"vh€Ø±\'tÄŽ$0Ú{GÉïåö€¦ F¡ª)HKó<‡=êª4œ/( &ç *±ƒØ±‹Q v®©gLsæÑG¥jTÿÏÿPÁ‚Ã]ÇLî#á`|î¹çZ·nýä“O®Y³æÉ'ŸlÕªÕsÏ=÷öïÐ0ÆÇÇïÙ³Çóá7ß|/C¤Ñèý2Ô—: ¤ˆFÄ1£µ`Ørss_xá…’’’sçΙO8гgO!ÄîÝ»i=nå°—›Ã¼/‡¦ &÷‘p0rè·ä°¤æð[¤–ÂC½±CÈØ!f´{÷ÞC·nÝFg^ -„˜;wî /¼ „xàh=þ3ã°—MA’PßÄä>òFUZŸ>}¦M›æ9íoåÊ•éééß~û-%£Âo‘Z -RÄ#b‡˜Ñb4¬:S$44´Ñ“N:3z¨¬¬T#Òh$ š‚ˆÝáÐoùÅ_õìÙsĈ=zôþòË/‰=¨¯¬”µ« vh=ŽIØ|øî5$&&nݺµá“qãÆ­_¿ž’‘'³fÍZºt)%#‡y_„çD™ÜGÂÁÈ¡ßR0XRó@ø|A-…‡z)bGˆ©ŽI` .{G9ìåFS #“ûH8§* ï±# Äð0— ‡½£ör3™÷5!<ÀFpXRã0G €@ì v.Ä#a1¶nݺiÓ&Ï?ý;î¸cæÌ™Ï>û,%cii©9øô°`Á©‡Š©7þõ¯=räˆ9 ;bij°ê*++Õˆ4=¨o××"¥gäP¥]l «{÷î’V·ÔÇŒs‰[F¶ vìhDìÐ0J‹àrá°w{¹]`r #“~Kòp¸W† ˆÄŽ]@ì\1ƒË…ÕÞQÂp˜÷åÐÄä>FžÐ[Rã0GÌÄU;v4J`p¹°Ú;Jó¾ö«7oÞ¼Q»uçÎ>l¹H¯”ƒñ|èUi–Ô0GLÄ #b‡†QØ .V{G s‰=*Ý»w§aä°_ýÚk¯mô¤OŸ>òtº¤Œ¬™ÌŽ2•UšTãäÉ“Guûí·7\àzä‘Gd¸t—/_>|øðÔÔÔ†3¶òtZŒL@ìÐ0"vh%`¸CoÞMAÀFpè·d²¤†{e€]@ìÐ0 ÄΕ‚0¸\8ìåš‚h8Ti8_ §OŸÎÎÎŽïСm)m;4ŒàŠA ´5¤§§'%%yÖL¶lÙR^^~ï½÷Ò0r8Pžh ¢adàÐoÙ¨(BÈ. Õ™ÌWTTLž<9<<ü‰'žøÛßþæIu©h‘Ò±CÃÈ$vd€`kp8uuu fee-_¾<''‡†Ѐü¯@S@-ÎB÷îÝ{ß¾}Ó§O?uêÔ믿ޱcG"íR¼&±# €­Áápdgg7mÚ´áãGÎ;·¦¦††ñ|èíåš‚\æX´¬¬L±nݺ'žx"99Yöª¬)€0[ƒÃáhÔìÁívÓ0rØ; h€¦ €vèÍ—–––”” <øûï¿7Ÿ”••Íš5«¾¾þ³Ï>£$À¦Ð‹I`°eÔÔÔ4jH&fä°w”Ã^n`¿:@1î•iݺu£ Ð"ÀpˆI`Ø.¸#—˜‘ÃÞQìåpp¸WfïÞ½'Nœ˜0aÂùK¯ž[ÐiH°bG[CYY™â3oÔ9ìå¹—›h H…ñ¢ººúå—_~þùç¥Z´KÕ_¼„«žÀÀ$vd€0ÿ‡½Üà°_]Kó<‡=Ø•® sÄ|(..îÔ©“Ê‹—Ô;W ÀÖ0eÊ”¡C‡Ž7ް‘ZÙ9À¡)HKó<‡=Ø•ÀÅÐRxh‘ª¿x W=É+ùàbà,k(,,lÔ@Ï€](--5Ÿ,X uwŠz£bóæÍšçËÊÊöíÛGLÊÁH¬äÓ@KᡱÚiÛ¶íûï¿¿nݺAƒ©¹xI¥‘Ãà°¢¢bòäÉ*×ÕÕÁb~Çår‘7r`ðàÁžÜyçÄŒBß‹@IÊÁ¸fÍ·ÛÝèáæÍ›322È…uuuffföë׌¡¤¤äÛo¿mÑ¢EÉïìß¿РA·Ür £aEEE~~~mÚ´IMM=wîœ<‘^cXXXqqñÈ‘#o¹å–#GŽÐ3‚+-ÐSQQqòäÉààà-ZP5´£¥yžÃ/1kóÆù‚ÄÐRx(“^bÕNRͬÞ(x´y›Æ²²2!ĺuëžxâ ÙëêêàÊ@ ´5Ô××/Z´(--­¸¸Ø|5mÚ´”””‹qd;#8ìåF/"Þ‡6oõ'VàŒ ËÑRx¨—îÙ³çb/IB½Ñá6ïÒÒR³É®´´T1`À€>ø`Ö¬Y6lô{VoW ÀÖ’’²aÆ”””¾}û†††VUUíÞ½{É’%ÕÕÕ‹/¦a䇽Ü=ôÐĉÕ ÕÕÕ+W®”4Uoä€ËåRvšz)£à18¬©©Qü‹Uo$–ÂC½´wïÞÕÕÕ3gÎTvϰz#‡ÁaëÖ­½z#¸bÐm ááá999±±± 8p`À€.—‹†нˆx^Òt #ø?ÑRx Ú‘‡6ï½{÷^l]]ÒDƒz#¸b°l uuuFFFž>}šŒ‘´÷rsXn"“ûH89Àd%Ÿ¸b|tDHLLLJJ:tèçI~~þ<0dÈ2ÆôôôúúúF·lÙòÎ;ï1Ö××/X° :::,,¬C‡aaaÑÑÑ .<ÿÛ°¯QQSSã¾”Œäar #‡Áaxx¸J#ÔZ¤b§wïÞ7Üpƒ9Tk£"$$äù矗÷õ½Áxúô錌Œ‚‚ÂF)è:~𥥥 B§ÓÙ¦M§Ó)„HHH(--%c î±HNNމ‰IMMÍÍÍÍËËËÍÍ]¾|yttôìÙ³É/ø[•Šz#À«˜Fý–Û¶mS©Ób<®³:JÅRÄ #“Ø1!|•,t/ASCý¯T™Q0h=zô¨Q£<èy’——7lذÛo¿Œ‘CS‡^DõÍóZ¤Œ¬(///(((//§gBø^2ƆjÙ RÄ1ÇNIIÉ·ß~Û¢E‹’ßÙ¿ÿ Aƒn¹å2FI`l1ÛÚÈa/÷ AƒÒÒÒ$}q/1^ð_±ýêaaa çMLöïß.ɨEÊÁèp•æv»çÏŸå™|ŠŠZ°`ÁùÓFö5rx‹l¨&,EìÐ02‰Å œd–TÑ .G•FFFnß¾]å^#õF&MAä{™ÜGÂÁÈ¡ß2%%eÆ )))}ûö ­ªªÚ½{÷’%Kª««/^LÃȀءaä;®³’…î85*++É9ðöÛo“7ž÷ zÕ7Ïk‘r0rè·ä°¤ær¹$}eï1zÐRx(“"vh9ÄŽaUUUÏ<󌼯ï F` .{G=¨ŸRfdÒD¾‘É}$Œª´ÀÀÀóg¾Nœ8HÆȀءaDì€K€hk(..~â‰'¾þúëS§N5|HÉÈáÌ@ph â€úæy-RFý–‰‰‰III‹/ö\ù–ŸŸŸœœ»wï^Æó!<æ°—›Ã¼/‡¦ &÷‘p0r¨Ònºé¦;v¨\àRoô@ï-RKᡱÚAìÐ0"vˆ­-ЊäädóÅO?ý´råʤ¤¤Î;—””¬]»v̘14Œ¬àÐÊŽ¦ F-Íó„Ûæ59ô[Þu×]III*¸Ô £¥ðÐXí vh;àØ{øî=ôïßÙ²e×\sù¡Ëåºûî»·mÛFÉègÚó¾Ð¼ý––Ôª©.Åh)<4V;„AìÐ06TSIØû»÷‚ƒƒ+++}}}=O®¿þú]»vQ2 )ˆ†‘É}$Œª´ªªªóJÖTol¨¦:G¬¥ðP/Eìȱ#±# ´@[C§NV­ZõÈ#˜nݺUv¬ÞHxï(«½Üh ¢“ãX99ô[ª¯“4Vf„Xõ…‡)b‡†‘ð_¢7¨m=úX¶Š;v >¼mÛ¶æ•ÇôÑGñññ”Œ„Ï ÌÈÈ0_\p¯ÑK/½DÀèüoC5Õ`l‡%5ÂsÄÔº¤€ˆ¨7Jµše¸\®ìììââ⨨¨±cdž‡‡3rØ;Êa/7š‚ˆÝáP¥¥§§_â³2–¿Ô Ï7D}©£KJÄbÇ.FI`l ß}÷]@@@×®]…n·»K—.ÄŒöŽb/7ðr˜ÜGÂÁèC•ÆsÄê •RÄŽÄŽ]à;’À`k˜:uê¼yóºvíúÞ{ïM˜0ÁÇÇgÉ’%ž-+4ŒöŽrØËÍaÞ—pS“ûH8=pØpÈ÷ʨ/ív»ƒƒƒ©9ì%¼—MH7 IDATAR³NKS“ûHÈ ÷[zèÓ§ÏäÉ“.p™‡ÆQ2r8_Pè(u´H;Œˆ2±#´@[F`` m#‡3}||Ξ=»sçÎýë_!!!²›=”ÑD̨¥yžÃõFÂý––/_>|øðW_}µáiÄŒÞ"…ŽRG½±CÈءa”…ÆÕg¼>}údgg§¥¥™-Ö555-[¶$fDS Ô·ëk‘r0rè·4 ãĉ«W¯~ê©§V¯^ír¹dë´ ;4Œb\´@ƒÿgzøðÃï¼óNÃ06lØ0räÈ]»veeeImÖUoDS¦ &÷‘p0 ý–æW×®]…n·»K—.ÄŒ€ ˆFĸƒÿV{Gƒ½Ü¸ë˜Æ~u&÷‘p0r¨Ònºé¦yóæ%&&¾÷Þ{&LðññY²d‰gRÌÖFVsÄL@ìÐ0"vlm”ŽÞh`#8œÈ4@}ó¼)#‡~˺º:Ã0 ”‘‘‘——CÃøöï<ÿüó‘‘‘)))éééóçÏïСƒ¤ÃŠÕ¹Ø¡aDìØÚ( €- OŸ>í/ £½£TØ·oß¡C‡Ì×?ýôÓ?ü@ÏÈ&÷‘p0®Ò<4oÞÜ0Œ3gÎ8Ϊª*ó{ f$?G¬¥ðÐ"EìÐ0"vh%S - ººzõêÕûì¤I“390uêÔyóæuíÚUYS2#«¦ #FTTTxšç¯¿þú믿^žN—”ƒ‘ðáóbbb6nÜèr¹zöìR[[ëïïOÌøÝwß5 „ˆˆˆ“'OR2j)<´H;4ŒˆFI`lW_}õ­·Þz±Ïzn±µQð8Pžùùùýë_…©©©k×®5Û:VfLNN6_\p÷¸å:-Ɔp¸„ƒ‘C•6oÞ<ÏiBˆï¿ÿÞsM#ù9b-…‡©@ì0"vhe¡kéØì%š‚h8ô[†Q[[[]]-Û¢ÑÈä^@Ä #bçŠÁ)ÐSQQqòäÉààà-Z3’?3°oß¾—ø ín4¥O?ý´ËåZ»víÎ;kkk;vìxüøqËEÁÁÁ•••¾¾¾ž'×_ý®]»(%È>Ïò÷ÊxP_êè’±C>±c-h¶†úúúE‹¥¥¥›O¢¢¢¦M›–’’Ò°&¶µ‘ðÞQ&{¹ÑDÃ(A¾ß’æŒíÔ©S…•••j®9UiT_xè’r±C±# ­ëÏtHNNމ‰IMMÍÍÍÍËËËÍÍ]¾|yttôìÙ³ÉÉŸxûí·_â³·Ýv£ š‚^‡{eÔº¤Ø±# ´@[CxxxNNNlllÇ0`€Ëå¢aäp5< í¦ ]Íóä÷hùÅ„††––– <8))É<í¯  €’Q}á¡^ŠØ6‚CìH-ÐÖPWWÑèaddäéÓ§É9œè!##Cv›®v#aÈ71¹„ƒý2ê õRİbG[CbbbRRÒâÅ‹=“”ùùùÉÉÉC† !cä°wÔÃ}÷ݧx8ªÞHòûÕ™ÜGÂÁÈ&KjæˆÕꥈ v$¡qiÊZÐm ÇŸ0aÂçŸît:CBBª««kkk²²²"##i§3Õê„AS° ª´N:½úê«ûì¤I“,?^½Qñá‡zflG޹k×®¬¬¬—^zÉr‘F£–ÂC‹”6ˆÄŽŒ’@Ím%Ü·oŸyL¯^½âââ虀°­aµ_]Kó<‡=jŒª´1cÆlܸñbŸ6lئM›ìn4a2G¬¥ðÐ"EìØ×ˆØ‘ë¬PspªªªBCCé9Ìû w{Ð2uÂa†H‘O•€í@ìØ×À¥ÁئL™2tèÐqãÆ6p˜÷Ìš‚0&cäÕ%5&h)<4V;ˆ vÀPrÙ} ”––FÛÈ5kÖ¸ÝîF7oÞœ‘‘AÈ»Žå¡Þh¢%ÆÕK9ß~ûmòFÿ £¥ðÐXípøçŠØ¡aL5ð?8Žººº€€€†³²²–/_ž““CÈ¡¾]_‹”ƒ‘Ãr# bF»Á× YLEE…yHC‹-(™ìBlÞ¼¹iÓ¦ Ÿ”••íÛ·O†K—ÑMA¶æÜ¹sB•Õ’© #vA}©£EŠØ€*˜¢°†úúúE‹¥¥¥›O¢¢¢¦M›–’’âëëKÀÈdï¨Ãá¸ØoÏív[®Óbl¨Æœ¨}3fÌ5×\óÔSO5|¸sçέ[·>ýôÓd¤éééIIIþF¶lÙR^^~ï½÷Ò0zàð'IuIɱúRG‹±C̈ر¯Q¨G­aöìÙ6lHNNîÛ·¯yýÉîÝ»—,Yr×]w-^¼˜€‘É™lH–ŠzcC5ù·=´nÝúßÿþw·nÝ„uuuï¿ÿþ„ :4räÈ~øŒ”Õ®ªU˜Ì«/u´H;ÄŒTa;’@=j ááá999±±± 8p`À€.—‹†‘3ÆÏÏÏår…„„!JJJúôéSZZZYYÙ¦M›ºº:2R‡Ã‘ÝhÀÑ£GçÎ[SSCÃÈòKjL戵ꥈ vhÄŽ$PZƒÓé,**j´/ÅårÅÄÄÔÖÖÒ0z ºÏYQVV.U¡Ýèê¼/“¦ ¶mÛnÚ´©W¯^Bˆ;w0 ¦¦¦¨¨èÚk¯-++³\§KÊaWù*M0[R#Œ–ÂC½±CÈØ—`k0ïzY¼x±g’2???99ÙÏÏï“%ö2’ßç܃|ò0i º÷Þ{?¾jÕ*!IJeËV¯^ýÄOWTT¼÷Þ{–ëtI94ep¨ÒX-©>_P}á¡Eê%! ÞˆØ±ÑáØ‘ÀÖpüøñ &|þùçN§3$$¤ººº¶¶6!!!+++22’†‘ü>gÁcO~Þ—ISÐáÇ ßÿý={ö<ûì³]»v]¿~}çÎ-×é’zI](ÛH¾Jã°¤ÖPMuw‰úÂC‹±Cƈءd´{÷ÞÆÁƒ÷íÛg®ãõêÕ+..Ž’‘Ã>gƒ|ó¾Lp»ÝùùùmÚ´ £*å°+C•ÆaDÑPM»U_ê(–"vÈ;”ŒÖbïïÞ !Ü:ËaŸ3‡A>‡y_š‚˜\ÈIUcC5ùJ±cw8üIr06T“‹1€¸ÝîùóçGEEy~±QQQ ,p»ÝdŒ£G5jÔÁƒ=Oòòò† fîÏ¡a ,//oôðĉdŒBß‹@ÆØP-[¡Ë¨>´H'Ožüî»ïJúâ^bBÔÕÕÑ6º\.•:-F•••Tˆ2FÄ £±#‰&V¨™’’’²aÆ”””F¬ÕÕÕ’YÕÓÒÒ&L˜×hÎÚµkeè´“’’Î?lcÈ!dŒBˆššÅ3”êäQŸZ¤………•••2¾²÷].—â¿õFO{§²E<õFêï8UfDìH±#Ä1£Åè!,,¬áB¥ÉþýûÃÃÃÉMòóó333_{íµÌÌÌüü|©.õÆÒÒÒ„„!„ÓélÓ¦ÓéB$$$”––’1 ó¾ ÕTZ@Wì»Ã¡IjÍš5çñÍ›7gdd1"v€@ìÐ0J`kàÐ:ËÚƒ|4Ñ0jI±S^^^PPp¾€‘C¿errrLLLjjjnnn^^^nnîòåË£££gÏžMÆxÁ™¾ÌÌÌ~ýú‘1"vÈ;4ŒbG6ßÁì5p¸xÊ”)C‡7nœŒ/î%F„3ce$ “ 99ÜL6xðàñãÇ?üðÃ2¾¸—q¾ #b±c##b‡†Qß”àÐ:;hР´´4I_ÜKŒf894©O-RSøàÐ$%œ/ˆØ!cäb‡†QX¶Ú÷s€Ã=ÀŒ|î:&!'‡)|„›284Iñ¹s±CÀè±ck#ŸØ±­Ão`KïŠáp˜£";;ûƒÿMjjªÓé$cd‡)|Mš¤§óiƒØ¡aDìÐ0J+ÀУG›o¾¹}ûö)))T‚Ç®§ÓYTTÔhVÒårÅÄÄÔÖÖÂh£Ãá¸Ø¿·ÛMèe‡¼z)‡)|M&´›¤ÊÊÊ<— ¨A±QKá¡EŠØ¡a4AìØÝ( ­Ão">ðÀ:u"l4xìŠ=zô¨Q£.Wæåå 6Ì|s‚Ñ.FÁ`NTýy-RSøš2XAµIJKá¡EŠØ¡adÕØ‘Ààrá—Þö89 €YAûf2ý–î\áÐSÊ ÄŽÝˆFI4±~M¥®®.""¢ÑÃÈÈÈÓ§O“1FFFnß¾]eû Œ2p¹\ŠOhPoô år)ÅÒ¸¸¸–-[R5&&&&%%ßý8dÈ2ÆÂÂÂÊÊJI_ÜKŒ)))6lHIIiÔáY]]-©ÃS½‘ˆ»;4Œ²Ð=¶Cë,+84Ì6j™…å0ÙŒ“ZÀ•Á¡IŠ ˆF v® €ÁåB>.»wï>mÚ´E‹Éøâ^b4x¼µs0j¹7’ÃAº.ä¤Ýoé𜇞R& v( ÄŽÍ’Àü1Ç%3ƒÑFF-³°&›5No®Ò8ÌI¡IŠ ˆFÄ £$0ÂqÉoíŒZfa9L6ãBNp˜“"ß$ÅÄ #b‡†Q[Ú5kÎÿÞ¼ysFF#‡¸ä‡·vF-³°&›Õ9Tiæ¤L7I: -RÄ #b‡’Ñr0¶q¡KP233ûõëGÆÈ!.9Àá­ƒQË,,‡Éf\È)sRP_xh‘"vh;à8 ÃàOãp8²³³›6mÚðáÑ£GçÎ[SSCÞ““ã9¿ÞäÀ p¹\4Œ8~üø„ >ÿüs§ÓR]]][[›•• £]Œ&*/—Ò(¥mt:EEEn=q¹\111µµµ4ŒæLÐù7 øùùmܸ‘†qÊ”)C‡7nœŒ/î%Fõ…‡.©@ìØßˆØ¡a”ÀÖàp8|}}/ø)·ÛMÃÈ!.ù@û­€C•ÆaNjðàÁãÇøá‡e|q/1ª/lù÷)£Tid :ï&x €;ÄŒL ;’ÀØÊÊÊÂÃÃiMhÇåßh³²²–/_ž““CÀÈä­}öìÙ6lHNNîÛ·ohhhUUÕîÝ»—,Yr×]w-^¼˜†»€y7ónBSá¡«Ú¶±C&v$0ÿ‡ÃÌ8€Û€]àP¥aÞ ónÀ«@ìÐ0 ÄΟAýÁÓ$™ºàD(,,¬¬¬$lÜ´iÓ™3gÒÓÓ MjjjÜ‚’‘<‰‰‰III‡ò<ÉÏÏà† BÆhо}û7ÞxCåŠz#êêê"""=ŒŒŒ<}ú4£úRG—”6ˆ2pˆI €ÿÃafàp`À«ÀE Å v® €-ç°Ù>‡™q€ömçÎBøø(íâÑ"À°šwÓRx Ú ¬bÇZ0¶œÃF Ld/g̘1×\sÍSO=ÕðáÎ;·nÝúôÓO“‘’¿™ ƒö¼›–ÂC½±ìíØ‘…ÆýÇ”HNNމ‰IMMÍÍÍÍËËËÍÍ]¾|yttôìÙ³É9àv»çÏŸåù‰ŠŠZ°`Ûí&c\³fÍù_|óæÍ4ŒLlkÕªU^^žùúôéÓ™™™†a´H9Lá4(--MÒ÷#æÝh;dŒˆF±# €­¡´´4!!Aát:Û´iãt:… ¥¥¥dŒâ’ÃDãg(¥¢Þ¨>´H9Lásón4Œˆ2F vh%k¬„öõ9àp0#ŸËÈ_Èž““Ûðá àr¹h=¾EÏœíZ¼x±ç›ŸŸŸœœìçç·qãFFÄ)b‡†±CÃ( ­Ãoj;vlÇŽÅÅÅ„é]1&ùùù™™™¯½öZfff~~>Œ¶3r8° ¦ð9ô[rh’ OL´*¥ˆFÄ £$0¶†S§N;Öáp8!ĸqãjjjˆ9Ä%+ÈOdÐ6ªß!¯EÊ¡cŸO¿%æÝìnT_xh‘"vhM;v7J`k˜={öˆ#Ž?{ôèÑ[o½õÑG%f$—III—øì½÷ÞKÀhð˜Èà`T¿C^‹”Ã>‡“ZXAxÞM}á¡EŠØ¡adáØ‘ÀÖбcÇcÇŽ†kFIIIûöí‰ÉÇe§NþsqZµjEÀh0˜È`bdí)|ý–.à0料ðÐ%5;ö7"vh%°5\uÕUæ 3 Ïž=Û¢E bFòqÙ¾}{ç%!`4Ld01zÐ2 Ëa²Y™‘C¿%‡K8Ì»©/œ˜‘C\r€üD£–YX“Í8©\æÝÔZ¤ˆFpˆI`l ÷ÝwŸÙöвeË   «¯¾º  €˜qIŒZfa9L6㤩^Rã0料ðÐ"EìP2ˆ›%°5¸Ýîß~ûÍ0Œ-[¶ìÙ³GA+¼z£ “¸$ ‡‰ F-³°&›Yu³+3rXRã0殮ðP/EìÐ0"vh%0øÃŽK>p˜È mÔ2 Ëa²™C7»z#‡%5ónL@ìÐ0"vh%°ÅTTT<òÈ#$âƒðd–YX“ͺÙÕù,©Ñžwó ¾ÔQ)EìÐ0"v(-`‹)** #iä—äÁ]Ç2tZŒZfa9L6sèfWoä°¤Æ õ¥ŽJ)b‡†±.Ã0 ¬£¸¸¸wïÞeeeôŒááá999±±± 8p`À€.—‹†‘<;wÎÈȸØgÇŒSRRbw£böìÙ6lHNNîÛ·ohhhUUÕîÝ»—,Yr×]w-^¼Ør£ÉÁƒ÷íÛwòäÉààà^½zÅÅÅÉsi”Ò6:΢¢¢-Z4|èr¹bbbjkkiÍ…¬Å‹{ò¸víÚ‹}vâĉ—X²‹ñ‹/¾Ø¹sçÅ>ûé§ŸZ«ÓbB¤¤¤lذ!%%¥ÑIxÕÕÕ’NÂSoäb±c£@ìü Ðm1'Ož|õÕWÿñÐ3ÖÔÔ<ðÀÙÙÙBÃ0Æ÷Æo8NJFP ‡~˘˜˜K·­ÖÔÔØÝ(˜]” ¾ÔÑ%% bG vìc”Ààr™3gN~~þ믿~óÍ7òÉ'S§N]ºtéÿ×ÞýÅ4y، ÚZ-4@D¬:eà²,ƈ\€Ë2X¢™bpqÞÊ’nêŒgV®ŒQ!!š.t¸£&N3$½ Ñf,¥Ê’Q!ÕÂ〤ð»h~ü ¿Aô¡íóœÓ÷ëb!ç!ùp1¿9ßó<ç™!dXÛ†4’d–– Ò“à^Ȳ# ÊŽf4À±1>>®(Ê| qâÒ¥KÿüóO‡Ãát:»»»ûúú>û쳸~º“øDH Ö¶ôgï)zL<ô „ e'Nh€ccË–-k×®=|øðÛƒçæÍ›GŽ‘#qÁ‚ÿþû¯¢(ÑvttttÉ’%/_¾ŒG–^‰kÛ/.JHüÄC¯P@ÉPvâ„86–,Yrûöí+V(ЇÛÛÛ«««ý~UUÕÇåHÌËË{ðàAVV–Óéüûï¿=úàÁƒ«W¯Æ#K¯D4“û^™ÄO<ô "wÙ‰àØHKK ƒ™™™Š¢<þ¼¤¤¤¯¯o``Àn·‡Ãa9¿þú몪ªíÛ·Ûl¶p8\PPðûï¿çççÇ#K¯D@¥¥¥¡Ph†_ˆÇN]BGâ'z…× Å†Õj ÅÅÅŠ¢<~üXUÕH$¢ªjüŽ,N|â… ¢Ë%/^´Ùl«W¯ž3gNœ²ôJ„ ªjccãtOwïÞ-M( „$9{/ñ½BãK’²'4À±±qãÆƒž;wNQ”–––7oÞ;v¬··wýúõÒ$N6Ÿ›6mŠS„Ö¬YóùçŸO÷tíÚµÒ„BH’KG?ñÐ+0¾$);qÂ'бÑÓÓSQQæÏŸßÞÞ~ÿþý£Gµ¶¶Ê‘8i``àÇ<}út\SôMà%ÉÙ{ºLü§ÄO<ô  7Vñc£ªªÊd28pàôéÓÏž=SeÛ¶m—.]²Ûí¿þú«‰sçþß‘i‘H$zBÕG}ôàÁƒxÄé’‡7À#ñ½BÈILl˜Íæžžž¬¬¬ m~cšIDAT`0¸hÑ¢§OŸÚíögÏž­Y³&Nî&>qŸ@Æ188h2™’!€¾t™xè8Û +>ŽÑÑÑè•t ,P%zc{ffæèè¨4‰Œc|||||\Q”D6¢º„0]&ÌvÄ÷ÇFQQÑùóç÷ïßßÜÜœ——wèСo¿ýöܹs%%%Ò$0Ž­[·®]»öðáÃoz<ž›7o9rD¦PÀøJKKC¡Ð ¿DOTtšx0ÛþS’”8áèØ¸råÊŽ;>üðÃÔÔÔ{÷îÕÖÖz<«ÕzõêÕO?ýTŽÄI¯^½:sæÌ÷ß×}ƒ[²dÉíÛ·W¬X¡(J8noo¯®®öûýUUU>”)0¾¥K—ž9sfº§»wïîïï=QÑiâ¡ãl0²$);qB3=òz½ëÖ­³ÛíOŸ>]¼xñÛÇ8IÀ ÒÒÒ‚Á`ôkÀçÏŸ—””ôõõ Øíöp8,S(`|[¶l¹råÊtO¿øâ‹k×®‰ž¥ËăÙðÿ%OÙ‰`¼«èÞ¿DÞ—øD@999×®]+..VÅãñ”—— =yòä“O>‰ßÁ0º„Ä­E ìÝ»w†§»ví’ QQ”­[·þüóÏS=ÏñãÇã§K" Š79Ì;wõêÕ¡P(,\¸0;;[ÖPºÓeâ!Э*Â`¼«ÄoÿcÃ!0H$R__ïp8ÌfsAAÙlv8'NœˆD"’…b>€¡PvÞ—Øï¯‘H‰ßþdžC`:ß}÷][[[]]]ii©Édìììlhhøê«¯Ün·L¡1DŒ÷øíl8þ“ÅbéèèX¾|ùÛƒ]]]åååÁ`P¦P@ÍÍÍ555sæÌy{ðÆ/_¾Ü¹s§‰ …²£Ÿ@ã=Ì;×n·geeIœ![­Ö)ƒ6›mddD²P@ûöí{óæÍ”AUUÏž=+M"C¡ìhÆ!XxW~¿ÿË/¿ôûý7oÞìèèøå—_–-[ÖØØ¸xñb9QTTTÔÔÔ¸ÝîÉ÷±>Ÿ¯®®®²²R²P@ׯ_OMM}{äÅ‹^¯W¦D†BÙцO ñ®ªªªL&ÓNŸ>ýìÙ3EQ¶mÛvéÒ%»Ýþ믿ʑˆ¢¿¿¿ººúÎ;ééé™™™ªª—••]¾|Ùf³É !%%eÊg“ÆÆÆäH`(”Íh€ñ®ÌfsOOOVVV0\´hÑÓ§Oívû³gÏÖ¬Y3ó5}%béîîöz½¯^½Z¸paqq±Óé”50¸”””p8Æ»^À»`ÁEQ233£ÿ•&‹ÓéL|ÿ©K(@LpÞUQQÑùóçEinnÎËË;tèßï?tèPII‰4‰€(jkk[[[“!B0Lð[‘Ä'0ÊŽf¼Æ»:|øðŽ;êêêRSSïÝ»W[[{êÔ)«ÕzõêUiQÄ_C¯K( ‹Åý! E7dggK–ÀP(;š±ïáÑ£G^¯wݺuv»}bbâéÓ§‹/ž;7ŽË(‰Oà}E"‘“'O655õööFGrss÷ïßïr¹¦;3F¸D†BÙÑŒFïaÙ²eË–-‹þœ’’’››+_" ]VaE_úbÎårµµµ¹\®ÒÒR“É488ØÙÙÙÐРªªÛí–#€¡Pv´›ÞAMMÍ OwîÜ)A" ±±±ãÇ¿½$”››[__?66&Y( ³ÙÜÝÝ=eð¯¿þ²X,Ò$0ÊŽf¼Æ;¹{÷®Çã™îé­[·$H¢Ë*¬>>>>ž ¡€šššTUu:999+W®nll”&€¡Pv4c0ˆgË–-k×®=|øðÛƒçæÍ›GŽ‘)Hww·×ë}õêÕÂ… ‹‹‹N§|‰ …²£ 0ˆgÉ’%·oß^±b…¢(áp¸½½½ººÚï÷WUU=|øP¦P€¢ñ¤¥¥ƒÁÌÌLEQž?^RRÒ××700`·ÛÃá°L¡€ñ­ZµjÆ yyy.—KÖD†BÙ™ ö€x¬Vk ˆþüøñcUU#‘ˆªªééé’…Æwíڵׯ_777KœÀP(;³Á`ÏÎ;ûûûÏ;§(Ê©S§øá‡ÞÞÞP(ôÛo¿É C4À žžžžŠŠŠ@ 0þüöööû÷ï=z´¨¨¨µµµ°°P¦P€¢!ù|>»Ýn6›åˆ`X(Š^E-w(Àìqˆ'‰Ô××;³Ù\PP`6›lj'"‘ˆd¡14Wï?ðÞ\.W[[›Ëå*--5™Lƒƒƒ ªªºÝn™BbˆO @<‹¥££cùòåovuu•——ƒA™B!477×ÔÔÌ™3çíÁ7n¼|ùrçÎr$0ÊŽf| â ‡ÃV«uÊ Íf‘,¾}ûÞ¼y3ePUÕ³gÏJ“ÀP(;šñ 4ˆ§¢¢¢¦¦ÆívO¾õù|uuu•••’…¢¸~ýzjjêÛ#/^¼ðz½2%0ÊŽ6| âéïﯮ®¾sçNzzzff¦ªªÃÃÃeee—/_¶Ùl2…BHII™òYऱ±19 eG3`Uww·×ëÞHT\\ìt:e  .%%%Ï›7OâD†BÙÑŒ`V˜‰H0ÊŽf‚â©­­mmmM†P@Á`0Á“ÂÄ'0ÊŽf‚â É Áb±D…BÑ ÙÙÙ’%0ÊŽf¼ñüñÇß|óM2„BˆD"õõõ‡Ãl6˜Íf‡ÃqâĉH$"M"C¡ìhÆ`˜.«°¢/ý1çr¹ÚÚÚ\.Wii©ÉdìììlhhPUÕívË‘ÀP(;šqˆ'‰œŸ¯®®.--íÊ•+r$0ÊŽf‚âÑeVš¥_ æšššTUu:999+W®nll”&€¡Pv4ã 0ˆG—UXi–~8éîîöz½Ñ#⊋‹N§|‰ …²£ 0ˆ§¿¿¿ººúÎ;ééé™™™ªª—••]¾|Ùf³É C4À *]Va%Xúb®¶¶vÓ¦MÛ·o—8€¡Pv4ãh•ÓéL|ÿ©K(`p@```@îD†BÙÑŒ7À ˜½{÷ž?~º§»víjii‘# ¶x ‚¹{÷®Çã™îé­[·¤ „ …¢²³³eM`(”÷Å`L~~þ‹/fø…¡¡!9BQD"‘“'O655õööFGrss÷ïßïr¹æÌ™#G"C¡ìhÆ`L H’P@.—«­­Íår•––šL¦ÁÁÁÎÎΆ†UUÝn·‰ …²£o€fÅb±tttL^‘ÕÕÕU^^ åH`(”Í>Ðû[8¶Z­Sm6ÛÈȈ4‰ …²£ 0À¬TTTÔÔÔøýþÉŸÏ·gÏžÊÊJi eG3`€YijjRUÕétfddäääddd¬\¹rxx¸±±QšD†BÙÑŒ=À1ÐÝÝíõz£·ƒ;Nù eG`@RàhíöîÝ;ÃÓ]»vIÀP(;³Á=ÀÚݽ{×ãñL÷ôÖ­[$0ÊÎlð 4€vùùù/^¼˜á†††DO`(”Ù $ö’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ÂÿÆv ª$*™IEND®B`‚sleef-3.5.1/doc/html/osi_logo.png000066400000000000000000000046541373003144100166540ustar00rootroot00000000000000‰PNG  IHDR´ÎeZ5¦PLTESÿÿÿ=¦9ÖÙÖ=j9t–œšNk"­I» í´¶´Ú(sšn?O@ïŒÓÚ’¹d!j˜uûI¢§¥ÛK‰§µÛg^{Nk·½óœÖ%:º_]’‡,zí|=´ÙŠD± dÈ¢#ƒX¬„9S/‚ qуÐC)rvÑ•ÐC*òä¢k¡‡Tdå¢w¦^JÄF±Ô´+±Q,5-ŽÅ†•º\l‹¾WRëô ú$¥î¥Ø]}©¹/öy±It••š‹hVìE]A‘鸎#Fߦ;E&[Ç“¢Ø`l"Ž=ÐôÚ¼?_"w>U%HNŽååG¬¿S‘"È'þúþKÉ,g ð)^»ÀŽì^gѵ'aË9”wFÁ;ºJ ï=td¯–Ã,èÑj ­‘9›@Ÿ:=ßdbC¦uC§õPÁĆHk6jÝú™Ø­*LÐd+†zXEÂt´x‘^[U9B#Õ39"1hÑïTª¶í×¹™ÈY`¢Ò%Ú¹Zz(Có,mÆq(7L‚¨Ð,Zäsµ ýévè' o/þËaœŸ|å%J«q^¡oºðóTüQß…²VÐ ôܦ;颼íB·Ýws·Fl5,Ž8ÑG,zpб|0cÖwšƒ'g Á«²!ØÝÛôÑ]o¤íõóó—ܱL ëìšôU¼­p“žµÜ5#ö:ŠåN¬¤Öm<“L§Š™»Hw|ˆ¤Ó6­g®ÿ‘FÃÌõŸÙçØÝûÇÂü« bÐW-kú2ì>^¸×ZÊþ6}‡sï)–é¯ö@È.ÍÉ®(T“Ÿ6Š"ŽìuDë8Aÿ¶8îÐÐþŒ<ô)ƒ^˜ß¦4¦¶¼'“£j¬€x–aVÉ¥ÔDk±ˆ¥ÔzçNÍKyaÑPj¢³™Í(Š8ÆëÆ3%Þé÷@OYD3ýB&ŠlGZ¢™~}ºÌE9¿­aý8¶Ôƒh£ü(2ŽB¢½qŽ#€Z Àü Zßi¦f¾^ߎ"±µ¡·Q;€Ðùh Lóûh †ÃGÛA£IÍ×AÌÉDûdËõãØ2ˆšDŸ_J£9CÍŠ 6Š~[ÞË tE·ÅÜò:í£¹PÀDÑcC§shº}gËÜÆ‘Nk4ÕFŸŒ¢m½‘NçÑÌí0íB¿x :~ëº%aSqlÆ¢úm»ý9ƒæÆÒOx+ý?韟/¯Û…f$ÑË·‚ð¿w«íÛÛåásMIÀ–{mÞÉ«¿Ú·›ÞFq0àÍ2“½æ¾ï' úór &¹.Ëd{D ×"úòõwžÇNH½E“6;—U­ ¿üjcÇXV{êJC=ü„¦G±½Qô–l‘§Å´X„›±**žûcc¦çãX>Ÿ™c&mn~F“¶5g]øF=„HiíÑó{þLUÂ-Ñ‹V×Ãêš¶RyþhÜ?Ì(ªpS,Š…OÏÛÃÌÖ—i•„‹(oŸž·ÍÌ—Ê—IÃð…èZ=õëN¢Ò¸KIêi ÚuÏRì©7'z ˆ(êu-7¤k, ôËI4D¿ƒŠˆÚ )<XÐW§±qtQ‘µ8µsÆÕ‘.ðyÏŸZ|0œÓ-n*ìë”hôO$ŽqزTÄñÃν-[Šô» ?Ò¼¨©4>‘FrFKÃVÜ¡'^þ¸–VH쇕Í3N­(²¿ðHs]bÃØ"“Tä ÐðeZsa †+ îKBÎ1/é"ÿœæò¼]ñºz ’FÚöi……ÜÓ'ºÎänÌ0ÑÒP×§˜$UèP> Û…A£~~3Ÿ.÷\-!éêët~éÁÑ©4is¤›|Z&_ö*=H‚£9ÌÓ{·>Mê¨<ºv´:ÒiÉÁ8Ú óiixÎã+Y>ÝXÚM¨i‰‡1µÍáR34ÿØJ5O 8šÀŸ8ÃLtdUc'ß¶Ô<‚ãÔ>t}¢[ž8M´b‰Ù‚SÇì8ûO,¡"…Ä-NtÁ×QÍ•wƒÐÔÆFR,3šë¬ðÔãÆ§±ìïPÕèZ4\ly‡úHkÄÝ]JôYw‡ÆÒk|_d CÂ1þMHÝrfGøÔ:šMIV5Dšîy‘d:dË«ðªª9Wݘéaÿ–ûíþVb½ï¥ë϶ÄÓ¾ãÕí{Cަ²tC¤J§”æ?|Ëí…_EÐëÛ_»YX7½];”Käe½‘'Ú.@¥ñhU ½¹žFãÑÞIëšÂn`ÚK`³®ß˜ MäÑWìùÊÒ§%qŽV¥¹l†ŒÉWà?Њj™ #include #include #include typedef struct { double x, y; } double2; double2 dd(double d) { double2 r = { d, 0 }; return r; } int64_t d2i(double d) { union { double f; int64_t i; } tmp = {.f = d }; return tmp.i; } double i2d(int64_t i) { union { double f; int64_t i; } tmp = {.i = i }; return tmp.f; } double upper(double d) { return i2d(d2i(d) & 0xfffffffff8000000LL); } double clearlsb(double d) { return i2d(d2i(d) & 0xfffffffffffffffeLL); } double2 ddrenormalize(double2 t) { double2 s = dd(t.x + t.y); s.y = t.x - s.x + t.y; return s; } double2 ddadd(double2 x, double2 y) { double2 r = dd(x.x + y.x); double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v) + (x.y + y.y); return r; } double2 ddmul(double x, double y) { double2 r = dd(x * y); r.y = fma(x, y, -r.x); return r; } double2 ddmul2(double2 x, double2 y) { double2 r = ddmul(x.x, y.x); r.y += x.x * y.y + x.y * y.x; return r; } // This function computes remainder(a, PI/2) double2 modifiedPayneHanek(double a) { double table[4]; int scale = fabs(a) > 1e+200 ? -128 : 0; a = ldexp(a, scale); // Table genration mpfr_set_default_prec(2048); mpfr_t pi, m; mpfr_inits(pi, m, NULL); mpfr_const_pi(pi, GMP_RNDN); mpfr_d_div(m, 2, pi, GMP_RNDN); mpfr_set_exp(m, mpfr_get_exp(m) + (ilogb(a) - 53 - scale)); mpfr_frac(m, m, GMP_RNDN); mpfr_set_exp(m, mpfr_get_exp(m) - (ilogb(a) - 53)); for(int i=0;i<4;i++) { table[i] = clearlsb(mpfr_get_d(m, GMP_RNDN)); mpfr_sub_d(m, m, table[i], GMP_RNDN); } mpfr_clears(pi, m, NULL); // Main computation double2 x = dd(0); for(int i=0;i<4;i++) { x = ddadd(x, ddmul(a, table[i])); x.x = x.x - round(x.x); x = ddrenormalize(x); } double2 pio2 = { 3.141592653589793*0.5, 1.2246467991473532e-16*0.5 }; x = ddmul2(x, pio2); return fabs(a) < 0.785398163397448279 ? dd(a) : x; } int main(int argc, char **argv) { double a = ldexp(6381956970095103.0, 797); if (argc > 1) a = atof(argv[1]); printf("a = %.20g\n", a); // mpfr_set_default_prec(2048); mpfr_t pi, pio2, x, y, r; mpfr_inits(pi, pio2, x, y, r, NULL); mpfr_const_pi(pi, GMP_RNDN); mpfr_mul_d(pio2, pi, 0.5, GMP_RNDN); // mpfr_set_d(x, a, GMP_RNDN); mpfr_remainder(r, x, pio2, GMP_RNDN); mpfr_printf("mpfr = %.64RNf\n", r); // double2 dd = modifiedPayneHanek(a); mpfr_set_d(x, dd.x, GMP_RNDN); mpfr_add_d(x, x, dd.y, GMP_RNDN); mpfr_printf("dd = %.64RNf\n", x); mpfr_sub(x, x, r, GMP_RNDN); mpfr_abs(x, x, GMP_RNDN); mpfr_div(x, x, r, GMP_RNDN); double err = mpfr_get_d(x, GMP_RNDN); printf("error = %g\n", err); } sleef-3.5.1/doc/html/ppc64.xhtml000066400000000000000000006104011373003144100163370ustar00rootroot00000000000000 SLEEF - Math library reference (POWER)

SLEEF Documentation - Math library reference (POWER)

Table of contents

Data types for PowerPC 64 architecture

Sleef_vector_float_2

Description

Sleef_vector_float_2 is a data type for storing two vector float values, which is defined in sleef.h as follows:

typedef struct {
  vector float x, y;
} Sleef_vector_float_2;

Sleef_vector_double_2

Description

Sleef_vector_double_2 is a data type for storing two vector double values, which is defined in sleef.h as follows:

typedef struct {
  vector double x, y;
} Sleef_vector_double_2;

Trigonometric Functions

Vectorized double precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sind1_u10purec(double a);
double Sleef_sind1_u10purecfma(double a);
double Sleef_cinz_sind1_u10purec(double a);
double Sleef_finz_sind1_u10purecfma(double a);

vector double Sleef_sind2_u10(vector double a);
vector double Sleef_sind2_u10vsx(vector double a);
vector double Sleef_sind2_u10vsxnofma(vector double a);
vector double Sleef_cinz_sind2_u10vsxnofma(vector double a);
vector double Sleef_finz_sind2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u10 with the same accuracy specification.


Vectorized single precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinf1_u10purec(float a);
float Sleef_sinf1_u10purecfma(float a);
float Sleef_cinz_sinf1_u10purec(float a);
float Sleef_finz_sinf1_u10purecfma(float a);

vector float Sleef_sinf4_u10(vector float a);
vector float Sleef_sinf4_u10vsx(vector float a);
vector float Sleef_sinf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_sinf4_u10vsxnofma(vector float a);
vector float Sleef_finz_sinf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification.


Vectorized double precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sind1_u35purec(double a);
double Sleef_sind1_u35purecfma(double a);
double Sleef_cinz_sind1_u35purec(double a);
double Sleef_finz_sind1_u35purecfma(double a);

vector double Sleef_sind2_u35(vector double a);
vector double Sleef_sind2_u35vsx(vector double a);
vector double Sleef_sind2_u35vsxnofma(vector double a);
vector double Sleef_cinz_sind2_u35vsxnofma(vector double a);
vector double Sleef_finz_sind2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u35 with the same accuracy specification.


Vectorized single precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sinf1_u35purec(float a);
float Sleef_sinf1_u35purecfma(float a);
float Sleef_cinz_sinf1_u35purec(float a);
float Sleef_finz_sinf1_u35purecfma(float a);

vector float Sleef_sinf4_u35(vector float a);
vector float Sleef_sinf4_u35vsx(vector float a);
vector float Sleef_sinf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_sinf4_u35vsxnofma(vector float a);
vector float Sleef_finz_sinf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification.


Vectorized double precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cosd1_u10purec(double a);
double Sleef_cosd1_u10purecfma(double a);
double Sleef_cinz_cosd1_u10purec(double a);
double Sleef_finz_cosd1_u10purecfma(double a);

vector double Sleef_cosd2_u10(vector double a);
vector double Sleef_cosd2_u10vsx(vector double a);
vector double Sleef_cosd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_cosd2_u10vsxnofma(vector double a);
vector double Sleef_finz_cosd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u10 with the same accuracy specification.


Vectorized single precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cosf1_u10purec(float a);
float Sleef_cosf1_u10purecfma(float a);
float Sleef_cinz_cosf1_u10purec(float a);
float Sleef_finz_cosf1_u10purecfma(float a);

vector float Sleef_cosf4_u10(vector float a);
vector float Sleef_cosf4_u10vsx(vector float a);
vector float Sleef_cosf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_cosf4_u10vsxnofma(vector float a);
vector float Sleef_finz_cosf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification.


Vectorized double precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cosd1_u35purec(double a);
double Sleef_cosd1_u35purecfma(double a);
double Sleef_cinz_cosd1_u35purec(double a);
double Sleef_finz_cosd1_u35purecfma(double a);

vector double Sleef_cosd2_u35(vector double a);
vector double Sleef_cosd2_u35vsx(vector double a);
vector double Sleef_cosd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_cosd2_u35vsxnofma(vector double a);
vector double Sleef_finz_cosd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u35 with the same accuracy specification.


Vectorized single precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cosf1_u35purec(float a);
float Sleef_cosf1_u35purecfma(float a);
float Sleef_cinz_cosf1_u35purec(float a);
float Sleef_finz_cosf1_u35purecfma(float a);

vector float Sleef_cosf4_u35(vector float a);
vector float Sleef_cosf4_u35vsx(vector float a);
vector float Sleef_cosf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_cosf4_u35vsxnofma(vector float a);
vector float Sleef_finz_cosf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincosd1_u10purec(double a);
Sleef_double2 Sleef_sincosd1_u10purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u10purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u10purecfma(double a);

Sleef_vector_double_2 Sleef_sincosd2_u10(vector double a);
Sleef_vector_double_2 Sleef_sincosd2_u10vsx(vector double a);
Sleef_vector_double_2 Sleef_sincosd2_u10vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_cinz_sincosd2_u10vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_finz_sincosd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincosf1_u10purec(float a);
Sleef_float2 Sleef_sincosf1_u10purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u10purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u10purecfma(float a);

Sleef_vector_float_2 Sleef_sincosf4_u10(vector float a);
Sleef_vector_float_2 Sleef_sincosf4_u10vsx(vector float a);
Sleef_vector_float_2 Sleef_sincosf4_u10vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_cinz_sincosf4_u10vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_finz_sincosf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincosd1_u35purec(double a);
Sleef_double2 Sleef_sincosd1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u35purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u35purecfma(double a);

Sleef_vector_double_2 Sleef_sincosd2_u35(vector double a);
Sleef_vector_double_2 Sleef_sincosd2_u35vsx(vector double a);
Sleef_vector_double_2 Sleef_sincosd2_u35vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_cinz_sincosd2_u35vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_finz_sincosd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincosf1_u35purec(float a);
Sleef_float2 Sleef_sincosf1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u35purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u35purecfma(float a);

Sleef_vector_float_2 Sleef_sincosf4_u35(vector float a);
Sleef_vector_float_2 Sleef_sincosf4_u35vsx(vector float a);
Sleef_vector_float_2 Sleef_sincosf4_u35vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_cinz_sincosf4_u35vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_finz_sincosf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification.


Vectorized double precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sinpid1_u05purec(double a);
double Sleef_sinpid1_u05purecfma(double a);
double Sleef_cinz_sinpid1_u05purec(double a);
double Sleef_finz_sinpid1_u05purecfma(double a);

vector double Sleef_sinpid2_u05(vector double a);
vector double Sleef_sinpid2_u05vsx(vector double a);
vector double Sleef_sinpid2_u05vsxnofma(vector double a);
vector double Sleef_cinz_sinpid2_u05vsxnofma(vector double a);
vector double Sleef_finz_sinpid2_u05vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification.


Vectorized single precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinpif1_u05purec(float a);
float Sleef_sinpif1_u05purecfma(float a);
float Sleef_cinz_sinpif1_u05purec(float a);
float Sleef_finz_sinpif1_u05purecfma(float a);

vector float Sleef_sinpif4_u05(vector float a);
vector float Sleef_sinpif4_u05vsx(vector float a);
vector float Sleef_sinpif4_u05vsxnofma(vector float a);
vector float Sleef_cinz_sinpif4_u05vsxnofma(vector float a);
vector float Sleef_finz_sinpif4_u05vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification.


Vectorized double precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cospid1_u05purec(double a);
double Sleef_cospid1_u05purecfma(double a);
double Sleef_cinz_cospid1_u05purec(double a);
double Sleef_finz_cospid1_u05purecfma(double a);

vector double Sleef_cospid2_u05(vector double a);
vector double Sleef_cospid2_u05vsx(vector double a);
vector double Sleef_cospid2_u05vsxnofma(vector double a);
vector double Sleef_cinz_cospid2_u05vsxnofma(vector double a);
vector double Sleef_finz_cospid2_u05vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification.


Vectorized single precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cospif1_u05purec(float a);
float Sleef_cospif1_u05purecfma(float a);
float Sleef_cinz_cospif1_u05purec(float a);
float Sleef_finz_cospif1_u05purecfma(float a);

vector float Sleef_cospif4_u05(vector float a);
vector float Sleef_cospif4_u05vsx(vector float a);
vector float Sleef_cospif4_u05vsxnofma(vector float a);
vector float Sleef_cinz_cospif4_u05vsxnofma(vector float a);
vector float Sleef_finz_cospif4_u05vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospid1_u05purec(double a);
Sleef_double2 Sleef_sincospid1_u05purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u05purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u05purecfma(double a);

Sleef_vector_double_2 Sleef_sincospid2_u05(vector double a);
Sleef_vector_double_2 Sleef_sincospid2_u05vsx(vector double a);
Sleef_vector_double_2 Sleef_sincospid2_u05vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_cinz_sincospid2_u05vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_finz_sincospid2_u05vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincospif1_u05purec(float a);
Sleef_float2 Sleef_sincospif1_u05purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u05purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u05purecfma(float a);

Sleef_vector_float_2 Sleef_sincospif4_u05(vector float a);
Sleef_vector_float_2 Sleef_sincospif4_u05vsx(vector float a);
Sleef_vector_float_2 Sleef_sincospif4_u05vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_cinz_sincospif4_u05vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_finz_sincospif4_u05vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincospid1_u35purec(double a);
Sleef_double2 Sleef_sincospid1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u35purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u35purecfma(double a);

Sleef_vector_double_2 Sleef_sincospid2_u35(vector double a);
Sleef_vector_double_2 Sleef_sincospid2_u35vsx(vector double a);
Sleef_vector_double_2 Sleef_sincospid2_u35vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_cinz_sincospid2_u35vsxnofma(vector double a);
Sleef_vector_double_2 Sleef_finz_sincospid2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincospif1_u35purec(float a);
Sleef_float2 Sleef_sincospif1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u35purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u35purecfma(float a);

Sleef_vector_float_2 Sleef_sincospif4_u35(vector float a);
Sleef_vector_float_2 Sleef_sincospif4_u35vsx(vector float a);
Sleef_vector_float_2 Sleef_sincospif4_u35vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_cinz_sincospif4_u35vsxnofma(vector float a);
Sleef_vector_float_2 Sleef_finz_sincospif4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification.


Vectorized double precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tand1_u10purec(double a);
double Sleef_tand1_u10purecfma(double a);
double Sleef_cinz_tand1_u10purec(double a);
double Sleef_finz_tand1_u10purecfma(double a);

vector double Sleef_tand2_u10(vector double a);
vector double Sleef_tand2_u10vsx(vector double a);
vector double Sleef_tand2_u10vsxnofma(vector double a);
vector double Sleef_cinz_tand2_u10vsxnofma(vector double a);
vector double Sleef_finz_tand2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u10 with the same accuracy specification.


Vectorized single precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_tanf1_u10purec(float a);
float Sleef_tanf1_u10purecfma(float a);
float Sleef_cinz_tanf1_u10purec(float a);
float Sleef_finz_tanf1_u10purecfma(float a);

vector float Sleef_tanf4_u10(vector float a);
vector float Sleef_tanf4_u10vsx(vector float a);
vector float Sleef_tanf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_tanf4_u10vsxnofma(vector float a);
vector float Sleef_finz_tanf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification.


Vectorized double precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_tand1_u35purec(double a);
double Sleef_tand1_u35purecfma(double a);
double Sleef_cinz_tand1_u35purec(double a);
double Sleef_finz_tand1_u35purecfma(double a);

vector double Sleef_tand2_u35(vector double a);
vector double Sleef_tand2_u35vsx(vector double a);
vector double Sleef_tand2_u35vsxnofma(vector double a);
vector double Sleef_cinz_tand2_u35vsxnofma(vector double a);
vector double Sleef_finz_tand2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u35 with the same accuracy specification.


Vectorized single precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_tanf1_u35purec(float a);
float Sleef_tanf1_u35purecfma(float a);
float Sleef_cinz_tanf1_u35purec(float a);
float Sleef_finz_tanf1_u35purecfma(float a);

vector float Sleef_tanf4_u35(vector float a);
vector float Sleef_tanf4_u35vsx(vector float a);
vector float Sleef_tanf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_tanf4_u35vsxnofma(vector float a);
vector float Sleef_finz_tanf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification.

Power, exponential, and logarithmic function

Vectorized double precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_powd1_u10purec(double a, double b);
double Sleef_powd1_u10purecfma(double a, double b);
double Sleef_cinz_powd1_u10purec(double a, double b);
double Sleef_finz_powd1_u10purecfma(double a, double b);

vector double Sleef_powd2_u10(vector double a, vector double b);
vector double Sleef_powd2_u10vsx(vector double a, vector double b);
vector double Sleef_powd2_u10vsxnofma(vector double a, vector double b);
vector double Sleef_cinz_powd2_u10vsxnofma(vector double a, vector double b);
vector double Sleef_finz_powd2_u10vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_pow_u10 with the same accuracy specification.


Vectorized single precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_powf1_u10purec(float a, float b);
float Sleef_powf1_u10purecfma(float a, float b);
float Sleef_cinz_powf1_u10purec(float a, float b);
float Sleef_finz_powf1_u10purecfma(float a, float b);

vector float Sleef_powf4_u10(vector float a, vector float b);
vector float Sleef_powf4_u10vsx(vector float a, vector float b);
vector float Sleef_powf4_u10vsxnofma(vector float a, vector float b);
vector float Sleef_cinz_powf4_u10vsxnofma(vector float a, vector float b);
vector float Sleef_finz_powf4_u10vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_powf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_logd1_u10purec(double a);
double Sleef_logd1_u10purecfma(double a);
double Sleef_cinz_logd1_u10purec(double a);
double Sleef_finz_logd1_u10purecfma(double a);

vector double Sleef_logd2_u10(vector double a);
vector double Sleef_logd2_u10vsx(vector double a);
vector double Sleef_logd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_logd2_u10vsxnofma(vector double a);
vector double Sleef_finz_logd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u10 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_logf1_u10purec(float a);
float Sleef_logf1_u10purecfma(float a);
float Sleef_cinz_logf1_u10purec(float a);
float Sleef_finz_logf1_u10purecfma(float a);

vector float Sleef_logf4_u10(vector float a);
vector float Sleef_logf4_u10vsx(vector float a);
vector float Sleef_logf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_logf4_u10vsxnofma(vector float a);
vector float Sleef_finz_logf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_logd1_u35purec(double a);
double Sleef_logd1_u35purecfma(double a);
double Sleef_cinz_logd1_u35purec(double a);
double Sleef_finz_logd1_u35purecfma(double a);

vector double Sleef_logd2_u35(vector double a);
vector double Sleef_logd2_u35vsx(vector double a);
vector double Sleef_logd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_logd2_u35vsxnofma(vector double a);
vector double Sleef_finz_logd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u35 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_logf1_u35purec(float a);
float Sleef_logf1_u35purecfma(float a);
float Sleef_cinz_logf1_u35purec(float a);
float Sleef_finz_logf1_u35purecfma(float a);

vector float Sleef_logf4_u35(vector float a);
vector float Sleef_logf4_u35vsx(vector float a);
vector float Sleef_logf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_logf4_u35vsxnofma(vector float a);
vector float Sleef_finz_logf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u35 with the same accuracy specification.


Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log10d1_u10purec(double a);
double Sleef_log10d1_u10purecfma(double a);
double Sleef_cinz_log10d1_u10purec(double a);
double Sleef_finz_log10d1_u10purecfma(double a);

vector double Sleef_log10d2_u10(vector double a);
vector double Sleef_log10d2_u10vsx(vector double a);
vector double Sleef_log10d2_u10vsxnofma(vector double a);
vector double Sleef_cinz_log10d2_u10vsxnofma(vector double a);
vector double Sleef_finz_log10d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10_u10 with the same accuracy specification.


Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log10f1_u10purec(float a);
float Sleef_log10f1_u10purecfma(float a);
float Sleef_cinz_log10f1_u10purec(float a);
float Sleef_finz_log10f1_u10purecfma(float a);

vector float Sleef_log10f4_u10(vector float a);
vector float Sleef_log10f4_u10vsx(vector float a);
vector float Sleef_log10f4_u10vsxnofma(vector float a);
vector float Sleef_cinz_log10f4_u10vsxnofma(vector float a);
vector float Sleef_finz_log10f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification.


Vectorized double precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log2d1_u10purec(double a);
double Sleef_log2d1_u10purecfma(double a);
double Sleef_cinz_log2d1_u10purec(double a);
double Sleef_finz_log2d1_u10purecfma(double a);

vector double Sleef_log2d2_u10(vector double a);
vector double Sleef_log2d2_u10vsx(vector double a);
vector double Sleef_log2d2_u10vsxnofma(vector double a);
vector double Sleef_cinz_log2d2_u10vsxnofma(vector double a);
vector double Sleef_finz_log2d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2_u10 with the same accuracy specification.


Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log2f1_u10purec(float a);
float Sleef_log2f1_u10purecfma(float a);
float Sleef_cinz_log2f1_u10purec(float a);
float Sleef_finz_log2f1_u10purecfma(float a);

vector float Sleef_log2f4_u10(vector float a);
vector float Sleef_log2f4_u10vsx(vector float a);
vector float Sleef_log2f4_u10vsxnofma(vector float a);
vector float Sleef_cinz_log2f4_u10vsxnofma(vector float a);
vector float Sleef_finz_log2f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2f_u10 with the same accuracy specification.


Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log1pd1_u10purec(double a);
double Sleef_log1pd1_u10purecfma(double a);
double Sleef_cinz_log1pd1_u10purec(double a);
double Sleef_finz_log1pd1_u10purecfma(double a);

vector double Sleef_log1pd2_u10(vector double a);
vector double Sleef_log1pd2_u10vsx(vector double a);
vector double Sleef_log1pd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_log1pd2_u10vsxnofma(vector double a);
vector double Sleef_finz_log1pd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log1pf1_u10purec(float a);
float Sleef_log1pf1_u10purecfma(float a);
float Sleef_cinz_log1pf1_u10purec(float a);
float Sleef_finz_log1pf1_u10purecfma(float a);

vector float Sleef_log1pf4_u10(vector float a);
vector float Sleef_log1pf4_u10vsx(vector float a);
vector float Sleef_log1pf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_log1pf4_u10vsxnofma(vector float a);
vector float Sleef_finz_log1pf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expd1_u10purec(double a);
double Sleef_expd1_u10purecfma(double a);
double Sleef_cinz_expd1_u10purec(double a);
double Sleef_finz_expd1_u10purecfma(double a);

vector double Sleef_expd2_u10(vector double a);
vector double Sleef_expd2_u10vsx(vector double a);
vector double Sleef_expd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_expd2_u10vsxnofma(vector double a);
vector double Sleef_finz_expd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expf1_u10purec(float a);
float Sleef_expf1_u10purecfma(float a);
float Sleef_cinz_expf1_u10purec(float a);
float Sleef_finz_expf1_u10purecfma(float a);

vector float Sleef_expf4_u10(vector float a);
vector float Sleef_expf4_u10vsx(vector float a);
vector float Sleef_expf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_expf4_u10vsxnofma(vector float a);
vector float Sleef_finz_expf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expf_u10 with the same accuracy specification.


Vectorized double precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp2d1_u10purec(double a);
double Sleef_exp2d1_u10purecfma(double a);
double Sleef_cinz_exp2d1_u10purec(double a);
double Sleef_finz_exp2d1_u10purecfma(double a);

vector double Sleef_exp2d2_u10(vector double a);
vector double Sleef_exp2d2_u10vsx(vector double a);
vector double Sleef_exp2d2_u10vsxnofma(vector double a);
vector double Sleef_cinz_exp2d2_u10vsxnofma(vector double a);
vector double Sleef_finz_exp2d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification.


Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp2f1_u10purec(float a);
float Sleef_exp2f1_u10purecfma(float a);
float Sleef_cinz_exp2f1_u10purec(float a);
float Sleef_finz_exp2f1_u10purecfma(float a);

vector float Sleef_exp2f4_u10(vector float a);
vector float Sleef_exp2f4_u10vsx(vector float a);
vector float Sleef_exp2f4_u10vsxnofma(vector float a);
vector float Sleef_cinz_exp2f4_u10vsxnofma(vector float a);
vector float Sleef_finz_exp2f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification.


Vectorized double precision base-10 exponential function function with 1.09 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp10d1_u10purec(double a);
double Sleef_exp10d1_u10purecfma(double a);
double Sleef_cinz_exp10d1_u10purec(double a);
double Sleef_finz_exp10d1_u10purecfma(double a);

vector double Sleef_exp10d2_u10(vector double a);
vector double Sleef_exp10d2_u10vsx(vector double a);
vector double Sleef_exp10d2_u10vsxnofma(vector double a);
vector double Sleef_cinz_exp10d2_u10vsxnofma(vector double a);
vector double Sleef_finz_exp10d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification.


Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp10f1_u10purec(float a);
float Sleef_exp10f1_u10purecfma(float a);
float Sleef_cinz_exp10f1_u10purec(float a);
float Sleef_finz_exp10f1_u10purecfma(float a);

vector float Sleef_exp10f4_u10(vector float a);
vector float Sleef_exp10f4_u10vsx(vector float a);
vector float Sleef_exp10f4_u10vsxnofma(vector float a);
vector float Sleef_cinz_exp10f4_u10vsxnofma(vector float a);
vector float Sleef_finz_exp10f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expm1d1_u10purec(double a);
double Sleef_expm1d1_u10purecfma(double a);
double Sleef_cinz_expm1d1_u10purec(double a);
double Sleef_finz_expm1d1_u10purecfma(double a);

vector double Sleef_expm1d2_u10(vector double a);
vector double Sleef_expm1d2_u10vsx(vector double a);
vector double Sleef_expm1d2_u10vsxnofma(vector double a);
vector double Sleef_cinz_expm1d2_u10vsxnofma(vector double a);
vector double Sleef_finz_expm1d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expm1f1_u10purec(float a);
float Sleef_expm1f1_u10purecfma(float a);
float Sleef_cinz_expm1f1_u10purec(float a);
float Sleef_finz_expm1f1_u10purecfma(float a);

vector float Sleef_expm1f4_u10(vector float a);
vector float Sleef_expm1f4_u10vsx(vector float a);
vector float Sleef_expm1f4_u10vsxnofma(vector float a);
vector float Sleef_cinz_expm1f4_u10vsxnofma(vector float a);
vector float Sleef_finz_expm1f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification.


Vectorized double precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u05purec(double a);
double Sleef_sqrtd1_u05purecfma(double a);
double Sleef_cinz_sqrtd1_u05purec(double a);
double Sleef_finz_sqrtd1_u05purecfma(double a);

vector double Sleef_sqrtd2_u05(vector double a);
vector double Sleef_sqrtd2_u05vsx(vector double a);
vector double Sleef_sqrtd2_u05vsxnofma(vector double a);
vector double Sleef_cinz_sqrtd2_u05vsxnofma(vector double a);
vector double Sleef_finz_sqrtd2_u05vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification.


Vectorized single precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u05purec(float a);
float Sleef_sqrtf1_u05purecfma(float a);
float Sleef_cinz_sqrtf1_u05purec(float a);
float Sleef_finz_sqrtf1_u05purecfma(float a);

vector float Sleef_sqrtf4_u05(vector float a);
vector float Sleef_sqrtf4_u05vsx(vector float a);
vector float Sleef_sqrtf4_u05vsxnofma(vector float a);
vector float Sleef_cinz_sqrtf4_u05vsxnofma(vector float a);
vector float Sleef_finz_sqrtf4_u05vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification.


Vectorized double precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u35purec(double a);
double Sleef_sqrtd1_u35purecfma(double a);
double Sleef_cinz_sqrtd1_u35purec(double a);
double Sleef_finz_sqrtd1_u35purecfma(double a);

vector double Sleef_sqrtd2_u35(vector double a);
vector double Sleef_sqrtd2_u35vsx(vector double a);
vector double Sleef_sqrtd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_sqrtd2_u35vsxnofma(vector double a);
vector double Sleef_finz_sqrtd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification.


Vectorized single precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u35purec(float a);
float Sleef_sqrtf1_u35purecfma(float a);
float Sleef_cinz_sqrtf1_u35purec(float a);
float Sleef_finz_sqrtf1_u35purecfma(float a);

vector float Sleef_sqrtf4_u35(vector float a);
vector float Sleef_sqrtf4_u35vsx(vector float a);
vector float Sleef_sqrtf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_sqrtf4_u35vsxnofma(vector float a);
vector float Sleef_finz_sqrtf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification.


Vectorized double precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrtd1_u10purec(double a);
double Sleef_cbrtd1_u10purecfma(double a);
double Sleef_cinz_cbrtd1_u10purec(double a);
double Sleef_finz_cbrtd1_u10purecfma(double a);

vector double Sleef_cbrtd2_u10(vector double a);
vector double Sleef_cbrtd2_u10vsx(vector double a);
vector double Sleef_cbrtd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_cbrtd2_u10vsxnofma(vector double a);
vector double Sleef_finz_cbrtd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification.


Vectorized single precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cbrtf1_u10purec(float a);
float Sleef_cbrtf1_u10purecfma(float a);
float Sleef_cinz_cbrtf1_u10purec(float a);
float Sleef_finz_cbrtf1_u10purecfma(float a);

vector float Sleef_cbrtf4_u10(vector float a);
vector float Sleef_cbrtf4_u10vsx(vector float a);
vector float Sleef_cbrtf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_cbrtf4_u10vsxnofma(vector float a);
vector float Sleef_finz_cbrtf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification.


Vectorized double precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cbrtd1_u35purec(double a);
double Sleef_cbrtd1_u35purecfma(double a);
double Sleef_cinz_cbrtd1_u35purec(double a);
double Sleef_finz_cbrtd1_u35purecfma(double a);

vector double Sleef_cbrtd2_u35(vector double a);
vector double Sleef_cbrtd2_u35vsx(vector double a);
vector double Sleef_cbrtd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_cbrtd2_u35vsxnofma(vector double a);
vector double Sleef_finz_cbrtd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification.


Vectorized single precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cbrtf1_u35purec(float a);
float Sleef_cbrtf1_u35purecfma(float a);
float Sleef_cinz_cbrtf1_u35purec(float a);
float Sleef_finz_cbrtf1_u35purecfma(float a);

vector float Sleef_cbrtf4_u35(vector float a);
vector float Sleef_cbrtf4_u35vsx(vector float a);
vector float Sleef_cbrtf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_cbrtf4_u35vsxnofma(vector float a);
vector float Sleef_finz_cbrtf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypotd1_u05purec(double a, double b);
double Sleef_hypotd1_u05purecfma(double a, double b);
double Sleef_cinz_hypotd1_u05purec(double a, double b);
double Sleef_finz_hypotd1_u05purecfma(double a, double b);

vector double Sleef_hypotd2_u05(vector double a, vector double b);
vector double Sleef_hypotd2_u05vsx(vector double a, vector double b);
vector double Sleef_hypotd2_u05vsxnofma(vector double a, vector double b);
vector double Sleef_cinz_hypotd2_u05vsxnofma(vector double a, vector double b);
vector double Sleef_finz_hypotd2_u05vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_hypotf1_u05purec(float a, float b);
float Sleef_hypotf1_u05purecfma(float a, float b);
float Sleef_cinz_hypotf1_u05purec(float a, float b);
float Sleef_finz_hypotf1_u05purecfma(float a, float b);

vector float Sleef_hypotf4_u05(vector float a, vector float b);
vector float Sleef_hypotf4_u05vsx(vector float a, vector float b);
vector float Sleef_hypotf4_u05vsxnofma(vector float a, vector float b);
vector float Sleef_cinz_hypotf4_u05vsxnofma(vector float a, vector float b);
vector float Sleef_finz_hypotf4_u05vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_hypotd1_u35purec(double a, double b);
double Sleef_hypotd1_u35purecfma(double a, double b);
double Sleef_cinz_hypotd1_u35purec(double a, double b);
double Sleef_finz_hypotd1_u35purecfma(double a, double b);

vector double Sleef_hypotd2_u35(vector double a, vector double b);
vector double Sleef_hypotd2_u35vsx(vector double a, vector double b);
vector double Sleef_hypotd2_u35vsxnofma(vector double a, vector double b);
vector double Sleef_cinz_hypotd2_u35vsxnofma(vector double a, vector double b);
vector double Sleef_finz_hypotd2_u35vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_hypotf1_u35purec(float a, float b);
float Sleef_hypotf1_u35purecfma(float a, float b);
float Sleef_cinz_hypotf1_u35purec(float a, float b);
float Sleef_finz_hypotf1_u35purecfma(float a, float b);

vector float Sleef_hypotf4_u35(vector float a, vector float b);
vector float Sleef_hypotf4_u35vsx(vector float a, vector float b);
vector float Sleef_hypotf4_u35vsxnofma(vector float a, vector float b);
vector float Sleef_cinz_hypotf4_u35vsxnofma(vector float a, vector float b);
vector float Sleef_finz_hypotf4_u35vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification.

Inverse Trigonometric Functions

Vectorized double precision arc sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asind1_u10purec(double a);
double Sleef_asind1_u10purecfma(double a);
double Sleef_cinz_asind1_u10purec(double a);
double Sleef_finz_asind1_u10purecfma(double a);

vector double Sleef_asind2_u10(vector double a);
vector double Sleef_asind2_u10vsx(vector double a);
vector double Sleef_asind2_u10vsxnofma(vector double a);
vector double Sleef_cinz_asind2_u10vsxnofma(vector double a);
vector double Sleef_finz_asind2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u10 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_asinf1_u10purec(float a);
float Sleef_asinf1_u10purecfma(float a);
float Sleef_cinz_asinf1_u10purec(float a);
float Sleef_finz_asinf1_u10purecfma(float a);

vector float Sleef_asinf4_u10(vector float a);
vector float Sleef_asinf4_u10vsx(vector float a);
vector float Sleef_asinf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_asinf4_u10vsxnofma(vector float a);
vector float Sleef_finz_asinf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification.


Vectorized double precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_asind1_u35purec(double a);
double Sleef_asind1_u35purecfma(double a);
double Sleef_cinz_asind1_u35purec(double a);
double Sleef_finz_asind1_u35purecfma(double a);

vector double Sleef_asind2_u35(vector double a);
vector double Sleef_asind2_u35vsx(vector double a);
vector double Sleef_asind2_u35vsxnofma(vector double a);
vector double Sleef_cinz_asind2_u35vsxnofma(vector double a);
vector double Sleef_finz_asind2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u35 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_asinf1_u35purec(float a);
float Sleef_asinf1_u35purecfma(float a);
float Sleef_cinz_asinf1_u35purec(float a);
float Sleef_finz_asinf1_u35purecfma(float a);

vector float Sleef_asinf4_u35(vector float a);
vector float Sleef_asinf4_u35vsx(vector float a);
vector float Sleef_asinf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_asinf4_u35vsxnofma(vector float a);
vector float Sleef_finz_asinf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification.


Vectorized double precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acosd1_u10purec(double a);
double Sleef_acosd1_u10purecfma(double a);
double Sleef_cinz_acosd1_u10purec(double a);
double Sleef_finz_acosd1_u10purecfma(double a);

vector double Sleef_acosd2_u10(vector double a);
vector double Sleef_acosd2_u10vsx(vector double a);
vector double Sleef_acosd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_acosd2_u10vsxnofma(vector double a);
vector double Sleef_finz_acosd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u10 with the same accuracy specification.


Vectorized single precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_acosf1_u10purec(float a);
float Sleef_acosf1_u10purecfma(float a);
float Sleef_cinz_acosf1_u10purec(float a);
float Sleef_finz_acosf1_u10purecfma(float a);

vector float Sleef_acosf4_u10(vector float a);
vector float Sleef_acosf4_u10vsx(vector float a);
vector float Sleef_acosf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_acosf4_u10vsxnofma(vector float a);
vector float Sleef_finz_acosf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification.


Vectorized double precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_acosd1_u35purec(double a);
double Sleef_acosd1_u35purecfma(double a);
double Sleef_cinz_acosd1_u35purec(double a);
double Sleef_finz_acosd1_u35purecfma(double a);

vector double Sleef_acosd2_u35(vector double a);
vector double Sleef_acosd2_u35vsx(vector double a);
vector double Sleef_acosd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_acosd2_u35vsxnofma(vector double a);
vector double Sleef_finz_acosd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u35 with the same accuracy specification.


Vectorized single precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_acosf1_u35purec(float a);
float Sleef_acosf1_u35purecfma(float a);
float Sleef_cinz_acosf1_u35purec(float a);
float Sleef_finz_acosf1_u35purecfma(float a);

vector float Sleef_acosf4_u35(vector float a);
vector float Sleef_acosf4_u35vsx(vector float a);
vector float Sleef_acosf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_acosf4_u35vsxnofma(vector float a);
vector float Sleef_finz_acosf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atand1_u10purec(double a);
double Sleef_atand1_u10purecfma(double a);
double Sleef_cinz_atand1_u10purec(double a);
double Sleef_finz_atand1_u10purecfma(double a);

vector double Sleef_atand2_u10(vector double a);
vector double Sleef_atand2_u10vsx(vector double a);
vector double Sleef_atand2_u10vsxnofma(vector double a);
vector double Sleef_cinz_atand2_u10vsxnofma(vector double a);
vector double Sleef_finz_atand2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u10 with the same accuracy specification.


Vectorized single precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atanf1_u10purec(float a);
float Sleef_atanf1_u10purecfma(float a);
float Sleef_cinz_atanf1_u10purec(float a);
float Sleef_finz_atanf1_u10purecfma(float a);

vector float Sleef_atanf4_u10(vector float a);
vector float Sleef_atanf4_u10vsx(vector float a);
vector float Sleef_atanf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_atanf4_u10vsxnofma(vector float a);
vector float Sleef_finz_atanf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification.


Vectorized double precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atand1_u35purec(double a);
double Sleef_atand1_u35purecfma(double a);
double Sleef_cinz_atand1_u35purec(double a);
double Sleef_finz_atand1_u35purecfma(double a);

vector double Sleef_atand2_u35(vector double a);
vector double Sleef_atand2_u35vsx(vector double a);
vector double Sleef_atand2_u35vsxnofma(vector double a);
vector double Sleef_cinz_atand2_u35vsxnofma(vector double a);
vector double Sleef_finz_atand2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u35 with the same accuracy specification.


Vectorized single precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atanf1_u35purec(float a);
float Sleef_atanf1_u35purecfma(float a);
float Sleef_cinz_atanf1_u35purec(float a);
float Sleef_finz_atanf1_u35purecfma(float a);

vector float Sleef_atanf4_u35(vector float a);
vector float Sleef_atanf4_u35vsx(vector float a);
vector float Sleef_atanf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_atanf4_u35vsxnofma(vector float a);
vector float Sleef_finz_atanf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2d1_u10purec(double a, double b);
double Sleef_atan2d1_u10purecfma(double a, double b);
double Sleef_cinz_atan2d1_u10purec(double a, double b);
double Sleef_finz_atan2d1_u10purecfma(double a, double b);

vector double Sleef_atan2d2_u10(vector double a, vector double b);
vector double Sleef_atan2d2_u10vsx(vector double a, vector double b);
vector double Sleef_atan2d2_u10vsxnofma(vector double a, vector double b);
vector double Sleef_cinz_atan2d2_u10vsxnofma(vector double a, vector double b);
vector double Sleef_finz_atan2d2_u10vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atan2f1_u10purec(float a, float b);
float Sleef_atan2f1_u10purecfma(float a, float b);
float Sleef_cinz_atan2f1_u10purec(float a, float b);
float Sleef_finz_atan2f1_u10purecfma(float a, float b);

vector float Sleef_atan2f4_u10(vector float a, vector float b);
vector float Sleef_atan2f4_u10vsx(vector float a, vector float b);
vector float Sleef_atan2f4_u10vsxnofma(vector float a, vector float b);
vector float Sleef_cinz_atan2f4_u10vsxnofma(vector float a, vector float b);
vector float Sleef_finz_atan2f4_u10vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atan2d1_u35purec(double a, double b);
double Sleef_atan2d1_u35purecfma(double a, double b);
double Sleef_cinz_atan2d1_u35purec(double a, double b);
double Sleef_finz_atan2d1_u35purecfma(double a, double b);

vector double Sleef_atan2d2_u35(vector double a, vector double b);
vector double Sleef_atan2d2_u35vsx(vector double a, vector double b);
vector double Sleef_atan2d2_u35vsxnofma(vector double a, vector double b);
vector double Sleef_cinz_atan2d2_u35vsxnofma(vector double a, vector double b);
vector double Sleef_finz_atan2d2_u35vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atan2f1_u35purec(float a, float b);
float Sleef_atan2f1_u35purecfma(float a, float b);
float Sleef_cinz_atan2f1_u35purec(float a, float b);
float Sleef_finz_atan2f1_u35purecfma(float a, float b);

vector float Sleef_atan2f4_u35(vector float a, vector float b);
vector float Sleef_atan2f4_u35vsx(vector float a, vector float b);
vector float Sleef_atan2f4_u35vsxnofma(vector float a, vector float b);
vector float Sleef_cinz_atan2f4_u35vsxnofma(vector float a, vector float b);
vector float Sleef_finz_atan2f4_u35vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification.

Hyperbolic function and inverse hyperbolic function

Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>

double Sleef_sinhd1_u10purec(double a);
double Sleef_sinhd1_u10purecfma(double a);
double Sleef_cinz_sinhd1_u10purec(double a);
double Sleef_finz_sinhd1_u10purecfma(double a);

vector double Sleef_sinhd2_u10(vector double a);
vector double Sleef_sinhd2_u10vsx(vector double a);
vector double Sleef_sinhd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_sinhd2_u10vsxnofma(vector double a);
vector double Sleef_finz_sinhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float Sleef_sinhf1_u10purec(float a);
float Sleef_sinhf1_u10purecfma(float a);
float Sleef_cinz_sinhf1_u10purec(float a);
float Sleef_finz_sinhf1_u10purecfma(float a);

vector float Sleef_sinhf4_u10(vector float a);
vector float Sleef_sinhf4_u10vsx(vector float a);
vector float Sleef_sinhf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_sinhf4_u10vsxnofma(vector float a);
vector float Sleef_finz_sinhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>


double Sleef_sinhd1_u35purec(double a);
double Sleef_sinhd1_u35purecfma(double a);
double Sleef_cinz_sinhd1_u35purec(double a);
double Sleef_finz_sinhd1_u35purecfma(double a);

vector double Sleef_sinhd2_u35(vector double a);
vector double Sleef_sinhd2_u35vsx(vector double a);
vector double Sleef_sinhd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_sinhd2_u35vsxnofma(vector double a);
vector double Sleef_finz_sinhd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>


float Sleef_sinhf1_u35purec(float a);
float Sleef_sinhf1_u35purecfma(float a);
float Sleef_cinz_sinhf1_u35purec(float a);
float Sleef_finz_sinhf1_u35purecfma(float a);

vector float Sleef_sinhf4_u35(vector float a);
vector float Sleef_sinhf4_u35vsx(vector float a);
vector float Sleef_sinhf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_sinhf4_u35vsxnofma(vector float a);
vector float Sleef_finz_sinhf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>

double Sleef_coshd1_u10purec(double a);
double Sleef_coshd1_u10purecfma(double a);
double Sleef_cinz_coshd1_u10purec(double a);
double Sleef_finz_coshd1_u10purecfma(double a);

vector double Sleef_coshd2_u10(vector double a);
vector double Sleef_coshd2_u10vsx(vector double a);
vector double Sleef_coshd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_coshd2_u10vsxnofma(vector double a);
vector double Sleef_finz_coshd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float Sleef_coshf1_u10purec(float a);
float Sleef_coshf1_u10purecfma(float a);
float Sleef_cinz_coshf1_u10purec(float a);
float Sleef_finz_coshf1_u10purecfma(float a);

vector float Sleef_coshf4_u10(vector float a);
vector float Sleef_coshf4_u10vsx(vector float a);
vector float Sleef_coshf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_coshf4_u10vsxnofma(vector float a);
vector float Sleef_finz_coshf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>


double Sleef_coshd1_u35purec(double a);
double Sleef_coshd1_u35purecfma(double a);
double Sleef_cinz_coshd1_u35purec(double a);
double Sleef_finz_coshd1_u35purecfma(double a);

vector double Sleef_coshd2_u35(vector double a);
vector double Sleef_coshd2_u35vsx(vector double a);
vector double Sleef_coshd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_coshd2_u35vsxnofma(vector double a);
vector double Sleef_finz_coshd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>


float Sleef_coshf1_u35purec(float a);
float Sleef_coshf1_u35purecfma(float a);
float Sleef_cinz_coshf1_u35purec(float a);
float Sleef_finz_coshf1_u35purecfma(float a);

vector float Sleef_coshf4_u35(vector float a);
vector float Sleef_coshf4_u35vsx(vector float a);
vector float Sleef_coshf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_coshf4_u35vsxnofma(vector float a);
vector float Sleef_finz_coshf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>

double Sleef_tanhd1_u10purec(double a);
double Sleef_tanhd1_u10purecfma(double a);
double Sleef_cinz_tanhd1_u10purec(double a);
double Sleef_finz_tanhd1_u10purecfma(double a);

vector double Sleef_tanhd2_u10(vector double a);
vector double Sleef_tanhd2_u10vsx(vector double a);
vector double Sleef_tanhd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_tanhd2_u10vsxnofma(vector double a);
vector double Sleef_finz_tanhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float Sleef_tanhf1_u10purec(float a);
float Sleef_tanhf1_u10purecfma(float a);
float Sleef_cinz_tanhf1_u10purec(float a);
float Sleef_finz_tanhf1_u10purecfma(float a);

vector float Sleef_tanhf4_u10(vector float a);
vector float Sleef_tanhf4_u10vsx(vector float a);
vector float Sleef_tanhf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_tanhf4_u10vsxnofma(vector float a);
vector float Sleef_finz_tanhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>


double Sleef_tanhd1_u35purec(double a);
double Sleef_tanhd1_u35purecfma(double a);
double Sleef_cinz_tanhd1_u35purec(double a);
double Sleef_finz_tanhd1_u35purecfma(double a);

vector double Sleef_tanhd2_u35(vector double a);
vector double Sleef_tanhd2_u35vsx(vector double a);
vector double Sleef_tanhd2_u35vsxnofma(vector double a);
vector double Sleef_cinz_tanhd2_u35vsxnofma(vector double a);
vector double Sleef_finz_tanhd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>


float Sleef_tanhf1_u35purec(float a);
float Sleef_tanhf1_u35purecfma(float a);
float Sleef_cinz_tanhf1_u35purec(float a);
float Sleef_finz_tanhf1_u35purecfma(float a);

vector float Sleef_tanhf4_u35(vector float a);
vector float Sleef_tanhf4_u35vsx(vector float a);
vector float Sleef_tanhf4_u35vsxnofma(vector float a);
vector float Sleef_cinz_tanhf4_u35vsxnofma(vector float a);
vector float Sleef_finz_tanhf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u35 with the same accuracy specification.


Vectorized double precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

double Sleef_asinhd1_u10purec(double a);
double Sleef_asinhd1_u10purecfma(double a);
double Sleef_cinz_asinhd1_u10purec(double a);
double Sleef_finz_asinhd1_u10purecfma(double a);

vector double Sleef_asinhd2_u10(vector double a);
vector double Sleef_asinhd2_u10vsx(vector double a);
vector double Sleef_asinhd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_asinhd2_u10vsxnofma(vector double a);
vector double Sleef_finz_asinhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

float Sleef_asinhf1_u10purec(float a);
float Sleef_asinhf1_u10purecfma(float a);
float Sleef_cinz_asinhf1_u10purec(float a);
float Sleef_finz_asinhf1_u10purecfma(float a);

vector float Sleef_asinhf4_u10(vector float a);
vector float Sleef_asinhf4_u10vsx(vector float a);
vector float Sleef_asinhf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_asinhf4_u10vsxnofma(vector float a);
vector float Sleef_finz_asinhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

double Sleef_acoshd1_u10purec(double a);
double Sleef_acoshd1_u10purecfma(double a);
double Sleef_cinz_acoshd1_u10purec(double a);
double Sleef_finz_acoshd1_u10purecfma(double a);

vector double Sleef_acoshd2_u10(vector double a);
vector double Sleef_acoshd2_u10vsx(vector double a);
vector double Sleef_acoshd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_acoshd2_u10vsxnofma(vector double a);
vector double Sleef_finz_acoshd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

float Sleef_acoshf1_u10purec(float a);
float Sleef_acoshf1_u10purecfma(float a);
float Sleef_cinz_acoshf1_u10purec(float a);
float Sleef_finz_acoshf1_u10purecfma(float a);

vector float Sleef_acoshf4_u10(vector float a);
vector float Sleef_acoshf4_u10vsx(vector float a);
vector float Sleef_acoshf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_acoshf4_u10vsxnofma(vector float a);
vector float Sleef_finz_acoshf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

double Sleef_atanhd1_u10purec(double a);
double Sleef_atanhd1_u10purecfma(double a);
double Sleef_cinz_atanhd1_u10purec(double a);
double Sleef_finz_atanhd1_u10purecfma(double a);

vector double Sleef_atanhd2_u10(vector double a);
vector double Sleef_atanhd2_u10vsx(vector double a);
vector double Sleef_atanhd2_u10vsxnofma(vector double a);
vector double Sleef_cinz_atanhd2_u10vsxnofma(vector double a);
vector double Sleef_finz_atanhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

float Sleef_atanhf1_u10purec(float a);
float Sleef_atanhf1_u10purecfma(float a);
float Sleef_cinz_atanhf1_u10purec(float a);
float Sleef_finz_atanhf1_u10purecfma(float a);

vector float Sleef_atanhf4_u10(vector float a);
vector float Sleef_atanhf4_u10vsx(vector float a);
vector float Sleef_atanhf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_atanhf4_u10vsxnofma(vector float a);
vector float Sleef_finz_atanhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification.

Error and gamma function

Vectorized double precision error function

Synopsis

#include <sleef.h>

float Sleef_erfd1_u10purec(float a);
float Sleef_erfd1_u10purecfma(float a);
float Sleef_cinz_erfd1_u10purec(float a);
float Sleef_finz_erfd1_u10purecfma(float a);

(SP2) Sleef_erfd2_u10((SP2) a);
(SP2) Sleef_erfd2_u10vsx((SP2) a);
(SP2) Sleef_erfd2_u10vsxnofma((SP2) a);
(SP2) Sleef_cinz_erfd2_u10vsxnofma((SP2) a);
(SP2) Sleef_finz_erfd2_u10vsx((SP2) a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erf_u10 with the same accuracy specification.


Vectorized single precision error function

Synopsis

#include <sleef.h>

float Sleef_erff1_u10purec(float a);
float Sleef_erff1_u10purecfma(float a);
float Sleef_cinz_erff1_u10purec(float a);
float Sleef_finz_erff1_u10purecfma(float a);

vector float Sleef_erff4_u10(vector float a);
vector float Sleef_erff4_u10vsx(vector float a);
vector float Sleef_erff4_u10vsxnofma(vector float a);
vector float Sleef_cinz_erff4_u10vsxnofma(vector float a);
vector float Sleef_finz_erff4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erff_u10 with the same accuracy specification.


Vectorized double precision complementary error function

Synopsis

#include <sleef.h>

double Sleef_erfcd1_u15purec(double a);
double Sleef_erfcd1_u15purecfma(double a);
double Sleef_cinz_erfcd1_u15purec(double a);
double Sleef_finz_erfcd1_u15purecfma(double a);

vector double Sleef_erfcd2_u15(vector double a);
vector double Sleef_erfcd2_u15vsx(vector double a);
vector double Sleef_erfcd2_u15vsxnofma(vector double a);
vector double Sleef_cinz_erfcd2_u15vsxnofma(vector double a);
vector double Sleef_finz_erfcd2_u15vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification.


Vectorized single precision complementary error function

Synopsis

#include <sleef.h>

float Sleef_erfcf1_u15purec(float a);
float Sleef_erfcf1_u15purecfma(float a);
float Sleef_cinz_erfcf1_u15purec(float a);
float Sleef_finz_erfcf1_u15purecfma(float a);

vector float Sleef_erfcf4_u15(vector float a);
vector float Sleef_erfcf4_u15vsx(vector float a);
vector float Sleef_erfcf4_u15vsxnofma(vector float a);
vector float Sleef_cinz_erfcf4_u15vsxnofma(vector float a);
vector float Sleef_finz_erfcf4_u15vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification.


Vectorized double precision gamma function

Synopsis

#include <sleef.h>

double Sleef_tgammad1_u10purec(double a);
double Sleef_tgammad1_u10purecfma(double a);
double Sleef_cinz_tgammad1_u10purec(double a);
double Sleef_finz_tgammad1_u10purecfma(double a);

vector double Sleef_tgammad2_u10(vector double a);
vector double Sleef_tgammad2_u10vsx(vector double a);
vector double Sleef_tgammad2_u10vsxnofma(vector double a);
vector double Sleef_cinz_tgammad2_u10vsxnofma(vector double a);
vector double Sleef_finz_tgammad2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification.


Vectorized single precision gamma function

Synopsis

#include <sleef.h>

float Sleef_tgammaf1_u10purec(float a);
float Sleef_tgammaf1_u10purecfma(float a);
float Sleef_cinz_tgammaf1_u10purec(float a);
float Sleef_finz_tgammaf1_u10purecfma(float a);

vector float Sleef_tgammaf4_u10(vector float a);
vector float Sleef_tgammaf4_u10vsx(vector float a);
vector float Sleef_tgammaf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_tgammaf4_u10vsxnofma(vector float a);
vector float Sleef_finz_tgammaf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification.


Vectorized double precision log gamma function

Synopsis

#include <sleef.h>

double Sleef_lgammad1_u10purec(double a);
double Sleef_lgammad1_u10purecfma(double a);
double Sleef_cinz_lgammad1_u10purec(double a);
double Sleef_finz_lgammad1_u10purecfma(double a);

vector double Sleef_lgammad2_u10(vector double a);
vector double Sleef_lgammad2_u10vsx(vector double a);
vector double Sleef_lgammad2_u10vsxnofma(vector double a);
vector double Sleef_cinz_lgammad2_u10vsxnofma(vector double a);
vector double Sleef_finz_lgammad2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification.


Vectorized single precision log gamma function

Synopsis

#include <sleef.h>

float Sleef_lgammaf1_u10purec(float a);
float Sleef_lgammaf1_u10purecfma(float a);
float Sleef_cinz_lgammaf1_u10purec(float a);
float Sleef_finz_lgammaf1_u10purecfma(float a);

vector float Sleef_lgammaf4_u10(vector float a);
vector float Sleef_lgammaf4_u10vsx(vector float a);
vector float Sleef_lgammaf4_u10vsxnofma(vector float a);
vector float Sleef_cinz_lgammaf4_u10vsxnofma(vector float a);
vector float Sleef_finz_lgammaf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification.

Nearest integer function

Vectorized double precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

vector double Sleef_truncd2(vector double a);
vector double Sleef_truncd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_trunc with the same accuracy specification.


Vectorized single precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

vector float Sleef_truncf4(vector float a);
vector float Sleef_truncf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_truncf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

vector double Sleef_floord2(vector double a);
vector double Sleef_floord2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floor with the same accuracy specification.


Vectorized single precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

vector float Sleef_floorf4(vector float a);
vector float Sleef_floorf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floorf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

vector double Sleef_ceild2(vector double a);
vector double Sleef_ceild2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceil with the same accuracy specification.


Vectorized single precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

vector float Sleef_ceilf4(vector float a);
vector float Sleef_ceilf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceilf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector double Sleef_roundd2(vector double a);
vector double Sleef_roundd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_round with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector float Sleef_roundf4(vector float a);
vector float Sleef_roundf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_roundf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector double Sleef_rintd2(vector double a);
vector double Sleef_rintd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rint with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector float Sleef_rintf4(vector float a);
vector float Sleef_rintf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rintf with the same accuracy specification.

Other function

Vectorized double precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

vector double Sleef_fmad2(vector double a, vector double b, vector double c);
vector double Sleef_fmad2_vsx(vector double a, vector double b, vector double c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fma with the same accuracy specification.


Vectorized single precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

vector float Sleef_fmaf4(vector float a, vector float b, vector float c);
vector float Sleef_fmaf4_vsx(vector float a, vector float b, vector float c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

vector double Sleef_fmodd2(vector double a, vector double b);
vector double Sleef_fmodd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmod with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

vector float Sleef_fmodf4(vector float a, vector float b);
vector float Sleef_fmodf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmodf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

vector double Sleef_remainderd2(vector double a, vector double b);
vector double Sleef_remainderd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_remainder with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

vector float Sleef_remainderf4(vector float a, vector float b);
vector float Sleef_remainderf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_remainderf with the same accuracy specification.


Vectorized double precision function for multiplying by integral power of 2

Synopsis

#include <sleef.h>

vector double Sleef_ldexpd2(vector double a, vector int b);
vector double Sleef_ldexpd2_vsx(vector double a, vector int b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ldexp with the same accuracy specification.


Vectorized double precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

vector double Sleef_frfrexpd2(vector double a);
vector double Sleef_frfrexpd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexp with the same accuracy specification.


Vectorized single precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

vector float Sleef_frfrexpf4(vector float a);
vector float Sleef_frfrexpf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexpf with the same accuracy specification.


Vectorized double precision function for obtaining integral component of an FP number

Synopsis

#include <sleef.h>

vector int Sleef_expfrexpd2(vector double a);
vector int Sleef_expfrexpd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expfrexp with the same accuracy specification.


Vectorized double precision function for getting integer exponent

Synopsis

#include <sleef.h>

vector int Sleef_ilogbd2(vector double a);
vector int Sleef_ilogbd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ilogb with the same accuracy specification.


Vectorized double precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_vector_double_2 Sleef_modfd2(vector double a);
Sleef_vector_double_2 Sleef_modfd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modf with the same accuracy specification.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_vector_float_2 Sleef_modff4(vector float a);
Sleef_vector_float_2 Sleef_modff4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modff with the same accuracy specification.


Vectorized double precision function for calculating the absolute value

Synopsis

#include <sleef.h>

vector double Sleef_fabsd2(vector double a);
vector double Sleef_fabsd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabs with the same accuracy specification.


Vectorized single precision function for calculating the absolute value

Synopsis

#include <sleef.h>

vector float Sleef_fabsf4(vector float a);
vector float Sleef_fabsf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabsf with the same accuracy specification.


Vectorized double precision function for copying signs

Synopsis

#include <sleef.h>

vector double Sleef_copysignd2(vector double a, vector double b);
vector double Sleef_copysignd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysign with the same accuracy specification.


Vectorized single precision function for copying signs

Synopsis

#include <sleef.h>

vector float Sleef_copysignf4(vector float a, vector float b);
vector float Sleef_copysignf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysignf with the same accuracy specification.


Vectorized double precision function for determining maximum of two values

Synopsis

#include <sleef.h>

vector double Sleef_fmaxd2(vector double a, vector double b);
vector double Sleef_fmaxd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmax with the same accuracy specification.


Vectorized single precision function for determining maximum of two values

Synopsis

#include <sleef.h>

vector float Sleef_fmaxf4(vector float a, vector float b);
vector float Sleef_fmaxf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaxf with the same accuracy specification.


Vectorized double precision function for determining minimum of two values

Synopsis

#include <sleef.h>

vector double Sleef_fmind2(vector double a, vector double b);
vector double Sleef_fmind2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmin with the same accuracy specification.


Vectorized single precision function for determining minimum of two values

Synopsis

#include <sleef.h>

vector float Sleef_fminf4(vector float a, vector float b);
vector float Sleef_fminf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fminf with the same accuracy specification.


Vectorized double precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

vector double Sleef_fdimd2(vector double a, vector double b);
vector double Sleef_fdimd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdim with the same accuracy specification.


Vectorized single precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

vector float Sleef_fdimf4(vector float a, vector float b);
vector float Sleef_fdimf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdimf with the same accuracy specification.


Vectorized double precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

vector double Sleef_nextafterd2(vector double a, vector double b);
vector double Sleef_nextafterd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafter with the same accuracy specification.


Vectorized single precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

vector float Sleef_nextafterf4(vector float a, vector float b);
vector float Sleef_nextafterf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafterf with the same accuracy specification.

sleef-3.5.1/doc/html/purec.xhtml000066400000000000000000002261151373003144100165260ustar00rootroot00000000000000 SLEEF - Math library reference (scalar)

SLEEF Documentation - Math library reference

Table of contents

Data types

Sleef_double2

Description

Sleef_double2 is a generic data type for storing two double-precision floating point values, which is defined in <sleef.h> as follows:

typedef struct {
      double x, y;
} Sleef_double2;

Sleef_float2

Description

Sleef_float2 is a generic data type for storing two single-precision floating point values, which is defined in <sleef.h> as follows:

typedef struct {
      float x, y;
} Sleef_float2;

Sleef_longdouble2

Description

Sleef_longdouble2 is a generic data type for storing two extended-precision (80-bit) floating point values, which is defined in <sleef.h> as follows:

typedef struct {
      long double x, y;
} Sleef_longdouble2;

Trigonometric Functions

Sleef_sin_u10, Sleef_sinf_u10 - sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sin_u10(double a);
float Sleef_sinf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the sine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sin_u35, Sleef_sinf_u35 - sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sin_u35(double a);
float Sleef_sinf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the sine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cos_u10, Sleef_cosf_u10 - cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cos_u10(double a);
float Sleef_cosf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the cosine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cos_u35, Sleef_cosf_u35 - cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cos_u35(double a);
float Sleef_cosf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the cosine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sincos_u10, Sleef_sincosf_u10 - evaluate sine and cosine functions simultaneously with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincos_u10(double a)
Sleef_float2 Sleef_sincosf_u10(float a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of a value in a at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned values is 1.0 ULP. If a is a NaN or infinity, a NaN is returned.


Sleef_sincos_u35, Sleef_sincosf_u35 - evaluate sine and cosine functions simultaneously with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincos_u35(double a)
Sleef_float2 Sleef_sincosf_u35(float a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of a value in a at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned values is 3.5 ULP. If a is a NaN or infinity, a NaN is returned.


Sleef_sincospi_u05, Sleef_sincospif_u05, Sleef_sincospil_u05 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospi_u05(double a)
Sleef_float2 Sleef_sincospif_u05(float a)
Sleef_longdouble2 Sleef_sincospil_u05(long double a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of πa at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_sincospi_u35, Sleef_sincospif_u35, Sleef_sincospil_u35 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospi_u35(double a)
Sleef_float2 Sleef_sincospif_u35(float a)
Sleef_longdouble2 Sleef_sincospil_u35(long double a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of πa at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned values is 3.5 ULP if a is in [-1e+9, 1e+9] for double-precision function or [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_sinpi_u05, Sleef_sinpif_u05 - evaluate sin( πa ) for given a with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sinpi_u05(double a);
float Sleef_sinpif_u05(float a);

Link with -lsleef.

Description

These functions evaluate the sine functions of πa . The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_cospi_u05, Sleef_cospif_u05 - evaluate cos( πa ) for given a with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cospi_u05(double a);
float Sleef_cospif_u05(float a);

Link with -lsleef.

Description

These functions evaluate the cosine functions of πa . The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_tan_u10, Sleef_tanf_u10 - tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tan_u10(double a);
float Sleef_tanf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the tangent function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_tan_u35, Sleef_tanf_u35 - tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tan_u35(double a);
float Sleef_tanf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the tangent function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.

Power, exponential, and logarithmic functions

Sleef_pow_u10, Sleef_powf_u10 - power functions

Synopsis

#include <sleef.h>

double Sleef_pow_u10(double x, double y);
float Sleef_powf_u10(float x, float y);

Link with -lsleef.

Description

These functions return the value of x raised to the power of y. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log_u10, Sleef_logf_u10 - natural logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log_u10(double a);
float Sleef_logf_u10(float a);

Link with -lsleef.

Description

These functions return the natural logarithm of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log_u35, Sleef_logf_u35 - natural logarithmic functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log_u35(double a);
float Sleef_logf_u35(float a);

Link with -lsleef.

Description

These functions return the natural logarithm of a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log10_u10, Sleef_log10f_u10 - base-10 logarithmic functions

Synopsis

#include <sleef.h>

double Sleef_log10_u10(double a);
float Sleef_log10f_u10(float a);

Link with -lsleef.

Description

These functions return the base-10 logarithm of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log2_u10, Sleef_log2f_u10 - base-10 logarithmic functions

Synopsis

#include <sleef.h>

double Sleef_log2_u10(double a);
float Sleef_log2f_u10(float a);

Link with -lsleef.

Description

These functions return the base-2 logarithm of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log1p_u10, Sleef_log1pf_u10 - logarithm of one plus argument

Synopsis

#include <sleef.h>

double Sleef_log1p_u10(double a);
float Sleef_log1pf_u10(float a);

Link with -lsleef.

Description

These functions return the natural logarithm of (1+a). The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_exp_u10, Sleef_expf_u10 - base-e exponential functions

Synopsis

#include <sleef.h>

double Sleef_exp_u10(double a);
float Sleef_expf_u10(float a);

Link with -lsleef.

Description

These functions return the value of e raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_exp2_u10, Sleef_exp2f_u10 - base-2 exponential functions

Synopsis

#include <sleef.h>

double Sleef_exp2_u10(double a);
float Sleef_exp2f_u10(float a);

Link with -lsleef.

Description

These functions return 2 raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_exp10_u10, Sleef_exp10f_u10 - base-10 exponential functions

Synopsis

#include <sleef.h>

double Sleef_exp10_u10(double a);
float Sleef_exp10f_u10(float a);

Link with -lsleef.

Description

These functions return 10 raised to a. The error bound of the returned value is 1.09 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_expm1_u10, Sleef_expm1f_u10 - base-e exponential functions minus 1

Synopsis

#include <sleef.h>

double Sleef_expm1_u10(double a);
float Sleef_expm1f_u10(float a);

Link with -lsleef.

Description

These functions return the value one less than e raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sqrt_u05, Sleef_sqrtf_u05 - square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sqrt_u05(double x);
float Sleef_sqrtf_u05(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of sqrt and sqrtf functions. The error bound of the returned value is 0.5001 ULP. These functions do not set errno nor raise an exception.


Sleef_sqrtf_u35 - square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sqrt_u35(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of sqrt and sqrtf functions. The error bound of the returned value is 3.5 ULP. These functions do not set errno nor raise an exception.


Sleef_cbrt_u10, Sleef_cbrtf_u10 - cube root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrt_u10(double a);
float Sleef_cbrtf_u10(float a);

Link with -lsleef.

Description

These functions return the real cube root of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cbrt_u35, Sleef_cbrtf_u35 - cube root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrt_u35(double a);
float Sleef_cbrtf_u35(float a);

Link with -lsleef.

Description

These functions return the real cube root of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_hypot_u05, Sleef_hypotf_u05 - 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypot_u05(double x, double y);
float Sleef_hypotf_u05(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of hypot and hypotf functions. The error bound of the returned value is 0.5001 ULP. These functions do not set errno nor raise an exception.


Sleef_hypot_u35, Sleef_hypotf_u35 - 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypot_u35(double x, double y);
float Sleef_hypotf_u35(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of hypot and hypotf functions. The error bound of the returned value is 0.5001 ULP. These functions do not set errno nor raise an exception.

Inverse Trigonometric Functions

Sleef_asin_u10, Sleef_asinf_u10 - arc sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asin_u10(double a);
float Sleef_asinf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the arc sine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_asin_u35, Sleef_asinf_u35 - arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asin_u35(double a);
float Sleef_asinf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the arc sine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_acos_u10, Sleef_acosf_u10 - arc cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acos_u10(double a);
float Sleef_acosf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the arc cosine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_acos_u35, Sleef_acosf_u35 - arc cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acos_u35(double a);
float Sleef_acosf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the arc cosine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan_u10, Sleef_atanf_u10 - arc tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan_u10(double a);
float Sleef_atanf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the arc tangent function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan_u35, Sleef_atanf_u35 - arc tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan_u35(double a);
float Sleef_atanf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the arc tangent function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan2_u10, Sleef_atan2f_u10 - arc tangent functions of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2_u10(double y, double x);
float Sleef_atan2f_u10(float y, float x);

Link with -lsleef.

Description

These functions evaluate the arc tangent function of (y / x). The quadrant of the result is determined according to the signs of x and y. The error bounds of the returned values are max(1.0 ULP, DBL_MIN) and max(1.0 ULP, FLT_MIN), respectively. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan2_u35, Sleef_atan2f_u35 - arc tangent functions of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2_u35(double y, double x);
float Sleef_atan2f_u35(float y, float x);

Link with -lsleef.

Description

These functions evaluate the arc tangent function of (y / x). The quadrant of the result is determined according to the signs of x and y. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.

Hyperbolic functions and inverse hyperbolic functions

Sleef_sinh_u10, Sleef_sinhf_u10 - hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_sinh_u10(double a);
float Sleef_sinhf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the hyperbolic sine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-709, 709] for the double-precision function or [-88.5, 88.5] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sinh_u35, Sleef_sinhf_u35 - hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_sinh_u35(double a);
float Sleef_sinhf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the hyperbolic sine function of a value in a. The error bound of the returned value is 3.5 ULP if a is in [-709, 709] for the double-precision function or [-88, 88] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 3.5 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cosh_u10, Sleef_coshf_u10 - hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_cosh_u10(double a);
float Sleef_coshf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the hyperbolic cosine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-709, 709] for the double-precision function or [-88.5, 88.5] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cosh_u35, Sleef_coshf_u35 - hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_cosh_u35(double a);
float Sleef_coshf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the hyperbolic cosine function of a value in a. The error bound of the returned value is 3.5 ULP if a is in [-709, 709] for the double-precision function or [-88, 88] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 3.5 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_tanh_u10, Sleef_tanhf_u10 - hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_tanh_u10(double a);
float Sleef_tanhf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the hyperbolic tangent function of a value in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_tanh_u35, Sleef_tanhf_u35 - hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_tanh_u35(double a);
float Sleef_tanhf_u35(float a);

Link with -lsleef.

Description

These functions evaluate the hyperbolic tangent function of a value in a. The error bound of the returned value is 3.5 ULP for the double-precision function or 3.5 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_asinh_u10, Sleef_asinhf_u10 - inverse hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_asinh_u10(double a);
float Sleef_asinhf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the inverse hyperbolic sine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-1.34e+154, 1.34e+154] for the double-precision function or 1.001 ULP if a is in [-1.84e+19, 1.84e+19] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_acosh_u10, Sleef_acoshf_u10 - inverse hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_acosh_u10(double a);
float Sleef_acoshf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the inverse hyperbolic cosine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-1.34e+154, 1.34e+154] for the double-precision function or 1.001 ULP if a is in [-1.84e+19, 1.84e+19] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atanh_u10, Sleef_atanhf_u10 - inverse hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_atanh_u10(double a);
float Sleef_atanhf_u10(float a);

Link with -lsleef.

Description

These functions evaluate the inverse hyperbolic tangent function of a value in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.

Error and gamma functions

Sleef_erf_u10, Sleef_erff_u10 - error functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_erf_u10(double x);
float Sleef_erff_u10(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of erf and erff functions. The error bound of the returned value is 1.0 ULP. These functions do not set errno nor raise an exception.


Sleef_erfc_u15, Sleef_erfcf_u15 - complementary error functions

Synopsis

#include <sleef.h>

double Sleef_erfc_u15(double x);
float Sleef_erfcf_u15(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of erfc and erfcf functions. The error bound of the returned value for the DP function is max(1.5 ULP, DBL_MIN) if the argument is less than 26.2, and max(2.5 ULP, DBL_MIN) otherwise. For the SP function, the error bound is max(1.5 ULP, FLT_MIN). These functions do not set errno nor raise an exception.


Sleef_tgamma_u10, Sleef_tgammaf_u10 - gamma functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tgamma_u10(double x);
float Sleef_tgammaf_u10(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of tgamma and tgammaf functions. The error bound of the returned value is 1.0 ULP. These functions do not set errno nor raise an exception.


Sleef_lgamma_u10, Sleef_lgammaf_u10 - log gamma functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_lgamma_u10(double x);
float Sleef_lgammaf_u10(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of lgamma and lgammaf functions. The error bound of the returned value is 1.0 ULP if the argument is positive. If the argument is larger than 2e+305 for the DP function and 4e+36 for the SP function, it may return infinity instead of the correct value. The error bound is max(1 ULP, 1e-15) for the DP function and max(1 ULP and 1e-8) for the SP function, if the argument is negative. These functions do not set errno nor raise an exception.

Nearest integer functions

Sleef_trunc, Sleef_truncf - round to integer towards zero

Synopsis

#include <sleef.h>

double Sleef_trunc(double x);
float Sleef_truncf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of trunc and truncf functions. These functions do not set errno nor raise an exception.


Sleef_floor, Sleef_floorf - round to integer towards minus infinity

Synopsis

#include <sleef.h>

double Sleef_floor(double x);
float Sleef_floorf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of floor and floorf functions. These functions do not set errno nor raise an exception.


Sleef_ceil, Sleef_ceilf - round to integer towards plus infinity

Synopsis

#include <sleef.h>

double Sleef_ceil(double x);
float Sleef_ceilf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of ceil and ceilf functions. These functions do not set errno nor raise an exception.


Sleef_round, Sleef_roundf - round to integer away from zero

Synopsis

#include <sleef.h>

double Sleef_round(double x);
float Sleef_roundf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of round and roundf functions. These functions do not set errno nor raise an exception.


Sleef_rint, Sleef_rintf - round to integer, ties round to even

Synopsis

#include <sleef.h>

double Sleef_rint(double x);
float Sleef_rintf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of rint and rintf functions. These functions do not set errno nor raise an exception.

Other functions

Sleef_fma, Sleef_fmaf - fused multiply and accumulate

Synopsis

#include <sleef.h>

double Sleef_fma(double x, double y, double z);
float Sleef_fmaf(float x, float y, float z);

Link with -lsleef.

Description

These functions compute (x × y + z) without rounding, and then return the rounded value of the result. These functions may return infinity with a correct sign if the absolute value of the correct return value is greater than 1e+300 and 1e+33, respectively. The error bounds of the returned values are 0.5 ULP and max(0.50001 ULP, FLT_MIN), respectively.


Sleef_fmod, Sleef_fmodf - FP remainder

Synopsis

#include <sleef.h>

double Sleef_fmod(double x, double y);
float Sleef_fmodf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fmod and fmodf functions, if |x / y| is smaller than 1e+300 and 1e+38, respectively. The returned value is undefined, otherwise. These functions do not set errno nor raise an exception.


Sleef_remainder, Sleef_remainderf - FP remainder

Synopsis

#include <sleef.h>

double Sleef_remainder(double x, double y);
float Sleef_remainderf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of remainder and remainderf functions, if |x / y| is smaller than 1e+300 and 1e+38, respectively. The returned value is undefined, otherwise. These functions do not set errno nor raise an exception.


Sleef_ldexp, Sleef_ldexpf - multiply by integral power of 2

Synopsis

#include <sleef.h>

double Sleef_ldexp(double m, int x);
float Sleef_ldexpf(float m, int x);

Link with -lsleef.

Description

These functions return the result of multiplying m by 2 raised to the power x. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_frfrexp, Sleef_frfrexpf - fractional component of an FP number

Synopsis

#include <sleef.h>

double Sleef_frfrexp(double x);
float Sleef_frfrexpf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of frexp and frexpf functions. These functions do not set errno nor raise an exception.


Sleef_expfrexp, Sleef_expfrexpf - exponent of an FP number

Synopsis

#include <sleef.h>

int Sleef_expfrexp(double x);
int Sleef_expfrexpf(float x);

Link with -lsleef.

Description

These functions return the exponent returned by frexp and frexpf functions as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_ilogb, Sleef_ilogbf - integer exponent of an FP number

Synopsis

#include <sleef.h>

int Sleef_ilogb(double m, int x);
int Sleef_ilogbf(float m, int x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of ilogb and ilogbf functions. These functions do not set errno nor raise an exception.


Sleef_modf, Sleef_modff - integral and fractional value of FP number

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_modf(double x);
Sleef_float2 Sleef_modff(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of modf and modff functions. These functions do not set errno nor raise an exception.


Sleef_fabs, Sleef_fabsf - absolute value

Synopsis

#include <sleef.h>

double Sleef_fabs(double x);
float Sleef_fabsf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fabs and fabsf functions. These functions do not set errno nor raise an exception.


Sleef_fmax, Sleef_fmaxf - maximum of two numbers

Synopsis

#include <sleef.h>

double Sleef_fmax(double x, double y);
float Sleef_fmaxf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fmax and fmaxf functions. These functions do not set errno nor raise an exception.


Sleef_fmin, Sleef_fminf - minimum of two numbers

Synopsis

#include <sleef.h>

double Sleef_fmin(double x, double y);
float Sleef_fminf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fmin and fminf functions. These functions do not set errno nor raise an exception.


Sleef_fdim, Sleef_fdimf - positive difference

Synopsis

#include <sleef.h>

double Sleef_fdim(double x, double y);
float Sleef_fdimf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fdim and fdimf functions. These functions do not set errno nor raise an exception.


Sleef_copysign, Sleef_copysignf - copy sign of a number

Synopsis

#include <sleef.h>

double Sleef_copysign(double x, double y);
float Sleef_copysignf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of copysign and copysignf functions. These functions do not set errno nor raise an exception.


Sleef_nextafter, Sleef_nextafterf - find the next representable FP value

Synopsis

#include <sleef.h>

double Sleef_nextafter(double x, double y);
float Sleef_nextafterf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of nextafter and nextafterf functions. These functions do not set errno nor raise an exception.

sleef-3.5.1/doc/html/s390x.xhtml000066400000000000000000006216041373003144100163000ustar00rootroot00000000000000 SLEEF - Math library reference (S390X)

SLEEF Documentation - Math library reference (S390X)

Table of contents

Data types for System/390 architecture

Sleef_SLEEF_VECTOR_FLOAT_2

Description

Sleef_SLEEF_VECTOR_FLOAT_2 is a data type for storing two __vector float values, which is defined in sleef.h as follows:

typedef struct {
  __vector float x, y;
} Sleef_SLEEF_VECTOR_FLOAT_2;

Sleef_SLEEF_VECTOR_DOUBLE_2

Description

Sleef_SLEEF_VECTOR_DOUBLE_2 is a data type for storing two __vector double values, which is defined in sleef.h as follows:

typedef struct {
  __vector double x, y;
} Sleef_SLEEF_VECTOR_DOUBLE_2;

Trigonometric Functions

Vectorized double precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sind1_u10purec(double a);
double Sleef_sind1_u10purecfma(double a);
double Sleef_cinz_sind1_u10purec(double a);
double Sleef_finz_sind1_u10purecfma(double a);

__vector double Sleef_sind2_u10(__vector double a);
__vector double Sleef_sind2_u10zvector2(__vector double a);
__vector double Sleef_sind2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_sind2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_sind2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u10 with the same accuracy specification.


Vectorized single precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinf1_u10purec(float a);
float Sleef_sinf1_u10purecfma(float a);
float Sleef_cinz_sinf1_u10purec(float a);
float Sleef_finz_sinf1_u10purecfma(float a);

__vector float Sleef_sinf4_u10(__vector float a);
__vector float Sleef_sinf4_u10zvector2(__vector float a);
__vector float Sleef_sinf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_sinf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_sinf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification.


Vectorized double precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sind1_u35purec(double a);
double Sleef_sind1_u35purecfma(double a);
double Sleef_cinz_sind1_u35purec(double a);
double Sleef_finz_sind1_u35purecfma(double a);

__vector double Sleef_sind2_u35(__vector double a);
__vector double Sleef_sind2_u35zvector2(__vector double a);
__vector double Sleef_sind2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_sind2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_sind2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u35 with the same accuracy specification.


Vectorized single precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sinf1_u35purec(float a);
float Sleef_sinf1_u35purecfma(float a);
float Sleef_cinz_sinf1_u35purec(float a);
float Sleef_finz_sinf1_u35purecfma(float a);

__vector float Sleef_sinf4_u35(__vector float a);
__vector float Sleef_sinf4_u35zvector2(__vector float a);
__vector float Sleef_sinf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_sinf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_sinf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification.


Vectorized double precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cosd1_u10purec(double a);
double Sleef_cosd1_u10purecfma(double a);
double Sleef_cinz_cosd1_u10purec(double a);
double Sleef_finz_cosd1_u10purecfma(double a);

__vector double Sleef_cosd2_u10(__vector double a);
__vector double Sleef_cosd2_u10zvector2(__vector double a);
__vector double Sleef_cosd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_cosd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_cosd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u10 with the same accuracy specification.


Vectorized single precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cosf1_u10purec(float a);
float Sleef_cosf1_u10purecfma(float a);
float Sleef_cinz_cosf1_u10purec(float a);
float Sleef_finz_cosf1_u10purecfma(float a);

__vector float Sleef_cosf4_u10(__vector float a);
__vector float Sleef_cosf4_u10zvector2(__vector float a);
__vector float Sleef_cosf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_cosf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_cosf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification.


Vectorized double precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cosd1_u35purec(double a);
double Sleef_cosd1_u35purecfma(double a);
double Sleef_cinz_cosd1_u35purec(double a);
double Sleef_finz_cosd1_u35purecfma(double a);

__vector double Sleef_cosd2_u35(__vector double a);
__vector double Sleef_cosd2_u35zvector2(__vector double a);
__vector double Sleef_cosd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_cosd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_cosd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u35 with the same accuracy specification.


Vectorized single precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cosf1_u35purec(float a);
float Sleef_cosf1_u35purecfma(float a);
float Sleef_cinz_cosf1_u35purec(float a);
float Sleef_finz_cosf1_u35purecfma(float a);

__vector float Sleef_cosf4_u35(__vector float a);
__vector float Sleef_cosf4_u35zvector2(__vector float a);
__vector float Sleef_cosf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_cosf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_cosf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincosd1_u10purec(double a);
Sleef_double2 Sleef_sincosd1_u10purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u10purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u10purecfma(double a);

Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u10(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u10zvector2(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u10zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincosd2_u10zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincosd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincosf1_u10purec(float a);
Sleef_float2 Sleef_sincosf1_u10purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u10purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u10purecfma(float a);

Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u10(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u10zvector2(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u10zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincosf4_u10zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincosf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincosd1_u35purec(double a);
Sleef_double2 Sleef_sincosd1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u35purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u35purecfma(double a);

Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u35(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u35zvector2(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincosd2_u35zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincosd2_u35zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincosd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincosf1_u35purec(float a);
Sleef_float2 Sleef_sincosf1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u35purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u35purecfma(float a);

Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u35(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u35zvector2(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincosf4_u35zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincosf4_u35zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincosf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification.


Vectorized double precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sinpid1_u05purec(double a);
double Sleef_sinpid1_u05purecfma(double a);
double Sleef_cinz_sinpid1_u05purec(double a);
double Sleef_finz_sinpid1_u05purecfma(double a);

__vector double Sleef_sinpid2_u05(__vector double a);
__vector double Sleef_sinpid2_u05zvector2(__vector double a);
__vector double Sleef_sinpid2_u05zvector2nofma(__vector double a);
__vector double Sleef_cinz_sinpid2_u05zvector2nofma(__vector double a);
__vector double Sleef_finz_sinpid2_u05zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification.


Vectorized single precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinpif1_u05purec(float a);
float Sleef_sinpif1_u05purecfma(float a);
float Sleef_cinz_sinpif1_u05purec(float a);
float Sleef_finz_sinpif1_u05purecfma(float a);

__vector float Sleef_sinpif4_u05(__vector float a);
__vector float Sleef_sinpif4_u05zvector2(__vector float a);
__vector float Sleef_sinpif4_u05zvector2nofma(__vector float a);
__vector float Sleef_cinz_sinpif4_u05zvector2nofma(__vector float a);
__vector float Sleef_finz_sinpif4_u05zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification.


Vectorized double precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cospid1_u05purec(double a);
double Sleef_cospid1_u05purecfma(double a);
double Sleef_cinz_cospid1_u05purec(double a);
double Sleef_finz_cospid1_u05purecfma(double a);

__vector double Sleef_cospid2_u05(__vector double a);
__vector double Sleef_cospid2_u05zvector2(__vector double a);
__vector double Sleef_cospid2_u05zvector2nofma(__vector double a);
__vector double Sleef_cinz_cospid2_u05zvector2nofma(__vector double a);
__vector double Sleef_finz_cospid2_u05zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification.


Vectorized single precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cospif1_u05purec(float a);
float Sleef_cospif1_u05purecfma(float a);
float Sleef_cinz_cospif1_u05purec(float a);
float Sleef_finz_cospif1_u05purecfma(float a);

__vector float Sleef_cospif4_u05(__vector float a);
__vector float Sleef_cospif4_u05zvector2(__vector float a);
__vector float Sleef_cospif4_u05zvector2nofma(__vector float a);
__vector float Sleef_cinz_cospif4_u05zvector2nofma(__vector float a);
__vector float Sleef_finz_cospif4_u05zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospid1_u05purec(double a);
Sleef_double2 Sleef_sincospid1_u05purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u05purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u05purecfma(double a);

Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u05(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u05zvector2(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u05zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincospid2_u05zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincospid2_u05zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincospif1_u05purec(float a);
Sleef_float2 Sleef_sincospif1_u05purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u05purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u05purecfma(float a);

Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u05(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u05zvector2(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u05zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincospif4_u05zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincospif4_u05zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincospid1_u35purec(double a);
Sleef_double2 Sleef_sincospid1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u35purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u35purecfma(double a);

Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u35(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u35zvector2(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_sincospid2_u35zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_cinz_sincospid2_u35zvector2nofma(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_finz_sincospid2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincospif1_u35purec(float a);
Sleef_float2 Sleef_sincospif1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u35purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u35purecfma(float a);

Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u35(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u35zvector2(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_sincospif4_u35zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_cinz_sincospif4_u35zvector2nofma(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_finz_sincospif4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification.


Vectorized double precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tand1_u10purec(double a);
double Sleef_tand1_u10purecfma(double a);
double Sleef_cinz_tand1_u10purec(double a);
double Sleef_finz_tand1_u10purecfma(double a);

__vector double Sleef_tand2_u10(__vector double a);
__vector double Sleef_tand2_u10zvector2(__vector double a);
__vector double Sleef_tand2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_tand2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_tand2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u10 with the same accuracy specification.


Vectorized single precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_tanf1_u10purec(float a);
float Sleef_tanf1_u10purecfma(float a);
float Sleef_cinz_tanf1_u10purec(float a);
float Sleef_finz_tanf1_u10purecfma(float a);

__vector float Sleef_tanf4_u10(__vector float a);
__vector float Sleef_tanf4_u10zvector2(__vector float a);
__vector float Sleef_tanf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_tanf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_tanf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification.


Vectorized double precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_tand1_u35purec(double a);
double Sleef_tand1_u35purecfma(double a);
double Sleef_cinz_tand1_u35purec(double a);
double Sleef_finz_tand1_u35purecfma(double a);

__vector double Sleef_tand2_u35(__vector double a);
__vector double Sleef_tand2_u35zvector2(__vector double a);
__vector double Sleef_tand2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_tand2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_tand2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u35 with the same accuracy specification.


Vectorized single precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_tanf1_u35purec(float a);
float Sleef_tanf1_u35purecfma(float a);
float Sleef_cinz_tanf1_u35purec(float a);
float Sleef_finz_tanf1_u35purecfma(float a);

__vector float Sleef_tanf4_u35(__vector float a);
__vector float Sleef_tanf4_u35zvector2(__vector float a);
__vector float Sleef_tanf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_tanf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_tanf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification.

Power, exponential, and logarithmic function

Vectorized double precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_powd1_u10purec(double a, double b);
double Sleef_powd1_u10purecfma(double a, double b);
double Sleef_cinz_powd1_u10purec(double a, double b);
double Sleef_finz_powd1_u10purecfma(double a, double b);

__vector double Sleef_powd2_u10(__vector double a, __vector double b);
__vector double Sleef_powd2_u10zvector2(__vector double a, __vector double b);
__vector double Sleef_powd2_u10zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_cinz_powd2_u10zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_finz_powd2_u10zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_pow_u10 with the same accuracy specification.


Vectorized single precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_powf1_u10purec(float a, float b);
float Sleef_powf1_u10purecfma(float a, float b);
float Sleef_cinz_powf1_u10purec(float a, float b);
float Sleef_finz_powf1_u10purecfma(float a, float b);

__vector float Sleef_powf4_u10(__vector float a, __vector float b);
__vector float Sleef_powf4_u10zvector2(__vector float a, __vector float b);
__vector float Sleef_powf4_u10zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_cinz_powf4_u10zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_finz_powf4_u10zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_powf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_logd1_u10purec(double a);
double Sleef_logd1_u10purecfma(double a);
double Sleef_cinz_logd1_u10purec(double a);
double Sleef_finz_logd1_u10purecfma(double a);

__vector double Sleef_logd2_u10(__vector double a);
__vector double Sleef_logd2_u10zvector2(__vector double a);
__vector double Sleef_logd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_logd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_logd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u10 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_logf1_u10purec(float a);
float Sleef_logf1_u10purecfma(float a);
float Sleef_cinz_logf1_u10purec(float a);
float Sleef_finz_logf1_u10purecfma(float a);

__vector float Sleef_logf4_u10(__vector float a);
__vector float Sleef_logf4_u10zvector2(__vector float a);
__vector float Sleef_logf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_logf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_logf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_logd1_u35purec(double a);
double Sleef_logd1_u35purecfma(double a);
double Sleef_cinz_logd1_u35purec(double a);
double Sleef_finz_logd1_u35purecfma(double a);

__vector double Sleef_logd2_u35(__vector double a);
__vector double Sleef_logd2_u35zvector2(__vector double a);
__vector double Sleef_logd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_logd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_logd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u35 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_logf1_u35purec(float a);
float Sleef_logf1_u35purecfma(float a);
float Sleef_cinz_logf1_u35purec(float a);
float Sleef_finz_logf1_u35purecfma(float a);

__vector float Sleef_logf4_u35(__vector float a);
__vector float Sleef_logf4_u35zvector2(__vector float a);
__vector float Sleef_logf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_logf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_logf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u35 with the same accuracy specification.


Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log10d1_u10purec(double a);
double Sleef_log10d1_u10purecfma(double a);
double Sleef_cinz_log10d1_u10purec(double a);
double Sleef_finz_log10d1_u10purecfma(double a);

__vector double Sleef_log10d2_u10(__vector double a);
__vector double Sleef_log10d2_u10zvector2(__vector double a);
__vector double Sleef_log10d2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_log10d2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_log10d2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10_u10 with the same accuracy specification.


Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log10f1_u10purec(float a);
float Sleef_log10f1_u10purecfma(float a);
float Sleef_cinz_log10f1_u10purec(float a);
float Sleef_finz_log10f1_u10purecfma(float a);

__vector float Sleef_log10f4_u10(__vector float a);
__vector float Sleef_log10f4_u10zvector2(__vector float a);
__vector float Sleef_log10f4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_log10f4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_log10f4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification.


Vectorized double precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log2d1_u10purec(double a);
double Sleef_log2d1_u10purecfma(double a);
double Sleef_cinz_log2d1_u10purec(double a);
double Sleef_finz_log2d1_u10purecfma(double a);

__vector double Sleef_log2d2_u10(__vector double a);
__vector double Sleef_log2d2_u10zvector2(__vector double a);
__vector double Sleef_log2d2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_log2d2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_log2d2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2_u10 with the same accuracy specification.


Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log2f1_u10purec(float a);
float Sleef_log2f1_u10purecfma(float a);
float Sleef_cinz_log2f1_u10purec(float a);
float Sleef_finz_log2f1_u10purecfma(float a);

__vector float Sleef_log2f4_u10(__vector float a);
__vector float Sleef_log2f4_u10zvector2(__vector float a);
__vector float Sleef_log2f4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_log2f4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_log2f4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2f_u10 with the same accuracy specification.


Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log1pd1_u10purec(double a);
double Sleef_log1pd1_u10purecfma(double a);
double Sleef_cinz_log1pd1_u10purec(double a);
double Sleef_finz_log1pd1_u10purecfma(double a);

__vector double Sleef_log1pd2_u10(__vector double a);
__vector double Sleef_log1pd2_u10zvector2(__vector double a);
__vector double Sleef_log1pd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_log1pd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_log1pd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log1pf1_u10purec(float a);
float Sleef_log1pf1_u10purecfma(float a);
float Sleef_cinz_log1pf1_u10purec(float a);
float Sleef_finz_log1pf1_u10purecfma(float a);

__vector float Sleef_log1pf4_u10(__vector float a);
__vector float Sleef_log1pf4_u10zvector2(__vector float a);
__vector float Sleef_log1pf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_log1pf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_log1pf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expd1_u10purec(double a);
double Sleef_expd1_u10purecfma(double a);
double Sleef_cinz_expd1_u10purec(double a);
double Sleef_finz_expd1_u10purecfma(double a);

__vector double Sleef_expd2_u10(__vector double a);
__vector double Sleef_expd2_u10zvector2(__vector double a);
__vector double Sleef_expd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_expd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_expd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expf1_u10purec(float a);
float Sleef_expf1_u10purecfma(float a);
float Sleef_cinz_expf1_u10purec(float a);
float Sleef_finz_expf1_u10purecfma(float a);

__vector float Sleef_expf4_u10(__vector float a);
__vector float Sleef_expf4_u10zvector2(__vector float a);
__vector float Sleef_expf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_expf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_expf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expf_u10 with the same accuracy specification.


Vectorized double precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp2d1_u10purec(double a);
double Sleef_exp2d1_u10purecfma(double a);
double Sleef_cinz_exp2d1_u10purec(double a);
double Sleef_finz_exp2d1_u10purecfma(double a);

__vector double Sleef_exp2d2_u10(__vector double a);
__vector double Sleef_exp2d2_u10zvector2(__vector double a);
__vector double Sleef_exp2d2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_exp2d2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_exp2d2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification.


Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp2f1_u10purec(float a);
float Sleef_exp2f1_u10purecfma(float a);
float Sleef_cinz_exp2f1_u10purec(float a);
float Sleef_finz_exp2f1_u10purecfma(float a);

__vector float Sleef_exp2f4_u10(__vector float a);
__vector float Sleef_exp2f4_u10zvector2(__vector float a);
__vector float Sleef_exp2f4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_exp2f4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_exp2f4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification.


Vectorized double precision base-10 exponential function function with 1.09 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp10d1_u10purec(double a);
double Sleef_exp10d1_u10purecfma(double a);
double Sleef_cinz_exp10d1_u10purec(double a);
double Sleef_finz_exp10d1_u10purecfma(double a);

__vector double Sleef_exp10d2_u10(__vector double a);
__vector double Sleef_exp10d2_u10zvector2(__vector double a);
__vector double Sleef_exp10d2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_exp10d2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_exp10d2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification.


Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp10f1_u10purec(float a);
float Sleef_exp10f1_u10purecfma(float a);
float Sleef_cinz_exp10f1_u10purec(float a);
float Sleef_finz_exp10f1_u10purecfma(float a);

__vector float Sleef_exp10f4_u10(__vector float a);
__vector float Sleef_exp10f4_u10zvector2(__vector float a);
__vector float Sleef_exp10f4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_exp10f4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_exp10f4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expm1d1_u10purec(double a);
double Sleef_expm1d1_u10purecfma(double a);
double Sleef_cinz_expm1d1_u10purec(double a);
double Sleef_finz_expm1d1_u10purecfma(double a);

__vector double Sleef_expm1d2_u10(__vector double a);
__vector double Sleef_expm1d2_u10zvector2(__vector double a);
__vector double Sleef_expm1d2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_expm1d2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_expm1d2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expm1f1_u10purec(float a);
float Sleef_expm1f1_u10purecfma(float a);
float Sleef_cinz_expm1f1_u10purec(float a);
float Sleef_finz_expm1f1_u10purecfma(float a);

__vector float Sleef_expm1f4_u10(__vector float a);
__vector float Sleef_expm1f4_u10zvector2(__vector float a);
__vector float Sleef_expm1f4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_expm1f4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_expm1f4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification.


Vectorized double precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u05purec(double a);
double Sleef_sqrtd1_u05purecfma(double a);
double Sleef_cinz_sqrtd1_u05purec(double a);
double Sleef_finz_sqrtd1_u05purecfma(double a);

__vector double Sleef_sqrtd2_u05(__vector double a);
__vector double Sleef_sqrtd2_u05zvector2(__vector double a);
__vector double Sleef_sqrtd2_u05zvector2nofma(__vector double a);
__vector double Sleef_cinz_sqrtd2_u05zvector2nofma(__vector double a);
__vector double Sleef_finz_sqrtd2_u05zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification.


Vectorized single precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u05purec(float a);
float Sleef_sqrtf1_u05purecfma(float a);
float Sleef_cinz_sqrtf1_u05purec(float a);
float Sleef_finz_sqrtf1_u05purecfma(float a);

__vector float Sleef_sqrtf4_u05(__vector float a);
__vector float Sleef_sqrtf4_u05zvector2(__vector float a);
__vector float Sleef_sqrtf4_u05zvector2nofma(__vector float a);
__vector float Sleef_cinz_sqrtf4_u05zvector2nofma(__vector float a);
__vector float Sleef_finz_sqrtf4_u05zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification.


Vectorized double precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u35purec(double a);
double Sleef_sqrtd1_u35purecfma(double a);
double Sleef_cinz_sqrtd1_u35purec(double a);
double Sleef_finz_sqrtd1_u35purecfma(double a);

__vector double Sleef_sqrtd2_u35(__vector double a);
__vector double Sleef_sqrtd2_u35zvector2(__vector double a);
__vector double Sleef_sqrtd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_sqrtd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_sqrtd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification.


Vectorized single precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u35purec(float a);
float Sleef_sqrtf1_u35purecfma(float a);
float Sleef_cinz_sqrtf1_u35purec(float a);
float Sleef_finz_sqrtf1_u35purecfma(float a);

__vector float Sleef_sqrtf4_u35(__vector float a);
__vector float Sleef_sqrtf4_u35zvector2(__vector float a);
__vector float Sleef_sqrtf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_sqrtf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_sqrtf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification.


Vectorized double precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrtd1_u10purec(double a);
double Sleef_cbrtd1_u10purecfma(double a);
double Sleef_cinz_cbrtd1_u10purec(double a);
double Sleef_finz_cbrtd1_u10purecfma(double a);

__vector double Sleef_cbrtd2_u10(__vector double a);
__vector double Sleef_cbrtd2_u10zvector2(__vector double a);
__vector double Sleef_cbrtd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_cbrtd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_cbrtd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification.


Vectorized single precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cbrtf1_u10purec(float a);
float Sleef_cbrtf1_u10purecfma(float a);
float Sleef_cinz_cbrtf1_u10purec(float a);
float Sleef_finz_cbrtf1_u10purecfma(float a);

__vector float Sleef_cbrtf4_u10(__vector float a);
__vector float Sleef_cbrtf4_u10zvector2(__vector float a);
__vector float Sleef_cbrtf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_cbrtf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_cbrtf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification.


Vectorized double precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cbrtd1_u35purec(double a);
double Sleef_cbrtd1_u35purecfma(double a);
double Sleef_cinz_cbrtd1_u35purec(double a);
double Sleef_finz_cbrtd1_u35purecfma(double a);

__vector double Sleef_cbrtd2_u35(__vector double a);
__vector double Sleef_cbrtd2_u35zvector2(__vector double a);
__vector double Sleef_cbrtd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_cbrtd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_cbrtd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification.


Vectorized single precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cbrtf1_u35purec(float a);
float Sleef_cbrtf1_u35purecfma(float a);
float Sleef_cinz_cbrtf1_u35purec(float a);
float Sleef_finz_cbrtf1_u35purecfma(float a);

__vector float Sleef_cbrtf4_u35(__vector float a);
__vector float Sleef_cbrtf4_u35zvector2(__vector float a);
__vector float Sleef_cbrtf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_cbrtf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_cbrtf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypotd1_u05purec(double a, double b);
double Sleef_hypotd1_u05purecfma(double a, double b);
double Sleef_cinz_hypotd1_u05purec(double a, double b);
double Sleef_finz_hypotd1_u05purecfma(double a, double b);

__vector double Sleef_hypotd2_u05(__vector double a, __vector double b);
__vector double Sleef_hypotd2_u05zvector2(__vector double a, __vector double b);
__vector double Sleef_hypotd2_u05zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_cinz_hypotd2_u05zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_finz_hypotd2_u05zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_hypotf1_u05purec(float a, float b);
float Sleef_hypotf1_u05purecfma(float a, float b);
float Sleef_cinz_hypotf1_u05purec(float a, float b);
float Sleef_finz_hypotf1_u05purecfma(float a, float b);

__vector float Sleef_hypotf4_u05(__vector float a, __vector float b);
__vector float Sleef_hypotf4_u05zvector2(__vector float a, __vector float b);
__vector float Sleef_hypotf4_u05zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_cinz_hypotf4_u05zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_finz_hypotf4_u05zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_hypotd1_u35purec(double a, double b);
double Sleef_hypotd1_u35purecfma(double a, double b);
double Sleef_cinz_hypotd1_u35purec(double a, double b);
double Sleef_finz_hypotd1_u35purecfma(double a, double b);

__vector double Sleef_hypotd2_u35(__vector double a, __vector double b);
__vector double Sleef_hypotd2_u35zvector2(__vector double a, __vector double b);
__vector double Sleef_hypotd2_u35zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_cinz_hypotd2_u35zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_finz_hypotd2_u35zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_hypotf1_u35purec(float a, float b);
float Sleef_hypotf1_u35purecfma(float a, float b);
float Sleef_cinz_hypotf1_u35purec(float a, float b);
float Sleef_finz_hypotf1_u35purecfma(float a, float b);

__vector float Sleef_hypotf4_u35(__vector float a, __vector float b);
__vector float Sleef_hypotf4_u35zvector2(__vector float a, __vector float b);
__vector float Sleef_hypotf4_u35zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_cinz_hypotf4_u35zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_finz_hypotf4_u35zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification.

Inverse Trigonometric Functions

Vectorized double precision arc sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asind1_u10purec(double a);
double Sleef_asind1_u10purecfma(double a);
double Sleef_cinz_asind1_u10purec(double a);
double Sleef_finz_asind1_u10purecfma(double a);

__vector double Sleef_asind2_u10(__vector double a);
__vector double Sleef_asind2_u10zvector2(__vector double a);
__vector double Sleef_asind2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_asind2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_asind2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u10 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_asinf1_u10purec(float a);
float Sleef_asinf1_u10purecfma(float a);
float Sleef_cinz_asinf1_u10purec(float a);
float Sleef_finz_asinf1_u10purecfma(float a);

__vector float Sleef_asinf4_u10(__vector float a);
__vector float Sleef_asinf4_u10zvector2(__vector float a);
__vector float Sleef_asinf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_asinf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_asinf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification.


Vectorized double precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_asind1_u35purec(double a);
double Sleef_asind1_u35purecfma(double a);
double Sleef_cinz_asind1_u35purec(double a);
double Sleef_finz_asind1_u35purecfma(double a);

__vector double Sleef_asind2_u35(__vector double a);
__vector double Sleef_asind2_u35zvector2(__vector double a);
__vector double Sleef_asind2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_asind2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_asind2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u35 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_asinf1_u35purec(float a);
float Sleef_asinf1_u35purecfma(float a);
float Sleef_cinz_asinf1_u35purec(float a);
float Sleef_finz_asinf1_u35purecfma(float a);

__vector float Sleef_asinf4_u35(__vector float a);
__vector float Sleef_asinf4_u35zvector2(__vector float a);
__vector float Sleef_asinf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_asinf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_asinf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification.


Vectorized double precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acosd1_u10purec(double a);
double Sleef_acosd1_u10purecfma(double a);
double Sleef_cinz_acosd1_u10purec(double a);
double Sleef_finz_acosd1_u10purecfma(double a);

__vector double Sleef_acosd2_u10(__vector double a);
__vector double Sleef_acosd2_u10zvector2(__vector double a);
__vector double Sleef_acosd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_acosd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_acosd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u10 with the same accuracy specification.


Vectorized single precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_acosf1_u10purec(float a);
float Sleef_acosf1_u10purecfma(float a);
float Sleef_cinz_acosf1_u10purec(float a);
float Sleef_finz_acosf1_u10purecfma(float a);

__vector float Sleef_acosf4_u10(__vector float a);
__vector float Sleef_acosf4_u10zvector2(__vector float a);
__vector float Sleef_acosf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_acosf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_acosf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification.


Vectorized double precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_acosd1_u35purec(double a);
double Sleef_acosd1_u35purecfma(double a);
double Sleef_cinz_acosd1_u35purec(double a);
double Sleef_finz_acosd1_u35purecfma(double a);

__vector double Sleef_acosd2_u35(__vector double a);
__vector double Sleef_acosd2_u35zvector2(__vector double a);
__vector double Sleef_acosd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_acosd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_acosd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u35 with the same accuracy specification.


Vectorized single precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_acosf1_u35purec(float a);
float Sleef_acosf1_u35purecfma(float a);
float Sleef_cinz_acosf1_u35purec(float a);
float Sleef_finz_acosf1_u35purecfma(float a);

__vector float Sleef_acosf4_u35(__vector float a);
__vector float Sleef_acosf4_u35zvector2(__vector float a);
__vector float Sleef_acosf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_acosf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_acosf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atand1_u10purec(double a);
double Sleef_atand1_u10purecfma(double a);
double Sleef_cinz_atand1_u10purec(double a);
double Sleef_finz_atand1_u10purecfma(double a);

__vector double Sleef_atand2_u10(__vector double a);
__vector double Sleef_atand2_u10zvector2(__vector double a);
__vector double Sleef_atand2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_atand2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_atand2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u10 with the same accuracy specification.


Vectorized single precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atanf1_u10purec(float a);
float Sleef_atanf1_u10purecfma(float a);
float Sleef_cinz_atanf1_u10purec(float a);
float Sleef_finz_atanf1_u10purecfma(float a);

__vector float Sleef_atanf4_u10(__vector float a);
__vector float Sleef_atanf4_u10zvector2(__vector float a);
__vector float Sleef_atanf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_atanf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_atanf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification.


Vectorized double precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atand1_u35purec(double a);
double Sleef_atand1_u35purecfma(double a);
double Sleef_cinz_atand1_u35purec(double a);
double Sleef_finz_atand1_u35purecfma(double a);

__vector double Sleef_atand2_u35(__vector double a);
__vector double Sleef_atand2_u35zvector2(__vector double a);
__vector double Sleef_atand2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_atand2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_atand2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u35 with the same accuracy specification.


Vectorized single precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atanf1_u35purec(float a);
float Sleef_atanf1_u35purecfma(float a);
float Sleef_cinz_atanf1_u35purec(float a);
float Sleef_finz_atanf1_u35purecfma(float a);

__vector float Sleef_atanf4_u35(__vector float a);
__vector float Sleef_atanf4_u35zvector2(__vector float a);
__vector float Sleef_atanf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_atanf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_atanf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2d1_u10purec(double a, double b);
double Sleef_atan2d1_u10purecfma(double a, double b);
double Sleef_cinz_atan2d1_u10purec(double a, double b);
double Sleef_finz_atan2d1_u10purecfma(double a, double b);

__vector double Sleef_atan2d2_u10(__vector double a, __vector double b);
__vector double Sleef_atan2d2_u10zvector2(__vector double a, __vector double b);
__vector double Sleef_atan2d2_u10zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_cinz_atan2d2_u10zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_finz_atan2d2_u10zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atan2f1_u10purec(float a, float b);
float Sleef_atan2f1_u10purecfma(float a, float b);
float Sleef_cinz_atan2f1_u10purec(float a, float b);
float Sleef_finz_atan2f1_u10purecfma(float a, float b);

__vector float Sleef_atan2f4_u10(__vector float a, __vector float b);
__vector float Sleef_atan2f4_u10zvector2(__vector float a, __vector float b);
__vector float Sleef_atan2f4_u10zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_cinz_atan2f4_u10zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_finz_atan2f4_u10zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atan2d1_u35purec(double a, double b);
double Sleef_atan2d1_u35purecfma(double a, double b);
double Sleef_cinz_atan2d1_u35purec(double a, double b);
double Sleef_finz_atan2d1_u35purecfma(double a, double b);

__vector double Sleef_atan2d2_u35(__vector double a, __vector double b);
__vector double Sleef_atan2d2_u35zvector2(__vector double a, __vector double b);
__vector double Sleef_atan2d2_u35zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_cinz_atan2d2_u35zvector2nofma(__vector double a, __vector double b);
__vector double Sleef_finz_atan2d2_u35zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atan2f1_u35purec(float a, float b);
float Sleef_atan2f1_u35purecfma(float a, float b);
float Sleef_cinz_atan2f1_u35purec(float a, float b);
float Sleef_finz_atan2f1_u35purecfma(float a, float b);

__vector float Sleef_atan2f4_u35(__vector float a, __vector float b);
__vector float Sleef_atan2f4_u35zvector2(__vector float a, __vector float b);
__vector float Sleef_atan2f4_u35zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_cinz_atan2f4_u35zvector2nofma(__vector float a, __vector float b);
__vector float Sleef_finz_atan2f4_u35zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification.

Hyperbolic function and inverse hyperbolic function

Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>

double Sleef_sinhd1_u10purec(double a);
double Sleef_sinhd1_u10purecfma(double a);
double Sleef_cinz_sinhd1_u10purec(double a);
double Sleef_finz_sinhd1_u10purecfma(double a);

__vector double Sleef_sinhd2_u10(__vector double a);
__vector double Sleef_sinhd2_u10zvector2(__vector double a);
__vector double Sleef_sinhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_sinhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_sinhd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float Sleef_sinhf1_u10purec(float a);
float Sleef_sinhf1_u10purecfma(float a);
float Sleef_cinz_sinhf1_u10purec(float a);
float Sleef_finz_sinhf1_u10purecfma(float a);

__vector float Sleef_sinhf4_u10(__vector float a);
__vector float Sleef_sinhf4_u10zvector2(__vector float a);
__vector float Sleef_sinhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_sinhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_sinhf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>


double Sleef_sinhd1_u35purec(double a);
double Sleef_sinhd1_u35purecfma(double a);
double Sleef_cinz_sinhd1_u35purec(double a);
double Sleef_finz_sinhd1_u35purecfma(double a);

__vector double Sleef_sinhd2_u35(__vector double a);
__vector double Sleef_sinhd2_u35zvector2(__vector double a);
__vector double Sleef_sinhd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_sinhd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_sinhd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>


float Sleef_sinhf1_u35purec(float a);
float Sleef_sinhf1_u35purecfma(float a);
float Sleef_cinz_sinhf1_u35purec(float a);
float Sleef_finz_sinhf1_u35purecfma(float a);

__vector float Sleef_sinhf4_u35(__vector float a);
__vector float Sleef_sinhf4_u35zvector2(__vector float a);
__vector float Sleef_sinhf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_sinhf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_sinhf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>

double Sleef_coshd1_u10purec(double a);
double Sleef_coshd1_u10purecfma(double a);
double Sleef_cinz_coshd1_u10purec(double a);
double Sleef_finz_coshd1_u10purecfma(double a);

__vector double Sleef_coshd2_u10(__vector double a);
__vector double Sleef_coshd2_u10zvector2(__vector double a);
__vector double Sleef_coshd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_coshd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_coshd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float Sleef_coshf1_u10purec(float a);
float Sleef_coshf1_u10purecfma(float a);
float Sleef_cinz_coshf1_u10purec(float a);
float Sleef_finz_coshf1_u10purecfma(float a);

__vector float Sleef_coshf4_u10(__vector float a);
__vector float Sleef_coshf4_u10zvector2(__vector float a);
__vector float Sleef_coshf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_coshf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_coshf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>


double Sleef_coshd1_u35purec(double a);
double Sleef_coshd1_u35purecfma(double a);
double Sleef_cinz_coshd1_u35purec(double a);
double Sleef_finz_coshd1_u35purecfma(double a);

__vector double Sleef_coshd2_u35(__vector double a);
__vector double Sleef_coshd2_u35zvector2(__vector double a);
__vector double Sleef_coshd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_coshd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_coshd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>


float Sleef_coshf1_u35purec(float a);
float Sleef_coshf1_u35purecfma(float a);
float Sleef_cinz_coshf1_u35purec(float a);
float Sleef_finz_coshf1_u35purecfma(float a);

__vector float Sleef_coshf4_u35(__vector float a);
__vector float Sleef_coshf4_u35zvector2(__vector float a);
__vector float Sleef_coshf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_coshf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_coshf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>

double Sleef_tanhd1_u10purec(double a);
double Sleef_tanhd1_u10purecfma(double a);
double Sleef_cinz_tanhd1_u10purec(double a);
double Sleef_finz_tanhd1_u10purecfma(double a);

__vector double Sleef_tanhd2_u10(__vector double a);
__vector double Sleef_tanhd2_u10zvector2(__vector double a);
__vector double Sleef_tanhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_tanhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_tanhd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float Sleef_tanhf1_u10purec(float a);
float Sleef_tanhf1_u10purecfma(float a);
float Sleef_cinz_tanhf1_u10purec(float a);
float Sleef_finz_tanhf1_u10purecfma(float a);

__vector float Sleef_tanhf4_u10(__vector float a);
__vector float Sleef_tanhf4_u10zvector2(__vector float a);
__vector float Sleef_tanhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_tanhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_tanhf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>


double Sleef_tanhd1_u35purec(double a);
double Sleef_tanhd1_u35purecfma(double a);
double Sleef_cinz_tanhd1_u35purec(double a);
double Sleef_finz_tanhd1_u35purecfma(double a);

__vector double Sleef_tanhd2_u35(__vector double a);
__vector double Sleef_tanhd2_u35zvector2(__vector double a);
__vector double Sleef_tanhd2_u35zvector2nofma(__vector double a);
__vector double Sleef_cinz_tanhd2_u35zvector2nofma(__vector double a);
__vector double Sleef_finz_tanhd2_u35zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>


float Sleef_tanhf1_u35purec(float a);
float Sleef_tanhf1_u35purecfma(float a);
float Sleef_cinz_tanhf1_u35purec(float a);
float Sleef_finz_tanhf1_u35purecfma(float a);

__vector float Sleef_tanhf4_u35(__vector float a);
__vector float Sleef_tanhf4_u35zvector2(__vector float a);
__vector float Sleef_tanhf4_u35zvector2nofma(__vector float a);
__vector float Sleef_cinz_tanhf4_u35zvector2nofma(__vector float a);
__vector float Sleef_finz_tanhf4_u35zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u35 with the same accuracy specification.


Vectorized double precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

double Sleef_asinhd1_u10purec(double a);
double Sleef_asinhd1_u10purecfma(double a);
double Sleef_cinz_asinhd1_u10purec(double a);
double Sleef_finz_asinhd1_u10purecfma(double a);

__vector double Sleef_asinhd2_u10(__vector double a);
__vector double Sleef_asinhd2_u10zvector2(__vector double a);
__vector double Sleef_asinhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_asinhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_asinhd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

float Sleef_asinhf1_u10purec(float a);
float Sleef_asinhf1_u10purecfma(float a);
float Sleef_cinz_asinhf1_u10purec(float a);
float Sleef_finz_asinhf1_u10purecfma(float a);

__vector float Sleef_asinhf4_u10(__vector float a);
__vector float Sleef_asinhf4_u10zvector2(__vector float a);
__vector float Sleef_asinhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_asinhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_asinhf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

double Sleef_acoshd1_u10purec(double a);
double Sleef_acoshd1_u10purecfma(double a);
double Sleef_cinz_acoshd1_u10purec(double a);
double Sleef_finz_acoshd1_u10purecfma(double a);

__vector double Sleef_acoshd2_u10(__vector double a);
__vector double Sleef_acoshd2_u10zvector2(__vector double a);
__vector double Sleef_acoshd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_acoshd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_acoshd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

float Sleef_acoshf1_u10purec(float a);
float Sleef_acoshf1_u10purecfma(float a);
float Sleef_cinz_acoshf1_u10purec(float a);
float Sleef_finz_acoshf1_u10purecfma(float a);

__vector float Sleef_acoshf4_u10(__vector float a);
__vector float Sleef_acoshf4_u10zvector2(__vector float a);
__vector float Sleef_acoshf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_acoshf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_acoshf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

double Sleef_atanhd1_u10purec(double a);
double Sleef_atanhd1_u10purecfma(double a);
double Sleef_cinz_atanhd1_u10purec(double a);
double Sleef_finz_atanhd1_u10purecfma(double a);

__vector double Sleef_atanhd2_u10(__vector double a);
__vector double Sleef_atanhd2_u10zvector2(__vector double a);
__vector double Sleef_atanhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_atanhd2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_atanhd2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

float Sleef_atanhf1_u10purec(float a);
float Sleef_atanhf1_u10purecfma(float a);
float Sleef_cinz_atanhf1_u10purec(float a);
float Sleef_finz_atanhf1_u10purecfma(float a);

__vector float Sleef_atanhf4_u10(__vector float a);
__vector float Sleef_atanhf4_u10zvector2(__vector float a);
__vector float Sleef_atanhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_atanhf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_atanhf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification.

Error and gamma function

Vectorized double precision error function

Synopsis

#include <sleef.h>

float Sleef_erfd1_u10purec(float a);
float Sleef_erfd1_u10purecfma(float a);
float Sleef_cinz_erfd1_u10purec(float a);
float Sleef_finz_erfd1_u10purecfma(float a);

(SP2) Sleef_erfd2_u10((SP2) a);
(SP2) Sleef_erfd2_u10zvector2((SP2) a);
(SP2) Sleef_erfd2_u10zvector2nofma((SP2) a);
(SP2) Sleef_cinz_erfd2_u10zvector2nofma((SP2) a);
(SP2) Sleef_finz_erfd2_u10zvector2((SP2) a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erf_u10 with the same accuracy specification.


Vectorized single precision error function

Synopsis

#include <sleef.h>

float Sleef_erff1_u10purec(float a);
float Sleef_erff1_u10purecfma(float a);
float Sleef_cinz_erff1_u10purec(float a);
float Sleef_finz_erff1_u10purecfma(float a);

__vector float Sleef_erff4_u10(__vector float a);
__vector float Sleef_erff4_u10zvector2(__vector float a);
__vector float Sleef_erff4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_erff4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_erff4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erff_u10 with the same accuracy specification.


Vectorized double precision complementary error function

Synopsis

#include <sleef.h>

double Sleef_erfcd1_u15purec(double a);
double Sleef_erfcd1_u15purecfma(double a);
double Sleef_cinz_erfcd1_u15purec(double a);
double Sleef_finz_erfcd1_u15purecfma(double a);

__vector double Sleef_erfcd2_u15(__vector double a);
__vector double Sleef_erfcd2_u15zvector2(__vector double a);
__vector double Sleef_erfcd2_u15zvector2nofma(__vector double a);
__vector double Sleef_cinz_erfcd2_u15zvector2nofma(__vector double a);
__vector double Sleef_finz_erfcd2_u15zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification.


Vectorized single precision complementary error function

Synopsis

#include <sleef.h>

float Sleef_erfcf1_u15purec(float a);
float Sleef_erfcf1_u15purecfma(float a);
float Sleef_cinz_erfcf1_u15purec(float a);
float Sleef_finz_erfcf1_u15purecfma(float a);

__vector float Sleef_erfcf4_u15(__vector float a);
__vector float Sleef_erfcf4_u15zvector2(__vector float a);
__vector float Sleef_erfcf4_u15zvector2nofma(__vector float a);
__vector float Sleef_cinz_erfcf4_u15zvector2nofma(__vector float a);
__vector float Sleef_finz_erfcf4_u15zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification.


Vectorized double precision gamma function

Synopsis

#include <sleef.h>

double Sleef_tgammad1_u10purec(double a);
double Sleef_tgammad1_u10purecfma(double a);
double Sleef_cinz_tgammad1_u10purec(double a);
double Sleef_finz_tgammad1_u10purecfma(double a);

__vector double Sleef_tgammad2_u10(__vector double a);
__vector double Sleef_tgammad2_u10zvector2(__vector double a);
__vector double Sleef_tgammad2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_tgammad2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_tgammad2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification.


Vectorized single precision gamma function

Synopsis

#include <sleef.h>

float Sleef_tgammaf1_u10purec(float a);
float Sleef_tgammaf1_u10purecfma(float a);
float Sleef_cinz_tgammaf1_u10purec(float a);
float Sleef_finz_tgammaf1_u10purecfma(float a);

__vector float Sleef_tgammaf4_u10(__vector float a);
__vector float Sleef_tgammaf4_u10zvector2(__vector float a);
__vector float Sleef_tgammaf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_tgammaf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_tgammaf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification.


Vectorized double precision log gamma function

Synopsis

#include <sleef.h>

double Sleef_lgammad1_u10purec(double a);
double Sleef_lgammad1_u10purecfma(double a);
double Sleef_cinz_lgammad1_u10purec(double a);
double Sleef_finz_lgammad1_u10purecfma(double a);

__vector double Sleef_lgammad2_u10(__vector double a);
__vector double Sleef_lgammad2_u10zvector2(__vector double a);
__vector double Sleef_lgammad2_u10zvector2nofma(__vector double a);
__vector double Sleef_cinz_lgammad2_u10zvector2nofma(__vector double a);
__vector double Sleef_finz_lgammad2_u10zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification.


Vectorized single precision log gamma function

Synopsis

#include <sleef.h>

float Sleef_lgammaf1_u10purec(float a);
float Sleef_lgammaf1_u10purecfma(float a);
float Sleef_cinz_lgammaf1_u10purec(float a);
float Sleef_finz_lgammaf1_u10purecfma(float a);

__vector float Sleef_lgammaf4_u10(__vector float a);
__vector float Sleef_lgammaf4_u10zvector2(__vector float a);
__vector float Sleef_lgammaf4_u10zvector2nofma(__vector float a);
__vector float Sleef_cinz_lgammaf4_u10zvector2nofma(__vector float a);
__vector float Sleef_finz_lgammaf4_u10zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification.

Nearest integer function

Vectorized double precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

__vector double Sleef_truncd2(__vector double a);
__vector double Sleef_truncd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_trunc with the same accuracy specification.


Vectorized single precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

__vector float Sleef_truncf4(__vector float a);
__vector float Sleef_truncf4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_truncf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

__vector double Sleef_floord2(__vector double a);
__vector double Sleef_floord2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floor with the same accuracy specification.


Vectorized single precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

__vector float Sleef_floorf4(__vector float a);
__vector float Sleef_floorf4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floorf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

__vector double Sleef_ceild2(__vector double a);
__vector double Sleef_ceild2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceil with the same accuracy specification.


Vectorized single precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

__vector float Sleef_ceilf4(__vector float a);
__vector float Sleef_ceilf4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceilf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

__vector double Sleef_roundd2(__vector double a);
__vector double Sleef_roundd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_round with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

__vector float Sleef_roundf4(__vector float a);
__vector float Sleef_roundf4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_roundf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

__vector double Sleef_rintd2(__vector double a);
__vector double Sleef_rintd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rint with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

__vector float Sleef_rintf4(__vector float a);
__vector float Sleef_rintf4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rintf with the same accuracy specification.

Other function

Vectorized double precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

__vector double Sleef_fmad2(__vector double a, __vector double b, __vector double c);
__vector double Sleef_fmad2_zvector2(__vector double a, __vector double b, __vector double c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fma with the same accuracy specification.


Vectorized single precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

__vector float Sleef_fmaf4(__vector float a, __vector float b, __vector float c);
__vector float Sleef_fmaf4_zvector2(__vector float a, __vector float b, __vector float c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

__vector double Sleef_fmodd2(__vector double a, __vector double b);
__vector double Sleef_fmodd2_zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmod with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

__vector float Sleef_fmodf4(__vector float a, __vector float b);
__vector float Sleef_fmodf4_zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmodf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

__vector double Sleef_remainderd2(__vector double a, __vector double b);
__vector double Sleef_remainderd2_zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_remainder with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

__vector float Sleef_remainderf4(__vector float a, __vector float b);
__vector float Sleef_remainderf4_zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_remainderf with the same accuracy specification.


Vectorized double precision function for multiplying by integral power of 2

Synopsis

#include <sleef.h>

__vector double Sleef_ldexpd2(__vector double a, vector int b);
__vector double Sleef_ldexpd2_zvector2(__vector double a, vector int b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ldexp with the same accuracy specification.


Vectorized double precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

__vector double Sleef_frfrexpd2(__vector double a);
__vector double Sleef_frfrexpd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexp with the same accuracy specification.


Vectorized single precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

__vector float Sleef_frfrexpf4(__vector float a);
__vector float Sleef_frfrexpf4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexpf with the same accuracy specification.


Vectorized double precision function for obtaining integral component of an FP number

Synopsis

#include <sleef.h>

vector int Sleef_expfrexpd2(__vector double a);
vector int Sleef_expfrexpd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expfrexp with the same accuracy specification.


Vectorized double precision function for getting integer exponent

Synopsis

#include <sleef.h>

vector int Sleef_ilogbd2(__vector double a);
vector int Sleef_ilogbd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ilogb with the same accuracy specification.


Vectorized double precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_modfd2(__vector double a);
Sleef_SLEEF_VECTOR_DOUBLE_2 Sleef_modfd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modf with the same accuracy specification.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_modff4(__vector float a);
Sleef_SLEEF_VECTOR_FLOAT_2 Sleef_modff4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modff with the same accuracy specification.


Vectorized double precision function for calculating the absolute value

Synopsis

#include <sleef.h>

__vector double Sleef_fabsd2(__vector double a);
__vector double Sleef_fabsd2_zvector2(__vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabs with the same accuracy specification.


Vectorized single precision function for calculating the absolute value

Synopsis

#include <sleef.h>

__vector float Sleef_fabsf4(__vector float a);
__vector float Sleef_fabsf4_zvector2(__vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabsf with the same accuracy specification.


Vectorized double precision function for copying signs

Synopsis

#include <sleef.h>

__vector double Sleef_copysignd2(__vector double a, __vector double b);
__vector double Sleef_copysignd2_zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysign with the same accuracy specification.


Vectorized single precision function for copying signs

Synopsis

#include <sleef.h>

__vector float Sleef_copysignf4(__vector float a, __vector float b);
__vector float Sleef_copysignf4_zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysignf with the same accuracy specification.


Vectorized double precision function for determining maximum of two values

Synopsis

#include <sleef.h>

__vector double Sleef_fmaxd2(__vector double a, __vector double b);
__vector double Sleef_fmaxd2_zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmax with the same accuracy specification.


Vectorized single precision function for determining maximum of two values

Synopsis

#include <sleef.h>

__vector float Sleef_fmaxf4(__vector float a, __vector float b);
__vector float Sleef_fmaxf4_zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaxf with the same accuracy specification.


Vectorized double precision function for determining minimum of two values

Synopsis

#include <sleef.h>

__vector double Sleef_fmind2(__vector double a, __vector double b);
__vector double Sleef_fmind2_zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmin with the same accuracy specification.


Vectorized single precision function for determining minimum of two values

Synopsis

#include <sleef.h>

__vector float Sleef_fminf4(__vector float a, __vector float b);
__vector float Sleef_fminf4_zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fminf with the same accuracy specification.


Vectorized double precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

__vector double Sleef_fdimd2(__vector double a, __vector double b);
__vector double Sleef_fdimd2_zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdim with the same accuracy specification.


Vectorized single precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

__vector float Sleef_fdimf4(__vector float a, __vector float b);
__vector float Sleef_fdimf4_zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdimf with the same accuracy specification.


Vectorized double precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

__vector double Sleef_nextafterd2(__vector double a, __vector double b);
__vector double Sleef_nextafterd2_zvector2(__vector double a, __vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafter with the same accuracy specification.


Vectorized single precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

__vector float Sleef_nextafterf4(__vector float a, __vector float b);
__vector float Sleef_nextafterf4_zvector2(__vector float a, __vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafterf with the same accuracy specification.

sleef-3.5.1/doc/html/sleef.css000066400000000000000000000032361373003144100161370ustar00rootroot00000000000000p.funcname { font-family: "Times New Roman", times, serif; font-size:1.2em; font-weight: normal; margin-top: 0.3cm; margin-bottom: 0.3cm; margin-left: 0.0cm; text-indent:0pt; } p.header { font-family: arial, sansserif; font-size:1.1em; font-weight: bold; margin-top: 1.0cm; margin-bottom: 0.3cm; margin-left: 0.0cm; text-indent:0pt; } p.synopsis { font-family: Ubuntu, arial, sansserif; font-size:1.0em; font-weight: normal; margin-top: 0.3cm; margin-bottom: 0.3cm; margin-left: 0.6cm; margin-right: 0.2cm; padding-left: 0.1cm; padding-right: 0.1cm; text-indent:0pt; } p.footer { color:#808080; font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; font-size:0.7em; text-align:center; margin-top: 1.6cm; } pre.command { font-family: Consolas, Courier, sansserif; font-size: 12pt; background-color:#000000; color:#d0d0d0; margin: 1.5em 0.5cm 2.5em 0.5cm; padding: 0.1cm 0.2cm 0.1cm 0.2cm; box-shadow: 2px 2px 2px #888; white-space: pre-wrap; } pre.white { background-color:white; color:black; font-family: Ubuntu, arial, sansserif; } i.var { font-family: "Times New Roman", times, serif; font-weight: bold; color:#a00000; } i.math { font-family: "Times New Roman", times, serif; font-weight: normal; font-style:normal; } b.func { font-family: Ubuntu, arial, sansserif; font-weight: normal; color:#008040; } b.type { font-family: Ubuntu, arial, sansserif; font-weight: normal; color:#0050a0; } sleef-3.5.1/doc/html/sleeflogo2.png000066400000000000000000000544071373003144100171040ustar00rootroot00000000000000‰PNG  IHDRàÁÝU0PLTE(*'786EGDSURdfcrtqƒ€“•’¤¦£·¹µÆÈÅÖÙÕçéæýÿüÄ´½\ pHYs  šœX}IDATx^ìÒ!Â0Åñ&\`% µaÀ8 ƒ! Ã-hì,vG`ÁÔ“Ñ©e_@4¤)æýôËSAðK€ € @ûÐW­o†ÂXæ“4馃éêHA´—¥hé¬s àk .ÞQ €Ï#ñi˜S£Tµ‚«\ä¤RbJ—š8åC޹S$¾éeÍb&k bž‰t°!'wéAFÌÞÃelˆKú°íÍÎÝ„ÆU…a'±*Jå ˆ.Š:»pSnêÆM%Ý–”èÆ….~¤¥. #j‚ñ£bhb0 M)µ) M,jýBƒ†ŠÒ6˜DIˆ6ÍÜ›´ÉÌÜóØ&gæÞsç¼'÷*AÏ=\BÎož;0òÅÔz¼fVÚHœž«_)…B ”¼Ö@A)t‹XßÅÖúV›¦•žDCÐLÏŸ¾Ì3uG¹s= øÜ `ÎL¹óÔ´ÌU¡(R\À?ÿŸ èÎ 2쉱€öÝ:€Áª¯å¾G¶·…à]|èºüg:EªÌƒgüëÇ_i§únÿÄ£ÕJz v èÆºã8jßÉ5iV:ÉXæ×Áj¡-}€ïçÒØ-Àô’{i­;§ ’s!\:€ðÇ_Ê6Õ·õÕ ¿V²xq¿þb›¯ïá{¯—Íf9eñt·þ[®¹]]ÓПø`—Þ࣠×|låšmÙ¬p›ô{ØŸª²Ë ÀÜË"v;ž>».÷µS´{É÷G‡û^茨=¨›AÍ,OGN÷=ÕPõÝ~A‘2Sˆ$ý_FŽì >ÙCµn‹hð8!/tê×ï‰J°nþma¨öí#ayyŸ0TÉùC €ãš§çb’ß¶;€¦ìE¤K‚Bmƒ®ê>Ýüõb}]ðŒU_ 3@@~,ŒU €@0`^@U¹ÃÔ¥ð~húŠÁ‹éY¬·¹ â¾|€r/ ä)óªþ`´%õ_ž‚®=ÔØÝ†[¹êæ¬S‡ÅÙÈ/ùQõ8·ø |áF;Iµvð¾%Ù†˜.Q¨ÝHP? –  ÌóâgÀJŽ¿€:ÀpÕµ³k)Aßo7©¡–1$¨bý,€˜µ(ó,€8k±€XpÃMS­-±àUjèF$*o°,XÁˆYÀ%‹DÐå"Ì—.·qÏ÷V$ê¬@ô³¢`y@éY, fÀ¸uÛÔü6­º ±u™§’ߌ ÀeÀÏlþĈ¼Å"ðÀ˜;ðþ0ï¥÷X²ˆ à´ @é±öÛ, Î9€ X§q¹€çÌç˯jp–ðšÕ8Äø›Í¢,@ª¹µ§n MÊóð Õwɲ(;8—­–à_V ˆÂ(ýâÄJ““¾/‘z¬i» Íú‘T›MŸ2(‰’ N¦=€W¬   ??:RkxpxðØÈ¿ø»@ŸS­-†-`-à"Õ7†då¬.s–…ÀÀE» r `0Z£78xøè‘ë9ö½ÔâÝZwrH›™/±2IzVÑÅX±X €Wíol €³wv±mdU¿M ›ò!™"Dx#!!´HámwÙ‡ˆ§*âC+ÁC­f BK>$`Næ‰ÀCìq¶Š°“ÝmÅ“=Ûmá±î¦Õ®àa6l +»1_¡ù°ï0{|ææº¹ã̹¹×mþjUOe'jõÓïœsï ­Øö¢ãòy;oC|W7²$/¦eý|߀îRTÅ„÷ë`­`{ôØ.K¯Ä;|Ã&~碙kG à¾ÝÛ ØLj -³èñÉ¡!(àDßiÔÔ€î9|6’øt—ôðNɧ°*Á‹ðç rµw5€oàÐÁ7 »¥€ô/y;R–7ä,ê€ðÜ´þÜ‘à; ØH¨–í YKP‡_wÑÃ/ÞŠ7uï]:"ôàn߀î¸rkE,c(“`|p2ú*Çûu5 {߀î9|ºëФ•ü=³å=øR“€)[ܪô!X àEPÓÐÝ’àM ÜQ àæµö€Qðà3¦½ßs¶(rÆá-",Áwô7àÚ ¹-Á€tT%€´Ô)­ù¬a¤Ò†aŒÙ¢¬Ôå®Â ˆ»!P³2ŠoÀ½¾Ýy•^ƒÊZÌ­ˆ´ßvñÂ?ïD䟼O[ºKøº£øt×X±!y{%‡OàMq#ïažÒ¶t7Ц`ÈE ÜV`#Ϙ(wYê À¿éoÀm \—`ÀfB€´µþ2iX¶Ÿ€0í(2Óƒ®€®Súö€M Þ•`@w\€´Õz…×´ý,D°P‚½a)þ:B;®½Ý$Þ ÿl|ºXµ½dýÆoº½ íÇïÈ9Eüaø\O0¥#Ú÷€î ¾éˆ®«°™÷ýH4,[ÇqüÕk À¯DÞAÒØ€þ¦—¨º Þ—|Ù€ÿS -¶ù³ÌyZ*D†ÏÈc‘‡æ!M{@Þ Á5àž+óx§…ØqJ-Wðìñ9ÿÖÛ€<Bp è– …å—ÀJ‡8@. >ÐKãëu ºÂìôhÀ‡@í˰–‡uXú3z0xy Àè¸4}oÀ]UT`óàPÑfÀEÇK)TWчòBÔ­ÖGú°<6 -CÅõc1%Ø·€N>|}é@4䩨›q÷€ýe@8hÏû´{0¸ÈXÞ_¼ŠÑÎôø4¿¿€ýÚ’‡Ô€µL ‡ÐñR .LËÎ-_vãg¾G^þ££éææfÙ€›^êiÍ ä˜pÁð \ź).úRô6¨R›°qï‡&}Àa/_F6`㺗ºéu§ÈBLá \h_øOz‰P„¯’p†"ÎïÑÅ€PTO¡MÁ€Ôg øæéáÓÃWTHËN)€ ’â¦`á*4hLy¾†t 5úãÔh ¨K¸bZÇð åF €ö›1˜XvÖêLÁc"á:8˰»Ýøaòqñº~¼+ÀudBÃýŠ\£ ЗXÖ£ rB!7-„£1[½þX…%ýzÀ-m ¾$À㪠زüÙÙTÇ€Ö\f×3Áçã‘^ OEZ‡9©•×%pI†Ϫ2`¹EŒÀ€œÀEL€4×0VÅ '®DyºçI­zÀ›—aÀ¤²4 ˆ_L­¬pÎ1› °–=;é]¼„òx@ÈÇDCä7¼<£•çñ§`:*Á€Í„"VA€–Oa9@á xßñ™¬íeåÙ0Ÿ¸1¢¤<‹oÀ&‘`À]¢Æ€Ôq‚Î-^ 4}˜Æ2"s0€¡f/×1öâ ƒ1¬¥Ä€Í|w‰Þ%j X 0hà@3 8 0œÕËk(7CÎÄЖŠp7?oÉ0à†"–Kùvå4-ŸC¦ÖA¸À`X¶É­¡<›òdp›àp]†¯ª1`Ó)·_¦Z ãd´ ìØ.XÁùiXÏÕû©ÜàœQcÀªÓ1×Xxoâ^Õ„m¹&`øÀœÿž8c0ä[ýdÀ« —aÀQ%¤N) a…Î#xDšs”‚}g_†’Ý®ê÷\¬ãœ‰†|±Þ?=à8¾›I l$”°æäš!M_fK°Ç 0éÌ!±§ÈuÝ °à¸C$ð.QbÀJ‹Ÿ ´m†îl`” ±:W¦CðŸÇϺ÷€ÐÄ¢OÁD‚7ˆ ÒÐ1„±ðk1[éhüÁN¨Oy/_ÆxJ4›¾ÒÜ ø\’aÀy%¬»pÐçAÓ7ÁÈ`@hÆ8K1ã¤{>|¾/zÀ|éˆÒ¤–(s–Š»C“M¡„AÂŸŽ±% ásâ»u S<ú¼M$p›¨0`Ó)uÊ®Ðb+êt·å?v š½§3¼¤Š¥@POàãokßÞ"øü‘`ÀKD…+Ü‹™jC¯a¨µºuÁ É‚÷Óxq;ò‘—t7à<>€tT‚鈶Kè7€vРL^€­ä-à‡—Ímrÿ ü¨®g} ¾E$ðOD…-…e“ö…íeÒ‰u[æ 0Ó4ÂE»¸¿è6ô,9 O¯élÀ_tÒsрЪª0`µ`:\f3Až5-Ö€¼m@0à‚ÁcCî rP>¹ªoønÀ[D‚OT¶jh–,È0{ÿeÀe®sXÃZà î~0dð¼®lú²Â‚w“߀ï¢Â€–ÃÒ € †ùa²›Y¹ÅnÒŽ×^"fàÇzö€ô—„`°ù,Á7`u„(1`Åghß™ûÙ‹ÜQ‘Az&4`;N¸Ç);œ ¡«?#ØÒ·À©x|k„(1 uBš@4Ýy>ã×`Šö}4§­Ø'SÿHù‚n=àk?ü, LÁµâsIB° H;BˆÖZË@›'óY» %›ÏðÇÜÝÔÎy´{“ø|jM¹†!Ë€§áK>J q HVœò«Á/çwÏûßO‰+Ž[ÀWP»Uà‚Íf^ø4Îáü[D”Çëêz@>HòA0à‡B ¾™ Ò¶ÅXJ&:/Ó݇à#@À”ußþ+÷Ðù)åÌš  c æ‚c@>ª Øpœ"2 Û¹Ö~Ç8ÂÂm„LYÈ€/‚«I"ÊuU= þD‰+mŠÒŒ¦²¡bÜ}† ¤Ë2 90rŸŽu.ð "Ê“õÀ€{‹iÙq@UBÙ \ÊwÐ Þ:7Ñu„ÎÅ8ý"Ì·„<$l};gd ‹¤Æ=ŸKÚ´À…̃¡@ú"Ì÷úƀǬúu”[…Y0lÀɹîƒc3Ű˷€1ok„Nw@ä ´BË*·HÐ¥ wÆ›üYp´e§Œ; ÄAëÐk6¬*ýÝj‡»Qî!gbÀV†ßo›pÀEãˆÊŸI ¤'e–ɈªÐQg0œ- ¿ç°UØÑsÌxѸbš!“à•²)-«øýˆ6Ó¡{ˆ!¾,bvÀÏ ùBÁ7 f"@ÖC@¨ªâi#‘¢µL¹`Æ(€tÜaÀ–>€esÚP0‘Âñh„7¼7¾¦’pS‰(³! ´¬`op¨BÌ(«þë‘,â{ø‹bL îï¹î€×”ˆ2»CU‚(­4¸ÕvÀ ­‹9ümÊˤy°=žI¥Fœu@1 .Ƨ§†  (‘Ó:)Èà€xë€Øöà°Çv×Cdµå3X§DLë¼±0Ü-‰óXy³à-ä^: ˜—ÎÐ9^DØÀ 7%]7æ€ÛFyü ²9àß ƒ(&ÁÓE@ ý ö*ÙeŠ1¼¬@À2Ä Îj޹ê€bœиB> ¦Oñ×15 Ÿ§ XްeÁhH;ê¸" ž%&“[°¦P;{˜ÚS ÐÒ<ˆÖ»aVl`ÒaD\PpµlXó »R?9LfŒÉñÝð8 ¼z„À›hr×±\’ÜͰdÞ¡€p4,d\·fCge@¼á¬Yðy謢 SÂÔŒ„|°7RT΃&p‘@¼á¬8ÇvÖ‘Ïà&LQV`hž0UWýt;¢}>›!ñŸr€ø¹I·°¹—çe—3Tì =¦¨•Áõoй%Tcr@d×w³ØÀ°eÁ[ø¿sÒqI+n(L@ȧ=€u¾ð:'€8îý”Ó/âvÀu¸£29š· %¶ö³Ú~õrݾzÏOá߀@t.c'óu@¸‚ŠT²jÀy@€B“‚)Ñl1`;à ’¥‡Ù²`¬5W·ÀÇ)ÏÙ°Ðu½ß¨.Áü³aðõ±ˆÐ;Á耷yPè €ôdʲ U|ÀQí~¬¾Ù0—PçðSTŠØ¼ˆ‚ÃØÚ]U`‘º Ίê4€}nð~6 s8gÇà"*ElYðžÈQDŒË|Ï8à« çÜjºNrŀʎq˜]Lˆ’©ï¦ÀÜa³Wm…€iÇ4f¦jløOoOóˆ¦>ob(˜ãxî€E)€eK’kR8'¾¾'àŠ‘Ùœ`ð:lÌ‚ŸÒlON;àú€ùSXƒ‰KªmhN¡¹ ¦VŠ@Ìå€xÓIÀ»"ÍbQp@¨ìý)§Ï‡ë;P¡³²$LD°¾‡ð_k Þú¡p Ë!€aFÀIÍU½ÇésÁáÜSQ¤ÁZÕiì×kÉ?Á vWt[î¿Ãø1êïN; ”¬Á ¸¬Ð^µ \¡FXWor¥¼Cñˆ1¤ð¸f¾ßilï-«³§$kp! 5Í+xwDêŠ$g!¤]µN8¢Ù´ØãsˆŸI;퀭=_€§ã“à·ìñ5VËQEk:“7‡Ã5@É9z*΀Ó¸ Œp +°5ÍÅ—÷¡5êÐ“ŠžÕ™}îªÍ ¼Œ›$ÚÇ4»€u@”Õ,ÛS%P˜µÑ¯¾õúÔ’*V9è—€šûÕ}ဪ›à-Ä©C ½‘8€Óð‰XWmè+8 ÷$¹Ç¿¯#òª tBcN’Ê·r×bÃ#Ä °¬ÓŽ'§º–€×P‡ŽÏ‚‘ƒXTHî”æŒM¾g#½E –Kø.¹XzÈÜ fä°ÂCŸMÀÒ) €„€õª@è¹®ÒK\[ý€öYÈz’}²¶žVuݧ”S™øÿÛ·àún`wÀ¢Î)8¨¥—÷(ÏÒ¡½ñš`-^ib¹TMY?$ècbx±-w±’Wa`Ùªêû§£/)°·Â‘»ýA\pÔ ²,ЂÞ=÷*<ôP2b#dZ¥ ¸jÀº€½*½Xƒ ßò‚i(Mةʇ&A?`Ac-¥ÏHw OmÀ5³²„2´M-Ë„îSùŽÓزS¿1MÂBg!—i'Êyª™Š€ÐXü©*¨– ’IðB@›š“ºô$mmý>Ñ  ¼Óð–l¹Fzˆ2À,a˜ˆd•iù\þ<<¡{°CÁÎX ˆn,”ÞOÔõ” æË¬ @|&”–/náïˆ<úQ(šˆö¨§ž€Ø .å HB@»òåbüÎÞ2Ž\È*Tóš÷3žœ ¸*@»à[Œ¢ßµRÀµ€ã|í;ýGc^­úA›Ž¤bb^5:ïš žR=ŽK@RÀ¥M hc’ÇûìvtxG1SÀþ] ZÞU H)(ÚX…ä›ÖÀþÝ>ûgÊ|Ýh'LHÀuç& ¹a”# ÆÎYùæƒ!‹Ï€Ë•²b^â[ÅÉ¢uÇ& >fD¯€—ÁI… Òú\óÞLИ¢˜›€F'`Ò·œ€x/óXŽ„ÎPTíŸü5 Ð+|—I øÚhM°ìàüÏÞä6CaqÎÀ `ÁK¶4½@ÕŠ-JS!±Lk§°ca›Âºã¶HIáÍ„@¦;„4á¥h»©g<µÿ—}ÔÊŸÞË›y¶g©1¬úŒ¬‚ý¬'ñèEþªøØÝ¥TÇ­Öæå?Óêö}»åèCÙêCkÓɧµCÿôžÕßð¼B€c­å9€†«Ò+̲z?~ò´àíð÷ãE÷ÿË€kuh¹cÿGûÜËÚëwž‘ÜÚÒ4VmZ¿—%»Î Þº¿Ó½:üðp`àöqF–9N³q??½Bøð+Mc-¨µÇ6¸ÕšàVÝòpóÒŠOÁ/¶-ú¸°F} yPS ®g켋fI È7@Þ xÐØ„(ß%˜ëhºlÔ S1"jÿ£ùˆ&¤QG€ü(ô¥@Ü[ø&„WT‚å€:– €ë«_:@9 µìUØ)%˜nUPšÇkTèb™¯ (i¾+æ1(Á^2 Wvã5aþ@º°Ÿ.©)e•KÛù.ê<2à  !:hè¯MGE˜ˆ&€öP8ågð£h”`/ùÅ~T¾ج 4ÔôÈ6cc:k_`â¨üRˆˆ÷fâp"¸S@ògðuÐKÉØn™ÍþtÔ;“p@%,[:_MtÐ}0)g{•T€Q’€c#@é¶ –†®:® €ù àÆ"€¢ e9*À"ò#b-(…rÙƒèâˆî´rÝ|¸ g.›`¥W®ÙY 6  pZ€©©‰ðY ~щ £„,¯ü…s`žgY6LÓÁ ßëi­»ýI Ò4e“Èsø¨ @å¬éèËdGNèe£tÐK)9LÌÿ›ÝîŽÇÇép˜eåÞü€?§ÛöT&î 0}a¹9àõô4””’QˆŽ^<þM@ãàxºü»ö%X¸+À°Ô˜Fƒ~÷Ÿ=-¥àlœËD)­´ML :Àßìarã6…÷´šì&»HâÄl çhï9Ss[äbSKA²àD$Ø¢ÈáóüpÕTÉ.ûó{ÝXõ–@ÐdÓ‚Up©—›,=¢®…#¥ÔáÊÌ*/ú æÀì@Š›XŒ"Ëß3F¥,zGú¤T:F&mUf­²Ïgq>Æ¥b;ȼó‰Àbóì|Q!&N€¨ûQZcg†3€/î(ÄïQq÷±K4>‘Þ÷Ñk(¹wXHœˆÜ –B™®#(œtG!?ù(b 0žh¹Þ¶©,‹â5ûT-à§ïuïRöÏhOfßÜQˆ€ñO4¤åÉŒe“ç¦V–þã¥A)5‡”ÂýŸ Ló@gß?M¢o<¬KE`/DÃCqd/7:m‹Gûµ“QjfÙ‘v­0 g+7üâƒÎ}¤ö6ׯë‚£;µGOê Hî§Û"I0]y—…3€úÀŸñˆ™ö»^-Wk¿a¤„Ø£Ÿ®È©õ  ›Ã|ñ ϤoEÄvë5+Eaï¿Yu[á à·€?ßF¦œ"€´$¢ õ.›‡Ö3€nsëõ[ %à¤eÚªÂÀn`°–>Oªî>ަº–ü„èâÐ2È«,³}òt|%A`P$1’0H©Sï•gB}Øù/¦à›ÖÊ÷7Í—À‘¯¬D|gpˆØõ’úJí{’‹jZV$XÀà}Ò€èË· PR(¿Òòz]T“0#­Hž_³-Ùxd¨T?ˆHlgP_NÆ5Êãp›‘^„À>KÀ$J2¤ùèÛT„ÈàƒÄE!Ö Ç à7: Ï­oþ€éÑsDÈ/ckñøJ³çf&ðõÀ~ G"vµÆÅ¸ØPŒÀ74”gZ° ô±?à7#…Ø6èÑC(ꨮ +ûAXy¸H[ëK²N¡•8«ð3ÆØKPŸŸóZëü •©•eiaL38µÍ†òpõ±1ÙÇ¡b^PÚ.„.¤ÄàWr8¤†/€-G[»“þ²,ªÖËejÙ»Tƒ¤0¢/žÀfCu¸ú8ͲÓïbU¶èº•ðˆ«¶üùV~iž?ŸKĉ2`(¥éÊÚ!w;ðt‹‚]¾0È\ºø¥Ÿöõ“/ì%«QÓ7£Þ×EOä{Ç3©ç_wøn‘CK¦–ZùÄ~úTn{úCª òAþ¡MMb­²¬ýnox™¯”ÊV«!Œ{Y¤í/-6eÏqSùA¨P0ô$ŽhµZåy¾ÙªŠê¨¢–{¢ aw¤ ¶¸Í¸2X²$0HAnÏšõ–GD"sû ¹ø7sKuŸz*Od.yx‰~«‚yÖà ïIô*%ÊfÿbÊ``Dwúš§ËÌògÂæÝRÿˆºOð©À÷ Èb¸øqýÊGýÅÆÐ%Áå ÂZrŠ&ò{#€S¸dIàö?]M „ä 1ÌÕU yîs¿+OdEË‹ˆ¶òã‹^ú±$T‡Ø q!Ø„?€üE ¼TïíàÝÃDâ\oRN˜X t²—2@ÚÐøñ·%DE¨|}Pý£{AœBã"¬6X±ô@L¡{Ð5¾ƒ3˜†ï, zL³¢œ€É Ú"P_ÀûžxuMo(‹çì ¥Ohâ脲 ÀŽ –± ü{Ð3k&É Û5£RbõØà»ó%~•/_Zi¹½J¹ Í '¬0Ê4V‰Õ¹”qxûõ§„Ôc¹½Z¯Ý Š8q¤.K oi€¿,þCNºÝÊÁX|^›A 8Wº Nø³ ÀW†áÑ‚ü-þK?·õêUÙz0ºt ޶7Æcj5øÞC”þjŸàÞ%ø£ÜŽCEžoÍ Ú×Û–Áwç$vYN‚ѯùjßÙ! NÞÆTúO¨±ãÒ›ë–pÁU:91˼uOpo‘|ÚW=ˆÂsw°õí‰pÕâ'0¾7øf«3þ&½ù[ü›¹ý~>H·$MLÉÖ«`â•°ëB,€ÄJ€Ê·Ytö wÛ‘ª*L`KHæUÓ¹ÿkÐb;€Â³©õå&iÓÃvÄr6èKJÖkìJÚï…ÇFߘ çï÷…ÕçðÖÁh×jG®¢x6 ¢wÿìœð:ùKàÁÈySv•€²ØNAEn|Àv¸ü¥Üf=[  ‹–fšþ ¿ÄJ/àuÎgðt<âd]‹Æ`þ\Ø:|œ}ô9‰RÑ‘8 @>~@§¥u!‚øs`sìê× 3H£…’\ò­ tW/µs@ž v äÏðâfêþçTl‚ÊA@I:Íð½±R(ÚzÓ²ÀW†¥@@ô$…#®þàoò *!ˆî‚@pˆÃíçc@Î…'µ|WN ðןŸÞýÇtËÁÆ(&ÊýáKº8’@"ƒ‡Ðo‹¼=òévâª6] 6ù™· ÒÎÏŸð»Èàt»81@'°7ÛLŸA’j€gm›C8  »°Òµäï«3@bû`ʪrs¾ ª„A¨ˆ)ç€×•Á¿tà’8~›r9H¯+„È}ó‡ÔùìÿÙ;Ÿ['b ŒS t¡' §PA²äÙAâ†4c( ³ €xzw ¼ ¢²í8–YO3וV+å§ïóüñd À““WŠ+7-€;ûÕ>–·éÆ?¿ÎrýY`=XN€Ýb\„ú§oÅe2VC®5A>Ny.Z›?·4þ&&·ä„B›œ‰ ?%¿—ZäÏdp‚A’À¨—BylLv`?ð4̤îÕ ÿ^ ÂõKËŠ ‹¸üÏ· ÞX•{@žHÐÓ?·²DÊŠKcò´B OþÄ"ÒŸ@ð¡ô¦8y$ä4D?WÊŸY± t‰ êG`Nî„(wC˜?A«L&:Åà8½–)¤! ÅßâŒûç‡OE¥$T7"§;P”@%þÜ&}ú¼TÿÌŠ ¤†/j2¸ceóñ†Ô—À혿euЬÅkIšH~ n1úÚbÑU`¬O'A†Hz†¿ºÖßfüÙH·n+K€ÿÏô*¡X]FPÀ0ð~ÀéøØD×WN2à×FWMŸŽpz]GäÇ:Þª,k'p¬=ZF‚ vÙUÐÇdG´TŒ¦Öü¹n´Ä°2,#!q‹ #ÚRÏð0·^¯]‹ e$™Y|l×!Fñ&à‘%°¥þ-:;αŸ 3ð‚ØTC›·Uöû6ågŽg^<~ÿ<ÓWøÅsY1An}ÍíÅÉgÄáŽÌJ`ßÐÝò¢€_6OŸ<ºÿb–w¿|øàñÝÌI~%4@ß¿ÀŒÎg¿™;ûضª»¯'}AÛ-Úh€™Ö ±)BHtCMͦ‰i­5Цj" cZ@Hm6¶&ü1Ú±GÔþƒ%Ó&5˜²¤CL‹]š¢½;MLѦ- M…z}cdMs}M\û<=ÇçøØ9Ç—Ü{}m~BÊÍíkq?ýþ^Ϲ:ýëþ¨€¹È*‹ç@m+ñÁ~+}ŸTTu2èiq<¾(É$‡Öwÿ«fÀ;ÉGÈv„¼ö ÀwÍ&AßǤšØûb¿J0c¥ã -€úZ`º^üõ9 _ú®¸}œS‡Y%Ì*“2ËO¦ÂP2íþó¥¿i&°Œþ|Ê ‘`v€U‡Ž’‹X§®mónI®e\ 3‹°pÈ—=’Å•P‹3Þù“ÞØZ$€„I`Úßö¯tÀ‹Þ„²Ð…à3ÖìŸL ¾ø ˜­NXdúÿÀ¬ƒHë€Àý{Éÿ® ¾ÝÛ»! àŠÑ…©î@Íøúβ«XÊQñÀ¼‰!BÞºönß{$²"SWÉÊËŽ)ú,jø§Ú}ŽÀ0úéÏ¿ @býäÌYŒ—?€ÚUòåy ñk>kK±¨sú• „2Šs€M§z€uì`¯ž‡1E!±êÕ*N;C'íÂçO8N´êç©ÕŽ¿^=ÝqGÀ,-²¡2ÛvÅYµ › =ÜþR¸õGŸVî{d+³T§üð( «>­bg;XÊf]*í¡B˜²ë@YNûþ¹ y2°–0;^í5 IÜÇgq'`”kŒÉAåï?ü;iÃP±_%þ8Œ@ÝZÅ“Î$P>"=–raR&ߒȉÅHflþÍÄ=ÊŸ>$jgJ)ÆESø:Ðl•þ¾€ ïŽ¶Ñ „ÛY™p^lš¥Ú•Pn&Šù`‚¤nVôÄ.|*o8@P£N–«Úç!#z=ëí‹ÅÜfÒîwØ~ †ݶ ”asw›@§L3›‡å²êˆãƒÖ©‰pKÔ翾&$Ž5P–gÕŠüq´œ˜—NX»šcKo_<î"ûÖçtÀR¸GHå}œCGp#Y¾w€–è6†æÆ*`ÞTôÁ™Iª(v‹I%¥?¶Ç5V¼¨:au;çÞ¸»èO݉mbqN A“€å@€óèÜ,•!ãÚ…–|ÆhC0¯ð^Ÿ`PráÊ3 mÕR-;ÊN°#É!…@EÒºãF_<ÏFe™ù"BJ H< dÒ¿X¦( Û«€@KC&øir¾›*£0©oçIùSÖO(ºÉ„wètLß;þw¾$¬ß-ܬƀóè¸@¸é£lUÌÑü¸¡ ˜š3Û8é¹Ìk¡Œ½"nëKÉÝÙ…·úí¬=€-å¯MïS^ýöa¿qMF_’—ÌÁfÑ©1Ë€2 T<°Ê`|øiðó¾ ½‰»´«Y `!¬Œ-W¬` C¶ xr,¾å{À鯬Xñ¹ îFv|5!Ó7]~G±“÷-Ó\ñ‰Ç87éÁ-wŽ’üÏ®]õÎ ³.Nm´´Å_€Ð˜¨Íž|p…¹âú'ÅÿõÁG7”ûé0 '¦Û`¬ ]?ùÆþÂO| Ì‚<êÖŠo*l û5€ñÙÐigC`+}uÆ3éº.žPtÚu‰¼-ÝìÈÞbî¶Áú7€àQTÐÅ€›(M¢*8d£€”‡Ù)`άðá0»R”šíAÑáç»ðù}o?"†·Â À7ØkØ1® 5"zhlÕX¸Æ/¿F0C»8m‚5y|µS/%ë å@¦•Š'‹¹Ù†HˆHH¨-Ã0ÌŠnúX KtY°D`™m xèÁr7ÇÛ¸ß.ü+ „NàöðBŽ!hÑÑîÔg~à±¶Ð#@(ð‹)rÆÃ"‰ßp×WÀ×6Üõ :ŒXö ¼„tê—Öt YÿëÓuÚ=01N(ÃÀ¿*A {È̓h¤(N„©1 s€ýEjš-­ÊÏj±Ï‚óa ðMêjMáŸBѬ(ðq6$ÆÏµó‚8poÓÄû@¼'›ƒÇq¥¼ï³'æK¡˜Å|nˆ!º Ƹð-Æ“<£÷?->T|bÊ€ä¤z$°¯‚?G‰Þ˜= „L­’Bh9ˆ6”t5Ù) #J(ÊÛÏ‹ÜùXBÄ0,&ÐâbQÀØDÞ§W(DMBÀûµYð»À„Â_V voe×ðA >C\X˜z×@u^gÛ°½fÆj’Zd©òò"akU+Z±Ú+ 9(DGÉqoæø` ëÍt’9>6Wú€mì_Åi†í0ŋǖ¶8 UZœs#Kæd è¿ùŽ`Æ€’ÀòDäQ×øuWÆnÖ1øT¶š &ûÏAì°¡ºe¯€Ç˳àR%¨•áM¡ˆ¦€,{àWfе³l¤p Ù§,ÕÖ fé‹tq=}„]«nú î´\HòêD·[JxZœ‹ h««$!ŒâôY;TTðŒ à~þ¢|ñ²ù1á2²E’‚@æm_xÈbΚŸíºJÛ ™—×ÝËǃşÓÛ3¤žöΘN÷’ÙI…À˜7Ü—óúšÜ=(Ú­J È]ð<ÏBöÒ»ªÏ‚e+ıž¥xs-ÁònRV hò,$onÕ*àAYîûÿºG©o„f&“~G€@ÇŘOnø¾x*=âùAü3€™1ª‹;L>¢­Ä¤xg„ ËﳪHzxyY­>\ÃÏZ8€í¤A6;ãCm01å @I ë‚`wa‰}³^6Sé”Ç@ÕwÒzZˆ½ åÔ+`[õ ¡‡• xžThRäï鿈Ñ* ±oÛé°ñ6ãd‰ýj3x“c•@@ýæJOLx©¾|P6Â|ZÄ€TsÌVb¯€Qàj›UqgÑ_µ8º@_çU€^ɰî|Àª¢€Àsüœ6l¼:ä0·H”¹H²Ú3SÕ¦ìž%^ìX X®Ä€sX'¢*¬'¶Yð¼\“¤UÀ]Ò*àEM/¸‹gªʉeÏ/©¶*n? Âü÷‚ýßjË^•^WÍ”"è8°ˆ';À”LP±F³7L»è¾bÈ^O €ô XˆbT«€s2 |-†ì ÛÀUÕÖ–ÊE/×øÐ n@£Î~»QÔ`hÂé^K ú}¼ˆNb³žÃÕBS–m Xè·X¯€s@F«€g˜\ ”…f[ÌwVSÀ9 $}õV ØÈÒÌ!ûÕíöµ^Ô"+r¶±êUâÙNP©µµ)À3œ¶%À¬ÐJMü¸b¸_:M#S©€D°Ý±@óa;Y2Õ°†‘ZjdÄ€nŸzçe¶I‡RëM¤ì½j³ãÜâLÿ?{×ÅÆÏì–Ýf L-A¡Ì¢@é¡”’^z©í5èêcÑ…ˆÀ¤|-‚,¢«[ ”Km7Lrµ z±Ù&º`*etq­yèâÿ¿îîôË7gwFq¡‰ ö{9sΜË,û›ß7çÌ|¿ó›ô‡ì£„òlÏð»uÆ<ä”y6¦§¯üµà|nu6ëA†WU Æ€%ÀÙø¼ç3 ˜¬¼õI2 Í˜vÞ7±2B²y\…«ý èU´{T×õhÿ¤ªÃÄKÝÍñ èÕÚ‹;*³ØþÓ«mÇétÈ·¥T²ñ"YmH*˜?‚?ß'ìÝoÊY‹Y™k6cª ᯌüV"=•LHIBd9,nãý#Þ/0^ Ö_fýÃF<™t:Ò†oÞ¼ FÖ¾@Ô¥öJµÝa•%&Ç])…(×ÓâIò(zÛÁ®ÀƒÕ6C&Oò¹îM„o_›ô oý/Í©6±¹Ô)ž H­t¬½Ô½KQ D°ß{r™=Àì!8ñvw¡èW7vó_`Râ#ø7­¬¬þ  ¦#@WtQ—©˜L0h-Écñ0_:µ‚£m§ë ÅE—I3«{~Ì•:¸l„v¸“CŒKúÆ+I!ÿݧ dn·=!"åg¨›i0\`Ù§ÂÇ3g![eÎ9©0ðUÞ*H> L‡ɤÞÌ0ËgA¡€íÔpm;Š˜žî,T¼Qup¿J XÇÛUÂM»omãÇì1~hu—eXRm5"M)wEC=WEeUQXê³nñ’ Æ€ká Ò >Æ‘£¼7­PÆËS'ýö]m­áùüpº°œá`sÞƒ²˜Án”Mz{©^ºÌÛk{rÌv:e ŒèQ=©.v]ózŒQÒ2¶ƒ0›®-,áT¼qú+ßÓàWu̽BÚCæ·‚rë¤Fß`+ÏíâðÉÜVé6kf&7ª{Ϧ ß½¤„— ð1y‹Fq®&À¬-•• ë[¢Ó´’wk5-à“Ç¡{ÂL«—¥R….0¨Òm!áç»ÇeåXÈa+Æ[D($fB @­¦ÌiàmXIŒÔ\–¿’jßò‡L ‡M'X䜋D´Ø•g[$xùµ¸Tã{÷óEÇqük«”R1Þ º·uèd&;tˆÂÃzé s°¥[]#»ˆ7û×Ö¨‹ÙBÕa+èPbB“„‚¥»‡Hwçù6ÁÖ3„³ó8=ãìç½€øâ³3°»€qÌùüê ÿçž0³3ýèŸxy“#Y›®ÙüÙXˆ„¢ ‡ò¬)DòãøZ@The} ê=ÎZ@T*3Ûš.UŒcÌs©2ëÑ5åÙê` QÇ±Ž‰^¢Ôõæ2k ѹa= úJD©Ö"q¹ý`žu€È™þÈÚ@„ Bˆ!Dˆ!Dˆ"DË%íß@oÔðÃƒÑ‘ó§´ p^NŽæOsƒ€N{“| p}ÙƒWY¹Eà ¢Vnˆ–Ì;èK °‡©©ÈªU rkãFÈISs9Ñ…?Þ¼åçF8€¶AtHù|bþv<°ì•[#¢|¢ÞÏf6¾®q€ áVˆhO}þ®øÎ—b7+Ž X53_™Ù-ù¦J¢‘¯.P5ÀŸòÙÓiò5ÆnŸâ¸€kä¯Èª½!¢³‰ø¬‡ÜR¹\.köA:ápo}ÇO’[‹{¾Œyä/ÀÅ8´;Éíèï³fK€ª x1ÑyåVíEiåà ¡®D\_߇kjÝóÝû°G€,ÞwÖ.ÙJ¥•K âª“~œvÊitm5@§Ï<'mÙ²æÎ¥6›¢[lãï3˪ƒ•—ÇI§ÚgöËlØÇI8,®E°€ëžg[Ú#¹ÞDZ¾oÏÕV#ZÀ!¢V¨"òÍÐW=·jß.pB´ êŠ χ˜^ °ñ[°¥°ê8ࢠ˜áÄ|"òHp—Ú,I€œŽ÷Ú€¶Úʰàò#¢TÉò,D{ hm!@¶îgYC- ó\Ý> Èo±€ÁzíùoŠî(Pàú6ZÀU•kÀF]@~1»´<yrvÛ, Ý Ð+$ž (‹ó]°<\þ¢îê£ë(®ûì{zOrDÛ5c\Ûy–›”¦Ø–)Ð ™8¸´,9''RIzÊiAVJ@6œZ‚„Êp°-h8Æ&¶äô€ $ÂG’žÔ¯Â´´Äö³eC`oc[d›­Á²ÐÓ{»½³÷Þ™·Ï+ÿùæ½ÑîìÌ;¿ùÍ™»³ €Å§í³àhGçš´ù™Gš›:ïÞ 'ÒÏ| ó䄞vž…'Ç |ç±5?¦(ç{|Çã}qµoåñíMü$ ÌñMæ¢ðížÎ-V0çgìI°¸×÷X„¦ÞÙäû¨ó‹ž†`NÌáõMk¶r<¬B¬ÌêÛ…N»2gÁãFΫ?ú¬„苞[GÔð²) ü¥€ùw-«KÚ²Þw-[d¶“Êž2ÜìÅ{:ï"Ù !W‰2rgs¤ç[7Ìý ’ÊEu¤M@X.Ó®ŽdÀ«ð‚Ït¾/ ü±åÙ7^3÷÷]Gºk¥s2Ëzˆ­Ô›2ß•i¯Ti3ó1m•gw¼ÖÙ ÉÿŠßàuœ‚?˜ ÿíêª]ç !fD_Y‘Gá÷Æk£84Å+¤ç[×Ì##òù*›Ëmt‹T.‡Z¤.‚b;;ïéAŠ5öHÏËêÌëço„‹ 5H…u5A¾$SLQX•ižIäh?‰=>{ùsºÃ€¸Ë ªí%¤”€C^ºxï?¦í’ðÈ}¿ó~Z±DI‹ïy ®"m9 ß/’ó‹É€CU ø”…\BKÖ’<fBczÙiØBZ(ó*•v¶»K ¸_p˜Oߦž—´w¸_^N36d‡J Ø—È¿ùå•têYSNbC¸X=—Ïéú jÖþRŸšâõ(ýE–—?ªð}S装å”dŠ\eÁGÇ=]p;DßZ*Ý;,7Ÿ1àßW-v)ÏμÈðLå74në=\¤m?Ç/c¾!Û£¦HhkdÏ#cƒ[lAF2àöª° ¨þ×¹OÙ5¾kJæM*н#üñaaÜæk-üóýŒàœ6À]"½Ó-¶á?î›Röô ¯6WxrÿÑIš2zS»=ùî0e!Ò­q‘Ô”¼ô=Sð>•DètÈÏhšDµ€Ã¥>5½Rz÷EªÎòV5«pjâ*Ûýµ T&ŸCtƒž`àVmIp¤È ¯€Î;-#¾¡p;hÑ#´ë<¶î~!’ë 䜓(¡z—¸â™GC tY#¡ Ã)$ŽdÀ¶0:ÝÞSS¤åÿ%ø¼që¿IèÍúDLf‡©zÓâ=”7Ñùç¿e’nã¦gîC–rßZ·ĺJßæ‰³çiïÙãõÛ%î,”¨M#é@ôb;@,Ë“ï×%eäDy°9¯Á¥%ÆMžŒ Õü×½•C‘-®[c5Hñ€í¼Ó†Ä*ÏD¨}¬ÂôRa uˤ…cB¶"‡`§Þ ê>˜Q7û¼ÖHÒ@4ß? Eµ"8 ëfnž„2@v´¬2oÆQ38-ħ%ZaœapŒ|;ývCaSÛ=°‰ªŽK 烪 Ge—@»"Kù^˜X+Û€ã²Ål)þ«edyk7 Vn“s?ÁRˆ: 0Ï9vH’¦’6w¨ Û$aq]A‚*µr5&ý=]Q‘U¯ÓPkq›UøSP¡'õ>ôö-fÄ@E2à°Hä´- ÔZžÌŒQ¬<RB„šuª •ù‘š»Rö„V^ii޾ϒ«¦efÁ/ˆ0BaÔe–Èߣ‚D<ÂÆŸc ÑENi›n5³·ñŸâþ#åâèe˜}BœGoTP˜aK«n!ÄLnGMä,øîFƒÈ®¬)6ß>O¦\„+Ž«Ïáï0Ç,¹‰TóH‘°” m|š{Ôòè´YpõîÝ»‘ª77q£¹Ÿò”·òRz›Vk3÷tK›išIí·vñk#-öÃ~ äÛ!ü H¼[ëCä„p)ƒþjl9À1s6‘£ÆMXšYÒZâ7¶º¼Üúd­ÅdI€ë:ô’É< XÈ0hð¬!Ï‘!Rw/V‹»D–j,Ñ!án£j-TÆ êÝ}•6‹©S§ jBÁRð Ž©òÄêÑ€ÀÄÇæ•V¾˜TÌÙŠ]ÝÈR>­!4P¾æõ(™¤5MSÍGÉFÿ„3ÎùžÆLˆf øAü7|%öȲó*ˆ® §Ík¨CË–Xn6 Ä5>ÊöƒÃ]¾CS| ¨oõ+Jø¹™TH÷Zâ$ÅÆ ‚)h< ̛}ž]‡Íëâ0oŠ4é'bÀ}% ¨Q-½µ‡ðDìG¾7šŸVÍÂÏ#Pqm†GÔ1ÍG‡ñ߀f@FÉjîøÕçW9¬Bd̰üÄ kŠ/±%MÀ/²6ÛÎÐÉHɘÀS¬WÕ€W£ k±î—US›7?v‡f@ÏÈÁUTp¯økîÅQ$ଠ•ÝN éò ¨óA¥½GKB X³yó¦[ H¢°@"‡¤Fh <âÍB¤KŠ%«ÐÂ!x–*a À€ùL2K 2ûsø˜ë4Ä×4—wÞµÌ$ùÈTAÅ€Kð³¨í4ó£éŽU—°in¸Û"‰U~½"¢]È<€!a½øjˆc@N;LÈX Á5J{W£…f”c@z)éá° ¨OZ «L3` æ§9¨ =‡*§°;Ì€î‰Ã<à&t‡bPµ{kÃ[q¶ãŒÔsõކm@Í€m(¾²Þ3 É]£aŽéÒ &\ "þÄ®à½à‰Œb@·…ƶnßœGD<rx?–5y½ƒ= 0 £Œ ñ>b@z*@§ž,~ˆ!Þ†v‡Ãó‘ X¤,œ–æè½àƒÁ!8Ì€zâµ_ ý£-n4C%rÙ]>þ–¦Y¸ÏX¹ѽzMk;íGÕ·êtÎ[™xä´¯e&e2qzIÃPW$F}6 YFS1à”P3 ‚! ¦N«8?š9àŽíŸ HOå*ú¢X8û$d¾€ý÷«° Èoâsÿi p!þB˜¶µrýÿW3àQ¬p¾dM½ð¨)&Ë€Øp:+Nø~[©Qre¼az} H–‚]¥tK‡àKÑ$à@(9Ë€Àöt$ò2á˜aGÐmòÙ€d¡•aÀþÀ•aÀA²9xÉ?’ǽ*_±îX§5Ž#Ô†“¬`ç7KÅ´»ã©ò21ís`@"µZÚH)ç8è³‰Õæy¡nQÝ¢¾0™±DÁ)5 ÄÚ€P·'Lã³0,Û€î(Vj+½‚Ýh°›@Ê!lò²$ ÙÉ€¼÷ýç• À1b@m¨*ÞÂ팛SZ ÊKvNœƒ H»º)Ü n,çx È€Á(ãÀ6`˜ó8äoæoÆ×ìèY0OE{1ƒ²Þ0ñ6 p~ Ò ?ÕÉ€nž|Ú+tÎO³J[}TŸãü«†!/ž=×”K,ÇägÁS¬´O¶ˆ*«ŽN÷1`45 = !ÈøÒÆ0àñ¥ÂXé_†ad¿ øûr ¯–µ»!MŒ ˆëâôTpRN ïÏè÷U™ èxç>Ÿd<] nÜüVÜo ‹g!Ùþ™*Åœ82öS‹MˆsdÀ3âë“y+®?e£ È£QŒ È è ÏH,戯hp?Ü ÖP3¬îùqŒŸ#€Òœà0&aÑ¢fÁã4ê¿ BT0»–rÝkiÖû"eXd@ ¥¾ Ó“±Û±bÐɤ™Ñzš3IÐížO½ˆ“wûQ Hx^_ß Àc“˜ŸtK÷Œó§™³þIªðTµ£íŠ‚‹ ¨‹MR&n<• ¥×o€Úû)~\0!£xÌC®hâÃÚÜ9Y€³±šz`ûÚùÒ0fÈ,ý©(°‘ gÅí„@mTN2 €X:ŽLäºe~”´”‡]± 8ÈânhÈ(™OÙHò®ö,˜ © ¦&Á€ã8w‰f@FŽ4†jˆµŸ;†oo(o’KšÓ^¿-<k÷O¼Wã–c@F¦‘tc8Ê]¤Xž ¦>‰üám^áµAÀ•ñÇϰ ø1¤à*Î#ºäTì$Sº\ÌD2 ·3’G7­Nù"‰815Êf@Ò8SCë€,ä;‰4—·©#Ë8YUÖ܈±ôÑ6 –$¦Äð §ÑÞ0þCÿ‹²£ô ø»YépE€Ä€lŽœ‰J; 2mkӦТ\|'v!/øÈ¦"«¾‘ØGj®æ–VÌ,˜ç/þJçƒÁ³ZJ¡€mà è¶)!ˆ\YœEUcÀýÔ°Â6 îEx]p»#h+8a7y(i•MãbŠ‚ù {P =†…#I´÷£¹ù°m­xZ¤á5vD/¡I»ÛêÆà¦›C#TÂçR§7)· „‚c’_çä Ÿ§þ´â=ÃöÁàóTéš{B;{Ã7‘ÅTh&ß+¤à.©*‡†ëóNæ—ñÌï–Œ™ˆZt е*êˆÞV7ˆÓ\e”QeÄǾ„™ÖømB‡9ÙáUœ¼ÉÎêJ }¨ÂÓlÄ ŠY•@V‘¸ã'Útc¿¢G“n¯éœ¦@‹k¹§¯e‹ÄxÖu^Jù•/×ù?…é„8ÛÝG­eñÎsÜ>&Pl9_{{Î"˜ö¶’FN* 4+ºÔN—©ùV=iq{W—~¿¤1t¬ŸºCq–3,ٕο¼Ïí%à2y‡ºyˆ ,E2n¶‹‚Dc$¡¬¥” ›ïÞ[yþ€'vÃ^¾õ3Ý‘œ'5üENÒý×nuJ‰Dÿë†y—? *Ü}˜Ý!­ÄÆ[7,§³Ínløâà÷]. ÈÐ¥={= ]yØÇ‹÷)Ÿ|&rêq²a¿³¨nñzŒâõ=®CY\¿=dCÊ´(ßPµñà\¼¿añÝxæÃ7ëßf˪/û*©•£O Ö3â¬8¿·¨á:Èè—{l׎*S=ÚPw¹” uzCÃòguí-jkær­B­Uò|êwÏÿ·w÷¾aÇw½Ü‰j‚D$T:Ñi7 ‰‚HÔ ¡ÐµF)¢Ôˆ†žÒ[èÏET»Ó¹ànžÛ0ĺîñä÷©§ØM¾™dgîfTÞœ|äEߤ—æKÔ)1Ǧ”§…ªû*¢V$çæïc/rÆ~>ªM~YËfà|ö1mù-¬ùqÈYü~ò”ïÞ[¿ØåÏñô5NxåZ¼µA¨È¬w5ý͘ëCe¨‡ :É Ä{ «¤< $Y£ÂÙEP‘ØÛþX ºI²õ¶2ÙxjÂ's¸ÍéeŒ]2µðjríþy}Ê“¡`¼´³Y4P“ç(— eWâÌ“d¼åÖÍT…·Ç·÷·V›gKöâî†ÑÖ›éQví–hvÑýdêaç†K¥ÞÉ ’îa¢gÊ @}lå!ù’[-ý!@@€ €¼,‡¨\cIEND®B`‚sleef-3.5.1/doc/html/sleeflogo3.png000066400000000000000000000517641373003144100171100ustar00rootroot00000000000000‰PNG  IHDRvp÷Ë0PLTEÿÿÿŽŽŽšššººº°°°eee¥¥¥ÉÉÉ”””ïïïÛÛÛ888yyyPPP3Y]? pHYs  šœSjIDATx^ìÙOHQÀñw’»Æ¬–7—.uˆêbD"¤‰â"ˆ‰Ñ©tafQ’Õú;xˆ ¶Ôl„MÌÕ"¼äØ#È“›ž:­¨A§vµÍÙ˜·þ¶·tVçsØËêÛ7óûÎÌ¢v W¶È}­žÈl Ûn €†«›Ù0¿v« ö;€‘ê18§O‡`±_f$ãz;è†dY> œNY¤„¤srA*À0(äY craîZ3€IÙÌ9[®BJÁ@‰Èo:ÌHA®‚á)ˆ³ çIa>Y2€ÉÉ`º’¤Æm àÀ.à›x™ ÍÕý}“Y<;ÉGþc¡ à±rvâs1¢GHR‰u°8Ä=õz*[Ž©Äðu„Y;;i…¤ÌUîfð†Œ(N§t|9óœÑµ;È‚>lF¿íü²Î™™‰&1€!ØGk8Öy3Z4IŒœÑPóë¼8ÚGQ Pƒ*Ñ͆À õ3o(Æðø®uåúW'¤Ùþ+yr*ÎPÚ¹9£}N †/þÐP©AÿÃ*ÞÇ£çq¶(@%¦£õÿfP<)&œÐmóüvAÎ&šÑòˆtŸ¡8ôr€ ÆP<é¶Š@¤ lgŠ¢€é†ÔÔÉi‚te+Y&Ô«’4¿òÊÕCË#´À›ð¡— 8ŠUÁ>,úàÑ}Gã{ÏÄ¢½ŒpJB×^OOŒ©(äµ* 3ÞGñP%¢›*Æ À•ºÎÇñ€Є?@>Cž^0¼<ŽP<Í­@O¢û09%ñûÝn·×[“|õû»#f7ð–èJ€¼BŒÜ„<ÑÅìHKâ\ €¸Ta0ŠìÃÔ”p‡»Ò««mÛPãéž €Iø‹¸ 0z/Ë„(÷5Á‚|õ å±Q& QX8€2 ؇yÐpGmk«>ú–òò>9©¯¼Íëöt+A:à® G€ðà€Lô|Ð-$GÃ9 5ŠxCe €?Bm4;€àöî74Ž´ ø|õí2irÙ{WÒø.DFqÀÃ0âwpKõr»n±WŒûa冃é2TȇJŠ‹i°î|Èí匦MY!ÔžYÿWN½Sh5 ¸pˆËz_Epæ?ï&³3™îû$“y>mËd[òüöyŸ÷ÏÌ.— ™ä^i5q-É0ê—VÚüOäœÞ½`Á—höoqa|ˆâ˜H€ßMp7´”€ŸM€¿wÄ6ÊU]v²O“O HR}á>ŽBÀ“×ï·ð1.ŒðOSV …n"€³)ð?I°”@Á:Jh§¬;yV0¶q$S’äKÎ8°Ño‚E€ÇÎ(€äm¤¿òO“ÝøÕDO§€ö\H€þ ­ªî¦_´E•ˆ¹¾°ßLé½sÁR2€&ùT&̤Àß0 šÉ ÜS5Q­ £˜’dTÀÇ€;Á"Nœ­ÍdÇ ’L¤0m%¸• êˆæß0ŒROUEUë÷ñˆpHÆeX´¬½¿Uœ9þ-vèßìø«GíÏ¿Y¯ŠªêP5'@¾<ZÖÂ:;Óìø]›G`ýœ$µZ¯UžSIÔ°ÑÉ€"¹BøUî7ñsõì ;€3;€B÷Hl”u©%Ú uw ˆ­‚ä 0ë&è:þa‹(¹vfø/°@‹ìгG pS—šbÏMzýºÀŽ ƒ  “èß%üæ²°É€_eÀï‚   "zYŸ÷Ôâ{€º¡óçÁ¬Rñ×d ße0`Ú‚°CòïÕýµÃT*zS˜_€ðl OgͲÈ[ìÐ"Ú8ù'Ahñ#@¥R½ŽjA`/Å^~>{üvü";~ rQMÀtî>qc(ƒø® “!€M«¶ÁРÙSƒ¬}ªWê.»~ÖÛ ÓÀøx-{S;€³ÎX°ÖÏ‘  S䊀Xa/4N¸ênö vÓ P7u…?é‚>@‚ ¢—°ó 6€?%Õßìð{ìŀ߃€–«RST‡H>-ZBUè »ƒ“Vxœ'éªàçFü«?bp@Û= àÞ,‰ôõˆ v£§ uæNÈøh‚¥B7{?2 ã#€ïF½Íàtç¦îú©7ã½àö€ºâPµâµ…€§nvx7w##4`0Ýï÷?`}e9X¢ZoÄ0Ã9€!Ô~‘ì È}?yÏø½k£ 5eL)Q|ö耹ÌÙÀ%ÝÒýàøcâvË:)4ÜÓ`1Za hHé½ y¡Ór¿Lš@8Ph8-~ÝWm3TëUÝôzÀ¾W5Šl[ƒùnªSýSÐN¬7Ê €†£—‚Ú(BаM¦'ºhrÉSî0§PãÅ«570®¬êØ$èò¡|¹°œü`´— €™‡~“Àç=.2@;Æ}PËnÀn±Ø÷Fè"PÉ éïa¶Óƒtwv½– Ò|þž ù¯† ù'Þ°U6$²`“\㾪yì¸ü+n5 ù§×™×f‹O&ÿ‚rÀ-vhÀ 4€‚Û’E@;͇-Ø1Û@zÓî’/_†ïŒ7þm¶³°`à,4€õ²î èÓv0R¼Ðý£‚ûCNú õñûÀ«ÜçâÉÙØcP°Ü€¶*ºI @-ÐW5’W­w€94´l:4›ã—€ÜPü-öm&ÿàì£;´È ÏXPx±ª˜Mï ¨àЂAmîÀÐ:° ((sÏ‚f ò é`¹Å]†ÎFýØ7ƒò]v–€¸»€ î-Lò©ÉÏùY%Aä((ÝJá˜á[ÐpñÚ¸SÁ™ýjKþ¦‡ã0m±¸`r[À’è ö-¿õûÄuº(Ù ‚áê6ý—},Ÿg½3ˆ>)2{T';€»Ð@ž´€s{Ý–DuThÊ^¨^Ç•jÁø% »@î·í`Àóò]X[2$©ù8G²ax§šÁß`52o´ñÚ¸%à6·_À¯Û'À‹ž±@ ›U2$‰3ýõ`»ÿ^ØÝ×0¢Q©T½%@?Ìf€ iÜp—;¿hŸ³ìàMÀz¥ê  †E ãË@î! chPm™h ­"®]aëi¼}ÿ„ÈØ<ø `ÙPý>^òh÷ú¸ Ȇ_DÕ–Þ8?JKÀüx ÂÈŠøûwO€3;€;ö@Ê•à$†5o}?À¬»é'%à µ¦ˆ#ôllŽYf¹H¼s¥}ì¸ ß…@G$Ñà¯à‘“¿èÎe^ôì!ä-¦‡ Ç?^ogÝfPèØä  åJx3€ 9 )˜ˆï•hçGÍôÝjq…õT§þÙÉÀd—À›€W@l”+_"eŸhù{‚1Ö¼üë.–æ"›Gª&˜ Æc”€ØïʽÿRÖv9fèòPëdM—¶ä5&¢³Àù ´p@。j›.‘ây†§…GâW¯·3PXdðc À÷8HˆÌ0mJb𙬑 î…š‚åß៼gŒŽñmßTx$}½%€mŽú<Ç`ª  P&·vùu!84zXó;@¥`{dh`(/s£#÷~';Sv?·Ø Os`èô€Nâœ×Ž #@u_þ½ícÍ6KX [ÇK ˜ìÆ xç…¬ä_å˜ü°Ë±xÙ‚°Eîmé÷Ït·¤ÑIþIï@G_b:GßÊúöÿ8V…/?映¯Z(€ÿ³w¶1ne7G¤±ƒ oÇ®ÍZº°…XÍí¡«‘äôC”J Qd‰)ð!*P\"ëê´!€¥ ¥´a—ôdàˆ me!"”&ˆŠA¸á]¼(q¥“ˆšÃ|«’TÂëÙÛÿÞ­gf=3™Êý¿Úñ9~~;ó¼Í3fikÙqCïø€âõ!¯ßž·Ôæì*–üCä<À¨eù‚§hÊ~XYÏ!-<Šëè`¥Ê-<XN€k°þôÁ¡"¦¬X¬Äâ3¡ùQÙ/ØÊáúÍö€ îÄ–€cªrð5 ‰ð ¡p©¨BN–vUàZ#½%¿0KFÅåßÑ€‰¨ j7,Û#ÖÆ'”eÙûź¯2x¿{”zŒÉR¯³ yÀ^(ЬU ìÒm`ЧjN(`kJ¬Ax ÃÙo«` €áîLø‰ù€VÁ ÕílOÅš¿¦"*|ºY{$ €øp ë„NA5,p’ø€…°†ï9 š©„¹ÿjX*WÉëPᵄ ÿvˆz´+ ¸¤B}XK`( uÃúvXü+»¤ ²?”p‘Š` V…ÍfèúuSK˜%ÀÈœXŒÛfÖ›ˆP«áeP…µ£ª‡-€”„D4ý:Ã|X€%V]AäÂöŸ°Ѓ“íD s€0Ð"É 1k@ö‰›@îâ/Q²Ñgü³µ¸€ X ý€Úˆz Xpƒ Dã{}Q>š¡ê¢ÓA_;CÔžó±GZB¼=öyS €_À'»ÑR\`‘“þÉöŸ0 LÜW!.¦äaD‚2ï§z‚¹+ZSÁ'žnɦ‚Ÿù’X&ðx¤3§Ÿm©àD©Dbü0ã ÞþCÀA à¢\ìâC~HÝ“ßæk-§ä‹AƤkÇrVpH„º°RÿDE ¯ÒpQ?ÈÇìHPHÏ}ŽFÀg4÷ì–/›ùjàuÕ@ƒ¸¡ÙÚ01Rÿsê­]ÞœÅgÂ-…HPLæwZ´%@/«[òA§„€Ž©`²DúûGBŠI+@>z¶…ÅJˆ•ùÞ$j‚bºÿwý ¸[sOàAyÌŽ<ÆF%À@à&]@øy7:l1DN-`$7PD?ºA‰ƒôpTV¾'ð…–"¦,ó»¶ z|^ØçZtÍ$& ¡ ¬çþÓ·U\ó H³#Àª–<æ% `ÍM]¬ Œ—H»']‰2p%d>v–âê<´[£# ´]dHê¯0Si[ùQZ @ÞÖp£Á]…½³m+¡<Ü@1_í \Ô À1a ÏÉ sΰH ؤôõÞdv(òÐó«p¡cÀ €5-y^TÀjé³ÈD'¼ÚÖ‚ 0¾8nïÙwmd}ÔØJT'[H@î²^ÌŽ<ë`ìQ@Ù‰î¶í¸4 ZDÂUžÁDÉÊ%†ÏëÀX,"6+àd ÎÝL@gÁóŽG|yŒ¯,`H Ðna Žž’> Q0ÖÉ¡¸ä €çÄe "…ee?6jêàe<¨€µª¨Æ(& A €¨`í—4>v?(·ãö·JrHT2è`Ò],{€ìŲ˜­× À“B×.Ž1qÈ*–íR\€ÛY Wÿ€áÐgõ`väX·XEžîå€E-`\hAòÎÿÿ ‹6Å€.)`²ŽwY´RðNW SJöcÞð'-@å0Î.†aÑð‰Hú–â’€Ž=À8Ôg¡øn+-¿M/ŒxI“ä!e$dãŸûó߸pá÷õF­ôõ°]Õ1š.ŒîàЗëØ0Š ¦QÅq’íë9oz™™Xü—Fݳ®ŒˆÕ?®qJç‚ÐuuŽó¦u­[À‹ ¸oq\5×nÍXD°Œ,ÖþTü}€dP¶®€áZ­¶M€ïu?£) À{ä€ض ¶ PíÚHeƒÝ#~Þ:À“«ßf7‡¢”_zR s’ä@"Í„tN¹J÷Àÿáß»$ ÀpÔÊ æ@Ò^ } ÍÏŸk¦¿U&·E óÙ½{W¸>) €y×Àê€tƒ¾Ø’®àò­s¼„%DÄ´ëõ€!ñ ' À^ ä7S0Ľ^VI9ØlI€s¯ €?ø Ýœ.•»Œ`|œ 8 °¡’xÛ¸Nžk>C2¬Åï+€Ù’yíMš0Nn-cºs¥_€À0 h81çCwðÀAW'šÂ%ÓôåL OŠ€?uNX:·O7“ «6%pÜÁh0ßf~ÓOQé}ƒ€tS(>å²8ÏÃ@Â\Âù}QàNÇe€ÀÛ㼯¸¡ž€å~Ê-àºëp´@€ÃXBDÀßËmÑ À‰É­U°00Žvà@Ô\gÁ³·ñúýÞó¸" n(Ý$ €qVŒÉê 8ï…[¡’Q f„Û©S¶Ç'ÅnàÃòÃÀÜ> ÂYK;Ä@Bò² ÏxL—@ v¿Ée€—vh¤ÉÛÅ1ŸúýYŠlT¦®Sðyy$ì!¢wÝBb׃WúEe‡@#95š_:ö¦šø 4€(·×àöÚ7¸ƒëà¿«€ÓÈÁ HÀ±k‘³þCEn9ÖUªVã`83D¾¹C€2·€lÉ4²é¶ØŸ³0džޕ4-ËÆ×Å0;<²~º›Í¶0¾Á2Ž-‡žjÀ Ìô` ‡‚Ù†-r7°’ò€ÐaTy8ý(Š=G[ágPàW¥7gˆêâL½Õ À¡^O8HFØÒw Î0ÖäÀë {‹´0Ûñœ €+®›¬ûÒÄú ‡ {Ù ˜â…ŒSŒ ÿž »-èó˜+ €¹‡@·}ùŒo0Ä©&â?«€­»·o€SAåQîÖNÙ8{ÀÔ¶4äõ€N|•{þ¾W û¢êh\óêXŸ—ŒæpM=fP p)A€=€mØ¢Kq9àCœg›è/>ÏýÎÕ9ÇF†|†÷…Õ—@n_ª GûüHb}¡Ø® €¤˜©}{Û{?36õP<÷{9­ÈËþÌÞC½¹þrª ÇúYŸÓ Dô7¿UÀ‰RàâQ@3ˆ=^€·]Œù  W|Îâ{"àé¦è(V Ê}œÎ4csdcÖ7ÈÔSÝ‘þÖñŠzÌÒ®N, ЯX}^-]_°—b€>Tgl¬ü‹âͼtZˆ€wð@[¾Ð/‡Î§UôNú7à ;8…펖 tQ9$ *ýpŠÞøD9•UV€ñm, ‹ýí÷óð!ý#ÿ#vP»yiŽâ1ÎÍtpÒèÜ—~ÃXD®bÔ%HAšP1È%N…Ž×&v¥ ï ä8Ñà_jl„Æ%y^ÆA¨˜Ü{ùaÀpû ëèbf%/….Xš+’»ûfÐîÛ P«M…€!ì| o?Ó9‡²û¿ÛEŸ3?„>j`u‡R¯Ã“ÇiLz&fbæ7È}‹ê €„‹ ýÊ×Ò˜ÀKqÙ¹`oÌgæç ?'W?ó*ž î' Ó&®á×3Њäu<ü@äéØ:Äeù¯fŠÛÑÏS\L8CŠ@G`´'þGÝÝ„6Dž‹ø»HÐCOº%PL¥ZP)Ò‹ˆ %´ê©=ô 1=ˆøƒ(¡ŠT„ÒöàAP*Jë„%Aƒ“úh›M¡‚šP R¡ˆÞ‚T‹J­ßtvòº™¦ñ]É&óeçÍÌ΀F/ ¾Ådµ¹lòî o{ €rš ô œï8"û:-"‰üÕ!zõ6ëгÈ-¼qlÊièØ”ãuh™O¦*€Ûâ_ÊÀ€þsÒìz¢ü ÉêXÌ0bìÿ-(Nßw1êù¹‹õàüÊÒì%ÒQàÊ¿‚ü¦.¤@€ ×™RÓô¢ õgS)‹€ÙQUNV€¶Ð€iü8ì„êC²c`7„Ø I¯Í °N€å@Xœ.²¬ûƒÌdpüïÚÝŸpnÿ#u¦KðöàU< ,€Ëã 91=l9µÿBÛK0V:mk¹ø›€¼ð”`<. |Ħïhw€ˆ"}Ü£@C%è-/€_ì1‹#7ÇýRL8¼L.L³…'f0¤ hÀpUð<&Ü¥É)Rl‘9¡l?p&îÖ…qR,^¤0v“*•IR夸ê¸@êp‰$ÏZ^K#)³c°~,˲{ ï§§ÿÓšƒ €Ü3 dò¢B€Öëõ/wßûþ;µ.ïý¥?Dj¼®‡°íý¿×µùJþéuùõnOû|mä‹ pV@^)=†öà.€z¬^›*+|ûòÍž¿´ ô,Fõ¶µGõy%ÿôÚÜ)cý§/À…þvI~Ð)P‘ ü¹þ`|x#r€špÊ€3E€§#óÕËóx›|÷áxyˆò{Z L2óº®10âg;í¹ ø?ð<· @Íeê ÀR`CxZ7»üa9€Üê 0%¯‚Æð`¥¨wD¦VúîP C¿ Žß¤)Æ6f¢’nC€*ÀÓ !<­E9šþ7ÆØú0Hd†z„ôYÐ<^u Û€˜wvwŽ?ú ‡ÌH, _sLáÈR ïª– Ð}˜Gœ½)6g€úÏeØh*xZ£Ý)àí}@ XÏ\@*!ù¬ÑàÐw€)( ªÄu Ì‘†^Ã!Àoçº+@T‰Ë@—mð<×aúVrJ #  O‚#À pñ "1•·M©„dp¬!Àw€»d€¾1R`õÀ ÐÕvr (-ÙÒ1mn ðƒ)€îŠ8ì*À¤ºùK†z:/Žn ðtb»’*b» ‘ž^3c€§õH€´@îIY>s°×.‘ži“g^€>ÒÝH¥ØvØE¡Kýák¤'lfð´ÎFB€×U¯ H±ãIpèRè¢ ¦í^€á@(´×_*á7ÍÍ^€=Ö )€Ì€îPØgˆ*‰›hžV{”d•@ZˆAáUÒi¨x0à\'}˜ºè([€àJ'å`A"#€óHA!3 09P ¸æŒv»xœn « –€“ê`Ï€’Nüä°‚Ï«Õj1,VŒår¾é ÿå^€›"ãc¼F^à ËÐU  ’#Qo\£H¢ð·›ºKWHð†Á€}kÜ–.Üà d0 p¥ƒ†Kd"ŒÈ3`[yÌH7’f„ÒŒH}¤À?'Ìþ…™¹ÀL`?é‚|e ? ~mì¶ËB¦#ŒÌ&δEíï­y’ь¶ÒÅ,H’$e2€¸K¦ ]%@GÀýa0{P ãtØØYö l ”CD¿¯¤”6¤ ˜§-Àâ,X`¶·d‘QSÏ‚-L¨Ñ€rÙ|9ƒ€ròˆ}Õ91À…ô€ï ãS´@¾;˜ `év:ö9²Î"Rm_÷EY9JÍ…m”@*‹ïÌ@¶öã‚S |aÀcõe{ œ§À´ô‘Àš—ºéž¯~Þ÷Å^`Èh ×_Än: "=ŠfÀrÁI ¡s½{ wŸ)Hœ3 ô.¬®eØc¡}<”‘/-ƒÛE2´Vß.ß ˜e4È(ùp5¿>%ζ'Aûk\0'™í\1—Ê(Ð^l²–$)o׋‚—KÔ¯¸s`À> <(¶Roµú "P ÆËù©ð/wç¯â¸’…ñ}…Éw¦†{Q¢ µƒP¸ Ù GÂ7›À¡(n¸àÀlg—¥’gÃt4°™}†›èV§ª¤O.ëŸ-iÚîÃܦ¹Ó¸Ç:?}ß9§Jå1 ">ëœ(€Ý¿¸t €P¯Å>Ї:œ§ ÛÔ|±Ü-›òé툃eK}à5Õ—1½uaû·Àb’‹£DI¸¨ €´®õ3ðåù g¥{EæÒê¥ `K ¼cQnˆaÿ†@øYïÉNàãð&àzî{u“úA-+ïÒ¨e{Y ¼îÀÔÓ‚7À; @¾ÉÚðûš€Ç~øh “'¿VUŸ„txåt'[ùôÇ÷}¯!–$ C"(ûDš.Þ j`dF§؃›6¢@[•õöü ý1¬GÇ$ã1\ãÊPh‘ƒ<ƒ‚·€ê¡ÏͰî3tÀãR™ôÉIuH&öUڣȶS›×„»JGÁP¾Š7"߃C¬&½ ¼X €h0N8²0þZ¬ªoM9ç‰aX¹•oòõq¯ó"”šq©Ë‰Â`ýKêDiuùMðB4¤çщËkÕ÷ya¦&W×€ˆTàÓ:/"®’oÛ"ó2×Vfå¹u\7ÇM¾±ŒzXY&DàdŒ­V0„‘'Ü9Ôòl†6° wë€ê8c"û¶ËO–QæÚ4ZãH3®MÁ(PÿFßÔV‚‚;Àš,À¤‘žfý9áØE î,²cÕ`%6Ù á°H‚É¥?äyNy¦(`Œ‚i0 q·¸Ó˜¡<ö;ÀÓŨ&–O9u“*÷¶£Y @Äb¶*¢ø*ÓÇâï8§*!?V¦„ Öúƒñž€éá°óW­#ãÆ€Êû™8gºHâF¤‚CSEx1qà 6GO%>ÓA€ žp,vÐ6… €éä`“ÀÚý/„ßÊ‹Ôe”¸ÆW¨¶Lá˜g‰…8 ar ö·€Ønœ¯Š€á»ظñ·Ý¶ÌÿjÅlyóç-Ù‡¥÷GÌHL8œ„‚Mjˆm¹÷ðÛ ðîÐR•„«î1@¯DÆ¥ñiçáîO¬üXȶÉÏD_ö÷a[ø¤"b^xÒ Ž„„ÐY0ø¶¿°1ø"œxd ˆ.ÐóËÚùÏqók÷}—ê7ŽöQUFu%HEA †7},Ëéßn €¯T&MYЀ ¦r€?1 ßç@`~P¨…æHãùèŽßô¼×øºóšª@•Y¶êÞ ÐÀÓÅ@¢“yvªþZÇGïEøèbÈu9 ؤªNƒ¸ã¥ÝH  ss€çW÷˜< ½TúÖóCà¡€Ô“s¦q' W®•gòu“ôüÌÑ€â¸tÿþ¢ÆCßòKð¦œÆÁü àœHâå凸«ŠåÒëˆE@Øìf§œÄ“8@†,ɹ—«³>FvE5v ¬xвSq÷б]RlUº·Å zx„Áî;(‚ª@Û¸‚wO@d­3 ¿v¬6joŒÄÓ]D679Gí î7ðö–{ ª@xÀ%$(ÀHp(ÿ¹ÁO*¥ü˲?6`Í=‚d€:ÐVú^àùwÀŠÅö5^Pè‘ö p¦X NÙïÖ¦žÿªò÷–PþÉú¥®ªÀ÷Á„¨A|ª1vºýXÙvü/@U`K`ð^€N8cK+7‹üçÙ¿vãys=´It{M p9¼ Z/ÐJǨ¬Sî€0²OßÀEÀ%¸šQ¼9´Ž™éØÚðÇsYo¶§5Ñt4‡‚T—à”7¡™5cwP(ø®ñ³E@›&÷Å4%À/¹I}¥Ì?Oñ¢A‡öO8=kuʵ\pŠ€ÍÕz‹o‚"Û…ìÀ×-Š€¡u ›èÄãÀÊÍÔµHsÐþ‹k¹Dúgh¬˜MV¥ÀäE1¹,\nª@Q`¬GÚT× fÕ €Ô5 z¨ÇM‹ÿÊé×SøO_vèS* . ¸Q!Çk8À •€“Äš­5 ëq€c†ïRW¯8 Ÿ^ú!‡]Mó 4áŠaërJ‡·XR¾‚= ã&Éù8W8À/ëkcºg+ñ§ï^% ºë„sÖ†Œn&@psnà ìø{Š€Ií' iZ7¼¼´Öëê»Ì5R'ÓßjifÖ¯ èÀ@N¬ âF ‚rìðŠÿ¶ð€½`ÒdñÅ`+Ž/@òš–«±nÿ×c`Ûv Á†EøA­Àu2ùí¸A¹¿Ãù@ÐîÔ™ÕTl÷°Júò¿) €'ª”( +_?¾µ®#˶ ÕÊ }Q QHÅ5‚àÝÁCÐ ç<1y{à\,Çêd*+7éy 9äÿ– Qqšê øa‚×uƒU×x^àW…i; @ýtÒ<ãô%µm.;Jg ùýX´3 ŸUæ<=—0`+P‚×2Y€‡-<ࢀ¼ ä] €øR ãhþî…Tß²ê73€!Á«¬ÕwøÆÿ áOÆÕñè\v:´Uû¼j‹Ò_3€/sÉÿø¥Ã!åd >%DMˆþðg.EðwðîëH0þ}f]¯öŸ£z¥•5aô{‹:0¤3T2 ·†u(× ØOù—} Ó`fO @Ôoë¬!ÿÚðçöt`·mÝMØÔuû… Ô… †aÌáiþYø¡Àb‡>àªxÔÚ@Öo`]€û¿ÕXt1`#œs4'@k0ÿb ?àÁB¦Áá(pâØœ-8¼G÷§ 0¦-iÕ¨ÇÌ; üòÁxÑù[€¿Ò‚Ð0ÓxXþ!Ÿµüãs£î ÚÆ„5ТàêhB0OUˆ ‡P€q€HY•ÿhHþ×åBðç´Ìÿ$£â~°@­l¾Ð|–a¡Oxr~¦÷§xÀ“1&Ì”‰ú—ý>(ÿyéÚj£âý3Ð+BlÑêsΉüÊÚµãÜ– ô«1€£xÏ€ŒM¦xØp¾ÜWþ1*ÞíúeÀ©ë@¨0kAà‡ÌŲ.¨€Msò02SæíßÝÉJ®Æ;ˆ·û>šŠù7µ#}ãx€˜5ÿ€4‘`:Xü½ÇhÙN´Õ"l l ³cG?‰€zê[à…<ÀÊ?z@Ó(˜¹çücJØ(ÀLªÀž ë¸zV¨Ö5œz@Íëùº :¬‘þˆ`Ðÿ;gà\MpÆA{7 ‹iLä_xï‹Þÿÿì]=ˆM@LeÀàpÁ`>èDáiÓ8¼ôŽ907Q6Y‡9Ìü‘3¦£ŽÄL4:0ð'¹ÄLá0ÕK ÅÈÈ@·¶wßtï 38ÝìZ¨Á]P÷^UõTå.‹˜dè”æEèüƒý®É?:Å­Þ”c®n‚™BîŸ4Æb¡>ò¯8"ì:`HP‚˜6¸ ]ÿõÿ¨†PI†€@ch ™´È>Z?àXžÍA˜ ƒ‚@€ž€ßÿí¿QŒ'å®%„43j ؂ݥû6eÑAþH1 „À†2€h29däÅuä•0P½‚ÆÅ@KÀVS]·7™/×´€¦ ã$¸«€cÿFþ?SÈhÝþŠåMªÇVv°ú¯š£0(pc&ûÞ¤<;Œâ È·ÂçLùSbBþWËR`ëšû™¡ÙVØ6è%¢Bð¦äxæve¸üƒ6åüFèQ¼Ê±l84io!¦º¶¡yûpš.vñ(ÝÀ°üš€‚¤©‡ Ïy`”@sw QŠ£8•¡µÔšMEÖÑžR< ^ "ÿ %Í×½ €ûoˆ¤Vΰ ¯« ÜŒK.ÿˆÌö_±8Cá"lþÇCF(×==Î %Z €òzØäÝÀ³í8@ÿ°i¶ŒÀ¬ $Fôç’=§ofQùã]CЧ¯/€€Í ô1ftmPñ¬¿pÿ/Š“Òõ9­ˆ´ZÐ@¶Øa® ßÈç °wX$ÿcZÅ;d0kå>qRº]¢4ÖTã5iFZ -5èLô+iýn%˜äEÀü]šêÝô?2´ÏטýBve˜Ó•\í™!žaCÈR3Èc;öC ɨ‚íÝôß'Fyfîw†©…åÁ¤„„ý ¦JïùG°ERf€u•ë fwLUð€YAæÃlKÍ ”‚þÊ?Ÿþqå0^ãÀcÒ=D:1Ã3†!(_Lïîò™`KX%ðÂ_þ³#£%eëxLš”@©â3{j˜Ò¤Gv€ìbO`• H&ÒkþQ²Íð_L¥ ±­h€¬0Q] :ð¿q€õÀ²€Q:)¼çl0m€Ö^ |À'Qù+` OI òúO@!€ª^@’KùÇ @q.ú6Oh+R’ø ¡A’w¤3* K‡²M€´3/HS"‚ùu@;ß w.ÿ!Š ïÀ«/Gq|ëô4:ë‡O¢Ow<¸Q˜PÖ}cåÌ È´!À6–EWQ@' SïU@ðI£ˆ!–E ¾G7ãøòqó€G§ï£ècŒèFî•FmíS%1 GÐ’ ðà€›ê( VŠÖôï SfTˬnü÷óÝ«ñÆN?ÍâšùÛÒIÿ>ŒãöM,„ùÿ…ê\Œ¢¿1þÿ‰œ?~{?ŠÎxš…W´2L ÈdÄÍ쟣ʠ *¨*+EàÛ4óÿ‹·sãªðQ ;¶®k·›YD‹#¶%E–i‘j”¥V1•#¹rE“°ùC!'‘³,b”H‹cꕈT‰¡iÚ:Æ`¤@IB<© „Ʀ£4/¿²B(8®µ?*µIÃ9÷Îcwæ:ž¯÷üðŽggîŒö|÷Þó¸ªþrû€Nß)€è  Œ=ÒòwƆG„¼ÉóÏØJ~ÿÿ>c3#–|ô͘ͳ·¬,£óSåà4¼Õáe &¼“ùs¥õ/—óÆ‹Ó òÁÙøN´-Y€@ N`5×äfCTò1Øöol†Ëûž˜1ëëà yû;3JŽê†eòcß3—»·GñïM戺 ø%ö¹ûlP“ЦnV¦ËºLu‚Y3àH¼„3s$uðdªhôå†=ÝvòÞ¨èŸRD¸OØÍûþ:ú§yY| ¿¦wéAßã ¶zê1¸uJÛe3åg°ϳþ:ÀMÜKñ)J4rü[p‰.ò€—èó÷e Ö¨Æ”w]“~DÀÛ Ä=K •ø‡©vsC|.v7ᄨ%Éem†î ;îíÖé3@ëï¬DúOùýÑ{…iõ ~jˆt3’A@±5¼¶‹Ãý»ä—:Kü’9²FÙK¿.÷CÕ¢„lt#û’l±|SõTŠcP¼d¹ AÜ ?£¹Å]¸vÊm¹þs}\ÿþ#j>Ã’0!þ·ô¥WíËÆÖ‰ãߢó€wX‘´4}’• €Ó°‚•_Dìô8h ˆ»ºéœ?ì\b6v’LÜd0ß;𜀧30K|È¥ƒì¿8¾]€@ݦZ«X©Í–¼¢ÃRMGÇwìÇQ”â*ÿ àe9R6¶ÀÒÅ ÒÆÀ î’ âæt¦m]‚=E4ÎýÊ8íðÖã@ê ”°Z¢ÙÙÙül}~í¬£}yý§Ý`Òé ³@j†Ä€©‹ÎqMõf/¬àJEXËÙâ ™º÷‘‹ÓX’y6xÊZ³1RmCäf@šn„ªÿ}Tÿ¥-¥©†€«]¶F.´Hx€¬€J0_ìŒa&‘6 ð‰J6—s´_¼o…µ´{J´Ä0ïÃ… JXRÿyÒ?ߦ!@ à˜­×)×ÛVàãçd°ÀöÊð± L,IÐÜÑ€·º…>éÚþB0¿)r@¬2ga$߀ф,ª?Ø4ðc¶ ÷0pÞÔô6)û.ù@-Ù•ÿÕÌÏS'K[Cù«''ž¯¹Š*cOn Œ€˜D A'kgãÁ1ê°v6ŒþÓi4°þœ:h«ð´€:Ãlzc㛤<P¥IP±>õýÊàèê»?&<ý|œ©[ïòxž¾Ûñ(¿<“è¬ïV²û¹èz’MŒÀ C" ]€*ŒK‹[vÿï»OŠ¢ÚŸ_¥aQ7Êi $H ÁD8}^÷ÑŽ>Ì °ÌˆÀtü7•nŒgyÆ"Ø2à5íyé9±T‹«"š€:¼»[ @tçȸqÅàuŠ6OR.ï7#£ãJÿ× ¨Fn^Þ©Œè0Iꬻç×ÑAÙ:NrLük¢ïFF PFFÞà·€b(£[±¨È…ÉÆ2#¢Èɰø¾æ2i„`X:>‹6$ÚÿÁ—¯Õ.7 ·Ý{Èô–0)1 Jˆ‘Îß1ø¼ò¶pACÊF”—u ÂÔÇàß™Ÿêð&½Æ.<ÿ¢ß§?—¯3ö‡ÀCD&Y¦1u L´%ÞÊQ¬Ô”?ñ¢Êm àft —,Ùb>p\õëCØù 8¸4× ôß4ÊN¿÷“üØ @ ˆ0Që59ªð‚Ü8Þb Œ}îÔÁ¨¤±®‰\g/dtñ穯ã÷»'dÀ»x…òu.5ª{D÷)À’LæZ,F3P¯Ê´(Jy‘Šš.ûpòƒä“…yÀ€  ±>`бª­ëï ± D/ L~YÀ1€³t˜Ü&€ð™–À¢†ÜÒ;ckði=Ã@/D†¸Ò$‚}0öh ÀlçS?ï6‚¿ÒS÷À¿ˆ¯à-TÔtù]´ÛÅX¡ð8ΰ ä¬lŸßêO+ÏÆ©þ7fƒ"_EùŽ€•ÿåcF·ê#vÌå,"Ý‚•ˆ&€;/œÏVP†øó§ÍäávŸÀ (ªÑä ÖPø7Ç@i*.j±æˆèL8YE €@À6Àw70{¤-š'ý‡]¬¸|( ªÃ2n²9ØÃo”´öŠdÂiº¨VG¶x˜ãG¶aN5[GÍ€xª €O –Xð]‡EQN˜a1Z‚°Sx‹ÓqÀ‚ÐŽøk:òGÚ8+Ÿ¥›Ão«nívêò‡^ØÏAY·å*› ”¦Ð1þùlÝ@¬ ÈK<ï F@EÀ¹E‰{r’-Ù!Ìš‡¢’çr8ðÜ™¢’µOï;ÔÏB‹:5m»ƒsþ°cjû\į–Ó$¸º€Ø Lô0 lR§vp_Å»9bä€)Ù¢DPæcž‘¤§!D -¬é¢™¿—¤šOþŸ½s «ˆâøÁGõÃΖcjçIýЪØ`-Š‚5A«1ø.–¶>bÀíÖ‚Á¨!Û«h° õ*­ñFkWŠ„\¥—X4JÓŒÕjE›]CýP U4¸õqÏ™Lfïvü´ %BîänîÜ…óãÌÜsæÌõ±FlZpŸ€…ãmèœðEß  €Í?ºHNßq%íI¸Ûõf˜œ|kHÍÖÄ¢©øÐEï)¥‚Z²Cb¥Ø±FÀîbaU­W€Ë‚i½“=yþGò¹ÿó‹Š.dá/×SÀ)¾SÚwà)9ç«\9 ëKaž?‹°|¥ $µ‚ˆ0 cŒJ`ñ6èThB èœs’„«ð XY0Ùt©`Ê*à ‹€§@,wà©õ®8À™¶ü: Kî5dºã§º˜}U d7¡ˆÀ ¯Ø²UÞ{*À˜U´—pˆóÐàdißÀN(€î>7˜€¾.'[VL iSx|k Pz€7¿Ï”ס¶X¹d£ñርeu<¼ïf;¹=ÀëÀW[LaÈ5ð}ý·µ€·ø¹Ó À"jñÉlá„"G„ÚkÖ@ê·g¯$2Zq°; °*j4+ªêÉN &9óò.ˆ€å¶òbŽN2J ³ {yâfxCw³À@=ðEå,¤ïÂà‰¶ZÀJ1ƒÙݱÂ0°ZzÞ¤߆¯už…ìÄ-Å6¨›b eéMiØ7ÞS¢˜( 3Œì"cZ—^¥o`‡=ë|6 ´ñ…´Ÿ€óƒ^¤=Îfþ´ö°ò}«, À<²³,°™x 7d/À>qÀã®W`{iÿéDùô$ Û¯ecIè½JsC9If"àŸ:ֵīŒ› (swü­àKö4‰ƒB+»b Ç}†tÊtUÌ®.¿eƒðEk6ûW^DK_êÁ=WG©c«Q®Û‡è5¿¬RKoDìº;€ú_2*t¢Uóe*†©/“yâWâÑ*?:ĤV \ ã9DùÌ$À§-¾æE<øXÇpìrún{–ŒÛ­[JrLÈ]ý¥¡!?‘Ó˜-uµàxK…>`€ôvc÷ý«æ¯&§: $ñ;Þ£¸1‚¸¦ñÜQýçGˆpó|woZIý/ñZn:жëéÿR±¹v‹‡Uþ‚%QŠË›¼Å~.)í€ø¶«D±¡ƒ÷˜±úÎÒÇË^êJ S·BþXSÄ Øê¹«±ÅsT¤Ö‡¼äW Ú-b¦p­iàŽÎ€éóÎl3öÇDrª<±¿Dw¶ò‘ïß •:êúãmfošüùÄD~[CŸŸ ®tW/ÆxÛ•Pâ~jÖñÙ9*Ô@½®K~Yš5¬!nqƒåÍœÇÐ(À¤ÂÁ0;óúæ‚‹(ŸkšÐK$eÈÒÖí¿•òærðº¬/ííX»š–ýV>·››&îÊ•võ{÷ÚÄpüôbS^Iuÿ\µTij¡‚/=ò£‡x¶‚ѧ€‚ô`,ï‚[l¤cá×ä$Ha/‚Ø-ñâŠBƒ×5Kò àºÙÙ¦lÆyßKÒ’tæða(¼aûO)ÿ@‰Å+`Bâ¯Ë®o¤l ²WÙÆÜÙ€óF®à„B\-¡«¸ù <õÖE}Ü[Ž.¹ €“¥™º €“Fô®³8Q½ï.®¹·uÝÀ‰—5Ÿ˜Ä1Žp €càÇ8À1Žp @´ïn˜ü‚lÏ,ŸÜÁ¡‹x›Q•|ÌÚʯ9ÌÑJ{ý­mþgã ¿k·àæhy£­ylX@<˜ê SRKYÐí± ã \AT‹E(Oõö0çõ´P² €lAo}&ÁÓ02ù¦êHgàS1Ö ·×˜§U€Q«ˆs| ÛEýà=ãtdŸ¼YMÐø¹ õ͘`ØnS¼Ç<8d j9èŽj~Þê`=€Wt™_ >a¦šº;Ö£t<A;Éžœ€ÎÒâ*0]옆7–ÀÒ´8â[¸C_Ǿ‰ÙºÝ€3@ΆÛhؘî.-3^Ncœ˜Øš³ Àÿ4¬Ý ö@¶ºšPÔ0ckภ/€hØx@K/˜(Ó`#”5´ @N¬€aO €¨  Lp)Ê6GéÀzÁðÑÐ-§ìÒî814°êâ . @^ÇaÐüêÜ  |LL¨þ”Ÿ €^@ÏÐï{ˆäi0*a{”J þxQZŠJy¯Ÿ@%þL^¨!;ªgñÙ…¸øjÝøñ7sgUuà3`Ó*{“ÞdëÝh;ãÔ(ˆ¢3dÊþP[Jª™¶£0,ŽøIÅvtF,µ«36 Ä?¶ƒÓ›HPH#[Ô¦¶JB .¹C QÌMˆÖÖIAEJchû~ݽûúgï½Ù=çžóžó>ç=ç¾÷ä¾aŒ´mÛCK]0¤ûõB»÷üò‹$NDËŒmÓÛýÏDÔAx©¹çõ>ÿ‡g_t5D³ÇœÕ¾Ñû¨Žp þvAÖÏ¡\¨ó¶çÿ¦OFÇÀÞÞhOlv:²%jÁ'˳L÷´'®&Iž¶Ô9 ÌmèÇ+HøOLãÖè¹ÔЮ¿ §Sð~Ε^ÿlï3/ÚL<¾( ïš±ûëÚ‹Úø–C_ãSJ} •‘5"ÙïØCßÔY´Ÿ.ªC)䥻¿k¼ ,€„ýèPxâhÀ@uYDÙŠ½ú@¥Eß½pRGùŽßÒ Ï(ßGÃé »Äà );àSãŽi³”Í™&SˆÖ¹¸^â>…)6WQ)ì½.‘¡·³h(ùyŒYç‡OŠdßÅuÐ;Œ_†6HP’.,2 yz).\ŒW¤py±C…‹æ[Õ‡çÉ·r›¥FÂz§T6î‹Å;Ÿ™$áåÇ+ë½quK¬me•–ðǰwbN€®Ú"wÝj5¨³w]æ3¾8_DKG|Mì*R¢ À‹XçÑn¥Ái®ÉSÄ€¾ÏÊ€Wsn:³Ü„Ömm–†'³™:ÍŒ(ÏÆR.†-<ÐDiï&àBÄñR m z{_Ãê¯@} A-€,€Šbâš(ņ~‰ ÙNL |E+åjÀÀ DÏ2c«ÒÀvxåô&ºrôõ@G—–(·¦¤TˆÇ´¾ƒ´¸œ™—5À‹1¶Ý@-v­?BŸÏZ<è/[x)iâHÃáçÐß& ã1ñ©Ò±ô) +€W蕾kéeÑíPÚ©ñçþ ÂÝ`ßÛs»T­oBvˆšf3q8úfÏVÈj‚$-<`•¨Îż~ñÆ”Ú2r'ŒÈ2‹¸A«éÉÔÒ/í’c¿˜H°f÷hþò3´iÄ2µ.`Ì×x«LÆ6À!É–«a“MÅ¿µ =딚C^rì¿rËSX`ÚC¶|¬ˆØv ©B 8`€t.8Ž’*¨Óüu–÷¡Ö* ¹†"ˆr›(s2ÆÚ$’z”Ž+«RÚHˆ”íƒZ‚<||%®PïÑe4IO!X+lƒ'$ï+ŸtÆbùÅ™JÀcñ¶3>h“°l?€©T Ž0–‹b·9‹@DÞZ‰òJzJ¶Ï¡Ø4”3@› €/o‹ —‹y5lÐJI\ŠrØ/k‚˜D…WÃÑÁL VÕ-å>¨'³~ U©™ Ððùn$¯¥…ðÄuïeJËkmð U­w’¢Œ§u“2\ª(µ äá|¿´Â_üË¥cá.†÷é¯>÷â•rgC%rûWpû·“³°‹rЃ À Õ'<¢r™™f…mšÏQ«*=fÁ0FÏb§¤X¾rlåBA–ûRè$ AÙÎ\ÄVš/*§hm¾˜:΋źD€d^€xÆ•€>´:”¨”Ó‰PGpJ´°IÇÔ†šø•¾Ca½™Ò;æ˜ xî{xÍ‚¹p*Ww™ò¨¥õØL*×à@‰Ó‚?–ÎIÐ v3{ Ͷã1n~%~6Ñ3]Ÿ…tøtŽd`T&Åã܇¬œê$lÛN Vî`qAyŠe ù¾Èü‰º^BƒT2PëhVGPÒ¾ŠùÚ÷P"^Û®¡O€»"+ë5²YdZS`$B™.9¬![>ÈV!–Ïo¯^ý“ˆýOh6c¿”ß÷»YØ÷cÔ|ålÄ(O`ÃêÕ?·’X¨æ‰MM È\Nî•b¼JÕ \øt";ï[¦§ œæ˜xªO xõêDz0éÆœâ;à”@'®šdÆ ÌE+Dž‹F³îŒU½Ô>ýÄé¸óª='F~6Í€ŸðÅ-Àui‹À Ðè¾Dzt8#­2“°rÊ €´¡- ½–Úá?_—ê ü“•¿¥ð ™é4c¢TWß,ã îÍ‚{Ð.`çN£&N†§Îç€ x,’¤¤y¼¥¹‚;’ðGTÉñ°ÔœÌäC˜e΀-ÑtºiY0NÀ8‘‹U‰)^æT$n±Ü9Ô'@MSc÷‡ ìY€{7䣸;eÓÑmT› ßI‹—ç¹ ž™‹+1ò½åpQŸÉ‘€J'ÇlOÚZ:e˜QÖ€.${œˆ{#Sà;xüÚéd8.–Òúë ÌA@§U`åܸnGn™€öóê­ÞüèRš‰*¾(_4€9Yù[ÙèTêAɳ2Ò†â[ÒðQÒUz•ù gÀM¼T©ÁTÄÇ=.°§Ûî@]ÖwºäͰ/ûm [-í^³"?ŒSjw^µ¦àŸq Š/q2äÀFÙØ)m¨I@ÂêFÕT:¥y¼P¨ñ8P`O_2å!hЬ–Aò–©ç ‹yä (_ûä<ü”€Žkw›®˜7•8‰ç€½¿© ­©4)eÂ0߆ÿ{ ðdðøcgP!Yé’IúK4,aV´ÊžÌ ÷û¸I„óÀØdÛöáprœ¼Î]~-7$ùäìÔ"šiÈß­åáLûf€tÎ ÀvgXšaH£ WMÓ…eH§§Ù·w¬tDü.ÃzN„ñüèûƈ–êvo¿šöZÏ €AÒà,ƒGô €&Ê@VÖ:tte¸ ÜCn o+u¨ ,€l—øÚÛŽJMÏ€ø<.Ë_ŒünÙ¸T± Ò‹r@⽟‰/ž‘颿 `¯ÒZ³ðFFGðä#"ó±…äãhøRúy2€‚]3ó K æ SÕ¹ž@±¢FOfhšU†3pZšWª£¤j*;_Ïí Tõ"_:•Ø$ñ…S¨”:Pp VMÊk©Óyét)ÍyÈŠÒ“ÈÀ~u,Ê J 8ô:#¬ü³pfh%ÎÚìÎè ü«xî[3ÀuØu@™’¦x4€šD¼‹ÁÝœ ÀB®£B FLU'Å]—Uւ什R¥Ü_AØìo8šχ~§ñWfF|-$ȼ¤(+ͲÑ̨H€OÇ .}W*Ò¾3;âîo·Ô¡t KèÛ’½L wÐc,‹eJ€ hE¡¬:•ƒb©ƒ…µXl²Àd±×Ä(2Ý•Áíì£ÛBÓ?ÈÌÈ¥¤œs‡AX¢¥4þÿöìæÄ:–Þ“Pî;ÔÅLL •<@|ɪtžŸ†MT‘]#“ªÀêš+J®ç6L‰usðâ£l9äÒâä¡1E-TïJêz"[›"ùhˆHÙ.4s7체ߦ°ƒ›{™š, îP¢3c¹âhËa̪>x¿ƒN ñ±.‹ñ×}íEb¬³±;H ÚÕçs9g6‚¬:Ù ÷}¡Äè£&×@Ö$ÈòV©·ÃÀ™¶Tª–RDë7íc8* Ó—‹Å@ ršg387ʶ¡Í–¹©)Ñw`Ä’—$]É7ç^lߘlç(›ÒÉv±‹6„êÛË'NùÒ,ÏI¨`ãÚ³<6NrbÙL< }‹E%ãá…4!pÔc}"™q ‹].bß¾¾ Ò<5ÿkïþ]¢8€¿É)Q¾SãÝWAê?àà$ÎE¨£éâPh'ÁAóW¸h¡8)ué.B-µþ N…Ó¤X0ÙîQ¼Ïz.ðåîñ /S1%L¬5uñõ{Ú†8¬+d¼FÚ!´¼¿õÈ*ÒÛö…Z›õc8oTð‡ŽÞ5äø&ݽ~îäxYtÂÉŽ—f΃ê_N =IO~$€Ÿ¿mEµEƒ¬ü ÀNöömL~ÖüØ¥ì¢9eð”À§°jwfO€4‘8«`UþT2yƒY¶HÅ%Ý cäÅî{ô¨l̺ ·öÓu®Õ<ÑpéfÙxUr^Ã˃x¨ÀªD䯋6¦ê¯ï¨^¢G · ZåkrïwºàÅýuL“/¦Uc©¾óIEND®B`‚sleef-3.5.1/doc/html/sleeflogo3.svg000066400000000000000000012327051373003144100171200ustar00rootroot00000000000000 image/svg+xml sleef-3.5.1/doc/html/texlike.css000066400000000000000000000125011373003144100165010ustar00rootroot00000000000000body { margin-left: 1.5cm; border-left: 0.0cm; padding-left: 0.0cm; margin-right: 1.5cm; border-right: 0.0cm; padding-right: 0.0cm; margin-top: 1.0cm; padding-top: 0.1cm; margin-bottom: 0.5cm; padding-bottom: 0.1cm; font-size:12.5pt; } h1 { font-family: arial, sansserif; font-weight: bold; font-style: italic; font-size:1.8em; margin-top: 0.8cm; margin-left: 0.0cm; } h2 { font-family: arial, sansserif; font-weight: bold; font-style: normal; font-size:1.6em; margin-top: 2.0cm; margin-bottom: 0.5cm; margin-left: 0.0cm; } h3 { font-family: arial, sansserif; font-weight: bold; font-style: normal; font-size:1.2em; margin-top: 0.9cm; margin-bottom: 0.5cm; margin-left: 0.0cm; } h4 { font-family: arial, sansserif; font-weight: bold; font-style: normal; margin-top: 0.7cm; margin-left: 0.0cm; margin-bottom: 0.2cm; padding-bottom: 0.0cm; } p { font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; margin-top: 0.0cm; padding-top: 0.0cm; margin-bottom: 0.0cm; padding-bottom: 0.0cm; text-indent:16pt; margin-left: 0.0cm; } p.noindent { text-indent:0pt; } span.normal { font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; } ul { list-style-type: disc; font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; margin-top: 0.0cm; padding-top: 0.0cm; margin-bottom: 0.0cm; padding-bottom: 0.0cm; margin-left: 0.8cm; padding-left: 0.0cm; } ul.circle { list-style-type: circle; } ul.square { list-style-type: square; } ul.none { list-style-type: none; margin-left: 0.0cm; } ol { font-family: "Times New Roman", serif; font-weight: normal; font-style: normal; margin-left: 0.8cm; padding-left: 0.0cm; } a:link { margin-left: 0cm; color: black; text-decoration: none; } a:visited { margin-left: 0cm; color: black; text-decoration: none; } a:hover { margin-left: 0cm; color: black; text-decoration: underline; } a:article { margin-left: 0cm; color: black; text-decoration: none; } a.underlined:link { text-decoration: underline; } a.nothing:hover { text-decoration: none; } i { font-family: "Times New Roman", times, serif; font-weight: normal; } b { font-family: arial, sansserif; font-weight: normal; } hr { margin-top: 0.8cm; margin-bottom: 0.5cm; padding-top: 0cm; padding-bottom: 0cm; } // table { margin-left:auto; margin-right:auto; } td.caption { font-family: times, serif; color: black; } td { font-family: times, serif; } table.lt { border-collapse: collapse; border-style: none; } td.lt- { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-width: 1px; border-style: none; padding-left=0.2cm; padding-right=0.2cm; } td.lt-r { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-width: 1px; border-color: black; } td.lt-l { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-left-style: solid; border-width: 1px; border-color: black; } td.lt-lr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } td.lt-b { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; } td.lt-hl { margin: 0px; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; height: 0px; } td.lt-bl { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } td.lt-br { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } td.lt-blr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } // pre.white { font-family: arial, sansserif; font-size:1.0em; font-weight: normal; background-color:white; overflow: auto; margin: 0.6cm; margin-top: 1.0cm; padding: 0.1cm; color:black; } pre.code { font-family:arial, sansserif; font-size:9pt; font-weight: normal; background-color:#fbf8ef; box-shadow: 3px 3px 3px #aaa; overflow: auto; margin: 1.0cm 1.5cm 1.0cm 1.5cm; padding: 1em 1em 2em 1.1em; counter-reset: line; color:black; } code { font-family:"Consolas", arial, sansserif; font-size:9pt; counter-increment:line; } code:before { content: counter(line); display: inline-block; border-right: 1px solid #c0a0a0; padding: 0 0.5em 0 0.5em; margin-right: 0.5em; min-width: 2em; text-align: right; font-size:9pt; } sleef-3.5.1/doc/html/trigdp.png000066400000000000000000001311171373003144100163260ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝ{\”eúøñ{å0ˆÈA@"A µHW6ÑÓrCRËJM…/R•Çܶ—x®]ب¯%™YïWÌs?×sß×}’ÏI¦R©Ý]æ@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›`—œœÜÑ9ð;#GŽB >ÜÌÇvåL·há 0 Ãœ={vÊ”)^^^NNNýúõ‹‹‹“Ú/^éãããëë;gΜêêjÝë ¦»Û¡C‡®]»v̘1NNNááá?ýôÓ‡~Ô½{÷ ¨T*ÃWÌoG“CkvÛæb:tÙ²ecÆŒ Ù»wo{ ¥é­·ÞêÓ§›››··÷¢E‹tϱÍA 0 côìÙsðàÁIII;wî\[[»{÷n//¯ö¦“––öú믟;w.;;[kïc=ÖÐÐpöìÙüüüK—.Íš5K÷:Ø­bÓ¦Mo¼ñFuuuxxø#ݺnÍÏÏïúõëÕÕÕR Üæ­ªRUüÍ7ß踖UGLKKËìÙ³'OžüÝwßmÚ´iÞ¼y:âËÊÊšššŠŠŠ‚ƒƒ5Û»uëvóæM鳿-ÊwÝÕö-HMMM.\ð÷÷BIí寥½n dÈŠ™APPP›‹)„ÉdÒjÊ”)S¦Lùõ×_7oÞüè£^½zõv¿A@'Ã=À€ŽqåÊ•E‹:uª®®®¦¦&==½´´ÔÀáôë×ïøÃâÅ‹®^½ºråÊÖ1111ñññêkwîÜÙÔÔd`Ìš5k.\¸°eË–íÛ·KÏ—Ò8yòäùóç———«TªÂÂB©$»çž{öíÛ'„hjjJMMUëëë«P(Zçúh̘1ž••UQQáëë)=ßÈÁÁA+fË–- 3fL·n݆ öÙgŸ©O!êŽ9tèPJJJvv¶\.ŠŠZºtéôéÓoܸ¡£Ï?þxÀ€‘‘‘®®®S§N•Î÷®[·®°°pðàÁ÷ßÿ½÷Þ«téÒ¥{÷îíÑ£‡æµ¸êyÙÙÙ 0 44ÔÓÓsË–-†/©Žn dÈŠ™A›‹©EïBݼyóÕW_õññqww_³fÍ®]»œÅm~ƒû÷ïß¹s§ég 0™ê·×`¥:4mÚ´6_„ Æ`€U:uêTaa¡ââÅ‹¯¼òÊO<ÑÑKÇC°VéÒ¥KÓ¦M»víZ·nÝþô§?½öÚk°t\ ° \ ° À›@ ° À›@ ° À›@ ° À›@ ° W777/Y²ÄËËK.—ÇÆÆVVV“˜˜(Ó––fÞÜ–Ëâ àõë×geeåää”””466Θ1ã¶b~óüóÏ›1q€E³ïè´¥§§/[¶,,,L‘’’ªP(h`Œ££c‡d°d–u¸ªªêâÅ‹ÒfHHˆ³³ó™3g ÉÎÎö÷÷>|øŠ+êëëÍ™<À’YÖàÚÚZ!D÷îÝÕ-nnnR£!1Ó§OŸ5k–OAAÁ¢E‹Îž=›••ÕÞXFŸÀèT*ÕwbY°«««¢¦¦FÝ¢T*¥FCbÆ'µôíÛ×ÉÉi„ 7nÜËåmŽÕØØh”ÔK&“™g Î:“²–±˜cuà@æˬ“š¯?Fõ®1⛲’±˜cuà@æ‹IYËXfž”Qú±¬K =<<üüüN:%m*ŠúúúðððÛBtíÚU¥R577›!m€å³¬X‘˜˜˜ššªP(ª««—.]%=Ý*;;;##CwLFFFqqqMMÍñãÇ“’’&Nœ¨uö`³,®^¾|ù£> “ÉÔ7ñîß¿çκc>ùä“ûgÏž3fÌ=zôÖ­[;fËc¾‹¶-Mg½2¾óŤ¬e,&ÅX89Çâ`k‹IYËXrRæ‹IYËXLÊBƲKNN¾ó^¬ÑÊ•+Í6÷1cƘg Î:“²–±˜cuà@æËl­ü§þ˜ä?g,¾)k‹I1Vdα˜”µŒe¾ÿ'©|ã 0Êlg€°pÆ*ß,î`LLbäÈ‘™™™þ}G'À¬Îž=ûòË/;vìúõë~~~QQQï¿ÿ¾bâĉƒ JKKÓ ž8qâþýû5[<8~üxÛ|ðÁ/¿üR+Õ«WôÑGååår¹üþûïOMM ÖŠyùå—?ûì³óçÏ»»»?òÈ#¯½öšÑ_qzòäɵk×;vìÆAAAS¦LY´h‘§§§‡X¼xqhh¨ôÙÍÍ­¦¦Fsoyy¹¿¿룎=úç?ÿùÔ©SNNN111|ðŽÃ›››_~ùåÍ›7×××O˜0!==ÝÛÛ[Ñ^{ÛžyF\ºtÇÓ½½z‰·ß6ëˆBÎ6E¥RMš4©Gyyy×®]ûôÓO‡ ¢ûçž{®AÃ<`xû¾}ûZw½oß¾_~ù%77×ÍÍmÚ´i­c._¾œ––VTT´gÏžC‡=ÿüów6om_~ùåèÑ£ûôésôèÑ«W¯îÙ³G¥R8pÀÀÃoݺeHØ´iÓÔpee¥zYžyæ™Ñ£G·Yýþë_ÿš4iÒÔ©S E^^ÞO<¡ûðõë×geeåää”””466Θ1CŠo¯½mf®~…¿übî…À€M¹|ùrIIIRR’ŸŸŸ££chhè‹/¾¨û;;;G wÝu—áí]ºtiÝáÈ‘#Ø£G€€€€€…BÑ:fÓ¦MãÆóññ‰ˆˆX°`Á·ß~Ûæ\ž|òI__ß9sæTWWKíC‡ýóŸÿ6dÈo¾ùFë@•JõÌ3ÏÌ™3'---44T.—4èµ×^“ªMÝ.[¶l̘1!!!{÷î½~ýú /¼äáá1yòäÒÒÒ6gª¾ÚÁÁAZ“–––íÛ·ÏŸßö£_y啹sç¾ôÒK&LÐ}xzzú’%K¼¼¼RRR>,­g{í6ް!={ösæL{톤׹Q6D&“=ztìØ±ûÛßÂÂÂÞÖw+æ»ï¾ë©A}FÔö/¾ø¢Í>yä‘Ó§Oçää$$$´Yܪ½óÎ;‡JIIÑj/**:räÈ›o¾éæææáᑚšúùçŸWTTH{¼¼¼¤ÊÊÊ*++5½råŠÂÏϯõpº»÷õõB\¸pa÷îÝ7nôööîÚµëš5k~þùçüü|]ëø›ôôô¹sçvíÚµõ®ššš¦¦¦;v|øá‡—.]š>~ذaNNNË—/wppøúë¯Û;\z0˜æÃ±”J¥««k{ízÓëô(€åèè×»wïÓ§Oësqqñ× ¾××vGGÇ™3gª~£.’ÕT*U}}}›¥ãªU«RSS>ÜÃÃÃÏÏïÔ©SÒ¦B¡¨¯¯o¯ÝÅéÜ(€råÊ•E‹:uª®®®¦¦&==½´´tÔ¨QÒÞæææ›T*UëÆæææ6ƒÕíºµ´´¬]»¶   ¦¦&//oþüùAAAƒ BdgggddHa‹/~ÿý÷<póæÍ_ýU«ŸàààÑ£G/\¸°¦¦¦ººzñâÅ“&M’®OÖK&“mܸqÓ¦M‹-*((¨¯¯—^ µ}ûv» Œ‰‰‰—N_»vmçÎMMMºÇ­­­Ý¾}{bb¢Ž˜gŸ}633ó‡~hll|ýõ×oݺ5~üx‡'&&¦¦¦*Šêêê¥K—FEEI%t{ímëÝ[wæÆ×§¹GBð`À¦899ݸqã©§ž*++ëÒ¥ËÀ?úè£1cÆH{7lذaÃuð?üкñƒ>Õ^»^¹¹¹6l¨ªªòòò3fLff¦½½½bÿþý%%% ׯ_OMMBH…±ÂÁÁáæÍ›Zýdee½ôÒK Éd&Løûßÿnø:Lœ8ñèÑ£kÖ¬ùãÿX__ß»wï)S¦H\6°Û-[¶¬Zµj̘1—/_öððˆŠŠš:uªîA·lÙâêê£#æ¹çž«©©yðÁëêêî¾ûî/¾øB]~·yøòåË•JedddCCCtttVV–îö¶ýþåϘLú¥Ž ’Élwî« kû¿£z×ôyÐÑŒU¾q 4À&Pl0À&Pl0ðßÛ¿ÿý÷ß/}9rdfffÇæÓšefe¸)S¦lݺµ£³øïYûúw2Àè Ξ=;eÊ///''§~ýúÅÅÅ !bbbfÏž­9vìØçž{N1qâD™L¶iÓ&õ®üQ&“¹¸¸H›'N|ñÅu ªR©^z饕+WJ›‹/V¿P×r+«­[·FFFº¸¸H¯,Rûå—_¦OŸîéééêê•››ÛúØæææ%K–xyyÉåòØØØÊÊJÃû_¹rå²eËï| ZNž<ãéééää²|ùò«W¯}Íõwss“ýÞ… ZrôèÑ?þñNNNîîîsçÎÕ}l{kkÈš«¿"dóÍú_ÿW¹È†£€ÕS©T“&MêÑ£G^^Þµk×>ýôÓ!C†!æÏŸÿÉ'Ÿ(•JuäÏ?ÿ|äÈ‘ùóÿÿ{ÆBBBÞ{ï=õÞ÷Þ{/44Ôðq8ÐØØ8vìXisÚ´i·u¸qݺu«Ívceåáá‘””ôúë¯kµ'&&VVVþç?ÿ©¨¨6lØÃ?ÜÜܬ³~ýú¬¬¬œœœ’’’ÆÆÆ3fÞxx¸··÷'Ÿ|rçSÐôå—_Ž=ºOŸ>G½zõêž={T*Õ <¼½ÕnMsý+++~óÌ3ÏŒ=Úßß_+þ_ÿúפI“¦NªP(òòòžxâ ÝǶ·¶†¬¹Z±ñ =Î]6÷ˆ `X½Ë—/—””$%%ùùù9::†††Jgn'NœèååõÑG©#322FŒ!•ÇBˆ?ýéOÅÅÅùùùBˆ_ýõã?Ž7|ܽ{÷Ž?^&“I›š»ÖÖÖ¾ð }úôéÖ­[XX؉'ÚkT{ûí·‡ªÞ,..¶³³+))B\¿~ý…^ òðð˜}ÜÜܼ½½-ZÔ:«Ë—/?ùä“>>>¾¾¾sæÌ©®®V÷óç?ÿ9:::,,lÈ!ß|óMëÉNœ8ññÇïÝ»·Vû¹sçüñ^½zÉåòÄÄÄË—/WTThŤ§§/Y²$,,ÌËË+%%åðáà …ÂÀþ…ÑÑÑ{÷îmÝþ_OG¥R=óÌ3sæÌIKK •Ëåƒ zíµ×¤jSG·š«ÝÞ—¢Esý[ZZ¶oß®þEŒ¦W^yeîܹ/½ôR`````à„ tÛÞÚ²æ6ˆV¯gÏžƒNJJÚ¹sç¹sçÔíwÝuW||¼úoccã‡~¨YutéÒeΜ9RÀ®]»† Ò¿ÃÇýþûïÛ;¹:kÖ¬üüüÇ×ÖÖîÞ½ÛËË«½Fµ'Ÿ|òìÙ³yyyÒææÍ›ÇŒ$„xúé§ÏŸ?âÄ‰ŠŠŠ!C†ÄÆÆ¶´´Ha»víÚ¾}{AAAxxø²e˾øâ ¥RYTTôøã·Îê±Çkhh8{öl~~þ¥K—fÍš¥ÞµwïÞmÛ¶åçç/^¼xæÌ™†/Â’%KvíÚUQQqãÆôôôQ£FùùùiTUU]¼x1""BÚ qvv>sæŒáC 2äûï¿7ât JJJæÌ™£Õ.ý.CG·êÕž>}ºŽ/E¯;vØÛÛO›6M«ýÖ­[ß~û­££ãÝwßݽ{÷Q£F}ûí·:Žmomï|Í;+ `X=™LvôèѱcÇþío xûí·¥]qqqÒ¹Ö={öܺuK«2üŸÿùŸ­[·Þ¼y3###!!á¶Æ½víš««këö²²²O?ý4##£wïÞ2™lÀ€ÁÁÁm6jÕ£G˜˜˜>ø@¡R©>üðÃyóæ !.\¸°{÷î7z{{wíÚuÍš5?ÿü³tÖZïëë+„èÒ¥‹J¥úþûïkjjärù}÷ݧ•UQQÑ‘#GÞ|óM777ÔÔÔÏ?ÿ\}¶6!!A*Èy䑲²2Ý·Œjºÿþû[ZZzõêåââ²wïÞ÷Þ{O}J\R[[+„èÞ½»ºÅÍÍMj4«««ú4¬Q¦såÊ!„V¡nH·êÕÖý¥è•žž>wîÜ®]»jµ×ÔÔ455íØ±ãÃ?¼téÒäÉ“zè¡‹/¶wl{k{çkÞYQ 3pww_µjÕ¿ÿýo¥R™œœœ””ôÅ_!zõê5yò䌌 !DFFÆÌ™3œœ4ìÛ·ïСC×­[÷ã?>ú裷5h=Ú,*JJJìííûöí«·QËܹs·mÛvëÖ­C‡)•J)Ÿââb™L=hРAƒ <ØÙÙY]’©«¸>}úìØ±cÓ¦M~~~øÃ>ûì3­ÎËËËíííÕ—¥MOOO郴>uuu†¬@KKËøñãýýý¯\¹R__¿páÂÈÈÈ_~ùE3FúAMMºE©T¶ù‹ƒöÔÖÖº»»q:Rm¬UXÒ­zµÛûR>þøcõCª4o>×ôý÷ßçææ¶yý³\.—ÉdñññÆ srrZ¾|¹ƒƒÃ×_ÝÞ±í­í¯ygE €NÅÑÑ1..®wïÞ§OŸ–Z²²²NŸ>““ÓfÕ‘°fÍšÙ³g;88ÜÖXÆ ûé§ŸZ·555émÔíàà°o߾͛7Ϙ1C*Þ¤Ûb¿ù曳¿©¬¬|ðÁ¥C4O·N™2嫯¾ªªªš3gΣ>ªUõ455©;,ep[SÖR]]]ZZúÜsÏIÏRNJJjllüî»ï4c<<<üüüN:%m*ŠúúúððpÃGÉÏÏ6l˜VãL'$$$((hË–-Zí*•Jw·êÕnïK™9s¦ê7nnnm޾qãÆqãÆµù«''§ª7¥á4¿b­cÛ[Û;_óΊVïÊ•+‹-:uêT]]]MMMzzzii©úÝ3>ø §§çÔ©Sÿð‡?„……µ><66öÀË—/o½«¹¹ù¦•J¥¹wÊ”)šgçÔ'Ož<þüòòr•JUXXXTTÔf£Övvv³gÏ~ë­·vïÞ-]ÿ,õ/‡¼víÚÎ;›šš´Ž-,,úHŠ9sæÌˆ#ºwïÞ£GaÆýóŸÿlUEEÅŒ3zöìéíí=kÖ¬+W®Híšý444!~þùg­Ä4ß%‘?{öìäÉ“ÝÝÝ]\\†¾wï^)>..nܸqÒ禦¦E‹yxx8;;ÇÄÄ\ºt©uL{ýŸ9s¦W¯^¿þúk뵺“é¨Tª'NüéOrwwwtt8pà²eˤ éVÇ—¢EsýU*ÕÿþïÿúúúÞºu«u¤¤¥¥eõêÕ¾¾¾...£Fú׿þ¥ûØöÖ¶½v+e¬òM¦úý/±l‡Lf»sXY×ijS½kú<èôå—_®[·îèÑ£H§;uêÔÛz05:c•o¶[R,0c•ovpssó’%K¼¼¼äry’[ IDATlll›Y×£R©ÆŽ+“ÉÔ¯JKLL”iHKK3ÇLÖ Ã àõë×geeåää”””466Θ1ãvcÞ~ûm{{{­C~óüóÏ›p«¢]@šMzzú²eˤ§ð¥¤¤„††* ­ç’éˆ)**úûßÿ¾oß>­çøÙÙÙ9::šqëÐ1g€«ªª.^¼!m†„„8;;Ÿ9sÆÀ•J·nÝ:­ž³³³ýýý‡¾bÅŠúúzÓO`:¦®­­BtïÞ]Ýâææ&5óöÛowëÖ­õUÓÓ§Oß½{÷‘#GV­ZµcÇŽ¹sçêNCö{ÉÉÉR»ú…ÑBˆÂÂB͈kîÒü\WW'=°žèèèÁè=è`E³ z z z0°‡ääd­bMIÇ< ¹ªªÊÓÓó»ï¾9r¤Ô"—Ë?øàƒÇ{LoLDDÄÿøÇ“'Oúùù]ºtÉ××÷ôéÓC‡Õâ믿ž0aBmm­\.o3ž °p<‰u?ÚÃÃÃÏÏO]å+ŠúúúððpCbŽ;võêÕððpOOÏÁƒ !ÆŽ»bÅ ­!ºvíªR©š››M?€è°³ kÖ¬Ù´iÓ_|áåå5wîÜšššÃ‡ !²³³¯]»–Ð^L}}}uuµÔÉ•+W†¾ÿþ#FtïÞ=###::ÚÝݽ  à™gžñõõýüóÏÛK€3À Ç`$Æ*ß:ì)ÐË—/W*•‘‘‘ ÑÑÑYYYRûþýûKJJ¤¸ÍgggggçÿŸ½½½¢gϞҭŸ|òÉ+¯¼R[[ëë룾§Û= Ê`€…ã 0ë¾3£Ø `€M Ø `€M Ø `€M Ø `€M Ø `€M Ø `€M°ïèæ&›¯?Fõ®éó0/Îl0À&Pl0À&Pl0À&Pl0À&Pl0À&Pl0À&Pl0À&Pl0À&X\ÜÜܼdÉ///¹\Yy»1*•jìØ±2™,//Ï\Y,ÅÀëׯÏÊÊÊÉÉ)))illœ1cÆíƼýöÛöööæÊ`,®NOO_²dIXX˜——WJJÊáÇ …á1EEEÿûßÓÒÒÌž8À¢YV\UUuñâňˆi3$$ÄÙÙùÌ™3ƨTª¸¸¸uëÖyxx2œì÷’““¥öS§N©c ëêêÔ›š»4?×ÕÕ¶¹‹èèÁÒz0…ϦzÐÁŠfAô@ô@ô``ÉÉÉZÅš0™J¥2V_w®¸¸888ø§Ÿ~ ‘ZüüüV®\oH̆ 8ðé§Ÿ^ºtÉ××÷ôéÓC‡mo,™Ì²æf#›¯?Fõ®éó€>|SHŒU¾YÖ½²®®®Bˆššu‹R©”õƯ_¿þäÉ“æJ`M,ëh???õéo…BQ__nH̱cÇ®^½îéé9xð`!ÄØ±cW¬Xaæ)¶ì×&QwS×\u€dYg€…‰‰‰©©©ãÇ÷òòZºtiTTÔÀ…ÙÙÙ×®]KHHh/& `̘1R'W®\>|xVVÖˆ#:p.€­q\ ?†Ë5a"\- ?Õa"W/_¾\©TFFF644DGGgeeIíû÷ï/))‘ à6cœ¥`é5H={öìÞ½{Í`Yl÷AP<ËŠèý ¿ÿ³ü²ÖZtÊoŠI@gÂ@h1VùfY÷`"À›@ ° À›@ ° À›@ ° À›@ ° À›`ßÑ €E“Í×£z×ôyàŽq`(€6`(€6`(€6`(€6`(€6áö àŒŒŒñãÇ›(Lçö à=z™&LÈþ¶¢§OŸ>}út¥€éè9|âĉ¢¢"õfQQÑÉ“'MœƧ§Ž‹‹kjjRo655ÅÇÇ›8%ŒOO\RRÒ¿õfÿþý‹‹‹MœƧ§öôô,//Wo–••¹»»›8%ŒOO3wîÜóçÏ«Tª¢¢¢yóæÅÄĘ'3ŒHO¼zõj—~ýúuéÒ¥o߾ݺu[»v­Qnnn^²d‰———\.­¬¬4|øðZ[[+„èÞ½»ºÅÍÍMj40æ‘G9}útNNNBB°aäÆéÓ§ïÞ½ûÈ‘#«V­Ú±cÇܹsu§!û½ääd©ýÔ©Sê˜Âºº:õ¦æ.ÍÏuuu………mî¢cõ ƒÍ¢Ó÷` ŸÅ1# œÅ³„YÞƒ!3²üYüߦÍ‚èèá¿èA+š=ÜnÉÉÉZÅš0™J¥Ò±ÛÃ㢢¢k×®ƒ :{ö¬¢[·nš‰þwªªª<==¿ûî;õÅÌr¹üƒ>xì±Çn+FñóÏ?:´¬¬ÌÃÃC³ý믿ž0aBmm­\.o3™LÏÜa9dóõ¨Þ5KÐGï7%¬ð˲¾Ip ´Ì€K -kR°¾oÊrR`~B‹±Ê7=g€år¹R©To–••y{{ßù¨~~~ê*_¡PÔ×ׇ‡‡ßnŒB¥RÕ××WTThµwíÚU¥R577ßy¶€N@ÿ{€ããã/\¸ „¸|ùò‚ 4¹|'SSS EuuõÒ¥K£¢¢¤'`egggddèˆiiiY»vmAAAMMM^^Þüù󃂂 $„ÈÈÈ(..®©©9~üxRRÒĉ]]]’-ÀÚé)€×®]kggPXXèããÓµk×W_}Õ(/_¾üÑGŒŒ ÉdYYYRûþýûÕOun/&77÷ðòòzøá‡{õêõÕW_ÙÛÛ !>ùä“ûgÏž3fÌ=zôÖ­[’* 0èBê .÷îÝ;00Ð 9™÷[î¶òvë›÷·I€µà ´«|Óÿ`!„¿¿¿¿¿]]]}}½³³ó €™é¹zÑ¢E¹¹¹BˆÏ>ûÌÓÓÓÝÝý‹/¾0Kb“žøã?BˆÞ½{oܸÑ,‰ÐF†Óÿ¤´ù+¢ç)ÐtÀ›@ ° À'Ož¬¨¨0u6˜ˆžøêÕ«=ôP¯^½FŒÑ«W¯‡~¸ººÚ<™`Dz à… ¶´´äçç744üøãÍÍÍ .4Of‘ž× 8pàÇôððB„††nݺ5,,Ì,‰`LzΫT*{ûÿ+’íííU*•‰SÀøôÀ<ðÀ¼yóÊËË…åååóæÍ{àÌ’Ƥ§~ã7®^½Ø¥K—ÀÀÀªªª´´4ód€éº¸±±ñ›o¾9räHQQQyyy@@@pp°Ù2Àˆtîҥˊ+„ÁÁÁQQQT¿ë¥«–ÉdÞÞÞ—.]2[6˜ˆž× Ýwß}÷ß|||¯^½d2™Ô8sæLÓ'€1é)€÷ïß/—Ë·oß®ÙH °:z ༼<óä€Iéy Ò‰'ŠŠŠÔ›EEE'Ož4qJŸž3Àqqq»wïVo655ÅÇÇÿç?ÿ1qVtv±±úczî1}Ø=pIIIÿþýÕ›ýû÷/..6qJ¸#²ùzTïš%°0z.öôô,//Wo–••¹»»›8%ŒOO3wîÜóçÏ«Tª¢¢¢yóæÅÄĘ'3ŒHO¼zõj—~ýúuéÒ¥o߾ݺu[»v­y2ÀˆôÀ®®®ÿøÇ?JKK:TZZºwïÞnݺ™4¡æææ%K–xyyÉåòØØØÊÊJÃcV¯^=`À'''OOÏØØXÍçWlœžX8zôèÀÀ@Sg#„X¿~}VVVNNNIIIccãŒ3 ‰ŽŽÞ·oß/¿ü’››ëææ6mÚ43$ ° m?:33sÔ¨Q¡¡¡™™™­÷ÆÇÇ›.¡ôôôeË–……… !RRRBCC ÅÀ ‰9r¤àêê°sçNÓå °.mŸÎÌÌüé§Ÿ¤­™.›ªªª‹/FDDH›!!!ÎÎÎgΜ1<&++ËÇÇÇÅÅeݺu+W®Ô=œì÷’““¥öS§N©c ëêêÔ›š»4?×ÕÕ¶¹«£zÐÁŠf¡¹ËÀYø,:}²YÜnÚºYéw¡ƒ%̸ߦÎâÎYÂ,èèÁBzÐÁŠfÑéÿMK†ôœœ¬U¬ #‘©T*cõu犋‹ƒƒƒúé§©ÅÏÏoåÊ•šçœuÇ444(•ÊÒÒÒ-[¶L:uܸqí%“YÖÜ¥S¾¸SNªSÒûM Kû²bcõ†ÈzîÑä,üجNù/%ëû©3VùÖö%ÐÅÕÕUQSS£nQ*•R£1NNNNNN¾¾¾C‡-++óðð0Gê˦§V©TŸ~úinn®æéé´´4eãáááççwêÔ)én^…BQ__~»1RæõõõÀ¡÷)Ð ,ˆ‹‹;wîÜu &M(11155U¡PTWW/]º4**JzVvvvFF†Ž˜–––µk×ÔÔÔäååÍŸ??((hРA&Í`-ôœÞºuknnnÿþýÍ“bùòåJ¥222²¡¡!:::++Kjß¿IIIBB‚Ž˜ÜÜÜ 6TUUyyy3&33ÓÞÞ²®ñt=õ¡›››———yR‘ØÙÙ¥¤¤¤¤¤hµk>}ºÍ˜»îºkÏý ,vÌLÏ%ЫV­Z´h‘R©4O6˜ˆž8""âÈ‘#=zôpÑ`žÌ0"=—@?ñÄ£Fz÷Ýwår¹yÀôÀ …âøñãNNNæÉÑs ôÈ‘# …yRÀtôœŽŽŽž:uêüùó{õê¥nœ9s¦‰³ÀÈôÀÙÙÙݺuÛ¶m›f#0Àêè)€óòòÌ“&¥ç`: `€M Ø `€MÐó,Ieee]]z³_¿~&Ë“ÐS9rdöìÙeeeš*•Ê”)ð{±±úczî1}Àºé¹zÁ‚kÖ¬©®®nÐ`žÌ0"ý—@Ïš5Ë y`RzÎûúú^¹rÅ<©`:zÎGDDDEE%$$xzzªgΜiâ¬02=ðçŸÞµk×Í›7k6R¬Žž8//Ï~ü¸Ö¬÷l0À&Pl0À&è/€[ZZþóŸÿìß¿_ÑÔÔÔÜÜlú¬02=piié=÷Ü9{öl!Ä®]»âââÌ’Ƥ§^°`Áĉ•Je=„ÑÑч6ÊÀÍÍÍK–,ñòò’Ëå±±±•••†Ç¼üòËC† qvvö÷÷öÙgkkk¥öÄÄD™†´´4£¤ èÚ~°Úwß}·{÷n;;;iÓÝݽªªÊ(¯_¿>+++''ÇÛÛ{Μ93fÌÈÉÉ10æòåËiiiƒ.//Ÿ5kÖóÏ?ÿá‡J‡$&&¾ñÆÒç.]º%UB«?¦çÓçü—ôÀr¹\©TöìÙSÚ,++óöö6ÊÀéééË–- B¤¤¤„††*гiÓ&)ÀÇÇgÁ‚šgzíìì’! 3Ñs tLLL||ü… „—/_^°`Á´iÓî|Ôªªª‹/FDDH›!!!ÎÎÎgΜ¹Ý!ıcdž ¦ÞÌÎÎö÷÷>|øŠ+êëëu§!û½ääd©ýÔ©Sê˜Âºº:õ¦æ.ÍÏuuu………mîê¨t°¢Yhî2pF> ­ d ³0. ù.îd ­YŸ(ÿÊÈfaÜYÂ,Œþ7ËfAô@ÒƒV4‹NÿoZz0¤‡ääd­bM‰L¥RéØ][[ûôÓOïÝ»W266vË–-r¹üG-..þé§ŸBBB¤??¿•+WÆÇÇßVÌ;3œœ|òäÉ   !Ä×_íèèèããSPP°hÑ¢¡C‡feeµ—ƒL¦gîVJ6_O€ê]³äaT¶9)aió2àh™—@3)“cRícR:%þ¥[`¬òMÏ%Ю®®{öì¹páBqqqïÞ½ï|H©[!DMMºE©TJ†Ç¼ùæ›ýë_srr¤êW1nÜ8éCß¾}œœ&L˜pãÆ;/×€þ÷ !üýýï¿ÿ~cU¿B???õin…BQ__nx̪U«RSS>Û(¯JLLLMM?~¼——×Ò¥K£¢¢¤'`egg_»v-!!AGÌâÅ‹³³³<póæM™Læàà „ÈÈÈˆŽŽvww/((HJJš8q¢ÖYe€ÍÒS/X°`çÎÑÑÑF¿xùòåJ¥222²¡¡!::Z}³îþýûKJJ¤¸Í˜ëׯ§¦¦ ! $âààpóæM!Ä'Ÿ|òÊ+¯ÔÖÖúúúÆÄĨj€žxëÖ­¹¹¹ýû÷7úÀvvv))))))Zí™™™ºc\\\Ú»ûùÀFÏÐ9è¹ØÍÍÍËËË<©`:z àU«V-Z´H©Tš'LDOqäÈ‘=z¸h0Of‘ž{€Ÿxâ‰Q£F½ûM`ÕôÀ …âøñãNNNæÉÑs ôÈ‘# …yRÀtôœŽŽŽž:uêüùó{õê¥nœ9s¦‰³ÀÈôÀÙÙÙݺuÛ¶m›f#0Àêè)€óòòÌ“&¥§ð߈ÕÓséóðÚ.€333Gš™™Ùzo||¼‰³ÀÈÚ-€ÝÜÜ(€FÛðñãǵ>¦ÂÕÂÌBÏ{€§OŸ®Õm²d0=ðéÓ§µZrssM– ¦ÒîS 7oÞ,„¨««“>H …é³ÀÈÚ-€7lØ „¨©©‘>!îºë.__ß÷ßßL©`<íÀÒ¥Î/¾øbZZšóÀ$ôÜLõ èôÀtÀ›@ ° í>KSeee]]z³_¿~&Ë“ÐS9rdöìÙeeeš*•Ê”)¡}±±zöì1K`}ô\½`Á‚5kÖTWW7h0Of‘þK gÍše†<0)=g€}}}¯\¹bžT0=g€#""¢¢¢<==Õ3gÎ4qV™žøóÏ?ïÚµëæÍ›5)€VGÏ%Ðym1iBÍÍÍK–,ñòò’Ëå±±±•••†Ç¼üòËC† qvvö÷÷öÙgkkkMš*ÀŠè)€%'Ož¬¨¨0u6Bˆõë×geeåää”””466Θ1Ãð˜Ë—/§¥¥íÙ³çСCÏ?ÿ¼X=ðÕ«Wzè¡^½z1¢W¯^?üpuuµIJOO_²dIXX˜——WJJÊáÇ …1›6m7nœODDÄ‚ ¾ýö[“¦ °"z à… ¶´´äçç744üøãÍÍÍ .4]6UUU/^Œˆˆ6CBBœÏœ9s»1BˆcÇŽ 6Ìt©¬‹žøÀ[·n¬»úØ=ðo¼qõêÕÀÀÀ.]ºVUU¥¥¥™4¡ÄÄÄÔÔT…BQ]]½téÒ¨¨¨ !²³³322tÇ,^¼øý÷ß?xð`@@ÀÍ›7ýõW“¦ °"öºwûøø9r¤¨¨¨¼¼< 88ØÔ -_¾\©TFFF644DGGgeeIíû÷ï/))IHHh/æúõë©©©BˆAƒI‡888ܼyÓÔ ¬‚žXl†ÒWbgg—’’ÒúùU™™™ºc\\\¬ïž^€¹´]gffŽ5*44T³ìT‹7qVX®Nù¼lA»°››0ÀæðpfaÀÙ€j»>~ü¸Ö¬šž§@OŸ>]«%::ÚdÉ`*z àÓ§Okµäææš,L¥Ý§@oÞ¼YQWW'}( Óg€‘µ[oذAQSS#}BÜu×]¾¾¾ï¿ÿ¾™RÀxÚ-€¥K_|ñÅ´´43æ€I´[K’““•J¥V£›››Éò€NJïËuöðfÓÒS÷èÑ£u£J¥2M2˜Šž¸¼¼\ýùêÕ«)))÷ÝwŸ‰SÀøôÀþþþšŸ7oÞ•””dâ¬02=ïÖboo_SSc¢T0=g€¿úê+õç›7oîÛ·ÏÛÛÛÄ)`|z à9sæ¨?wëÖíÞ{ïݼy³IÀôÀ.\0O˜”ž{€Oœ8QTT¤Þ,**:yò¤‰SÀøôÀqqqMMMêͦ¦Y| IDAT¦øøx§€ñé)€KJJú÷ï¯Þìß¿qq±‰SÀøôÜìééY^^(m–••¹»»›>+@[bcõìÙc¦Œ8–¹è93wîÜóçÏ«Tª¢¢¢yóæÅÄĘ'3ŒHÏàÕ«WÏš5«_¿~vvvÍÍÍ111k×®5Ofl—Ù~¯ [¢§vuuýÇ?þQVVVRR¤¾ë¢çh!DKK‹R©lhh ljjjnn6CZ—ž¸´´ôž{Œœ={¶b×®]qqqfI cÒS/X°`âĉJ¥²GBˆèèèÇ›#/ŒJÏ=Àß}÷ÝîÝ»íìì¤Mww÷ªª*Óg°<® X=°\.W*•={ö”6ËÊʼ½½MŸØ"Ù|=ªwÍ’@'¥§Ž‰‰‰çw„—/_^°`Á´iÓ̒ܽW* .V²9zî^»v­]@@@aa¡O×®]_}õUód€é)€]]]÷ìÙS^^~äÈ‘’’’]»vÉår£ ÜÜܼdÉ///¹\YixÌÖ­[###]\\ìíw;11Q¦!--Í(©€.±±úÿƒÐSKoýõ÷÷¿ÿþû…ׯ_7ÊÀëׯÏÊÊÊÉÉ)))illœ1c†á1III¯¿þzëC~óüóÏ%U@' §?~ü¥K—Ô›ùùùF8==}É’%aaa^^^)))‡V(ÆLœ8ññÇïÝ»wëníìì£~x5z àÞ½{:ôСCBˆ÷ßÔ¨QO=õÔZUUuñâEu-âìì|æÌ™Ûi-;;Ûßßøðá+V¬¨¯¯×,û½ääd©ýÔ©Sê˜Âºº:õ¦æ.ÍÏuuu………mî2z:hö ƒ%ÌBs׳„YwFZ,áϤqÝùJen3k=̶’–ö7Kw²ùB÷ÂŒ³Œò“Ö@–öçÁÒ~NÒ=Ѓ)zÐÁŠfaøÏIãþ¤5Û¿iï|% dÎÓýÏCrr²V±ö_dÕ&™J¥Ò±y󿤤¤aÆ;wnÛ¶mQQQw>jqqqppðO?ý"µøùù­\¹2>>Þð˜þóŸS¦LijjRòõ×_;::úøø,Z´hèСYYYíå “éŸ»Å1à•›Ö÷n‡õÔót¾N9)aȼÌùlC³Mʱ:餌ó÷—IYÀX7)šõýóÏz'%¬ð_JæË'¥±Ê7=¯ABŒ1ÂÇÇçøñã“'O¾÷Þ{ï|H!„«««¢¦¦FÝ¢T*¥ÆÛŠÑ2nÜ8éCß¾}œœ&L˜pãÆ c=µ øìÝ{\”uÞ?þÏ2rP@”ƒA˜"ÒŠÚÁÄ]65+ïÒt5­­- ]­Ö\Ŭ=Õnº¦ÑA%Ãr;<¢Û‹{k›Ç½­T½m=ƒ˜ Š&r•ƒ1xýþ˜ïÎEE“ëóžy¿çõük¸ yÍËàóöãuq ¸s†"x˜k\ýÉ'ŸÜyç>úè‰'Ο?ûí·>|¸ý©!!!‘‘‘ÎÓÜEEEõõõýû÷ÿ¡_ÓÃ07ñ¸ÆàôôôÜÜÜ#F(¥6nÜ8wîÜ»îºëܹsíž:uêo¼1bĈ°°°^x!%%¥OŸ>J©5kÖÔÔÔL™2¥¯innnjjjjjRJ566*¥üüü”Rï½÷Þ=÷ÜÓµk×ÂÂÂéÓ§ßwß}mŸ1øÁÜêbW¥Ôuüº„9AìJ\æg€÷íÛçØý*¥:tèð‡?üáoû›)Á/½ôÒ¸qã’““£££-‹ó—u7mÚ´zõê¶¿æÃ?ìÔ©Ó¸qãš››;uêÔ©S§ÊÊJ¥TNNÎwÜÑ­[·I“&ýä'?ùôÓOMy© ÀUÏoݺuذa=zôPJUWWwèС¹¹ùäÉ“¦{yyeddddd´:¾|ùòk~MZZZËÛe9mÞ¼Ù”×ò\õ ð=÷Üóý÷ß;÷îÝû»ï¾SJ555¥¦¦½4ó\ãh®ý6HW„÷¡^p×ó5÷ÝwŸRê³Ï>sÉ+pS”¿.‹&€s¯ pUUÕéÓ§o¿ývLJ}ûöõ÷÷ß¿Ë ðõ|»±üòÚ_c,Óÿ:<˜{]}îÜ9¥TPPóHpp°ãàúšëdùO¯¼òŠãø×_íüš£Gž?ÞùaËOµ|Ü~çÏŸ?zô蟼ÕkhCËghÃÕZ´z íw’æµñ'y®ÿÿ…¾ï‡Vnìû¡A¦?´ä&?Y&)üd]ëÚà&ßæþd1ý~¸Îÿ¼VÚhqcß7ÐâÆ¾n¬Å üI¶ÿûá:Ûµ¿ÅõÿI¶ÿûá@²Ÿ¬6˜ò“Õþהּ×Oë÷ƒº¾ÿ›¦»þŸ‹W^y¥Õfͬ×`1 ìçj¿ªªªÐÐÐþóŸƒv±Z­~øáÃ?|ý_óÙgŸ=ôÐCv»½í,‹Å¤î×q]™¥Ûµ/÷º®3À×q]Ù5Ï6›DYê:²Ì ¢»Ь?@nÿ§ÌÊYJæÏ¯ÈR„‹e)~—@£”î,”jOmþ¦äú 7ËbYêšA&mßÜë pHHHdd¤s÷_TTT__ß¿ÿú5­¸×ï+¥¦NúÆoŒ1",,ì…^HIIqürïš5kjjj¦L™ÒÆ×477755555)¥•R~~~.mÌà·µs¯3ÀJ©—^ziܸqÉÉÉÑÑÑ‹eÕªUŽã›6mZ½zuÛ_óá‡vêÔiܸqÍÍÍ:uêÔ©See¥kj€›q»3À^^^­Ž/_¾üš_“–––––¦ý%Cnw@l€À#` `ðØ€GÀ<6Àà°àíê×ËòËk±Lÿëà g€À#` —@€+Q^×}Í,\@ ΀GÀ`¸œ,…Vp6g€À#à 0€Ép²À=á 0xl€À#` `ð¸ ÈDv"Üî€ œ€3Àpís˜ï(_3þÖ€“¥àB8 ×ö½ÝÕ¯ Ý°€ 0xl€À#` `ðØ€GpÙ¸¹¹yöìÙaaaV«uìØ±gÏž½þ¯¹Úñ©S§ZZxûí·éú€{sÙxþüù«V­Ú¶m[IIÉ÷ß?iÒ¤ëÿš6þÛ©S§6üÛ¯~õ+¢2àö¼]œ••5gΜÄÄD¥TFFFBBBQQQŸ>}®çkÚøo½¼¼üüü\QÜškÎWUU>}úöÛow|Ø·o_ÿýû÷_Ï×´ýß®Y³&**jàÀ/¿ür}}=U!pw®ÙŸ;wN)ä<ì8xͯiã¿0aBnnî—_~ù§?ýiåÊ•©©©m¿ Ëzå•WÇ¿þúkç×=zôüùóÎ[~ªåãö;þüÑ£G¯øä­^CZ>C®Ö¢Õkh¿ëü“47¨?ISÐ|?´rcßí 2ýû¡%7ùÉ21Há'ëZÏÐ7ù~0÷'KÞ÷þ}ûœ±Òš„•ö‡’÷“Eöw7ù~ÀJ{ý¸d‰\ÕE–¢Ä(KÜK‹ààಲ2??¿#F¤¦¦8pÔ¨QÅÅŬ³D–zî¹çŠŠŠ²²²n¾ùæãÇOž<ùÖ[o]´h‘éA”Y"KõíÛwôèÑñññ‹ÅqdΜ9 ,PJ=ùä“L³D–RX”˜Qf¡T;a|pɹª‹,…E‰Q–®ÞËÔ¥KÃ0­Vkmm­aAAAܳD–ŠŽŽ>yò¤óÃ’’’˜˜A”Y"K·:§#ˆ2Kd)‹“ Ê,”j'Œ.Y"Wu‘¥°(1ÊÒAÔû»˜˜˜µk×VTT$&&ÕÕÕùúúrÏYª¦¦&**ÊùaÏž=+++uQf‰,uÛm·µ:2`ÀA”Y"K),JL‚(³Pª0>¸d‰\ÕE–¢Ä(K WïÀeZ¿~½¯¯¯Ïºuë ÃØµk×Ì™3¹g‰,•˜˜XTTäü°¨¨¨_¿~:‚(³D–2 ãøñãšžÜUY"KaQbD™…Rí„ñÁ(Käª.¯%FY:`¬K]]¾Û9¸*K^©]»v}ûí·Î¿ýöÛüü|îY"K-\¸°C‡;vüüóÏ ÃXµjÕ¢E‹tQf‰,å€E‰EeJµÆ—,‘«ºÈR%VY¦ÃÀ8~üø—_~yüøqIYÂJÝrË-§Núì³Ï~úÓŸ†qìØ±øøxîY"K€&îŸ%rUY <î­ÅàÁƒ/?˜ŸŸŸ“““‘‘‘ŸŸÏ1Kd©òòòI“&mÛ¶ÍÛÛÛn·>|ÅŠaaaf=¿K²D–JNNÞ¹sç¥K—bccOœ8aF``àùóçM¢ÌYJaQbD™…Rí„ñÁ%Käª.²%FY:à&XZ¤¥¥]ñxBBÂÕ>åþY"K͘1£K—.'NœèÙ³çÉ“'§OŸ>cÆŒO?ýÔÄú,‘¥«ªªBBB›žBœ%²”¢Ä$ˆ2 ¥Ú ãƒK–ÈU]d),JŒ²´pÙ¹g7Э[7ÇÝÛjjjÂÃùg‰,õë_ÿº_¿~o¿ývHHÈ›o¾Ù«W¯Y³fé¢ÌY L‡ñÁ%Käª.²x8\ ­{÷î]»vu|XUU•˜˜xæÌÖY"K9/¶±X,¡¡¡Ã† {öÙg}||L¢ÌY L‡ñÁ%Käª.²x8l€µHJJºüà¾}ûXg‰,õ‹_ü¢²²ò7Þˆ=~üøÌ™3»wïþÑG™D™%²0‚E‰EeJµÆ£,`‹£,ð;ÀZ¤§§;WTT¼ÿþûcÇŽåž%²Ô›o¾ùøã'$$8>5jÔ›o¾©#ˆ2Kd)`‹‹ Ê,”j'ŒFYÀ%FY:à 0…ŠŠŠG}tË–-’²$•:sæLiiittt=4EÐg +%òß5E–º%A”Y(uc0>Ü?Käª.²T+X”e™g€)„……iúµfÉ(uàÀ??¿Þ½{÷èÑ£¸¸ø›o¾¹å–[tQf‰,%òß5E–º%A”Y(õCa|pɹª‹,Õ %FYæpñM¸„jh¡¦¦æ£>JLLäž%²ÔàÁƒ7oÞlÆÚµk}||üüü–,Y¢#ˆ2Kd©VÊËËGŒAD™%¦%A”Y(ÕNŒ²Z³ª»$Hk%FY:`¬E«eˆŽŽþâ‹/¸g‰,ÔÐÐ`Æðáó³³ bbbtQf‰,u¹~ýúÑQfÉ(…E‰EeJµÆ£¬Vd¬ê® Ò—…E‰Q–¸Z‹o¾ùÆù8 <<\@–ÈR:tðóó»xñb~~þ_ÿú×   ššîY"K566¶|ü?ÿó?‹EGe–ÈR ‹“ Ê,”j'Œ.Y"Wu‘¥°(1ÊÒ`-zõê%/Kd©˜˜˜µk×VTT$&&ÕÕÕùúúrÏYªS§N-?ŒŽŽþä“OtQf‰,¥þýó[ZZZRR¢ï{2ˆ2‹¾‘ヲÆ—,‘«ºÈRŒ²´põ)hWZ¿~½¯¯¯Ïºuë ÃØµk×Ì™3¹g‰,õM eee:"è³D–2 Ãf³3F)åå奔3fŒÍfcD™EY ¸Àøà’%rUY ãƒQ–؃§«««#û¡%ËY ¸xöÙgGŒqìØ±K—.;vìî»ïž>}:ë Ê,ÊRÀÆ£,pŒ²tÀû€™zöì¹sçÎèèhLJ'NœøéOZ\\Ì7ˆ2‹²€gÂø`”¥6À`¦€€€sçÎ9ï\bF``àùóçùQfQ–ðLŒ²tÀM°´¸ï¾û®ö©72ÍY LóÍ7ßôîÝÛñá7ß|sÓM7±¢Ì¢,…ñÁ"L‡ñÁ(KœÖÂb±LŸ>½Õ­Û ÃX¼x±éàdY"K€évïÞzóÍ7;><~üxEEÅwÞÉ7ˆ2‹²Æ‹ 0Æ£,°ÖÂb±455y{ÿÇ v»ÝÞ±cGÞ&Kd)àge9—––FGGÇÆÆêx~ú Ê,š ŒAÀˆÈU]d)ŒFYæÂX ??¿šššVïfV__ÒÐÐÀ4Kd)‘˺ÈR8ƒÄ(«¼¼|Ò¤IÛ¶móöö¶ÛíÇ_±bEXX˜¹)”A”Y”¥0>X)Œ>Y"Wu‘¥0>eé€ °±±±ÿûßZ<|øðƒ>xüøq¦Y"K‰\Ö¥–Â$.Y>úèÅ‹ßzë­ž={žúhddäéÓ§³³³?ú裼¼<¾Y"Kùúú^þoßÿ½ŸŸŸ¹A”Y"KõèÑ£¸¸¸ÕÉ–âââ=z˜D™%²”RÊb±477;?lnnnuâ…]ee)ŒA ãƒO–ÈU]d)ŒFYZ¸z.Öš5k"""œÎkÖ¬áž%¯TLLÌáÇ[|øÞ{ï}â‰'LO¡ ¢Ì¢,e`|0 Âøà’%rUY ãƒQ–ØkÔÜÜ\PP°cÇŽ‚‚‚ææfYÂJ‰\ÖE–ÊÏÏ÷òòš>>ëÖ­3 c×®]3gÎ4=…2ˆ2‹²TK,‚0>¸d [Õ‰ƒ²0>eé€ 0…}ûö­X±âÈ‘ŒUú IDAT#’²„•’´¬Óg…‡‡ggg777ÏŸ??))I_e–°Ruuu6›MÇ3»*ˆ2‹²TKî„ñÁ(KتND…ñÁ(ËtØü?’–uú Ê,œAb”æÂøà’%rUY <6ÀÿÈe]d)gXe€‰0>e‰\ÕE–Oƒ·Aøÿíß¿¿°°pÀ€}úô“%²€ÇÂø`”n`ð\ý€ .ˆÌ1ÈÆæS8¬ÅòåËSSS½¼¼ZܸqcUUÕÏþs¦Y‹eÖ¬Y->õÔSݺu{íµ×L "ÎsÝwß}WûÔÆ9QfQ–"”3‘l|`Nð…ñÁ(Kl€µ°X, ~~~-®ZµjñâÅ;wîdše±Xzöì9eÊ”ßþö·Îƒ[·nýõ¯}ðàAƒˆ³\åÂ… ;w–—e±X¦OŸn±XZ4 cñâÅæN² Ê,âRdãƒr&ÒŒÌ)ð"GÆ£,°ÖÂb±äæævìØ±åÁ'NÌ™3Çô‹¦È²,KaaáÈ‘#_xá…çž{Îq°¬¬ìæ›o®¯¯71ˆ8K‘³ WUðÊjjjòöönyÐn·wìØÑôaOD™E\Šl|PÎDšñ9ÕNU\²DŽ*ŒFYZÜkÚã(¥¼®‚o–Rª¡¡¡¨¨¨{÷îK—.uܽ{wdd¤¹A”Yï¿ÿ¾ÝnoupÆ ÙÙÙæQf)¥fÍšÕê`jjêœ9sÌ ¢Ìr|K´:¸råÊ!C†˜D™¥”êÙ³çܹs[ܲeKbb¢¹AÄY¾¾¾õõõ­ÖÕÕùùù1 ¢Ì¢,E9>(g"Íø œ‰dãƒr&bTqÉ9ª0>eé€ °W\•¸g9ƒ<öðÃ/Z´¨oß¾Ó¦Mã›…YÅ%K)•››»þ?effZ­Vsƒ(³”R………ÑÑÑK–,qø`È!Lƒ(³(K‰D6>(ç”"”3£ŠK–¼Q…ñÁ(K WïÀeª¨¨™%ÂÅêL²®ªà“•ŸŸïåå5yòämÛ¶=ztÛ¶mO?ý´——×®]»˜QfQ–"˜SíD9>ä]¬N™…QÅ% ãƒQ–Økqþüy‘Yò`VqÉùÊ××·±±Ññ¸¸¸xܸq±±±?þ¸ÍfceÆš5k"""œÿÒ±fÍÖA”YdAdãsªèÇeF•›gIUŒ²L‡»@k!òŽy#GŽ?~ü”)S”RÿøÇ?Ö¬YãüTVV–‰A”YW|sMȲüüül6›¯¯¯Rª¤¤dÖ¬Y{÷î:thfff`` Ó¬ÊÊÊÐÐPŸÐM²»téRQQQuuu×®]ûôéÓ¡CîA”Y4A"ß2—l|PÎD²ñA91ªeI…ñÁ(Ëd®Þˤ$ÞÚ!44ôøñãŽÇ;vìèÖ­Û³Ï>;uêTßEdY¸X \U`:²ñA9ÉÆåLÄÅê\`T°€3ÀZˆ|Ë\__ßòòò   ¥Ô·ß~›’’rêÔ©ÆÆFÇ­ùL ¢Ìù>„"᪠FY÷ÝwßÕ>µqãFŽA”Y”¥D¾e.Ùø œ‰dãsª0ª¸da|0ÊÒÏ©jnw±›7oÞ»ï¾ë8¢ûŽyº³zôèQXXèx\XXØ­[7sŸß%Yééé­>õÔS/¾ø"߬‘#G¾÷Þ{ŽÇÿøÇ?¦¶`nqÖš5kæÍ›×òÈ£>ú÷¿ÿÝô ²¬½{÷ÞsÏ=ŽÇ~~~k×®õöö¶X,Ë–-37ˆ8+þßzöì¹}ûvÇã>}úlÚ´‰iee)E8ªÈ‚ÈÆåL$”3£ŠK–ÈQ…ñÁ(K —žKI¼µÃôéÓ ðÕW_mݺµwïÞ¿ÿýï ÃhhhÐñ]D–¥p±:“,%ñ}}||jkk;温¦Ÿ)Ê,§3g΄‡‡;755éË" ¢Ì""”3‘l|PÎD²ñA91ª¸dÉUŒ²LÄãU²#òŽyÕÕÕ÷ß¿Åb±X,ãÇ¿pá‚av»ý­·Þ27ˆ2 ³ŠK–’ø†7ÝtÓ?ÿùOÇãõë×0ÀÐöŠ2ËÉf³8×ÖÖúûûs¢Ì""”3‘l|PÎD²ñA91ª¸dÉUŒ²L„ 0ü06›ì¾ Y˜U\²pU£¬–ÂÃó³³›››çÏŸŸ””$ ˆ2‹²”0d£Š&ˆl|PÎDŒ*.YâGÆ£,³`¬Qeeess³¼¬†††ÊÊʦ¦&Y˜U\²pU£¬–233•R^^^ÞÞÞ999‚(³h‚ÈÆåœ2GA.Vç’…QÅ(Ë ãƒQ–Y°ÖâÈ‘#}úôQJÅÆÆ;vìã?NHH3fÌ™3gXg½óÎ; ‹E)Õ±cÇaÆåç盞B™…YÅ(K*aWU\nß¾}+V¬8r䈘 Ê,­AdãƒrN„£Š,«3Ê’Jö¨Âø`”e ¼ ’>ø`ppð3Ï<³dÉ’ï¾ûN)5~üø•+WFDD¬^½ši–ãîiiiJ©åË—Oœ8ñâÅ‹}ôÑÆ‡jbq–TçÎëСÍZeUUUuéÒ…æÖ)³ëêê‚‚‚¼½½%e›#”3‘l|`N™£ŠQFÈáê¸L]»v­®®6 £¼¼\)uúôiÃ0NŸ>Â7+""âÀŽÇÿú׿n½õVÃ0Þ|óÍ¡C‡šDœeàbuY¸ª‚W°@6>(g"Ùø žS.Vç…QÅ+ <6ÀZtîÜùâŋƿÃÄq±Í… œ÷Iã˜å 2 £¾¾>00Ð0Œ'Nøùù™D™…YÅ%ëxì±Çòòòyä‘””””””%K–$''O˜0oÖܹs###_~ùå—_~922ræÌ™Ï>û¬Õjݾ}»¹AÄYÀýø œ‰†æñA9q±:—,Œ*FYàɰÖbàÀï¾û®aK–,¹é¦›žþù#GŽ<ÿüó?ùÉOøf 2dÁ‚†a\ºtiÞ¼ywÝu—a'NœèÒ¥‹¹A”Y˜U\²pU£,à‚l|PÎD²ñA9ÉÆåLĨ⒅Qò`¬Enn®···Õj >tèÐàÁƒ•Raaa»ví⛵sçÎÀÀÀÐÐÐÐÐЀ€ÇØØ´iÓ“O>inef—,\UÁ(K$²;²PÞú…l|PÎD²ñA9q±:—,Œ*FYòPŽújš `]Ž=ºfÍÇð¸téRii©¾ß9!Ë:}úôÇüñÇŸ:uJÇóÓgaVqÉÂUŒ²Þÿ}»ÝÞêà† ²³³™†¡”š5kV«ƒ©©©sæÌaä@6>(g"Ù¨" ÂÅê\²0ª¸dQ޲,ÊñA<ªL‡ °^Ÿ|ò‰¼,I¥0«¸d᪠FYêßo%ÚÒÊ•+‡ Â4ȑճgϹsç¶<¸eË–ÄÄD¦A-a|¸y.Vç’…QÅ%‹x|ÍD²ñá’Qe"l€õ¢¼Ï6Y–¤R˜UŒ²pU—,¥Tnnîúÿ”™™iµZ™9² £££—,Yâ¸d [Õ‰ƒ²(ÇYåø ÌÒ—@ká¸ÞùÏÃÿú׿¦OŸ¾cÇÖY"K8ƒÄ(Ëb±ôìÙsÊ”)¿ýío·nÝúë_ÿúàÁƒƒ”Raaa»wïŽUJíܹsܸq&LhnnÎÊÊ2wŒ’)Œ&AÀˆÈU]d)ÊñA–E9>(³tÀ`-8pë­·:?ìß¿ÿ¸g‰,%rYY ge)¥6mÚ4räÈ   çž{Îq$11ñÛo¿åtîܹ®]»:wïÞ½cÇŽ™™™¦ÿS7YÂø`¤0>ød‰\ÕE–R„ãƒ,‹r|Pfé€ °qqqË—/ÿå/éøpÙ²e½zõâž%²”Èe]d©-[¶|þùçοČ7núôé/¿ü²éA”Y"K9ÄÄÄlݺ5%%ÅËËkÚ´iJ©ÒÒRç°äÔ£GÂÂÂÁƒ+¥ »uëfzqÂø`¤0>ød‰\ÕE–R„ãƒ,‹r|Pfi¡ñ-–<ØW_}˜˜øÀôë×/00pÇŽܳD–JNNÞ»w¯óï¿þ:99YGe–ÈRv»Ýù¡ÝnÐD™%²”!ô-s§OŸ>`À€¯¾újëÖ­½{÷þýïoFCCƒéc”,ÈÀø`d`|ðɹª‹,E9>Ȳ(Çe–<^%GåååYYY¿ûÝï²²²***ddÉ+%rYY*))Éñ‹%ï¼ó΀tQf‰,e†¯¯occ£ãqqqñ¸qãbccüq›ÍÆ4È0Œêêêûï¿ßb±X,–ñãÇ_¸pÁ0 »ÝþÖ[o1 rÀø`„ñÁ%Käª.²åø ˢģÊt؃G¹¬‹,…3HŒ²³ÙlçÏŸ—,`|pɹª‹,%åøà;ªph-×Ä·’ŸŸŸ“““‘‘‘ŸŸÏ1Kd©íÛ·ÿ×ý×M7ÝS\\\ZZúùçŸ'''›õü.ÉYJ)UQQ‘››{êÔ©¨¨¨ŸýìgZßVž,Kd)‡ªªª.]ºtèÐAk eCccc]]]PP··Þ›ha|°R¬²D®ê"K)ÚñA™E6§ˆ³L„ °Ë—/¿ü`ZZZAAA^^^ZZÇ,‘¥”Ðe]d)ࢨ¨h̘1EEE±±±[¶lÙ¹sç‚ n¹å–¬¬¬îÝ»s rXºté;ï¼SXXhFÇŽüãÿùϾóÎ;ùa|°rÀø`”îr|Pf‘â,Óa à £¬|088ø™gžY²dÉwß}§”?~üÊ•+#""V¯^Í1H)5oÞ¼wß}×±«Y¾|ùĉ/^¼øÑGmܸqèСƒÀt"Wu‘¥(ÇYåøà>ª8­fmÆŒo¿ý¶°,¥D.ë"K]ítJBB‚égZȲD–RJååå;v¬K—.½zõêÖ­ÛéÓ§#""Æ÷£ýˆiRjéÒ¥7nt¼ïë˜1cžxâ‰ÄÅŽôÒKÛ·oçtEî„ñÁ%Käª.²åø Ë¢®Uí‡3ÀZäåå-X°àÌ™3—.]r9tèPbb¢RjÏž=L³D–Â…yŒ²€‹€€€ªª*ŸÆÆÆN:]¸pÁjµÖÕÕõèÑãܹsƒZf)¥ºwïn³ÙNž<Ù§OÇ?° RL‚Æ«,`Á%ãƒl&*ýãƒ2Kl€µèÛ·ïèÑ£ããã-‹ãÈœ9s,X ”zòÉ'™f‰,|á ’Ûf 4hòäÉS§NÍÌÌÌÈÈx衇¦M›öî»ïîÝ»÷Ë/¿ä¤”JNN3fÌ /¼`Æüùó?ûì³¼¼¼“'O&%%UWWs RL‚€5«º«‚ôeQ޲,ÊñA™¥…Kî=-^ppp«#qqqܳD–ºÜóÏ?OD™% ÔÎ;G}ûí·ú7___ǾY"K†‘››ëíímµZƒƒƒ:䏸0,,l×®]Lƒ ÃØ¹sg```hhhhhh@@ÀöíÛ ÃØ´iÓ“O>É4ÈÀø`tEî™%rUYŠr|eQŽÊ,ð;ÀZÜvÛm­Ž 0€{–ÈRW¼†mÇŽŠêÂ<Y"K=ýôÓ£G~衇Zžlyî¹çLŒ ÏYJ)5vìØ‚‚‚ýû÷2$"""//ïôéÓÝ»w7ý=È‚”RC† ),,ܺu«Rjøðá‘‘‘J©‘#GŽ9’iÂø`¤0>ød‰\ÕE–¢dY”ãƒ2K\  æqÉêÒ¥KMMMË#½zõ:v옉ôY"Kµ”ýØci ¢Ì¢,nãƒK–ÈU]d)'ŒFYfrõ)hWÂ…y\²FŒÑêÈøñãuQf‰,ÕÙˆ¡œe"K›Ãøà’%rUYÊ ãƒQ–‰:¸në àz¸0KÖ–-[ZY³fŽ Ê,‘¥Àt\²D®ê"K‡Ã%Р…ÅB4bÈ‚(³(Kx&ŒFY&Â`Тկr ¢Ì¢,à™0>e™ˆå®à‡ÂÛ €™’’’Úøì¾}ûØQfQ–ðLŒ²tÀÌ4vìØeË–=õÔS111%%%ùË_¦NÇ7ˆ2‹²€gÂø`”¥…«n? "%''ïÝ»×ùá×_œœÌ:ˆ2‹²”auuuŸ|òÉñãÇõEQfQ–a|0ÊÒ`-Þÿ}»ÝÞêà† ²³³ùf‰,¦ hùók·ÛXQfQ–2 £´´ÔÇÇ'"""33óÒ¥K‚(³(K€‰0>e逻@k1yò䦦¦Vm6ÛÒ¥Kùf‰,¦‹‹‹[¾|¹óÃeË–õêÕ‹uee)‡€€€Ý»woÚ´iذaÇD™EY X¨¯¯ÏÎÎ...–”%¯Æ£,-\½—I)•››»þ?effZ­V¾Y"K"/@YÊ0Œ¯¾ú* 11ñèׯ_``àŽ;LO¡ ¢Ì¢,eFiiiHHˆãñÿ÷ÇÄÄh:™ID™EYJäuÝ"³p±‹,ŒFY:àm´°X,^^^Wü”Ýngš%²ÔòåËSSS[emܸ±ªªêç?ÿ¹‰A”Y"KY,–††??¿–W­Zµxñâ;wšD™%²”CEEEnnî©S§¢¢¢~ö³Ÿ…††šAD™ETVVvæÌ™#F>|Øq¤²²rÆŒÍÍÍÛ¶mãD™EYJ)uêÔ©¸¸¸ÐÐÐßüæ7Ï<óŒÅb1=‚8HjÖ©S§’’’öïß?mÚ´óçÏÿå/¹ù曹g‰,…ñÁ(ËtØkqÅ¿krÏòœRÜw RKåæævìØ±åÁ'NÌ™3çÂ… &Qf‰,Œ´±0÷ïdA”Y”¥¶:|²A•••J©+Vüæ7¿IOO×´å&ËY <Þ <݆ Zí *++÷ïßÏ:Kd© &˜þœ.ÏYjðàÁ—ÌÏÏÏÉÉÉÈÈÈÏÏgD™EYjïÞ½ååå'NÔqÆÒ%A”Y”¥œ"##×­[·bÅŠáÇkÝ Ë*++«¨¨0 £¬¬L)u÷Ýw¯_¿~ÆŒ999:.v ÉY ãƒQ–ØkQQQAs¦”2Kd)%t"²Ô… Ⱦ+ȲD–JKK»âñ„„„«}Ê̓(³(K%%%Ùl¶éÓ§'%%™ûÌ® ¢Ì¢,…­—¬=z´z Y–ÈRŒ²tÀ%ÐàÑ<çºnîY(Å( Ì%òºn‘Yûöí»Úu¦ÿC Y–ÈRàápX‹´´´{ï½—æÌY–ÈRÀ…È D–º¢3f¼ýöÛ’‚(³ôa|°RB¯ë™…‹eµ‚ñÁ(«ý°Ö¢¤¤¤¦¦FX–ÈR"w "KQÞ],Kd)¥T^^Þ‚ Μ9séÒ%Ç‘C‡íرC)µgÏŽA”Y”¥0>X)luXeýñÔBœ%¯Æ£,p 40€3HŒ²úöí;zôèøøxçe‡sæÌY°`RêÉ'ŸäD™EY Ì%rUY ãƒQ–´o;ìqªªªŠ‹‹«ªª$eI*õôÓO¯^½ZÓ“»*Kd©áÇ/[¶Œ ˆ2Kd)Ã0‚ƒƒ[‰‹‹cD™EYÊ ãÃ̓0>¸d‰\ÕE–Âø`”¥CWoÀejnnž;wntttHHHlllHHHttô¼yóš››ùf‰,… ó¸dmݺuÊ”)A”Y"K)¥n»í¶VG À:ˆ2‹²Æ‹ …ñÁ'Käª.²Æ£,p ´³gÏÎÉÉIOO8p`pppmmíž={^ýõI“&-\¸i–ÈRÀNuuõ¹sç»ví*&Kd)p,‚€#‘«ºÈRà¡\} Z¦#GŽ´:xðàÁÐÐP¾Y"K9áÂ<7ϲÛí¯¾újTT”síŠŠŠš;w®Ýnç›%²0‚ñÁ"¨%Œ7Ϲª‹,`-üýý/_^ËËËýýýùf‰,%rYY*===&&&333//¯   //oñâÅÑÑѳgÏ67ˆ2Kd)`ãƒEñÁ'Käª.²x8l€µ3fÌèÑ£[þÛpAAÁ¨Q£zè!¾Y"K‰\ÖE–Â$FYÀÆ‹ ãƒO–ÈU]d)ðpØkQVV–’’¢”²Z­V«U)•’’RVVÆ7Kd)‘˺ÈR8ƒÄ( ¸Àø`d`|ðɹª‹,7ÁÒèÈ‘#û÷ïwüÿþýãããd +eµZKKK[Ýb¡¢¢"&&¦®®Ži–ÈRŽ“* .ìÓ§ãHaaazzºÏÚµkM ¢ÌY ØÁøpÿ Œ.Y"Wu‘¥ÀÓ¹lëí>ùäyY’JáÂ<.Y8ƒÄ( ØÁøpó Œ.Y"Wu‘¥ÀÃá °^ ÝŸ0Y–¤RgÏž8qâ—_~iµZƒ‚‚l6[]]]JJʪU«ÂÃÙf‰,å€3HŒ²€Œ7Âø`”¥„®ê"KÇÂX/ {Y"—u‘¥”RÙÙÙ=ö˜¾çwI–ÈRÀÆ‹ ŒFY"Wua¥êëësss“““cccµQf‰,¥õ)gCù'L–%².Ìã’%òÛOX©÷ßÿò7Ù°aCvv6Ó Ê,ÊRNøVgd`|ðÉ·ºûg•––úøøDDDdff^ºtIF–ÈR:t ßr{”ššyY"K=þøã4A”Y"Kû›|xzzú3Ïk,‘¥”R¡¡¡;wîìÓ§O˃‡ºûî»+**8QfQ–.0>¸d‰\ÕE–Âø`”¥.6SÛo½oß>ŽY"K9áÂ<.Y(Å(«¡¡!,,¬ÕÁðððúúz¦A”YA,‚Z¢Ä% ¥¸da|0ÊÒg€Í”íxpüøñ¥K—¦¦¦öêÕëÌ™3|ðÁرcßxã ŽY"K€>=ôa .tþÛpaaazzºÏÚµk9Qfa|°}0>eé€ °C‡]´hÑ AƒVTT<òÈ#[·ne%²¸9œAb”åtöìÙ‰'~ùå—V«5((Èf³ÕÕÕ¥¤¤¬Zµ*<<œcee)ŒAàþD®ê"K9a|0ÊÒ—@kqàÀ–?ÌaaaçÎãž%©”Èe]d©ôôtǃ+žl1+…8Kd©–ÂÃÿøâ‹#GŽìß¿ßñîýû÷çD™EY ãÃ̓0>¸d‰\ÕE–rÂø`”¥6ÀZÄÅŽûî»Ï=÷œãÃ-[¶è;ÓN–%©”Èe]d©Ç{Ìñ`èСÿûß'[~ùË_>òÈ#&Qf‰,u¹øøø={öLžød‰\ÕE–rÂø`”e"–/Úý eé€ °ùX]]ÝÆ”””°ËY tHKKD™E„ñÁ"tÀø`”¥.6_\\Ü;ï¼sµÏ>ñÄgÏže—%²€ÇÂø`¦Ã`óýèG?ºï¾û®öÙAƒqÌY ¸À$FYÀÆ‹ `Däª.²΃繬‹,…3H,²’’’Úøì¾}ûÚAD™EY xÁøà’%oU§ ¢ÉÂø`”¥ÎëU]]}îܹÀÀÀ®]»ŠÉSÊf³eee]í³O<ñÇ,‘¥p‰EVzzºãÁñãÇ—.]šššÚ«W¯3gÎ|ðÁcÇŽ5%‚8ˆ2‹²Ôå0>Ü9ãƒK–¼U2ˆ& ãƒQ–^®}b©ìvû«¯¾åüsŽŠŠš;w®Ýnç›%¯ÔC=ÔÆgï¿ÿ~ŽY"K/ÉÉÉ{öìq~X^^>|øpÖA”Y”¥0>Xa|0Ê.0>eé€ °ééé111™™™yyyyyy‹/ŽŽŽž={6ß,‘¥€ªªªââ⪪*IYÂJ´ÚÜ~ûí¬ƒ(³(Ka|°Ž„­êÄAYŒ²tÀX‹#GŽ´:xðàÁÐÐP¾Y"K9IZÖ郲p‰QVRRÒ’%KœnÞ¼ù¶Ûn3=…2ˆ2‹²Æ‹ –0>Üød‰\ÕE–R¬²Lg1 ÃÕ¯A Ù³gçä䤧§8088¸¶¶vÏž=¯¿þú¤I“.\È4Kd)`ge›Ãø`‰\ÕE–åêk°eùöî"K9UUU_þ>¬³$•²Ûí¯¾újTT”síŠŠŠš;w®Ýnç›%²0‚ñÁ"¨%Œ7Ϲª‹,`-D¾½»ÈR"—u‘¥ÒÓÓcbb233óòò òòò/^={ölsƒ(³D–F0>X|²D®ê"K‡ÃX ‘oï.²”Èe]d)œAb”\`|°20>ød‰\ÕE–‡ßÖâìÙ³'NüòË/­VkPPÍf«««KIIYµjUxx8Ó,‘¥BCCwîÜÙ§OŸ–:t÷ÝwWTT˜D™%²”Õj---mõ»@111uuu&Qf‰,5pàÀêêê6¾ ¤¤„Wee)…ñÁ$Ha|ðɹªË+…ñÁ(Kl€5"{ËuÊ,a¥ä-ë”A”YŽ“* .tþ ¦°°0==ÝÇÇgíÚµ&Qf‰,÷Î;ï\í³O<ñÄÙ³gyQfQ–rÂøpÿ Œ.Y"Wuy¥0>eéâªSÏBҭ胲pa—¬²²²””¥”Õjˆˆ°Z­J©”””²²2sƒ(³D–jûýý÷ßÏ.ˆ2‹²TKn„ñÁ%Käª.¯Æ£,M°ÖBä­D–’·¬Sg†QXX¸råÊ÷Þ{oåÊ•………:"è³D–0>X¬² ¡«ºÈRà±p ´xÏCFY æ±Êù>„"KQf¡ÔÁø`ä„ñÁ( ‹—,”b”e&WïÀeyÇ<‘¥œpaž›gá ²\D™EY ãƒEPKnž…E‰KJ1ÊÒ`-ðž‡\²D."K‰|à ‘¥(³Pª0>X|²°(qÉB)FY:`¬…È[;ˆ,%r±Y gå Ê,ÊR,‚ Œ>YX”¸d¡£,°ÖBä­D–¹Xˆ,…3HÈraee)ŒAÆŸ,,J\²PŠQ–¸ –F"oí ¬”È÷!YJÞûRIÍB)S`|¸Æ—,,J\²PŠQ–ä[n7‚ ó¸dá ²\D™Eü~-ÀÆ—,,J\²PŠQ–8l¾~ýúýä'?¹é¦›^|ñE1Y"K)¥Îž=;qâÄ/¿üÒjµÙl¶ººº”””U«V…‡‡3ÍYÊgå Ê,‚ ŒAŒ²%>Y(Å(Ë\Ø›ïĉ¯¼òÊöíÛ;&&Kd)'‘‹…ÈR`ŒA-a|0Êw† 0¸;œAb”¥”JKK»÷Þ{'L˜ &ˆ2‹²˜Häª.²”Âø`•¥CW¿—éׯߴiÓ^{í5IY"K}þùçË—/×D™%²”CIIIMM¤ Ê,ÊRÀÆ—,‘«ºÈR ãƒU–8 ž æ1ÊðLŒ²€l€@‹êêjǯ۵z÷¾A”Y”¥<Æ£,áh0Sssóܹs£££CBBbccCBB¢££çÍ›×ÜÜÌ4ˆ2‹²€gÂø`”¥ƒ·«_€puuuAAAÞÞrþ¨E–³¼øâ‹999/¾øâÀƒƒƒkkk÷ìÙóúë¯Ûl¶… r ¢Ì¢,å$rUY LñÁ(K —¾ ±dï¼óNBB‚ÅbQJuìØqذaùùù:‚Þÿ}»ÝÞêà† ²³³MÏY ÌräÈ‘V<Ê4ˆ2‹²”Aµª/é"K€‰0>eé€K µ˜7oÞŸÿüç &üሌŒüÕ¯~Õ·oßáÃ‡ïØ±Ãô¬É“'755µ:h³Ù–.]jnÈRÀNcccUU•Ýnwõ 1“°R aaa­†‡‡×××3 ¢Ì¢,E¶ªS.é"K;ÂVuI¥0>eiáê¸Lp<þ׿þuë­·†ñæ›o:Ôô,¥Tnnîúÿ”™™iµZÍ YÊ©¡¡¡²²²©©IÓó»„°R"/@Yj̘1£GnùoãFz衇˜QfQ–"[Õ)—t‘¥œ„­êÂJ‰\Õå•Âø`”¥6ÀZtîÜùâÅ‹ŽÇõõõ†aœ8qÂÏÏÏô,¥”×U˜$²”!qY7$–š;wnddäË/¿üòË/GFFΜ9óÙgŸµZ­Û·o77È0 ¥TCCC«ƒ+W®2dˆ¹A"K†QVV–’’¢”²Z­V«U)•’’RVVÆ4ˆ2‹²ÙªN¹¤‹,e½®£ª=0ªÚãƒQ–x$-’““ÇŒó /†1þüÏ>û,//ïäÉ“IIIÕÕÕæfY,–††???sŸör"KÍ›7ïÝwßMKKSJ-_¾|âĉ/^üè£6nÜ8tèPs³®XjÕªU‹/Þ¹s§‰A"KEFFnܸñÖ[oUJíÝ»÷‰'ž8pàÀ[o½•››»}ûvƒ”R‹%77·cÇŽ-ž8qbΜ9.\01Hd)§#GŽìß¿ßñîýû÷בBD™ED¶ª“-éJh)²UlIWUí†QÕ~Œ²Ì… °yyy÷ß¿RêâÅ‹ŸþùСC7oÞ¼bÅŠ?üÐÜ,²,²”Èe]d©€€€ªª*Ç·_CCC÷îÝm6ÛÉ“'ûôéÓÐÐ`bRÊb±xyy]ñSæþî“ÈR-á=Ý<ˆlU§Ü+Š,E¶ªSn?0ªÚ £ÊŒ²ÌäÊÓÏ¢>}úã?þøãO:¥5¨¢¢Bëó·$¯”ÈëºE–2dÈ‚ øtéÒ¼yóîºë.Ã0Nœ8Ñ¥KsƒŒ«\‚¥ƒÈR†aØíöW_}5**Ê9h¢¢¢æÎ{ù%ˆ\‚(³(KT«:åœ2$–y]7FU;aTµÆ£,°&rYYjçΡ¡¡¡¡¡Ž_=Ú´iÓ“O>izJµSzzzLLLfff^^^AAA^^ÞâÅ‹£££gÏžÍ4ˆ2‹²pA¶ªS.Uí„RíñÁ(Kl€µxúé§W¯^-,Kd)‘˺ÈR†Ä  ¡¥ðž‡,‚ Œ&AáªN¹¤cTµFU{`|0ÊÒÁûÆ.œ†¶•””ÔÔÔËYjÈ!………[·nUJ ><22R)5räÈ‘#GšžUQQAóÛb"K)¥"""~ñ‹_…††¤8ˆ,…÷˜)ÂUrIǨj'ŒªöÀø`”¥…«wà׆3HŒ²ðž‡,‚Àt"Wu‘¥0>eéÐÁÕp™4½­HÛª««KJJLG"'‘¥ÒÒÒÖ¬Y£éÉ]•%²Î 1ÊZ¶l™Íf‹ïܹsdddçÎêêê²²²˜QfQ–¢_Õu/éJh)Œ.Y"Wu‘¥0>eé€K µ˜5kVFFF˃O=õT·nÝ^{í5s³š››çÏŸ¿lÙ²S§N9ŽDEEM:õÅ_¼Ú­äoŒÈR"—u‘¥—ÉÑ ËYJ)þÅ_¼= Yee)²UlIWBKa|pɹª‹,…ñÁ(K WŸ‚–I)Õ³gϹsç¶<¸eË–ÄÄDÓ³ÈîÃ&²pqþüyúЪªªââ⪪*MÏ/²0B¶ªS.é"K"Wu‘¥ÀÃa¬…Rª°°0::zÉ’%΃gΜéÔ©“éYd÷aYJä².²”RjÖ¬Y­¦¦¦Î™3Çô,²w·Y*!!aêÔ©óçÏ7÷i]D™EYÊ \Õ)ï*²ýªN°ýÀ¨j'Œª†ñÁ(Kl€µPJ544uïÞ}éÒ¥Žƒ»wŒ4=Ëßßÿò¥¼¼¼ÜßßßÜ ©¥„-ë†ÜRò.@Yª¤¤äÉ'ŸŒ‹‹3÷i]D™EYÊ \ÕÉ–tCn)šUlI70ªÚ £ê†a|0ÊÒ`-Ô¿ßqîàÁƒaaa?üð¢E‹úöí;mÚ4Ó³ÈîÃ&µ”°eÝ[JÞ"K#d«:åýB¥–’w]7FU;aTÜ0l€µðõõmllt<...7n\llìã?n³ÙLÏ*++KIIQJY­Öˆˆ«Õª”JII)++37Hd)‘˺ÔR"/@W ![ÕÉ–tCh)‘×ucTµFÀ ÃXˆÂ•+W¾÷Þ{+W®,,,tõË1A)‘˺àR†¸ 䕸̩&õºnŒªöÀ¨¸aØkTYYÙÜÜLGsÇ}Ú0ŒÓ§O‡„„˜žEvk‘¥„]×í ¬”È D–rxçw,‹RªcǎÆ ËÏÏ7=åý÷ß¿üõoذ!;;Ûô,Cb)²UrIYÊAØuÝU7 £ªh–tƒvUYJl€µèܹóÅ‹ ÃhhhPJ]¸pÁ0Œ .˜žEvk‘¥D.ë"K=ðÀ=öX^^Þ#<’’’’’òÿµw÷¡UŸ÷ÿÇ/ktmîŒIÔ,'1i'Í™÷“ l*–.kÇVV+.8ÐÒÄ:ÖI¡ÞÌX`MÚU¡uZ‡usW²?¾eh[a˜´H«ÎšdehîL0&6&1M¼¾vÈ×Õüøó9×§Ÿ×çùøcd'’ëz1÷º|ç\çœÇvïÞýè£VVVz»uø’¡¬µuuu‘H¤¶¶¶¶¶6‰¼üòË/¾øbFFFcc£· ™ÿ¼†p¢#GŽ”——{» å¬Õ]¾µ’d(É{ÝUIâ¨J†³J·[]2TŠ0§Ä²eËöíÛg­Ý½{wIIÉK/½ÔÒÒòÒK/­\¹Ò󵜽µƒd(ÉZ— %yA2”µ¶°°ðÂ… ±¯?úè£E‹Ykßxã+Vx»1¦¡¡áþ¯={ödddx» å¬Õ]¾µ’d(g­îò7eUIâ¨J†³J·[]2TŠ0§DCCCZZZFFFNNÎÅ‹—/_nŒ™5kÖ™3g<_ËÙ[;H†’¬uÉP’$CÙ ¹¬µ·nÝÊÎζֶµµÝÿýÞ.dŒ™zÞ.dEC9ku—oA$Jò^7GU’8ª’á¬Ò­ÃV— •" À©ò¯ýë/ùK¬ÐïܹÓÑÑñÅ_¤n97J»Ö•BI^@ e­-//ýõ×­µwîÜ©¯¯ä‘G¬µmmm3gÎôv¡/½‚•"’¡¬ÛVwöήz¡$ïusT%‰£*Î*Ý:luÉP)Â,¢½½½±±±³³ÓïxÉA(ÉZ— %yA2”µöÔ©SÙÙÙùùùùùùYYY±W?~¼ªªÊÛ…\À’¡ãœJ˜ä½nŽª$qT%ÃY¥[‡­.*E€½W]]=Éw×­[çírƒƒƒkÖ¬™2eJì=ß*++c¿Fõ–d(+Zë’¡¬â+ÊZÛÕÕuèСC‡¥t0èííMÝÿob¡\¶º³J— eEïusTy‚£*an*ݺ=ª$C¥Ÿì½yóæ>|ø^ß]½zuww·‡Ëmݺµ¹¹ùí·ß^¹rå{ï½÷ /”••íܹÓÃ%Œh¨˜O?ýôüùóååå………ÖÚ®®®‚‚‚Ô}rš›Ï!” åXGGG[[Ûƒ>‰DüÞ‹g$C!1.[ÝY¥K†ŠqÙêÎ*£*y’­. _-¾ŽßšJJJ2&åír=ôP{{»µ¶¬¬ÌZÛÝÝ]RRâíV4”/¸˜É ’¡b~úÓŸ=z4E?Ü—…\®ål!—­î¬Ò%C¹'yNYŽª„H†²Z+Rõk¶0»råŠËå®]»V\\ÿ¯¹¹¹ƒƒƒž¯"jÆ ¼×wׯ_?É3 ªªªjhh0ÆXk+++ÿð‡?Ä.byH2ÔÉ“'OŸ>}¯ïž8qÂÛå~ó›ßŒŽŽöôôÄŸØ©©©ñü‰ÉP1W®\¹qãF*~²_ ¹\ËåBV‰qSéF4”ËVwSé†£Ê UÉàøÐZ©ÀèÀ+))9wîÜÌ™3£Ñè'Ÿ|R[[{îܹwß}×ï}%ÅM(É{Ý’¡JKK¯_¿>Éòp¹o|ãÿûß‹‹‹£ÑhKKKOOÏòåË=ÿ‡µd(àKqN%Cò^7GUò8ª€Äùúü3<ðì³ÏÆ.!Ìž=;++kñâÅ—/_ö{SÉrJò^·d(Ç233c_ÄBŽŽæææúº#¸ 588˜¢Ÿ|/}}}—/_þï9ñd(g8§’!y¯›£*yUÉp_é6õ­.*E€oll,ö^|ÇŽûøãÇÆÆüÞ‘$CqVÅܹsûûû­µeeeccc555O=õ”ß›J–ËPƘM›6Ýõ`uuõ/ùKo{å•WŠŠŠâ¿¹þßöÈIDATÒ-**ª««KEcH†rF²Ò%CIVºÍÅQ• g•n¶ºd¨aᬠž­J’1fîܹuuu|ÿý÷.\èíB›7o.--ݳgOSSÓ¥K—šššvíÚU\\¼eËo²¢¡€»HVºÍÅQ• g•n¶ºd¨aÖÑßßÿ‹_üÂï]xL)gUPH>±ã2”1¦¹¹¹¸¸x÷îÝñ»»»xàoÊËËkii¹ëÁþóŸùùùÞ.dEC¹§TéqJ¡$+ÝŠæâ¨J†³J·[]2TŠ0ëèèèÈËËó{S ÅY…0ÆŒŒŒ´¶¶ìÝ»7öà‡~‰D¼](==ý¿_wtíÚµôôto²¢¡ÜSªô8¥Pª•®š sVéÖa«K†J‘ûRóÖZî6uêÔ´´4cÌ÷¿ÿý¥K—N:Õïy@2TÜ76nÜè÷.<æ,ÔÃ?üþûï×ÖÖ®]»v×®]Ï=÷ÜÓO?ííO<ñDuuukkkü‘æææªªª'Ÿ|ÒÛ…â$Cqª•®š+†£*a*Ý8ouÉPžc¼žÿèííµÖƾîëëó{_I‘ ÇYÃÃÃþóŸýÞ…Ç„úÚ×¾6eÊcÌÂ… ?üðñ±±;w~ûÛßþíoëíBû÷ïˆF£™™™‘H$33sþüùÃÃÃo½õ–· ÑPÎHVºd¨8ÉJ7¢¹8ªà¬ÒÃV— •"|pàÅ~©3>>û½æÂ… Ï;çߦ’%*®³³séÒ¥“°^à*(ôBµ´´œ?þæÍ›ÙÙÙK–,‰F£~ïÈb¡$+]2Tœ^QÄHæ"T ˆµzLpC¥ý¿ÿ¾ÚÆÆÆb_(•…d(EOOOì‹ø;ƘiÓ¦åååùº¯¤¸Õ××7sæÌûîKù=£h4:{öìØœ›››Òµ$C9 Yé’¡ UÉsVéÆa«K†òW G$ïËI†*úï|ç;ýýý±¯+**üÞWR\†jmmF£ùùùóæÍû÷¿ÿý§?ýiÁ‚Ï<óLü_6^¯««+..ÎËË{ðÁóòòŠ‹‹ëëëÇÇǽ]Ȉ†î"YéF4GU2œUºqØê’¡R„+Ð:$ ­Jò¾œd¨8¥¿~qBýèG?ÊÉÉùùϾ{÷î«W¯c~üã9r¤°°ðèÑ£.´eË–wÞygóæÍË–-ËÉÉùì³ÏΞ=»cÇŽŸüä'Û·o÷p!#Ê=þ?õ§Z骹b”þÆ¥:”³J7[]2TªøñÖÓH‰W_}Õï]xL2”ÒgfÄ*(„ÊÍÍíïï·Ö^»vÍÓÕÕe­íêêò|]—ŸC(Ê=ÉJ— %Ù~V4¡à¬Ò­ÃV— •"\Ö‘ý«_ýÊï]xL2 íöíÛƘ¬¬,cÌŒ3bÿyûömo™5kÖ]Ι3çÖ­[Þ.dDC¹'Yé’¡mÎ*Ý8luÉP)¬£¿¿ÿÊ•+ýýý~oÄK’¡ÙÙÙ›6mò{sêá‡>xð 1æÀ%%%555­­­555ßúÖ·¼]ÈåçJ†rO²Ò%C!@8ªà¬ÒÃV— •*~?d½òÊ+EEEñÿM‹ŠŠêêêÆÆÆüÞZâ$CÅIÞ—“ …Ä444¤¥¥eddäää\¼xqùòåÆ˜Y³f9sÆÛ…zzz{ì1cLFFFaaaì7ß=öXOO· YÑPÎHVºd¨8ÕJWÍ…ÿ_Î*Ý:luÉP)›`^à_†þe$C!púûûƒøæþ“sêÓO?=þ|yyyaa¡µ¶«««  `âûÐxÈÙçJ†rC²Ò%C!ˆ8ªã²Ò«V— •¾Žßð@Ð_†þ¥$CÅõõõ]¾|¹¯¯ÏïxI)”ä;’¡bÚÛÛ;;;ýÞˆ—ÄBIVºd¨8¥JŸH)—d«K†Škõ˜à†b¼ôôôÿ®òk×®¥§§û²OH†’¬uÉP›7o.--ݳgOSSÓ¥K—šššvíÚU\\¼eË¿·–8g¡ª««'ùîºuë<\kpppÍš5S¦L™2eŠ1¦²²rhhÈß'Ê%ÉJ— %YéV4GUÂ\VºuÕê’¡R‡+Ð÷Ì3ÏXk·oß^VV{¤¹¹yóæÍÓ§Oÿë_ÿêïÞ&Jò¾œd¨üüüS§NÅÿîÅ\¼xññÇïííõkWIrjÞ¼y‡¾×wW¯^ÝÝÝíÕZ[·nmnn~ûí·W®\ùÞ{ï½ð eee;wîôêçÇI†rI²Ò%CIVºÍÅQ•0—•n\µºd¨òuü†‚þ2ô/%Jò¾œd(É'vœ…*))ɘ”‡k=ôÐCíííÖÚ²²2kmwwwII‰‡??N2”K’•.J²Ò­h.Žª„¹¬tëªÕ%C¥Ï‹ðËÐïM,TFFFGGÇ]oçÐÛÛ[ZZ:<<ì×®’$Jò‰ÉPYYYƒƒƒÆ˜h4ÚÒÒrûöí¯ýë}}}~ï+)’¡bÄ*=F,”d¥Ñ\’­.J²ÕÊ×ñ^¹~ýú_|á÷F¼¤jÕªUO?ýôÄ_B_ºté‡?üa¬îJ2”ä;’¡æÎÛßßo­-++«©©yê©§üÞT²$CÅ)UzœR(ÉJ·¢¹$[]2”d«=°‚ßýîwóçϽ }Ú´ißýîwOŸ>í÷¦’¥J²Ö%CÅ4779rä÷¿ÿý‘#Gš››ýÞŽ7ÄB=ûì³GµÖΞ=;++kñâÅ—/_ö{SÉ’ e+Ý*†R­tÕ\V®ÕcÄBI¶zÐCq:ðêëë÷íÛ÷üóÏc8°víÚÑÑÑ?þñÇŽ[±b…ß»Kd¨±ûr1’¡Œ1ŸþùðððŒ3R÷zî)…·Ö¦¥¥?~|Μ9‹-š:uªß›J–d(ÉJ— £Z骹”Z=N)”d«>”¯ã7Ço$ïæÍ›ñ· ˆF£mmmƘ5kÖœ={Ö×}%E2”1fïÞ½ ,HOOÏÏÏOOO¯¨¨8sæŒß›J–^¨úúúW_}µ²²ò׿þu$Ù¸qã7¿ùÍŠŠŠüã~o-q’¡ânܸ±qãF¿wá1¥P’•.Ê(VzŒ^.ÉV— §ÔêqA å÷Žd•——¿þúëÖÚ;wîÔ××?òÈ#ÖÚ¶¶¶™3gú½µÄI†ª««‹D"µµµµµµ‘Häå—_~ñÅ322ýÞZâ$CI>±#*®££#//Ïï]xL)”d¥K†’¬t+šK²Õ%CÅ)µz\@C1Þ©S§²³³óóóóóó³²²bm~üøñªª*¿·–8ÉP’µ.*33stt4öõ­[·²³³­µmmm÷ß¿¯ûJŠd¨¸€À“S %Yé’¡$+ÝŠæ’luÉPqJ­ÐP¼ –‚«W¯~ðÁÆ˜ŠŠŠH$â÷v¼¡*++«¯¯oúôéÆ˜‘‘‘‚‚‚ööö²²²‘‘¿w— ÉP>úèªU«¶nÝj­}íµ×Þ}÷ݦ¦¦ööö¥K—ö÷÷û½»I†êéé‰}ÑÝÝý½ï}ï“O>1ÆL›6-//Ï×}%E2”Q¬t£J²Òh.ÉV— %Ùêåëø „ˆä}9ÉP’OìH†š:1&öÅ’%KüÞWR$C!($+ÝŠæ’luÉP’­ôP ÀÁV]]=Éw×­[çl'’ eEk]2”µ¶««ëСC‡êììô{/ž‘ Ð+X““ %Yé’¡¬n¥«æ’luÉP12­>Q@Cq:ØæÍ›wøðá{}wõêÕÝÝÝ.÷ã ÉP1z÷åŒh(KggçÒ¥K¯_¿î÷F¼$J²Ò%CŨVºj.ˆL«OÐP ÀÁVZZ:ùß¹¡¡!g›ñŠd(ņ ¿7@"FFFfÍšu׃sæÌ¹uë–/ûñ„d(-ÉVz(`@ =ñÄÕÕÕ­­­ñGš››«ªªž|òIw•$ÉPZ’­ôP À€@Ú¿ÿÀÀ@4ÍÌÌŒD"™™™óçÏ~ë­·üÞZâ$C@hI¶zÐCñ`@€µ´´œ?>ö1 K–,‰F£~ïÈ’¡ ´$[=¸¡€ÁöùçŸϘ1#-Mç£ $C@hI¶z@CqT{÷î]°`Azzz~~~zzzEEÅ™3güÞT²$C@hI¶z CiX ®¾¾~ß¾}Ï?ÿ¼1æÀk×®­¨¨8vìØŠ+üÞ]‚$C@hI¶zÐCqH‘H䨱c‹-2Æ|üñÇÏ=÷Ü… Þ|ó͆††ÆÆF¿w— ÉPZ’­ôP À€@ÊÊÊêëë›>}º1fdd¤  ``` ½½½¬¬lddÄïÝ%H2„–d«=¯ÒâÅ‹wîÜiŒ±Ö¾ùæ› ,ˆ=þÀøº¯¤H†€Ð’lõ ‡â5À€@Ú±cÇ~ðƒ;vcFGGÿö·¿cZZZV­Zå÷Ö' BK²ÕƒŠ+Ѐ ºzõê|`Œ©¨¨ˆD"~oÇ’¡ ´$[=С€¡Àk€Á³aÆI¾»~ýzg;ñd(-ÉVÅk€ÁsòäÉÓ§Oßë»'Nœp¹¯H†€Ð’luP\Oiiéõë×'ùCCCÎ6ãÉPZ’­.Š ¼ À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0BáØ ³Ÿ3×IEND®B`‚sleef-3.5.1/doc/html/trigsp.png000066400000000000000000001227071373003144100163520ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝ{\”eþÿñkå0ˆÈA@"jbhEnâ‰Ô²Ô2W] V(ËJQ²¶]Y»ðÚZHé`Š®âæ·òPž:¨-&n”gCTp€C`~Ü¿ïÄaî1ffæõ|ìs_÷u_×çº'ÛÞÞ‡Qh4€µ»©» ÀÀ›@Ø0À&€6 ° `€M l`À›@Ø0À&€6 ° `€M l`ìV­ZÕÝ5 ¢¢¢„·Þz«™í.&ª™aЃ+À³:wîÜôéÓ½¼¼œœœ† ’ µ§¤¤DGGwom&uûí·gggk7µ^ Û†e @gì»»€ Ñh4S§N7n\~~¾‡‡GQQѤ]3gÎìÞÚÌÌDëµâa¯_¿Þ«W/S”°\˜Ï¥K—JJJ–,YâçççèèöÔSOI»¢¢¢²²²¤Ï#GŽ|öÙgcccÃÃÃGŒñÙgŸIí.\¸çž{ÜÜÜBBB6oÞ¬P(®\¹ÒfŠŸ~úéÉ'Ÿ òðð˜6mZiiiû2:ìSYYéííýÞ{ïI},X0~üø––=cÖÕÕ=ù䓃 êÓ§OxxøÉ“'…þþþŸ|ò‰Ô¡¢¢B¡P¨Õê§žzª   55uÈ!³fÍj³ÞK—.Í™3ÇÇÇÇ××wÁ‚555úσ–þaG޹~ýúqãÆ999EDD|÷Ýwo¿ývPPPß¾}/^¬Ñh ?c†|;º œZwØOæÈ‘#SSSǺgÏžÎN”®W_}uРAnnnÞÞÞË–-Ó¿Æ'X10À|ú÷ï?|øð%K–ìØ±ãüùózzîÙ³çý÷ß/((HII™;w®ÔøàƒúøøTVV~ñÅÚ¤ÚÆþð‡~øáäÉ“•••#FŒˆomm5¤¯¯ïÖ­[/^|öìÙììì}ûömÛ¶ÍÎÎNϘóæÍ+((8räH]]ÝîÝ»½¼¼:[NFFFxxøK/½tþüù;w¶Ùûàƒ666ž;w®  àâÅ‹óæÍÓ V±yóæ—_~¹¦¦&""âþûï?zôh~~~^^^NNÎ|`økCU†O­«³“¹k×®mÛ¶={vÖ¬YzN”¤°°055õã?V«ÕEEE=ôþ5þ ¬„3ª®®~þùço¿ývÿ×^{Mj=zô›o¾)}Žˆˆxùå—¥ÏÒU¾‹/~ÿý÷Bˆêêj©ýÈ‘#BˆË—/ë[^^.„(++“ú477»¸¸œ9sF·ý}þò—¿ :´OŸ>Ÿ~ú©þþÒUÄóçÏ·Y ŸŸßÁƒu½zõªF£¹í¶Û¶lÙ¢í¦­ù‡~B”––JíBˆü±³óÐfºÎ†•饗¤ÏÒEi©F3{öì?ýéOž±öÃÊVeÈÔºÃvv2#""6lØ }Ös¢´ŠŠŠ¶nݪV«µ7ú ¬ÏÌÊÝÝ}Íš5kÖ¬¹víÚÖ­[-Z(•J¥Réææ¦Ý¬¯¯×³:Ý3Öž!UÉN­«¤¤¤³“éçç'}Т$ƒ Ú¾}ûk¯½¶hÑ¢#FüéOº÷Þ{Mý ,Ð=Ö¯_úôéö¸=??¿Ÿ~ú©¦¦FÊÀ>ª*¥âÏ>ûLϽ¬zú´¶¶ÎŸ?Ú´iÇß¼yó#<¢§YYYsssQQQpp°n{Ÿ>}®]»&}Ö}Dù¦›:~ì(  ¹¹¹¢¢Âßß_QTT$5vV k CΘux2… …Bú`à‰š>}úôéÓùå—ìììxàÊ•+7ú ¬ÏÌçòåËË–-;uêT}}}mmí¦M›JKK ü!œ!C†Üyç)))W®\Y½zuû>qqq‰‰‰Ú{wìØÑÜÜl`ŸuëÖUTT¼óÎ;Û¶m“Þ/¥§``à´iÓ’’’ÊËË5Maa¡Én»í¶½{÷ !š››ÓÓÓµóúúúªTªö5;véÒ¥µµµ555)))S§NÕ½ª©_gÃÈ3fL]†œ¨Âƒ^»vÍÁÁÁËËK¡PØÙÙÝè7¸sçÎÌÌL³­`N`€ù899ýüóÏ¿ÿýï½½½7oÞüî»ïŽ7ÎÀÃsrr*++}}}ÇŒ#½ßÈÁÁ¡MŸwÞyçæ›o7n\Ÿ>}Fõá‡j/!êïsèС´´´;w*•ʘ˜˜+VÌš5ëçŸÖ3æ{ï½wóÍ73ÆÕÕuÆŒÒõÞ^x¡°°pøðáwÝu×í·ß®tÅŠ{öìéׯŸî½¸ÚuÙÙÙÝ|óÍaaažžžï¼óŽá§Tϰ2䌙A‡'³ ÙuíÚµçŸÞÇÇÇÝÝ}ݺu»vírvv7ø î߿ǎ¦_1 (4ÿý),È¡C‡fΜÙáátˆ+À‹qêÔ©ÂÂB!Ä… ž{~¸»+–„—`,ÆÅ‹gΜyõêÕ>}úÜwß}/¾øbwW, ·@l·@l`À›@Ø0À&€6 ° `€M lB·à–––åË—{yy)•Êøøøªª*Ãû¸¹¹)~­¢¢Â¼å,L·à 6äää>|¸¤¤¤©©iöìÙ†÷©ªªjü¯G}tìØ±þþþæ-`a¦[&ö÷÷OMM}â‰'„gÏž ;wî\HHÈ õihhðóó{ýõ×çÌ™cæú–¥{®WWW_¸p!22RÚ uvv>sæÌöÙ¾}»½½ýÌ™3ÍS6ÀrÙwˬuuuBˆ¾}ûj[ÜÜܤÆê³iÓ¦… öîÝ[Ï\MMMF)Ð-Œrór÷`WWW!Dmm­¶E­VK†÷ùúë¯óòò¶mÛ¦®¦¦&³Ýæ­P˜ï–r³Íe•‹²Ö¹¬rQÌeA1—eÍe•‹²Ö¹¬rQÌeA1—eÍe•‹’æ2Ê8Ýs ´‡‡‡ŸŸß©S§¤M•JÕÐÐqC}6nÜ8qâÄÁƒ›­l€åê¶·@'''§§§«Tªššš+VÄÄÄHo·Ú¹sgff¦þ>BˆºººmÛ¶%''wWýËÒmxåÊ•<ðÀ˜1c ENNŽÔ¾ÿþ;vèï#„xçw\]]ãâ⺡t€ê¶ŸA2+¾ ž§˜«»&b.ËšË*Å\4sYÐDÌeYsY墘˂&²Ð¹ìV­ZÕõQz²Õ«W›sãÆ³¾¹¬rQÖ:—U.й,h"沬¹¬rQÖ:—U.й,h"沬¹¬rQÆŠu\ôhÆŠuÝö 0æD“ˆŠŠÊÊÊêî*ðì»»fuîܹgžyæ‹/¾øé§ŸüüübbbÞzë-!Ä”)S† –‘‘¡ÛyÊ”)û÷ï×m9xðà¤I“ lŸ\RRÒÔÔ4{öl©gí3súBüø£¹gB€›réÒ¥’’’%K–øùù9::†……=õÔSú±³³sÔqÓM7ÞÞ«W¯öFEE…„„ôë×/ @¥Rµï³yóæ‰'úøøDFF.^¼øóÏ?ïp-sæÌññññõõ]°`AMMÔ>räÈgŸ}6666<<|ĈŸ}öY›5Í£>º`Á‚ŒŒŒ°°0¥R9lذ_|QJ›z†MMM7n\hhèž={~úé§'Ÿ|2((ÈÃÃcÚ´i¥¥¥®T{ ´ƒƒƒtNZ[[·mÛ–””ÔáÙ~î¹ç.\øôÓOÞ}÷Ýúß´iÓòåËÃÃý¼¼ÒÒÒŽ9"ÏÎÚm°!ýû÷>|ø’%KvìØqþüùî*#''ÇÇÇÇÅÅå…^X½zµþÎ_|ñŨQ£Ú·?øàƒçÎ+((¸xñâ¼yó´»öìÙóþû襤¤Ì;·ÍgÏž-))Y°`A›v…B¡Ø]»vmÛ¶íìÙ³³fÍúÃþðÃ?œ|øð¢E‹: ·ZÿøÇ?:”––Ö¦½¨¨èèÑ£¯¼òŠ›››‡‡GzzúG}TYY)í]´h‘———4QYYYUU•î±—/_BøùùµŸNÿ°‰‰‰¾¾¾BˆŠŠŠÝ»woܸÑÛÛ»wïÞëÖ­ûþûï ôÇÿÚ´iÓÂ… {÷îÝ~WmmmssóöíÛß~ûí‹/N›6íž{î¹páBg‡×ÕÕ !úöí«ÝëææVWW×Y»!åY70`[ÜÝÝ׬Yóïÿ[­V¯ZµjÉ’%ÅTÉïÿû|nnn†·?þ½÷ÞSü—Z­–ú899ùúúFEE=ýôÓ÷ßuuu‡S¿òÊ+ëׯ?|øpPPP›]åååöööÒæàÁƒ¥FiSû.+'''!D}}½î±R6n, V›™‹‹‹ Ellì°aÆ 6|øpgggmNÖã믿ÎËËÓÞÀÜæä(•J…B‘˜˜8jÔ(''§•+W:88|úé§.½L÷åXjµÚÕÕµ³vÙò¬°QŽŽŽ <}ú´žn...þ:´ÏúÒîèè8wî\ÍiC²–F£ihhè0:®Y³&==ýÈ‘#Ço¿7  ¹¹¹¢¢BÚ,**’ YxhhhPPÐ;ï¼Ó¾ýÃJ÷H !(„øì³ÏÎýWUUÕäÉ“e§Þ¸qãĉ¥\-„hsrœœœBBB´¥é´“¶?ÜÃÃÃÏÏïÔ©SÒ¦J¥jhhˆˆˆè¬Ý“cÝÀ€ ¹|ùò²eËN:U___[[»iÓ¦ÒÒÒèèhioKKË5¦}cKKK‡µíúµ¶¶®_¿þìÙ³µµµùùùIIIAAAÆ Bìܹ333Sê–’’òÖ[où¤žZZZ¦OŸîããÓ«W¯Ì™3çüùóÒ®„„„‰'JE¶ÜÁÁ¡ýP•••³gÏîß¿¿··÷¼yó._¾,µGDD¼ûî»ÒçÆÆF!Ä÷ßßþð“'OÞwß}îîŽ!!!©©©Ò† «ÑhêêêRRR‚ƒƒ]\\8þüæææ6SŒ=úÍ7ßÔnþýï÷õõ½~ýºžóÓÚÚºvíZ___—èèè/¿üRÿáÍÍÍË–-óððpvvŽ‹‹»xñ¢þv e¬X§Æ²b …õ¯¬˜±b}ׇƢèøW!Eó†éëÀñ 0À&€6 ° `à·Û¿ÿ]wÝ%}ŽŠŠÊÊÊêÞzÚë™Unúôé[·níî*~;K?ÿV† kpîܹéÓ§{yy999 2$!!A7þü6=Çÿøã !¦L™¢P(6oÞ¬Ýõí·ß* isÊ”)O=õ”žI5ÍÓO?½zõji3%%Eûƒº=‡±ªÚºuë˜1c\\\ìíõ&ÝüqÖ¬Yžžž®®®111yyyímiiY¾|¹———R©Œ¯ªª2|üÕ«W§¦¦655u} m|õÕWqqqžžžNNN¡¡¡+W®¼råŠÑgÑ=ÿnnnŠ_«¨¨hȱcÇ~÷»ß999¹»»/\¸Pÿ±[CιVðsB‘dÖÿ }Þ˜'Ùp`X1b„"))éŸÿü§Z­Ööüþûï=š”ôÿ_¹úæ›oj÷¾ùæ›aaa†Ï{àÀ¦¦¦ñãÇK›3gμ¡Ãëúõë¶«*%K–¼ôÒKmÚ“““«ªªþóŸÿTVVŽ5êÞ{ïmiiiÓgÆ 999‡.))ijjš={¶áãGDDx{{ÿóŸÿìútíÛ·oìØ±ƒ :vìØ•+Wrss5Í <¼³³Ýžîù¯ªªjü¯G}tìØ±þþþmúùå—S§N1c†J¥ÊÏÏøá‡õÛÙ¹5äœk?øË8ÉÜ3JÀ°x—.]*))Y²d‰ŸŸŸ££cXX˜tåvÊ”)^^^ï¾û®¶gffæèÑ£¥x,„¸ï¾ûŠ‹‹ „¿üòË{ï½—˜˜hø¼{öì™4i’B¡6uov­««{òÉ' Ô§OŸððð“'OvÖ¨õúë¯9R»Y\\lggWRR"„øé§Ÿž|òÉ   iÓ¦•––J}FŽ™šš:nܸÐÐÐ={ö¼ú꫃ rssóöö^¶lYûª.]º4gÎ__ß ÔÔÔhÇyöÙgcccÃÃÃGŒñÙgŸµ_ì”)Szè¡¶i?þüC=4`À¥R™œœ|éÒ¥ÊÊÊ6}6mÚ´|ùòððp//¯´´´#GލT*ÇBÄÆÆîÙ³§}ûo^ŽF£yôÑG,X‘‘¦T*‡ öâ‹/JiSϰºg»³/¥ ÝóïàààèèèèèØÚÚºmÛ6í_Äèzî¹ç.\øôÓOÞ}÷ÝúíìÜrÎm¯ÿþÇ_²dÉŽ;Ο?¯m¿é¦›µ×x›ššÞ~ûmÝÔÑ«W¯ HvíÚ5bĈ¡C‡>ï×_ÝÙÅÕyóæ9r¤®®n÷îÝ^^^5jÍ™3çܹsùùùÒfvvö¸qã‚‚‚„øÃ~øá‡“'OVVVŽ1">>¾µµUê¶k×®mÛ¶={6"""55õã?V«ÕEEE=ôPûª|ðÁÆÆÆsçÎ\¼xqÞ¼yÚ]{öìyÿý÷ RRRæÎkøIX¾|ù®]»*++þùçM›6EGGûùùév¨®®¾páBdd¤´êìì|æÌç1bÄ×_mÄåœ={¶¤¤dÁ‚mÚ¥¿ËÐ3¬ölÏš5KÏ—"kûöíööö3gÎlÓ~ýúõÏ?ÿÜÑÑñ–[néÛ·ottô矮çØÎÎm×Ϲµ"Àâ)ŠcÇŽ?þoû[xxx@@À믿.íJHH8{ö¬t­577÷úõëm’áÿøÇ­[·^»v-33sÑ¢E74ïÕ«W]]]Û·—••}ðÁ™™™T(7ß|sppp‡ºGõë×/..nË–-BFóöÛo?òÈ#BˆŠŠŠÝ»woܸÑÛÛ»wïÞëÖ­ûþû參ÖBˆÄÄD___!D¯^½4Í×_][[«T*ï¸ãŽ6U=zô•W^qssóððHOOÿ裴Wk-Z$òûï¿¿¬¬Lÿ#£ºîºë®ÖÖÖ¸¸¸ìÙ³çÍ7ßÔ^—ÔÕÕ !úöí«mqss“ äêꪽ k”å\¾|YÑ&¨2¬ölëÿRdmÚ´iáÂ…½{÷nÓ^[[ÛÜܼ}ûö·ß~ûâŋӦM»çž{.\¸ÐÙ±Û®ŸskE€5pww_³fÍ¿ÿýoµZ½jÕª%K–|üñÇBˆL›6-33S‘™™9wî\'''Ý}øðáSÇ¢E‹Ö­[7þ|‡škÔ¨Qß}÷]ûö   æææ¢¢"ÙÆ6bccöîÝ›={öl)¼IÅ~öÙgçþ«ªªjòäÉÒ!º—[§OŸþÉ'ŸTWW/X°àh“úš››µ¯–* ¸¡%·QSSSZZúøãKïR^²dISSÓñãÇuûxxxøùù:uJÚT©T †ÏRPP0jÔ¨6]YNhhhPPÐ;ï¼Ó¦]£ÑèV{¶;ûRæÎ«ù/77·g߸qãĉ;ü«''§í¦4îWÜæØÎÎm×Ϲµ"Àâ]¾|yÙ²e§Nª¯¯¯­­Ý´iSii©ö·g&Ožìéé9cÆŒ;ï¼3<<¼ýáñññX¹reû]---×th4ݽӧO×½:§8mÚ´¤¤¤òòrFSXXXTTÔac›íììæÏŸÿꫯîÞ½[ºÿY-...11QºyõêÕ;v477·9¶°°ðàÁƒ×®]sppðòòR(vvvº‚ƒƒÇŽ»téÒÚÚÚššš”””©S§J7ôB:ÒÛ¥³!„ðôô Ù¸q£Z­njjúÇ?þqýúu)híܹSºð.„HNNNOOW©T555+V¬ˆ‰‰‘bžnŸÇ—,mŽ=úÍ7ß”>«ÕêÇ{, ÀÅÅEzásgm !†®ÛXWW—’’ìââ2pàÀùóç777k4šˆˆˆwß}WêsæÌ™Ñ£G÷íÛ·_¿~£FúßÿýßöUUVVΞ=»ÿþÞÞÞóæÍ»|ù²Ô®;Ncc£âûï¿oS˜îïEI¤ÃÏ;7mÚ4www—[o½uÏž=Rÿ„„„‰'JŸ›››—-[æáááììwñâÅö}:ÿÌ™3 øå—_ÚŸ«®,G£Ñœø lŸûî»O©TþùÏ–r¬F£éß¿zzúüùó;œ‹[ „[ hϲoöðððóóÓ&~•JÕÐÐaH'''ÝG…¥¿ 0â_ ¬R·½:999==]¥RÕÔÔ¬X±"&&Fе;wîÌÌÌÔßç±ÇËÊÊúæ›oššš^zé¥ëׯOš4©»°öÝ5ñÊ•+Õjõ˜1ccccsrr¤öýû÷—””,Z´HOŸÇ¼¶¶vòäÉõõõ·ÜrËÇìëëÛ] Xë>–g€„g€hϲŸÀÌÀ›@Ø0À&€6¡Û~ `Ñ,îÇ ¸ ° \ /ß'7×ôuôt\Ø0À&€6 ° `€M l?ƒÀ–Èþb?`½¸ ° \Ѓ(’d:hÞ0K°F\Ø0À&€6 ° `€M l`À›@Ø0À&€6 ° `€M l`À›@Ø0À&€6 ° `€M l`À›@Ø0À&€6áÆpffæ¤I“LT ¦sc¸_¿~AAA¦©²¿¡Þ³fÍš5k–‰JÀtd®Ÿkuuõ… "##¥ÍÐÐPggç3gÎÞgçÎþþþ·Þzë_þò—††ýÓ)~mÕªURû©S§´} ëëëµ›º»t?×××v¸‹,bõðUèî2P_…­Œ0c†˜3Gßÿ {цşF`F`½#¨‡¯Â¦FÐï7Ô””Ô&Ä8—,…F£Ñ³ûÉ'Ÿ,((ÈÌÌ ...þãÿ8|øðW_}µ‹³÷Ýw¡¡¡R‹ŸŸßêÕ«uï¯ÖÓçÓO?uttôññ9{öì²eËFŽ™““Óé 2k`Câãe:äæš¥£’]”°ÌuY%¾,Eù/Kó†1Š€žJ‘$Ó ö²_–0Ò÷e¬X'sŸÕÚµkçÍ›7dÈ;;»–––¸¸¸õë×w}VWWW!Dmm­¶E­VK†ô™8q¢Ô2xð`''§»ï¾ûçŸV*•]/ `­dnvuuý׿þUZZzèСÒÒÒ={öôéÓ§ë³zxxøùùi/y«Tª†††ˆˆˆí#„èÝ»·F£iiiézU+&€…­­­jµº±±100°¹¹ÙXQ3999==]¥RÕÔÔ¬X±"&&Fz»ÕÎ;333õ÷ÉÌÌ,..®­­=qâÄ’%K¦L™Òæê1mÈàÒÒÒÛn»m̘1óçÏBìÚµ+!!Á(¯\¹ò3fL@@€B¡Ð>Ä»ÿþ;vèïóÏþóŽ;îèß¿ÿìٳǎ»uëV£”°b2OO›6mĈëÖ­>|ø¹sçjjjn½õÖ’’s•g¼ Àÿá%Xè^¼ ÀK°,ˆµ½ëøñã»wï¶³³“6ÝÝÝ«««»>+f&s ´R©T«ÕÚͲ²2ooo—€ñÉุ¸ÄÄÄŠŠ !Ä¥K—/^pàÀ·ß~ëáá!„ Ûºukxx¸Y À˜dnÖh4ööÿ’ííí5‰KÀød®O˜0á‘GÉÈÈ(//ê©§&L˜`žÊt§øxù>ýsM_`42W€_~ùå+W®öêÕ+00°ºº:##Ã<•`Dú®755}öÙgG-***//6[eè'û&6^Ãté»Ü«W¯¿üå/Bˆààà˜˜Ò/Àré À …ÂÛÛûâÅ‹f«‘y ÖwÜq×]w%&&0@¡PHsçÎ5}a“LÞ¿¿R©Ü¶m›n#`qdp~~¾yêÀ¤dðÉ“'½¼¼´¯¿***ºråÊwÜaú aÈïåæò{¹@÷“ À »wïÖn677'&&þç?ÿ1qU`=dºOðë}f!€KJJ†ªÝ:thqq±‰K‚MãW=˜ˆ¾ŸABxzz–——k7ËÊÊÜÝÝM\Æ'€ãââ.\øÃ?h4𢢢Gy$..Î<•`D2xíÚµ...C† éÕ«×àÁƒûôé³~ýzóT€É<ìêêú¯ý«¬¬¬¤¤$(((00Ðø //¯¾¾^Û˜‘‘aâª02™¼xñâ;vÄÆÆ*•Jó€)Èà­[·æåå :Ô<Õ`"2Ï»¹¹yyy™§LG&¯Y³fÙ²ejµÚ<Õ`"2822òèÑ£ýúõsÑa”‰[ZZ–/_îåå¥T*ãã㫪ªn´F£?~¼B¡ÈÏÏ7JI+&€~øáèèèO~Í(oذ!''çðáÃ%%%MMM³gϾÑ>¯¿þº½½Ì3ÌHd¤J¥:qâ„“““Ñ'Þ´iSjjjxx¸"---,,L¥R…„„ا¨¨èþçöîÝ+í@?™+ÀQQQ*•Êè³VWW_¸p!22RÚ uvv>sæŒ}4MBB /¼àááaôÚVI&ÇÆÆÎ˜1ã¯ýë{:º>k]]¢oß¾Ú777©Ñ>¯¿þzŸ>}:¼kºCŠ_[µj•Ô~êÔ)mŸÂÂÂúúzí¦î.ÝÏõõõ………îbc` ² «µîïBž°Š6#¨§}zF0®øÏd×õ„UXýŸ,›Á@=|65‚´ «Á@=|Vÿ_ †e¿¡†¤¤¤6!ÎÀ¹d)4žÝ#GŽlߨõ—NUWW{zz?~<**JjQ*•[¶lyðÁeûDFFþîw¿ûꫯüüü.^¼èëë{úôéë”(2kD¢H’é yÃ,u.>^¾On®éë06Ö¥è/¿.Ëû¾ø²z¾,«ü7†E~Y¶JöŸ@Á—ÕcXë—e³ë²ÊE Ö¥"#Å:™g€Mô‚e??¿S§NIáV¥R544DDDÒç‹/¾¸råŠÔY:ãÇ_²dÉêÕ«MQ*›b­ÿç!€M'999==}Ò¤I^^^+V¬ˆ‰‰‘ÞnµsçΫW¯.Z´¨³>ãÆ“¹|ùò­·Þš““3zôèîZn€!K ¸ô¿A·à•+WªÕê1cÆ466ÆÆÆæääHíû÷ï/))‘p‡}œÿõööBˆþýûë>* @{Ý€íììÒÒÒÒÒÒÚ´geeÉöÑòññáù^€!dÞ €u0è pUU•î««‡ b²z0 ™|ôèÑùóç—••é6r×1ÀâÈܽxñâuëÖÕÔÔ4ê0Oe‘ü-ÐóæÍ3CøC~.(—Ÿ € sØ××÷òåËæ)Ó‘¹³hÑ"OOOmãܹsM\F&€?úè£Þ½{gggë6€G&ççç›§LJæ`¬CÇW€³²²¢££Ã²²²ÚïMLL4qUY§ØÍÍ °à'N´ù€Eã`€M l`À›ÐñK°tµ¶¶TVVNž<¹¹¹Y¡PØÙÙ™¡2Àj(’äûhÞ0}€m“¹\ZZzÛm·3fþüùBˆ]»v%$$˜¥0ŒI&/^¼xÊ”)jµº_¿~BˆØØØ#GŽ˜£.ŒJæèãÇïÞ½[{ϳ»»{uuµé«ÀÈd®+•JµZ­Ý,++óöö6qIŸLŽ‹‹KLL¬¨¨B\ºtiñâÅ3gÎ4Ka“L^¿~½]@@@aa¡OïÞ½ŸþyóT€É<ìêêš››[QQQ\\ø //¯¾¾^Û˜‘‘aâª02™¼xñâ;vÄÆÆ*•Jó€)Èà­[·æåå :Ô<Õ`"2ovssóòò2O)˜ŽL^³fͲeËÔjµyªÀDdpddäÑ£Gûõëç¢Ã<•`D2Ï?üðÃÑÑÑo¼ñ/ÁX4™¬R©Nœ8áäädžj0™[ £¢¢T*•yJÀtd®ÇÆÆÎ˜1#))iÀ€Úƹs皸*tE’|ͦ¯L@&ïܹ³OŸ>ï¿ÿ¾n#ÜÙLE ˜LÎÏÏ7O˜”Ì3ÀX‡Ž¯geeEGG‡……eeeµß›˜˜hâª0²N°››`5:À'Nœhó‹&ó ð¬Y³Ú´ÄÆÆš¬LE&Ÿ>}ºMK^^žÉŠÀT:ý¤ììl!D}}½ôA¢R©|||L_FÖi~íµ×„µµµÒ!ÄM7ÝäëëûÖ[o™©4Œ§Ó,ÝêüÔSOedd˜±LBæ`ӥߖ––åË—{yy)•Êøøøªª*Ãû¬]»öæ›ovrròôôŒ/**2Q‘«!€MgÆ 999‡.))ijjš={¶á}bcc÷îÝûã?æåå¹¹¹Íœ9Ó¼µ,O§·@›Ú¦M›RSSÃÃÃ…iiiaaa*•*$$Ä>QQQRWW×€€€;v˜¿~€eéž+ÀÕÕÕ.\ˆŒŒ”6CCCÏœ9cxŸœœ—^xaõêÕú§SüÚªU«¤öS§NiûÖ××k7uwé~®¯¯/,,ìp—ÑG躞° Ý]ÆÕ]«h3BW–Ð^Oûg²ëzÂ*ø“uCzÈŸ,ݺ®'¬ÂêÿduýÛ´ Uü†o³‡¯â·ý{²‡¯¢³ô° Uü¶OöðUð'Ë"Vñþd‰_ÿ7³5$%%µ qÎ%K¡Ñhd;UUUé=dÈ.ÎZ\\üÝwß…††J-~~~«W¯NLL4°Occ£Z­.--}çwf̘1qâÄÎæR( ZcÏ/ÛEÑ?W¶æ ccDÆX—U.J°.³‘[—U.JXëºrsI2],oQúeÀò¾,È.J°®Ã*%lx]V¹(ÁºôOd¤X's ôÑ£GçÏŸ_VV¦ÛØõ‰]]]…µµµÚµZ-5ØÇÉÉÉÉÉÉ×××ÃÃcäÈ‘eee]¬ `Ådn^¼xñºuëjjjut}V???í%o•JÕÐÐq£}„¦¡¡¡²²²ëU¬˜ü3ÀóæÍëׯŸ££Lœœœœžž®R©jjjV¬X#½kçΙ™™zú´¶¶®_¿þìÙ³µµµùùùIIIAAAÆ 3JUk%€}}}/_¾lЉW®\ùÀŒ3& @¡PäääHíû÷ï×¾Õ¹³>yyy&Lðòòº÷Þ{ ðÉ'ŸØÛwÛë¬A&7FFFÆÄÄ,Z´ÈÓÓSÛ8wîÜ®Olgg—–––––Ö¦=++KŸ›nº)7Wþ!è’ À}ôQïÞ½³³³u€0'™œŸŸož:0)ù—` !*++¿úê+Þ´ °\2øÊ•+÷ÜsÏ€F=`À€{ï½·¦¦Æ<•`D2xéÒ¥­­­ß~ûmKKËÒ¥KÍSF$ó ð¾ýö[!DXXØÖ­[ÃÃÃÍRÆ$sX£ÑèþÄ®½½½F£1qIŸLž0aÂ#}ºMK^^žÉŠÀT:} tvv¶¢¾¾^ú Q©T>>>¦¯ #ë4¿öÚkBˆÚÚZéƒ⦛nòõõ}ë­·ÌTÆÓi–nu~ê©§222ÌX&Ñi–¬ZµJ­V·itss3Y=˜„Lîׯ_ûFFcšb0™\^^®ý|åÊ•´´´;î¸ÃÄ%`|2Øßß_÷svvvLLÌ’%KL\F&ó;ÀmØÛÛ×ÖÖš¨LGæ ð'Ÿ|¢ý|íÚµ½{÷z{{›¸$ŒO&/X°@û¹OŸ>·ß~{vv¶I ÀdpEE…yêÀ¤dž>yòdQQ‘v³¨¨è«¯¾2qIŸLNHHhnnÖn677'&&š¸$ŒO&—”” :T»9tèÐââb—€ñÉ`OOÏòòrífYY™»»»‰KÀødp\\ÜÂ… øáFSTTôÈ#ÄÅÅ™§2ŒH&¯]»ÖÅÅeÈ!½zõ}Ö¯_ožÊ0"™ŸAruuý׿þUVVVRRhž²0.™+ÀBˆÖÖVµZÝØØØÜÜÜÒÒb†²0.™\ZZzÛm·3fþüùBˆ]»v%$$˜¥0ŒI&/^¼xÊ”)jµº_¿~BˆØØØ#GŽ˜£.ŒJæàãÇïÞ½ÛÎÎNÚtww¯®®6}U™Ì`¥R©V«µ›eeeÞÞÞ&. ã“ÿàÄÄÄŠŠ !Ä¥K—/^>>'NœˆŠŠºýöÛ5qKKËòå˽¼¼”Je|||UU•á}žyæ™#F8;;ûûû?öØcuuuƪ `­dð»ï¾;zôè9sæ”––Ö××GFF~ûí·F™xÆ 999‡.))ijjš={¶á}.]º”‘‘QTT”››{èС'žxÂ(%¬˜Ì-Ð)))»wïž4i’bß¾}ëÖ­»óÎ;rÅuÓ¦M©©©áááBˆ´´´°°0•JbHŸÍ›7K|||/^œ‘‘ÑõzÖMæ p~~¾”~…7ÝtÓŸÿüç={öt}Öêêê .h_(êìì|æÌ™í#„øâ‹/FÕõ’Ö­ÓüÉ'Ÿ´¶¶úúú !jjjZ[[…---eee]ŸUº†Ü·o_m‹››[› ˆôùÇ?þqèС´´4ýÓ)~mÕªURû©S§´} ëëëµ›º»t?×××v¸Ëè#t]OX…î.ãê®U´¡+Kh¯§ý3Ùu=aüɺ!=äO–î]×Vaõ²ºþmZÐ*~÷ÙÃWñÛþ=ÙÃWÑÙzXÐ*~Û¿'{ø*ø“e«ø ²Ä¯ÿ›ÙÀ’’’Ú„8ç’¥Ðh4ïP(…žžžùùùþþþ×®]srrêìÃUWW{zz?~<**JjQ*•[¶lyðÁ ïóÊ+¯üõ¯=pàÀðáÃõ­PÑé{®øxÙ.Šþ¹²}4o£#2ƺ¬rQ‚u™Üº¬rQÂZו›«H’éby‹²Ð/Ë–÷e@vQ‚uõV¹(aÃë²ÊE Ö¥"#Å:ù·@›‚‡‡‡ŸŸŸ6ñ«Tª†††ˆˆÃû¬Y³&==ýÈ‘#úÓ/’î ÀBˆäääôôt•JUSS³bÅŠ˜˜é X;wîÌÌÌÔß'%%å­·Þ:xð`@@Àµk×~ùå—îZÀRȼÚtV®\©V«ÇŒÓØØ›““#µïß¿¿¤¤dÑ¢Eõùé§ŸÒÓÓ…Æ “qpp¸víZ7­:bÀ®"WþNW‘¾¼|ùr;;;!ÄÏ?ÿ¼jÕ*—––cMlgg—––ÖþýUYYYúû¸¸¸XÞ3½€îÖižsÖÀvð²_³Ô+×ïh„ IDATm/ÁÀœÀ›@Ø„nû$z~³ ›A@†"I¾æ Ó׺†[ 6 ° `€M l`x 4t^, `f\Ø0À&€6g€‹—ï“›kú:~ ®l`À›@Ø0À&€6 ° `€M l‚}wòI24o˜¥X2®lW€Ë/ß'7×ôu‰+À›@Ø0À&ð 0¬”ìã²<+ Ø®l`À›@Ø0À&€6 ° `€M l`À›`ßݬŠ"I¾æ Ó×ÐØæÅÇË÷ÉÍ5}`ZÜ ° Ý€[ZZ–/_îåå¥T*ãã㫪ª ï³uëÖ1cƸ¸¸ØÛs`n 6lÈÉÉ9|ø°··÷‚ fÏþì½{xTå¹þÿ„$dÈ’˜ƒ –€ÐR`›*•*A+JÄ"… Zë—VB•ƒŠ9(ZÙbå´ao°‚õD`£Í&äD ‰eÂûûc]_v¤ÌZï¬÷žûó‡×ÌJ®ù¬'fî‡'ï;kþôÓO¯ó{ÂÃçL™R^^þôÓO{ãÜ ±/üø%!„B!WÃkð²eËfΜ™œœ,„X°`A×®]óóó;wî|=ßs÷Ýw !>üðC¯œ¹™ŠB!„€wàŠŠŠ3gÎôîÝÛxÚ¥K—   C‡5€¯ç{”‹EB!„BˆÎxç3ÀçÏŸB„††º„……ÿ­ï¹Nÿ—?ýéOÆñ¯¾úÊý=ÇŽ»pá‚ûiã/5~|m®ç.\¸pìØ±+~©É9xÎužƒ¹\ã'i.×þIzþ ×ùóß<ëŸà:ÏÁ\Ñ5~žcú;ëßÃw–)xþκ~”ý$}öeJÊ™û ×@£*nàÿ¦Í«¸±œ´y7Ðq4ªâÆrÒæUð¥Eʦ¤'žx¢Éw®ŸÄ!¥4ëµ®ŸŠŠŠˆˆˆ/¿ü²OŸ>ƧÓù׿þõ¸þïùðÇêr¹®ír8LªQå ðu¸mÚu]Û’us]ï^ëŸtñ–F®r¾Y¿˜?@T—Y¿¶*ÊD—Í0í]l'P?"YdQ‡ë‚,Jذ•Ø©m™5Öyg8<<<66Ö=ñççç_¼x±{÷îÿî÷B!„B!׉×.‚5qâÄW^yeàÀ‘‘‘Ï<óLjjªñáÞuëÖ;wîñÇ¿Æ÷444\ºtéÒ¥KBˆúúz!D`` · !„B!„èê:¹^»ðsÏ=7|øð~ýúÅÇÇ;Ž5kÖÇ·mÛ¶víÚkÏ_ÿú×–-[>¼¡¡¡eË–-[¶,//÷N„B!„B4Ák+ÀÍ›7_°`Á‚ š_±bÅO~ÏøñãÇoù)B!„BÂk+À„B!„BˆJ8B!„Bñ 8B!„Bñ ¼ö`B!„B±Šë¸‡­¸Ž{Ø08B!„BˆpØÖn&„B!„âp&„B!„âp&„B!„âp&„B!„âp&„B!„âð*ЄB!„\×s±ß&]ìWå……mU—Y"B®W€ !„B!„ø€ !„B!„øÜM|Ç?ý=r¹õçA!„ø0?ÙŽÙ‹íÿíD0àL!„B4ç'?VjÖge¯Š„ØÀ„B!^ÅV× ¯«ÄkB”ÃØLø?B!v3•Õ"ÁEEBÑ^‹B!„BˆOÀ˜B!„BˆOÀ-ЄB‚&„r58B!> ?VJ!Ä×àhB!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`BˆÝùÓŸþäíS v„¿äŠðC#ø?‹\þbKáL±;þóŸ½} ÄŽðƒ\þbhÿg‘+Â_ b)€ !„B!„øZÀ 3f̈ŒŒt:Æ ;{ö¬·ÏˆB!„BˆÝÑr~饗֬Yóé§ŸþóŸÿ=z´·ÏˆB!„BˆÝñóö ÜË–-›9sfrr²bÁ‚]»vÍÏÏïܹ³·Ï‹B!„Bˆ‘ºQ^^.„øòË/ÝG‚‚‚Ö¬Ysµï÷ö˜B!„Bˆ§˜2Nê·|þüy!Dhh¨ûHXX˜qðŠp&„B!„"tü pHHˆ¢ººÚ}¤ªªÊ8H!„B!„\ ýàðððØØØ¯¾úÊxšŸŸñâÅîÝ»{÷¬!„B!„؇Ž;„333ßyç-[¶DFF¦§§WWWöÙgÞ>)B!„B!¶F¿Ï !ž{ªª~ýúÕÕÕýú׿^³f·ÏˆB!„BˆÝÑr˜B!„BùwÑï3À„B!„BÈ À˜B!„BˆOÀØ#>|ìØ1ãqAAÁñãÇé²³KQTT´sç΢¢"K-tYÁÔ©SÁD0.eïbÔh¢K‘b—j¼+sÁ÷,T@Ϣ˾Hâ}úôùøã¥”7nô÷÷ \¼x1]ötUWW§¥¥ !š7o.„HKK«®®¶BD—)ìÞ½{È!½{÷¾í_4»”½‹!£‰.DŠ]¨ñ®ÌÙ³P]=‹.Û‹`yDXXXIII``àÀÓÓÓ{öì9xðà‚‚ºlèzê©§òóó—-[vóÍ7Ÿ®ÒÒÒÑ£Gúé§~~~.—kÀ€|ðAdd¤Y¯O—¹ôë×o÷îÝ—/_îСéS§¤”!!!.\ÐWìRö.†Œ&º4)v¡Æ»2dÏBuAö,ºl /‚åãÇ¿âñ®]»^íKtyË5uêÔÖ­[Ÿ:uª]»vß}÷Ý”)S¦Núþû ËDBBB***ÂÃçùùùaaaZ‹€]ÊÞÅÑD—F"Å.ÔxWæ‚ìY¨.ÈžE—}ñÚÚ3!jiÛ¶­q¡vƒsçÎEEEÑe[×ïÿûnݺ-\¸0<<üÕW_íØ±ãôéÓµ»!V€ïÊ\= ÕÅžETÂ-ÐÄWˆŽŽÎÉÉiÓ¦ñ´¢¢"99¹¸¸˜.{ºÜ»kGDDÄwÞ9yòd}EÀ.Bˆ Æ»2dÏBu±g•pöˆ”””[B—ç>|800°S§N7ÝtSAAÁñãÇo¹å+Dt™׎4r5AYb`D]úЬv¡Æ»2dÏBuùBÏ¢ËFxù"\šS׈sçνûî»ÉÉÉtÙÓÕ§OŸ?þXJ¹qãFÿÀÀÀÅ‹[!¢Ë JKKˆ$Br){CF]‰»Pã]™Ëzª £gÑe[8{D“¿&ÄÇÇöÙgtÙÓZWW'¥0`ÀªU«rrr¬ÑeݺuÁ¸”½‹!£‰.DŠ]¨ñ®Ìå#= Õгè²-ÜíÇw?ŽŠŠ¢Ë¶®fÍšþðÃ{öìù¯ÿú¯ÐÐÐsçÎÑe[W}}}ãÇÿó?ÿãp8´»”½‹!£‰.DŠ]¨ñ®ÌÙ³P]=‹.ÛÂØ#:vìH—.®„„„7–••%''‡††ÖÖÖÐe[WË–-?ï½÷´»ŒwqQQQaa¡u¿*Eð.5(sA%pã]™ ²g¡º {]öÅÛKЄ(bóæÍþþþ›6m’RîÝ»wÚ´itÙÖu¼%%%YTŠ€]ÕÕÕiiiBˆæÍ› !ÒÒÒª««µ»ˆF Æ»2dÏBuAö,ºl `âCÔÖÖ*{ÒE|‡É“'8ðĉ—/_>qâÄwÜ1eÊ­EÀ.¢¨ñ®ÌÅžE~ j¼£º¬€÷&„âíÚµÛ½{w||¼ñôÔ©S¿úÕ¯ ô»!ÄÇAwT—pöˆšššºººÈÈHã©”²´´Ô¢‚ÓE±'ÁÁÁçÏŸw_°DJráÂ}EÀ.Èx‡,Šb¨ñŽê²‚fÞ>½™1cÆË/¿l<®¬¬¼õÖ[£££;F— ]„+HHHh|AÈãÇ·oß^k° 2Þ!‹"„Xj¼£º¬€°G|öÙg=ôñxáÂ…¡¡¡_}õUÿþýŸyæºlè"QSSSVVæ~*¥<{ö¬Ö"`×Ûo¿íç÷ÿßSÀÏÏïí·ßÖZ삌wÈ¢ˆ^ Æ;d/FwT—%¨ú°1&-[¶<þ¼ñ¸W¯^kÖ¬‘REDDÐe7×… JKKÝO/_¾lÝeéòœ‰'º/×YQQÑ­[7!ÄÍ7ßœŸŸ¯©ØepòäÉÏ?ÿüäÉ“½¾z¤ 2Þ!‹’¸ñ®ÌÙ³P]À=‹.ÂØ#BBBªªª¤”uuuÇ7Òe7dÃv%%%íß¿ßxü /ôíÛ÷«¯¾zä‘GÒÒÒ4»Îž={Çw!Œ¿0 ñ?:u» ã²(‰ï3j¼CöbÔxGuY`èÝ»÷Ò¥K¥”ï¼óN\\œq0'''11‘.»¹ °‹kG¹|ðÁáÇŸ:uJJyêÔ©´´´‡zÈt‹J° 2Þ!‹’¸ñ9S¡Æ;d/FwT—pöˆ÷ß¿Y³f7ß|s³fͲ²²Œƒ¯½öÚ¸qãè²› ²a»¸v¤‘«mÛ¶†ËàܹsQQQ¦[TŠ€]ñY”ÄwÈ™ 5Þ!{1j¼£º¬€°§üãÿÈÌÌüè£è²¹ ²a»¸v¤‘+**ª¢¢Âý´¼¼<::Út‹J°K"Æ»J‘Jj¼CÎT¨ñÙ‹QãÕe€‰¯Ù0€]\;ÒÈ5f̘{î¹'''§®®î›o¾¹ë®»}ôQÓ-*EÀ.¢¨ñ9S¡Æ;d/FwT—8¤”Ö_j–»ï¾ûj_Úºu+]¶rýío3fLBBBaaá¢E‹&Ož,„X¸pá‘#GL¿t;]¦ðé§Ÿfgg÷ìÙóž{î1ýŽ"Bu•——3Æý†‚‚‚.^¼h<®­­ ¢Ën.ȆìJJJ2üx™ES°ë‰'žxñÅ;uêôßÿýߣFjÖ¬ÙüùóŸzê)}EÀ.7Hñ®^¤Æ…ï3j¼CöbÔxGuYW€M#::ú•W^yðÁçÍ›·fÍšÐe+ê9T—®ÙßVRR8pàÀôôôž={<¸  À\‹J°«10ñî‘j¼Cîwïê]H=‹.ûbε´ˆ”YYYBˆæÍ›ûùù­_¿ž.ÛºŠ‹‹Ý7+»té’¥ïº<§ºº:88Øx\UU¤»ÏÕºuk)e}}½Óé4n:jºE¥ØÕ¼xW)RéBwe.Èž…êBêYtÙn6É“'÷ïß?77·G;w¦Ë¶.°=rð®   ÷ßÿÁ\ºt©ñ­Ex®„„„7–••%''‡††ÖÖÖ˜nQ)v5/ÞUŠTºPãroÊ¡C‡Ôì‘£‹Bˆç Æ»2{!Ä€0!„B!„Ÿ ™·O€Ø‘ššH!„<س!„\?€=bÅŠ Mnݺõý÷ß×Úœ‘‘Ñäà¸qãž}öY­]„âã@¶-ö,B!×`˜0a‚qC¹ÆTWW/Y²Dk—bݺusæÌi|䡇úûßÿ®»‹/‚ºNUSSSVVæ~*¥<{ö¬Ö"`jÛbÏ"Ä ”µÈžE—máì)[¶lùðÿR^^~èÐ!Ý]Û¶m[¾|¹qÕDƒäääo¿ýVw¨3•²µ#î­ðœ3f¼üòËÆãÊÊÊ[o½5:::11ñرcšŠ€]´m±giäL¹·B(l%=‹.ûâ­ËOc „h~twÕÕÕåççGGG/Y²Ä8¸oß¾ØØX}]o½õ–ËåjrpË–-«V­2W¤Ø%„˜>}z“ƒééé3gÎÔÝUWW×äàêÕ«ûöí«©Èpµk×.33³ñÁíÛ·'''kíJJJÚ¿¿ñø…^èÛ·ïW_}õÈ#¤¥¥i*vA¶-Èž%Ù¶ôIå­¯mAö,ºl `¸b!¹Ž9ùÀ¼þúë]ºt™4i’¾.6'í\6lØüÉÊÊr:šŠ Wnnn||üâŋ݋‹‹[¶l©µ«eË–çÏŸ7÷êÕkÍš5RÊ¢¢¢ˆˆMEÀ.ȶÙ³$Û–>"©¼•àµ-ÈžE—mñ³f]™èM@@€ÃáB$''ïÛ·oúôé .ìß¿¿{·ƒ¦®-[¶´hÑ¢ñK7þ)smÛ¶mРA¡¡¡O=õ”qÄÒ½Ê\#GŽ´âe½(B$$$ìØ±#55µyóæ“&MBµiÓFkW‹-._¾,„¨¯¯?räHÏž=…¦ï?T&vA‚Ú³Û–>"¡¶•@¶-¼žE—}ñö®7eee.HâÆ?‰»÷O(_;Ré[§êÝ»÷Ò¥K¥”ï¼óN\\œq0'''11QS°‹mK#ضtIнRa+ìYtÙÀqáÂH$lNšº¬Fåÿ¬€€€úúzãqAAÁðáÃ;tè0f̘êêj­]ï¿ÿ~³fÍn¾ùæfÍšeee_{íµqãÆi*v±miÛ–."‰û?KY+ìYtÙ‡”RÍR3$‡cúôé ,h|pܸqmÛ¶5}7”J× AƒFŒñøã !>ùä“uëÖ¹¿´lÙ2M]‡£®®.00ÐÄ×´ƒ+00°ºº: @QXX8}úôôïß?+++$$D_WyyyDD„¹¯é]6Ÿ~úivvvÏž=ï¹ç ª ²mAö,Á¶¥H¨m%l[žïÀ.óñö®7ôúC'Ož4ïÚµ«mÛ¶“'Ož8q¢¿0Ê\Üø§ÊÖŽ¸HE| ȶÙ³$Û–Vpo!Á`p8¹¹¹ƒ zæ™gÜ×W())¹ùæ›/^¼¨¯+  ´´444Tñí·ß¦¦¦ž>}º¾¾Þ¸Ÿ¦®šššV­Z™ø‚6q¡¢líˆ{+<çî»ï¾Ú—¶nݪ£ØÙ¶ {–`ÛÒ È½Ba+ìYtÙ–fÞ>í1®Y7gΜ¥K—G¬¾¦«×M7Ý”››k<ÎÍÍmÛ¶­é õ.•÷XWé4hЛo¾i<þä“O&6Bk—bݺusæÌi|䡇úûßÿ®¯èÀ¿þõ¯Ç7nôóós8Ë—/×Ú•ô/Úµk·sçNãqçηmÛ¦©Ø%ÛdÏl[úˆ ”µ•.e­²gÑe_¼ºþ¬=ôúCS¦LéÑ£Ç_|±cÇŽN:½ð Rʺº:+~a”¹âÆ?‰»÷O Þ{Ðßß¿ªªÊx|âÄ ãJ¤½³TºÜGEE/]ºdK™ÏÙ¶ {–dÛÒG$qo󮬕`÷,ºì†gi[P¯éZYYyÏ=÷8‡Ã1bĈšš)¥Ëåzíµ×ôu±9éåˆ÷Éhß¾ý—_~i<Þ¼ys=¤e?@•.7ÕÕÕÁÁÁÆãªªª   ÝEx.ȶÙ³$Û–>"‰{KBe­»gÑe78“«R]]­ìB \lNz¹”­qo…¹DEE­Zµª¡¡á¥—^JII»ÀëY’mK‘Ý[!¶øžE—­àlååå x.)e]]]yyù¥K—\lNz¹ ï=ˆºNÕ˜¬¬,!DóæÍýüüÖ¯_ ‚t¡¶-¤ž%Ù¶ôIнRa+ïYtÙ À‘——×¹sg!D‡Nœ8±råÊ®]»¦¥¥kí’R¾ñÆ]»vu8Bˆ-ZÜyç{öì±B¤ÌÅæ¤— ¼uª&ÈË˃!¹PÛ^Ï’l[úˆàQÖJ°{]ö·Aòˆûî»/,,ìÉ'Ÿ\¼xñ÷ß/„1bÄêÕ«cbbÖ®]«¯Ë¸bçøñã…+V¬5jÔ?üðî»ïnݺµÿþúº€9þ|³fÍÔÜÄB¥«¢¢¢uëÖÍšY~½ze"ƒúúúÚÚÚÐÐP???$±?m‹=KG”µÈž¥Ø%¶ö,¢oOàzÓ¦M›ÊÊJ)eii©âÌ™3RÊ3g΄‡‡k튉‰9|ø°ñøë¯¿¾õÖ[¥”¯¾újÿþýµvIÜnµ#î­ >dÛîY’mKêÞ ©°•°gepöˆV­ZýðÃò_)1v×ÔÔÔ¸¯‡¦»KJyñâÅ)å©S§õu±9éåº÷Þ{~øáììì|055555uñâÅýúõ9r¤¦")efffllì¬Y³fÍš;mÚ´É“';Î;wjí"Ù¶ {–dÛÒJ¤²•@¶-ö,¢ÀѳgÏ¥K—J)/^ܾ}û§Ÿ~://ïé§Ÿ¾ýöÛµvõíÛwîܹRÊË—/Ï™3ç—¿ü¥”òÔ©S­[·Ö×Åæ¤—KÙÚ÷VŸ²mAö,ɶ¥H‚î­ [ {Q `ذaƒŸŸŸÓé ;zôhŸ>}„‘‘‘{÷îÕÚµ{÷ˆˆˆˆàà`£UlÛ¶mìØ±úºØœôr©_;âÞ Ò•bQé‚l[=K²rËöK IDATmé#’ {+¤ÂVžå9¨mË 8{ʱcÇÖ­[g´ŠË—/Y÷!•®3gά\¹råÊ•§OŸ¶H¡ØÅ椗KÙÚ÷VxÎ[o½år¹šܲe˪U«4I)…Ó§Oor0==}æÌ™Z»$hÛÂëY’mK‘Ý[!¶Èž¥ØܶL‡°9¼÷Þ{tÙÜÅæ¤—KÙÚ÷VxŽø×D³zõê¾}ûj*2\íÚµËÌÌl|pûöíÉÉÉZ»Ü Å»z‘Û–." º·B*l%=K½ »m™`sPy=mºn 6'½\RáÚ÷VxˆbÆ ›ÿ/YYYN§SS‘áÊÍÍ_¼x±û`qqqË–-µv5–Z÷âÞrŶ¥‹Èro…TØJðz–zvÛ2ÀæÖtQ]lNz¹ ¸vd—¢ùUÐT$ÿõgûüüüèèè%K–÷íÛ«µ«±Ôº÷– ¯(¶-]DnÀâ]½ ©g©wa·-álxMØÙ0€]ü§³ý]WÜ⥵¨±ëÈ‘#‘‘‘<ðÀ믿ޥK—I“&iíj,µîŽå‚,JâÆ;ÒLåõ—é]ì•V¢Ò…Ú¶LÄ!¥ÄcªªªÂÂÂèÒÂåp¨ûµ§K#dQj\‡£®®.00ÐR‹J‘"00°ºº: @QXX8}úôôïß?+++$$D_—Èx‡,JÀ%†zdQ¨.¤ž¥Øß¶L„0ñ9 ]‰ð\ååå–*‹±`‰¡ÞYª ©g)v‘ëÇÏÛ' 7)))×øêÁƒ鲕‹èȹsçÀDx.5ÛV)2Þ!‹"šïê]H=K±‹\?€=bذaË—/7n\BBBaaáÛo¿=qâÄÄÄDºìé2€lÀ.e› •‰ð\ÁÁÁÓ§O_°`AãƒãÆkÛ¶íË/¿¬£H1hР#F<þøãBˆO>ùdݺuî/-[¶L_d¼CÕÔxGš©Ü€Å»zRÏRìBm[VÀ-Ðalvwÿ=øë¯¿ž2eÊ®]»è²§‹h׎4r9ŽvíÚ=þøãÏ?ÿ¼ûàŽ;~ÿûß9rDG‘"22rß¾}:tBìÞ½{øðá#GŽlhhX¶l™é}S¥ 2Þ!‹"zï½Xe+aÛ²'\öˆÃ‡ßzë­î§Ý»w?|ø0]ötA6 `׎4r !¶mÛ6hРÐÐЧžzÊ8’œœüí·ßê+:þ|›6mŒÇÑÑÑ-Z´ÈÊʪ¯¯·âoÛ*]ñY”ÀwÈ™ 5Þ!{±PØJTºPÛ–pöˆÄÄÄ+V<ñÄÆÓåË—wìØ‘.{º °kûöí}ô‘ûŸ/ÇŸ2eʬY³ô»„ ;vìHMMmÞ¼ù¤I“„EEEî覛nÊÍÍíÓ§"77·mÛ¶¦+¼â‚ŒwÈ¢n¼CÎT¨ñÙ‹…ÂV¢Ò…Ú¶,ÁÂ[,ù_|ñEppprrò½÷ÞÛ­[·]»vÑeOW¿~ý8à~úÕW_õë×Ï ]¦ìr¹ÜO].Wpp°Ö"`—Pu?@e")å”)SzôèñÅ_ìØ±£S§N/¼ð‚”²®®Îо©ÒïEIÜxWæ‚ìY¨.Èž¥Ø…Ú¶¬@³´3¥¥¥Ë–-ûãÿ¸lÙ²²²2ºlë‚lÀ®””ã“$o¼ñF=´»êëëÇÇïСØ1cª««5I)+++ï¹ç‡Ãáp8FŒQSS#¥t¹\¯½öšÖ.‰ï*E*]¨ñ9S¡Æ;d/VÙJضì `â+@6 `׎4rS]]}áÂ<ÑÔx‡œ©Pã²cöu=ð*Ðaì}ož={Ö¯_¿`Á‚={öÐe×Î;ó›ß´oß>!!¡   ¨¨è£>êׯŸY¯O—锕•mذáôéÓqqq÷ß¿u·’W&v !***Z·nݬY3K-*Eõõõµµµ¡¡¡~~–_5C 2Þ!‹¸ñ®Ì…Ú³P]¨=K±KÀµ-+àì+V¬øñÁñãÇçäädgg?ž.[¹ °‹èB~~~ZZZ~~~‡¶oß¾{÷î¹sçÞrË-Ë–-‹ŽŽÖQd°dÉ’7Þx#77WJÙ¢E‹ÿøÿøË_þò‹_üÂt‘Jd¼Ce€ï¨3Ñ•­„mËžp&„Ø®iäºï¾ûž|òÉÅ‹ÿý÷Bˆ#F¬^½:&&fíÚµ:Š„sæÌYºt©1ϬX±bÔ¨Q?üðûᄏuëÖþýûëë"„Xj¼Cöb•­„mËžè´Z­S§N]¸p!]6tA6 `×ÕRºvíjî‹2°+;;ûĉ­[·îرcÛ¶mÏœ93|øðŸýìgšŠ„K–,Ùºu«qÇ×´´´G}ôðáɉ‰Ï=÷ÜÎ;õuý€x÷¢ÈRj¼CÎT¨ñÙ‹U¶¶-{Â`ÈÎΞ;wnqqñåË—#GMNNBìß¿Ÿ.[¹P÷È¡ºˆFWTTøûû×××·lÙ²¦¦ÆétÖÖÖÞtÓMçÏŸ×QÔØ%„¨««‹ŽŽ®®®þî»ï:wîlÜéASd¼C%pãro<ѯ´¶-[ÁØ#ºté2dȤ¤$‡Ãa™9sæÜ¹s…cÇŽ¥Ën.¢5\;²­ë¶Ûn›0aÂĉ³²²,X0tèÐI“&-]ºôÀŸþ¹Ž"!D¿~ýÒÒÒžyæ)åK/½ôá‡fgg÷Ýw)))•••úº ã²(¢;ñîE@ÏRìBm[–à•kOÃÖäHbb"]¶u5áé§ŸV#¢ëؽ{÷!Cz÷î}Û¿0h*vmذÁÏÏÏét†……=zÔØv¹wï^MERÊÝ»w‡„„DDDDDDïܹSJ¹mÛ¶±cÇj킌wÈ¢®F¼{ÑгP]=K± µmY?ì½zõjr¤GtÙÓuÅ}k»víªöÈÑõoñØc 2dèС—Yžzê)s-*EÀ®aÆåää:t¨oß¾111ÙÙÙgΜ‰ŽŽ6ý¦ÊDBˆ¾}ûæææîØ±C1`À€ØØX!Ä Aƒ ¤µ 2Þ!‹¸ñ®ÌÙ³P]=K± µmY·@_uª«uëÖçÎk|¤cÇŽ'Nœ0×¢Rìr³jÕª‡~ØR…b°‹ØÔx‡Üï½Ø j¼£ºÌÄÛKЄ(uªkàÀMŽŒ1Bk°Ë²ž¢²y¡ºˆýAwȽñ¨ñÙ‹Ý Æ;ªËDšyoô&D)¨{äP]Û·oordݺuZ‹€]„+@wȽñ¨ñÙ‹ áhB!æàp(ê)ÊDÀ.BñqPãÕe"\&„bM>Á vBˆƒï¨.Ñrj'„B!„Bþ]x$B!‘’’r¯øàøCFF†ES7¤KeQÄÇámˆ±eË–&ó@yyù¡C‡è²§käÈ‘V¼¬E¨®>}úüøàž={Ö¯_¿`Á‚={öh'v8p ´´tÔ¨QV,WzËYTcbcc7mÚôÁ 0Àê‘Ò¥@TRRRVV&¥,))BÜqÇ›7ož:uêúõë­Ø†€çRYj¼£º¬€°G”••©Y&¥Ë g`WMMšß e"T×øñã¯x¼k×®Wû’ÍEÀ®”””êêê)S¦¤¤¤˜ûÊ^tA%@ç•.•EÝtÓMMX¤KeQ¨ñŽê²n&¾ä¾nº4»!V€ºµroüÁƒ¯¶5Àô¿•@ºTEW€=büøñwÝu—š51ºˆO¡lkê~•®3uêÔ… "‰`\ñY”ÀÝÚ ¹7uä>Žƒï¾ãòÀQXXxîÜ9º´p¡Î¨.e—TyÝBTWvvöܹs‹‹‹/_¾l9zôè®]»„û÷ï×Q삌wÈ¢蜣ҥx¦ ýóŸÿ¬@„êR&BwT—p 4!ÄŽpíH#W—.]† ’””äÞp8sæÌ¹sç !ÆŽ«£ØE±Ôx‡ìŨñŽê²µ·†¥¢¢¢   ¢¢‚.Ûº{ì±µk×Zôât™Î€–/_Ž$v………59’˜˜¨µØå)ÞÕ‹Ô¸Pã]™ ²g¡º {]¶¥™·p½ihhÈÌÌŒïСCxxx||üœ9sè²› uªkÇŽ?þ8’ØÕ«W¯&Gzô衵ØïE Üx‡Üï½5ÞQ]VÀ-Ð1cÆŒõë×gddôìÙ3,,¬ªªjÿþýóçÏ=zô¼yóè²›‹èHeeåùóçCBBÚ´iƒ!vûïEMAwÈ^L|o/AëMxxx^^^“ƒGމˆˆ Ë†.°=r¨.—Ë5{ö츸8wXÅÅÅeffº\.MEÀ.¢ñYTcÀâ]½ ©g¡ºØ³ˆJ8{DPPÐó´´´4((ˆ.»¹ °+###!!!+++;;;''';;{Ñ¢Eñññ3fÌÐTì"ïEIÜx‡œ©Pã²ÂØ#ÒÒÒ† ÒøÁ999ƒ:t(]vsA6 `׎4r€ŒwÈ¢$n¼CÎT¨ñÙ‹ áì%%%©©©B§Óãt:…©©©%%%tÙÍÙ0€]\;ÒÈE42Þ!‹’¸ñ9S¡Æ;d/&„Á2¼¼¼C‡ÙïÞ½{RR]6t9΢¢¢&×T(++KHH¨­­¥Ën.c9eÞ¼y;w6ŽäææfddøûûoܸQG°‹hX¼+)s¡Æ»2dÏBu±g¥xmôÆâ½÷Þ£Ëæ.Ô=r¨.®iä":‚ïêEj\¨ñ¹75Þ!{1!\6‡CÝO’®ãìÙ³£FúüóÏNghhhuuummmjjêš5k¢¢¢è²›Ë€kG¹ˆ^ Å»z‘j¼+s÷,T{Q`skºÀ.ȆìB¬ZµêᇶT¡Xì"ï*E*]¨ñŽ:S¡Æ;X/¾xñ↠úõëסCºìï2ÕKΠ¨üIÒå!`{äà]Ê~1 ÛÕ¸Þzë­ßVdË–-«V­ÒTìröK¨X¤Ø…ï{ãQ ÁÞÅEEEþþþ111YYY—/_¦Ëæ.Ói¦~ä†äܹstéâ3fŒ]ÄG˜0aÂ¥K—𬮮^²d‰¦"`—Èx‡,JàÆ»2{ù1ÁÁÁûöíÛ¶mÛwÞyòäIºlî2?oŸaaatiä"„˜Ë–-[Z´hÑøHyyù¡C‡ô» ã²(BˆEÄÆÆnÚ´éƒ>0`@FFÆ“O>ép8è²­ËDø`8}úôþð‡ÿýßÿ½páBãƒtÙÓeù!1`WUU•šh*á¹GóæÍ¯ø%—Ë¥£ØïE55Þ!?ïê] D%%%ÅÅÅüæ›oŒ#åååS§NmhhøôÓOé²›Ë 8{ÄwÞ™˜˜8dÈ–-[º8.{º °‹Ø‡ÃQWW#vAÆ;dQAw¤™ŠhÄ5–(M‹è²'ÜígÏžýÇ?þA—..Ô=r`.®iä"ïE5,ÞÕ»zªKeQ(--5j”‚µJºì ` q¹\~~*~ŒtydÃv=òÈ#‰‰‰óæÍk¼Ì¢µØUVV¦fõR™ØïE Üx‡œ©Pã²§¤¤TWWO™2%%%….û»¬€[ =âõ×_ß¿FFFxx¸û`\\]6t¡î‘CuuëÖÍýÁKQ&v€ŒwÈ¢n¼CîGwÈ^Lïì*¤tyH×®]­xYº,¢OŸ>—.]B»{ì±µk×"‰€]ñY”Äwe.Èž…ê‚ìYtÙnöÔ[BºP÷È¡ºFžž®`™E™ØUXX¨æ¬L삌wÈ¢n¼CîGwÈ^Œï¨.+àhâ+ î‘Cu]ñƒVä•2°‹b¨ñ¹75Þ!{1!€=¢OŸ>×øêž={è² ²a»ªªª~|Њ+y*»ÜTVVž?>$$¤M›6"<d¼C%pãr¦BwÈ^ì,Þá]&Â-Ð1~üxºtq¡î‘Cu!ÝÞÕÐÐðÒK/-_¾Ü}yÕ¸¸¸‰'>ûì³Í›7×Q삌wÈ¢n¼CîGwÈ^Œï¨.+à 0!ÄŽpíH#׌3Ö¯_Ÿ‘‘ѳgϰ°°ªªªýû÷ÏŸ?ôèÑóæÍÓQì"„Xj¼CöbÔxGuY`â+@6 `׊+®ñUa”‰€]»wïîܹsãƒG½ãŽ;ÊÊÊt»ˆF Æ;äL…ï½5ÞQ]VÀ-Ð7µoú|ðàAºlå2@Ý#G—."`W]]]ddd“ƒQQQ/^ÔT„炌wÈ¢ƒšŒwº¼(pñﲮ߫V­2œ¸cǺìé"ö‡kG¹Üœ={vÔ¨QŸþ¹Óé ­®®®­­MMM]³fMTT”Ž"`d¼CE´5Þ!{±ÔxGuY·@{ÄáÇ¿u###ÏŸ?O—­\ Ø•‘‘a<¸â2‹Ž"`—›¨¨¨Ï>û,//ïСCÆíºwïž””¤¯Ø…ïêEj\¨ñ9S¡Æ;d/vƒï¨.+à쉉‰K—.}ê©§Œ§Û·o·nE®²a»~øaãAÿþýÿþ÷¿»—Yžxâ‰|PG°« IIIû÷ïŸ0a‚¥•"HR¼«©q¡Æ;äL…ク xñŽí2I<à‹/¾NJJº÷Þ{o»í¶ÐÐÐ]»vÑeOW¿~ýöïßï~ZZZ:`À+Dt™Bpp°Ëåj|¤wïÞZ‹€]n”õ•Í ÌïEIÜxWæ‚ìY¨.ìžE—Ýhæ½ÑÿøÿøöÛo§Nš’’2a„'Nôë×.{ºÀöÈÁ»Œe÷S«×Žˆ€]D# ã²(ï{ãQã²¢åÔn:”ŸŸo<>yòä±cÇè²­+%%eñâÅî§üq¯^½è²­‹kG¹Ü(ë)*›˜ 2Þ!‹’¸ñ®ÌÙ³P]Ø=‹.»¡åIÛ‡>}ú|üñÇRÊ7úûû6ŽZºlå‚lÀ.)eiié²eËþøÇ?.[¶¬¬¬ @ì28wJž 2Þ!‹’¸ñŽ:S¡Æ;d/6‹wx—‰ð6HVRR8pàÀôôôž={<¸  €.{ºÊÊÊ6lØpúô鸸¸ûï¿?"" ]¦pøðáÀÀÀN: ! \.×-·Ü¢µØE42Þ!‹2€Œw•.¼ž…êbÏ"Jñö®7­[·–RÖ××;Ϊª*)ehh(]öt¡î‘CuqíH#WQQј1c’’’b¡µØïEIÜx‡Üï½5ÞQ]VÀØ#zôè±aÆåË—ÿâ¿RÖÔÔ´mÛ–.{º °+44´®®NJ9`À€U«Våää$$$h-vÝqÇãÇß´iÓöFh-vAÆ;dQ7Þ!g*Ôx‡ìŨñŽê²À±y󿀀ÿM›6I)÷îÝ;mÚ4ºìé‚lÀ.®iäêÚµ«E¯ì-° 2Þ!‹’¸ñ9S¡Æ;d/FwT—pö”ÚÚÚêêjºìï‚lÀ.®iäêÓ§Ï¥K—,zq¯ˆ€]1ÞUŠTºPãr¦BwÈ^Œï¨.+ðóögµ'((ˆ.-\ 7n,++KNN ­­­  Ë¶®_|qĈRÊõë× !¾ù曇~Xk°kôèÑéééáááîƒqqqúŠ€]1ÞUŠTºPã]™ ²g¡º {]öÅ{³7!JAÝ#‡ê’\;ÒÇ¥¬¹¨ìb¨.¢¨ñ¹7^‚Æ»J^Ϣ˶ð6Hć¸xñ¢Ëå ¡K Ñ…ªªª ÓWì"zïÊ\ìYäÇ Æ;ªË 8B!„Bñ ø`B!ѧOŸk|uÏž=Ú‰€]„âã Æ;ªË 8ß8={ö¬¬¬¼Æ7Òe!Ä"Æ&BuAÆ;dQ„뀌w`—p ô“˜˜øÆo\í«>úèÙ³g鲋B|Èx‡,ŠBˆupøÆùÙÏ~v÷Ýw_í«·Ýv]¶ràÚ‘F.¢ñYÑ Ôx‡ìÅ„®²a»¸v¤…+%%å_=xð ç Å"`ÑÔx‡œ©ðâ]± ¬gÑe¸l•••çÏŸ iÓ¦ ]6tUWW/[¶ìj_}ôÑG鲕‹kGZ¸222Œ'Ož\²dIzzzÇŽ‹‹‹ßyçaÆ™¢P,vý˜x÷ŠH 5Þ•¹ {ª ¬gÑ¥Þ½ ±î¸\®Ù³gÇÅŹžqqq™™™.—‹.[¹†z¯ÞsÏ=tÙÊEô¢_¿~û÷ïw?---0`€Ö"`^¼«©t¡Æ»2{¹¨ñŽê²À‘‘‘‘•••“““½hÑ¢øøø3fÐeCÑ‘ŠŠŠ‚‚‚ŠŠ ž+88¸É¿þ{÷î­µØïEM‹wõ.¤žE—máìáááyyyM9r$""‚.º ž‹kG¹RRR/^ì~úñÇ÷êÕËt‹J° 2Þ!‹j X¼«w!õ,TdϢ˶pöˆ   çiiiiPP]vsA6 `׎4r}ñÅÁÁÁIII÷Þ{ïm·Ýºk×.Ó-*EÀ.Èx‡,JâÆ;äL…ï½5ÞQ]VÀØ#ÒÒÒ† ÒøÁ999ƒ¾ö'OèòŠ ²a»¸v¤‘KJYZZºlÙ²?þñË–-+++³B¡X„ꂌwÈ¢$n¼CÎT¨ñÙ‹%h¼»L‡°G”””¤¦¦ !œNgLLŒÓéB¤¦¦–””Ðe7dÃvqíH#סC‡òóóÇ'Ožûì³¼¼¼C‡?>$$¤{÷îIIItÙÓe””déë«Aº6nÜXVV–œœZ[[ µØïE ÜxWæîY¨.°žE—}ñöN!W€kG¹6oÞàïï¿iÓ&)åÞ½{§M›¦µØE±Ôx‡ìŨñŽê²®›Cee¥ñÇÅ6mÚÐeO×øñãïºë®‘#GZñât™׎4rÝ{u••.—+$$DñóŸÿüç?ÿ¹Ö"<Â+ IDAT`—¤xW/RãBwe.Èž…ê‚ìYtÙ–fÞ>½ihhÈÌÌŒïСCxxx||üœ9sè²› ²a»vìØñøã#‰€]Bˆ   £ ˆP]ñY”ÀwÈ™ 5Þ!{±w`—é8¤”Þ>™1cÆúõë322zöìVUUµÿþùóç=zÞ¼ytÙÍEt„kG¹ˆýŒwÈ¢ˆ¦ Æ;d/&¾‹·÷`ë äã]?¾7 ]¶r¹\®Ù³gÇÅŹÃ*...33Óåri*v€ŒwÈ¢ïê]H= ÕÅžETÂØ# oê‚lÀ®ŒŒŒ„„„¬¬¬ìì윜œìììE‹ÅÇÇϘ1CS°‹hd¼C%qãr¦BwÈ^L`€¼q<ª ²a»¸v¤‘‹hd¼C%qãr¦BwÈ^L?ìgÏž5jÔçŸît:CCC«««kkkSSS׬YE—­\»wïîܹsãƒG½ãŽ;ÊÊÊLÑe N§³¨¨¨ÉG€ÊÊÊjkku¡ºzöìYYYyo(,,ÔKì ñY”Àwe.Èž…êÂëYtÙÀ& òëtÝ0 Øe,§Ì›7Ïýo—ÜÜÜŒŒ ÿ7ê(Bu%&&¾ñÆWûê£>zöìY½DÀ.7`ñ®X¤Ì…ïx3•w•.¼žE—­ñÖÒ3s¡î‘Cu•””¤¦¦ !œNgLLŒÓéB¤¦¦–””h*Bu]ûÿþ=÷Ü£ØÕ¤xW/RãBwȽññ®Ò…׳è²3€=ò¢¨.Ȇì2ÈÍÍ]½zõ›o¾¹zõêÜÜ\‹,*EÀ.¢ñY”Äw¼™Ê j¼CöbâËp ´G Þ{Õ%÷Èa»è½éÒE„炌wȢܠÆ;äÞx—ê]EÑe;¼=ë ä…ø€]`{äP]\;¢Ë‹"`d¼CÕ°xWïBêY¨.È¢è²-€=õÞƒ.Ô\@uAÞ'ƒ.]DÀ.Èx‡,JâÆ;äL…šŒwº¼î²ÀyÑTj. º¸vD—EÀ.Èx‡,JâÆ;äL…šŒwº¼î²ÀyÑTj. º¸vD—EÀ.Èx‡,JâÆ;äL…šŒwº¼î²^Ë /炼É!° ïÞƒti$v€Å»b‘2j¼ó>ÀtyQD—v.KP>râP÷È¡º¸vD—EÀ.¢¨ñ¹751ïtyÝe\¾qºuëvûí··oßþÙgŸ¥Ëþ®³gÏŽ5êóÏ?w:¡¡¡ÕÕÕµµµ©©©kÖ¬‰ŠŠ¢Ën.®ÑåEž 2Þ!‹2@we.àž…ê‚,Š.ÂøÆ9uêÔŸþô§;wž8q‚.û» PsÕE1Èx‡,ª1¨ñ9SB´€0!ÄvpíH#—büøñwÝu×È‘#aDÀ.Bˆé Æ;d/¸ñŽê²‚fÞ>BTЭ[·I“&½üòËtiáúè£êëëW¬X#v ! Ï;‡$v]@we.Èž…êBíYtÙ®Ÿuª‹B|Ôx‡ÜOÑ À„BÌ¡²²Òø ]“ûŽè+vBˆƒï¨.áhB!ÑÐЙ™Þ¡C‡ðððøøø9sæ444h*vBˆƒï¨.+ðóö €P___[[êçgÕôòåËBˆfÍ”þ͵.Bˆ‰<ûì³ëׯöÙg{öìVUUµÿþùóçWWWÏ›7OG°Ë d¼CE1ÔxGuY‚WïBŒÀo¼ÑµkW‡Ã!„hÑ¢ÅwÞ¹gÏ+DC‡={v“ƒ_~ùå‹/¾h…µ.Bˆé„‡‡çåå59xäÈ‘ˆˆMEÀ. ïEB¬5ÞQ]VÀ?azÄœ9sþò—¿Œ9òÿý¿ÿû»ßý®K—. صk—é®={öÜÿýÆãººº5kÖ!Z·nýÞ{ï™îB­‹èH}}}EE…Ëå²èõ/_¾l,é¨Äꢄںêêê"##›ŒŠŠºxñ¢¦"`d¼CE4,ÞÝ õbÔxGuY‚·'p½‰‰‰9|ø°ñøë¯¿¾õÖ[¥”¯¾újÿþýMwµhÑ¢ªªÊxüý÷ßGEEI)+++Mw¡ÖePWWW^^~éÒ%‹^_JÙÐÐÐÐÐ`Ýë_¼ºÔ,épo…礥¥ 2¤ñƒsrr}Új×¥K—>\^^nµH"ÖÅæä9*ëR¶¤Ã½žãr¹fÏž羺D\\\ff¦ËåÒTì’ˆñ.A‹BwÈ™ 5Þ!{1j¼£º¬€0ñØœX]lNž£².eK:Ü[A|°x7À+ 5Þ!g*Ôx‡ìÅ„pöˆ¼¼¼Î; !:tèpâĉ•+WvíÚ5--­¸¸Øt—Êk9@ÖÅæä9Àý r‘JâÖEnÈx‡,JâÆ;g*S€ŒwÈ¢ˆ qH)¹Qî»ï¾°°°'Ÿ|rñâÅßÿ½bĈ«W¯Ž‰‰Y»v­¹®3f¬_¿>##£gÏžaaaUUUû÷ïŸ?þèÑ£çÍ›g® µ.Åh|ðk¢²®ŠŠŠÖ­[7kÖÌ:…›ÊÊJ£¨6mÚX*RY”PX—¢¾¾¾¶¶644ÔÏÏϊ׿|ù²BÙÎÀꢄں ã²(¯Ù¶P{–m[H=K°mÙoOàzÓ¦M›ÊÊJ)eii©âÌ™3RÊ3g΄‡‡›îRy-Ôº$â9¼º”-épo…)¼ñÆ]»vu8Bˆ-ZÜyç{öì1Ý2tèÐÙ³g79øå—_¾øâ‹¦»¤ª¢¤Úº ã²(7xñn ².¤ž%AÛdÏ’l[v…°G´jÕê‡~RÖÕÕ !jjj¤”555ÁÁÁ¦»T^˲.6'ÏQY×½÷ÞûðÃggg?øàƒ©©©©©©‹/îׯßÈ‘#Í©¼O†²¢¤Úº233cccgÍš5kÖ¬ØØØiÓ¦Mž<ÙétîܹÓ\QtttNNŽñøâÅ‹«W¯–RæååÝrË-护¢¤Úº ã²(‰ï3j¼CöbÔxG­Ë 8{DÏž=—.]*¥\¼xqûöíŸ~úé¼¼¼§Ÿ~úöÛo7Ý¥òZu±9yŽÊº”-épo…çÄÄÄ>|Øxüõ×_ßzë­RÊW_}µÿþæŠZ´hQUUe<þþû¢¤”•••护¢¤Úº ã²(‰ï3j¼CöbÔxG­Ë 8{Ć üüüœNgXXØÑ£Gûôé#„ˆŒŒÜ»w¯é.•×r€¬‹ÍÉsTÖ¥lI‡{+<Ç]—”òâÅ‹!!!RÊS§N™Þcbb3A­‹ÜñY”ÔxWYdÏ ñY±#^¿õ¦}ûöÎkb®îæ›oþî»ï¤”;w–R·oßÞ\…j]êÜø'•Ô¥rI‡{+<ç±Ç[»v­E/îª 2Þ!‹òm ¬gIжÙ³è²-VýÑ(,,T©+--w?mӦͅ ¬AÖ5nܸwÞyçj_}øá‡¯±€pÔÔÔŒ;vÆ B)åÈ‘#ÿú׿›¯Ìµ®/¾øbÏž=Wûê'Ÿ|b¢ëÅ_üá‡JJJÜë9Ï?ÿ¼ë9*‹ ëBž;wΊWö–ÕïE ÜxWYdÏ m ²gÑe[¸ZÚ·oðàÁÖ­['%%}óÍ7³fÍ:xðà‡~èíóò5u¡î‘C­+!!¡¼¼üßPSSc–+11ñ³Ï>‹OJJÊËË+))éÓ§ÿ¤VY”PX!W²m)+ 5ÞUÖÙ³hÛbÏ"Jñêú3ù73fŒ±Ù mÛ¶ÁÁÁ?ûÙÏ ¼}R& ¦.Ô=r¨u©¤U«VÆ£¨~ø¡M›6^=#sPY×… ,zå«QQQQPPð㜘ˆú¢¤’ºTÙ¶”…ï*ë‚ìY´ma÷,ɶe38kƒËå2.»·uëÖ¸\.oŸ‘9@ÖÙœ$h]íÚµ«¬¬”RvîÜÙår=ÿüó¿ùÍo¼}R& ².!ÄôéÓ›LOOŸ9s¦¹"—Ë5{ö츸8÷ßpãââ233­È eEIµu©2Þ!‹’ ñY”m[=K²mÙÀ„˜ds’ uA.RIµu !Úµk—™™ÙøàöíÛ“““Íedd$$$deeeggçäädgg/Z´(>>~ÆŒ护¢¤Úº¹"ñY”m[=K²mÙÀúQYYùÔSOyû,Ì©.Èæ$AëB]ÏQY—"777>>~ñâÅîƒÅÅÅ-[¶4Wž——×äà‘#G"""ÌI…EIµuy¤xwVd¼C%AÛdÏ’l[v…°~…‡‡{û,Ì©.Èæ$që""„¨««ËÏÏŽŽ^²d‰qpß¾}±±±æŠ‚‚‚~üA£ÒÒÒ   sERaQRm]^)ÞÝ€ïEÏAwÔº¬ ™5—Ö"ħiÞ¼¹ŸŸŸâ®»îJIIiÞ¼¹·ÏÈPë28wîÜï~÷;oŸ…ù(««S§NÛ·oŸ5kÖ¨Q£-Zôè£2Ä\ůýëôôôüü|÷‘ÜÜܱcÇ4È\‘E oÔEH 㲨Æ@¶-¤ž%ضì `m(ùeeeRJãqEE…·ÏËSPë2€lN´®ÚÚÚ>øÀÛga> ê p8Bˆäää}ûö¹\®… öêÕëå—_6W´|ùòêêꤤ¤V­ZÅÆÆ¶jÕªk×®µµµË–-3W$%ÔÖ¥Èx‡,ª1ñY”m[H=K°mÙÞXŒ¿b444ÈLNN>xð ÷NÊPë28}útJJʵo£§#uA%ëÊËË;tèÐùóçCBBºwïž””äí32¼º 㲨Æà%†-J€ÖY^¼è[—ßO ±.—Ëx ¨u]())1¸×s„-Z´÷êyyŠúº***Z·nݬ™å‹’’’Ú¶mktÜ6mÚXêRV”P[— ã²(¢m ¸g ¶-ûÁ-Є˜ê9ȺâþEïÞ½+++Ç ðöyyŠÊºòóó“’’""":vìøí·ßþçþg·n݆êþY444dffÆÇLJ‡‡wèÐ!<<<>>~Μ9 抄¢„Úº¹"ñY”m[=K°mÙnÖÔ¿:#Õ…ºGµ.¤ßÀÆ(¨ë¾ûî {òÉ'/^üý÷ß !FŒ±zõꘘ˜µkך(š1cÆúõë322zöìVUUµÿþùóç=zÞ¼y&Š„Â¢„Úº¼ä› ¬(Èx‡,ª1`¿„H=K°mÙo\zšxDuuõ_þòoŸ…ù@ÖvŸ 7uA%•ÔÕ¦M›ÊÊJ)eii©âÌ™3RÊ3gΘîUyãAeEIýo¨ø“@Æ;dQ4 !‹’ u!õ,ɶeW¸Z?BBBž{î9oŸ…ù ÖE<ÿüç?N§"88Xjü÷Ÿÿü§¹¢ºººÈÈÈ&£¢¢.^¼h®H(,J¨­Ë+@Æ;dQ„ø¨ñŽZ—pÖÊÊÊÂÂÂÊÊJoŸˆÉ ÖEt!$$dúôéÞ> óQPW§NÞyç!ÄŠ+Ú·oÿüóÏççç?ÿüó=zô0W¤òƃʊúßPñ'ŒwÈ¢ˆ^@¶-¤ž%ضl‹·— Éõâr¹fÏžçþ—™™ér¹¼}jZ—ê9ԺȱaÃ???§ÓvôèÑ>}ú!"##÷îÝk®¨¤¤$55Uát:cbbŒ?u§¦¦–””˜+’ ‹’jëR d¼CÕÈx‡,ŠÜ0¨ñŽZ—ð"XÚ ýÇͯj]D;*++u¼”ÿO¢¦®ãÇ:t¨oß¾111RÊ3gÎDGG7¾‰(»ñ Ê¢„Î7T¼ñYÑȶ…׳Û– ñêøMþ tÿ¸ùÕ@­Ë ¢¢¢   ¢¢ÂÛ'b2Hu¡®ç Ö%¥üî»ïvîÜyúôioŸˆÉàÕïE5)ÞÝ€ïE¹Á‹w}ë⬠AAAÿ_{wšeÝÇüÞ“I”³HKj•S„èe‘›uPô"d½A2hÅ “: "B!;Ij„$ØÑEDPAdÑ[ÛZ­½ẞƒöøôàžzv=÷åÿ÷ÿ|ŽÆ}Ëöûâø^ûÝ×u_÷v÷èèèµ×^[ÉùÄO\úÂó¹sç}ôÑz¿§+d®¨çsBæºãŽ;&&&Š¢hkk»xñâ[o½õØcU=T ¢æ*"Ö{4TÈzªZï!Cqë=õ\à” .]ºôÖ[oݹsçüQõ8¥‰—+jGÍUÅÀÀ@__ßÇÜ××700Põ8¥ –ë¹çžûâ‹/Š¢¸ùæ››››ï¹çžï¾û®ê¡J5W±Þ‹ ¡BÖ{ÈPó‚Õ{]¼PQë=õ\.NÉÐÐPGGÇéÓ§_yå•©©©={ö¬]»¶ê¡J5W¼käêâåšýòË/ׯ_¿fÍšªg)S¼\sssEQ,Y²¤¿¿ÕªUwß}÷UW]UõP%ˆš«´ÞC†ª‹Wïµ ¡âÕ{-h¨¨õž|®J×oþžÁÁÁ+VÔ¿þì³Ïê÷‹ðòsÈ\!¯‘+‚æ y>§ˆ›‹„„¬÷¡Š õ2T´ÞC†âÊôªpþª‘‘‘±±±¢(FFFFFF6lØðÕW_íß¿¿³³³êÑ%j®‰‰‰^xáصkWè:‹¨¹š››?ÞßßßÙÙyáÂ…ªÇ)MÔ\?ÿüók¯½Võå –+d½‡ U²ÞC†ª Yï!CÕ«÷y©æªn÷æï‰úŸ5WýÁÐÐÐã?þÐC}ûí·UOT޹¢žÏ‰š«ø÷h‘˲ÞC†ª \ïÁBAë=d¨yÁê}^¢¹–,¼så8yòäèèèæÍ›:Tõ,eŠš«®¥¥åÀŸþyWWWOOÏ«¯¾ÚÔÔTõP%ˆ”ëÒó9µZ­~>§»»{ß¾}IÿNFÍEBBÖ{ÈP—ŠTïó‚… Yï!CqÅr¬”LNN¾ÿþûï¼óNÕƒ”,^®‘‘‘ááá7ž={¶þÈøøxww÷ÜÜ\Ò=2×%]!sÕÿ0ªÕj—þ^}õÕ+V¬¨t®ÅŠš«±ÞkAC…¬÷¡jAë=d¨ZÜzO=—ʵÇCæ:uêÔåÎçtttT2R)BæZ²ä_W-ÍÍÍÕï9y×]w:uªº¡J5 Yï!CÕ‚Ö{ÈPµ¸õžz.—@§aÛ¶mŸ|òÉåžÝ²eËÞ½{9OY¢æŠz\È\“““Û·oOúûŸBæºxñbý‹úçÐŒW;OYBæ Yï!CÕ…¬÷¡jAë=d¨ZÐz¯¥ŸËœ†Ã‡;vìrÏ'j.àÿ!d½‡ U Zï!Cq%³CÉ¢öxÔ\¤hbbâ×_]¾|ù7ÞXõ,eŠš‹+\ÈzŠtE­÷DsY€HÃÜÜܻᄏ{÷î¡¡¡ú#·ÝvÛË/¿ÜÛÛ[ÿ †DE͹¨õžz.ï ½½½ûöíëíí½ï¾ûn¸á†_~ùåĉï½÷ÞäääŽ;ªžî5@æ¢Ö{깜 +W®ú¨êÑ%j.€ÌE­÷Ôsy0)9þüéӧ럻pï½÷¶··W=Q9¢æÈ\ÔzO7—»@ŒÙÙÙ'N¬_¿~Íš5UÏR¦¨¹2µÞ“ÎuÕÛo¿]õ ð—üøã]]]}}}×\sÍý÷ßßÔÔTõD刚 sQë=é\Þ @Jš››?ÞßßßÙÙyáÂ…ªÇ)MÔ\™‹Zïéæ²˜–––¼øâ‹]]]»ví s3‹¨¹2µÞÍå&X¤adddxxxãÆgÏž­?2>>ÞÝÝ=77wèСjg[Œ¨¹2µÞSÏe ¼Å(écYÔ\™‹Zï©çrhÒpòäÉÑÑÑÍ›7'ñó_5@æ¢Ö{ê¹,À¤¡££crrrûöíUÏR¦¨¹2µÞSÏåh²à.Ð$`Û¶m <»eË–†MR®¨¹2µÞär 4 8|øð±cÇ.÷ìÁƒ9L‰¢æÈ\ÔzË%Ð$ µµu|||0==ݰaJ5@æ¢Ö{€\`²à=ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @þ ºBuûIEND®B`‚sleef-3.5.1/doc/html/tutorial.c000066400000000000000000000031301373003144100163270ustar00rootroot00000000000000// gcc tutorial.c -lsleef -lsleefdft -lm #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #define THRES 1e-4 typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PI * _Complex_I / n) * kn); } void forward(cmpl *ts, cmpl *fs, int len) { for(int k=0;k THRES) || (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) { success = 0; } } printf("%s\n", success ? "OK" : "NG"); free(fs); free(ts); Sleef_free(sy); Sleef_free(sx); SleefDFT_dispose(p); exit(success); } sleef-3.5.1/doc/html/vectest.c000066400000000000000000000005131373003144100161430ustar00rootroot00000000000000#include #include #include #include #define N 256 __attribute__ ((__aligned__(16))) double a[N]; int main(int argc, char **argv) { srand(time(NULL)); for(int i = 0;i < N;i++) a[i] = rand(); for(int i = 0;i < N;i++) a[i] = sin(a[i]); for(int i = 0;i < N;i++) printf("%g\n", a[i]); } sleef-3.5.1/doc/html/x86.xhtml000066400000000000000000014757571373003144100160600ustar00rootroot00000000000000 SLEEF - Math library reference(x86)

SLEEF Documentation - Math library reference(x86)

Table of contents

Function naming convention

The naming convention for the vectorized math functions is shown in Fig. 3.1 and 3.2. The function name is a concatenation of the following items, in this order.


  • String "Sleef_".
  • Name of the corresponding double precision function in math.h.
  • Data type specifier of a vector element, "d" and "f" for double and single precision functions, respectively.
  • The number of elements in a vector.
  • Accuracy specifier, a concatenation of string "_u" and 10 times the maximum error for typical input domain in ULP(two digits). There is no field in the name, if the function is expected to always return the correctly rounded value.
  • Vector extension specifier.
    • (Nothing) : Dispatcher automatically chooses the fastest available vector extension
    • sse2 : SSE2
    • sse4 : SSE4.1
    • avx2128 : AVX2+FMA3 instructions utilized for 128 bit computation
    • avx : AVX
    • fma4 : AMD FMA4
    • avx2 : AVX2+FMA3
    • avx512f : AVX512F
  • Attributes.
    • c : The function gives bit-wise consistent results across all platforms
    • f : The function requires FMA support and gives bit-wise consistent results across all platforms
    • i : Infinity is correctly handled
    • n : NaN is correctly handled
    • z : Negative zero is correctly handled

naming convention
Fig. 3.1: Naming convention of vectorized functions


naming convention with attributes
Fig. 3.2: Naming convention of vectorized functions with attributes

Data types for x86 architecture

Sleef___m128_2

Description

Sleef___m128_2 is a data type for storing two __m128 values, which is defined in sleef.h as follows:

typedef struct {
  __m128 x, y;
} Sleef___m128_2;

Sleef___m128d_2

Description

Sleef___m128d_2 is a data type for storing two __m128d values, which is defined in sleef.h as follows:

typedef struct {
  __m128d x, y;
} Sleef___m128d_2;

Sleef___m256_2

Description

Sleef___m256_2 is a data type for storing two __m256 values, which is defined in sleef.h as follows:

typedef struct {
  __m256 x, y;
} Sleef___m256_2;

Sleef___m256d_2

Description

Sleef___m256d_2 is a data type for storing two __m256d values, which is defined in sleef.h as follows:

typedef struct {
  __m256d x, y;
} Sleef___m256d_2;

Sleef___m512_2

Description

Sleef___m512_2 is a data type for storing two __m512 values, which is defined in sleef.h as follows:

typedef struct {
  __m512 x, y;
} Sleef___m512_2;

Sleef___m512d_2

Description

Sleef___m512d_2 is a data type for storing two __m512d values, which is defined in sleef.h as follows:

typedef struct {
  __m512d x, y;
} Sleef___m512d_2;

Trigonometric Functions

Vectorized double precision sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sind1_u10purec(double a);
double Sleef_sind1_u10purecfma(double a);
double Sleef_cinz_sind1_u10purec(double a);
double Sleef_finz_sind1_u10purecfma(double a);

__m128d Sleef_sind2_u10(__m128d a);
__m128d Sleef_sind2_u10sse2(__m128d a);
__m128d Sleef_sind2_u10sse4(__m128d a);
__m128d Sleef_sind2_u10avx2128(__m128d a);
__m128d Sleef_cinz_sind2_u10sse2(__m128d a);
__m128d Sleef_cinz_sind2_u10sse4(__m128d a);
__m128d Sleef_finz_sind2_u10avx2128(__m128d a);

__m256d Sleef_sind4_u10(__m256d a);
__m256d Sleef_sind4_u10avx(__m256d a);
__m256d Sleef_sind4_u10avx2(__m256d a);
__m256d Sleef_sind4_u10fma4(__m256d a);
__m256d Sleef_cinz_sind4_u10avx(__m256d a);
__m256d Sleef_finz_sind4_u10avx2(__m256d a);
__m256d Sleef_finz_sind4_u10fma4(__m256d a);

__m512d Sleef_sind8_u10(__m512d a);
__m512d Sleef_sind8_u10avx512f(__m512d a);
__m512d Sleef_sind8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_sind8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_sind8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sin_u10 with the same accuracy specification.


Vectorized single precision sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinf1_u10purec(float a);
float Sleef_sinf1_u10purecfma(float a);
float Sleef_cinz_sinf1_u10purec(float a);
float Sleef_finz_sinf1_u10purecfma(float a);

__m128 Sleef_sinf4_u10(__m128 a);
__m128 Sleef_sinf4_u10sse2(__m128 a);
__m128 Sleef_sinf4_u10sse4(__m128 a);
__m128 Sleef_sinf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_sinf4_u10sse2(__m128 a);
__m128 Sleef_cinz_sinf4_u10sse4(__m128 a);
__m128 Sleef_finz_sinf4_u10avx2128(__m128 a);

__m256 Sleef_sinf8_u10(__m256 a);
__m256 Sleef_sinf8_u10avx(__m256 a);
__m256 Sleef_sinf8_u10avx2(__m256 a);
__m256 Sleef_sinf8_u10fma4(__m256 a);
__m256 Sleef_cinz_sinf8_u10avx(__m256 a);
__m256 Sleef_finz_sinf8_u10avx2(__m256 a);
__m256 Sleef_finz_sinf8_u10fma4(__m256 a);

__m512 Sleef_sinf16_u10(__m512 a);
__m512 Sleef_sinf16_u10avx512f(__m512 a);
__m512 Sleef_sinf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_sinf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_sinf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinf_u10 with the same accuracy specification.


Vectorized double precision sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sind1_u35purec(double a);
double Sleef_sind1_u35purecfma(double a);
double Sleef_cinz_sind1_u35purec(double a);
double Sleef_finz_sind1_u35purecfma(double a);

__m128d Sleef_sind2_u35(__m128d a);
__m128d Sleef_sind2_u35sse2(__m128d a);
__m128d Sleef_sind2_u35sse4(__m128d a);
__m128d Sleef_sind2_u35avx2128(__m128d a);
__m128d Sleef_cinz_sind2_u35sse2(__m128d a);
__m128d Sleef_cinz_sind2_u35sse4(__m128d a);
__m128d Sleef_finz_sind2_u35avx2128(__m128d a);

__m256d Sleef_sind4_u35(__m256d a);
__m256d Sleef_sind4_u35avx(__m256d a);
__m256d Sleef_sind4_u35avx2(__m256d a);
__m256d Sleef_sind4_u35fma4(__m256d a);
__m256d Sleef_cinz_sind4_u35avx(__m256d a);
__m256d Sleef_finz_sind4_u35avx2(__m256d a);
__m256d Sleef_finz_sind4_u35fma4(__m256d a);

__m512d Sleef_sind8_u35(__m512d a);
__m512d Sleef_sind8_u35avx512f(__m512d a);
__m512d Sleef_sind8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_sind8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_sind8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sin_u35 with the same accuracy specification.


Vectorized single precision sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sinf1_u35purec(float a);
float Sleef_sinf1_u35purecfma(float a);
float Sleef_cinz_sinf1_u35purec(float a);
float Sleef_finz_sinf1_u35purecfma(float a);

__m128 Sleef_sinf4_u35(__m128 a);
__m128 Sleef_sinf4_u35sse2(__m128 a);
__m128 Sleef_sinf4_u35sse4(__m128 a);
__m128 Sleef_sinf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_sinf4_u35sse2(__m128 a);
__m128 Sleef_cinz_sinf4_u35sse4(__m128 a);
__m128 Sleef_finz_sinf4_u35avx2128(__m128 a);

__m256 Sleef_sinf8_u35(__m256 a);
__m256 Sleef_sinf8_u35avx(__m256 a);
__m256 Sleef_sinf8_u35avx2(__m256 a);
__m256 Sleef_sinf8_u35fma4(__m256 a);
__m256 Sleef_cinz_sinf8_u35avx(__m256 a);
__m256 Sleef_finz_sinf8_u35avx2(__m256 a);
__m256 Sleef_finz_sinf8_u35fma4(__m256 a);

__m512 Sleef_sinf16_u35(__m512 a);
__m512 Sleef_sinf16_u35avx512f(__m512 a);
__m512 Sleef_sinf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_sinf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_sinf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinf_u35 with the same accuracy specification.


Vectorized double precision cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cosd1_u10purec(double a);
double Sleef_cosd1_u10purecfma(double a);
double Sleef_cinz_cosd1_u10purec(double a);
double Sleef_finz_cosd1_u10purecfma(double a);

__m128d Sleef_cosd2_u10(__m128d a);
__m128d Sleef_cosd2_u10sse2(__m128d a);
__m128d Sleef_cosd2_u10sse4(__m128d a);
__m128d Sleef_cosd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_cosd2_u10sse2(__m128d a);
__m128d Sleef_cinz_cosd2_u10sse4(__m128d a);
__m128d Sleef_finz_cosd2_u10avx2128(__m128d a);

__m256d Sleef_cosd4_u10(__m256d a);
__m256d Sleef_cosd4_u10avx(__m256d a);
__m256d Sleef_cosd4_u10avx2(__m256d a);
__m256d Sleef_cosd4_u10fma4(__m256d a);
__m256d Sleef_cinz_cosd4_u10avx(__m256d a);
__m256d Sleef_finz_cosd4_u10avx2(__m256d a);
__m256d Sleef_finz_cosd4_u10fma4(__m256d a);

__m512d Sleef_cosd8_u10(__m512d a);
__m512d Sleef_cosd8_u10avx512f(__m512d a);
__m512d Sleef_cosd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_cosd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_cosd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cos_u10 with the same accuracy specification.


Vectorized single precision cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cosf1_u10purec(float a);
float Sleef_cosf1_u10purecfma(float a);
float Sleef_cinz_cosf1_u10purec(float a);
float Sleef_finz_cosf1_u10purecfma(float a);

__m128 Sleef_cosf4_u10(__m128 a);
__m128 Sleef_cosf4_u10sse2(__m128 a);
__m128 Sleef_cosf4_u10sse4(__m128 a);
__m128 Sleef_cosf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_cosf4_u10sse2(__m128 a);
__m128 Sleef_cinz_cosf4_u10sse4(__m128 a);
__m128 Sleef_finz_cosf4_u10avx2128(__m128 a);

__m256 Sleef_cosf8_u10(__m256 a);
__m256 Sleef_cosf8_u10avx(__m256 a);
__m256 Sleef_cosf8_u10avx2(__m256 a);
__m256 Sleef_cosf8_u10fma4(__m256 a);
__m256 Sleef_cinz_cosf8_u10avx(__m256 a);
__m256 Sleef_finz_cosf8_u10avx2(__m256 a);
__m256 Sleef_finz_cosf8_u10fma4(__m256 a);

__m512 Sleef_cosf16_u10(__m512 a);
__m512 Sleef_cosf16_u10avx512f(__m512 a);
__m512 Sleef_cosf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_cosf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_cosf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosf_u10 with the same accuracy specification.


Vectorized double precision cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cosd1_u35purec(double a);
double Sleef_cosd1_u35purecfma(double a);
double Sleef_cinz_cosd1_u35purec(double a);
double Sleef_finz_cosd1_u35purecfma(double a);

__m128d Sleef_cosd2_u35(__m128d a);
__m128d Sleef_cosd2_u35sse2(__m128d a);
__m128d Sleef_cosd2_u35sse4(__m128d a);
__m128d Sleef_cosd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_cosd2_u35sse2(__m128d a);
__m128d Sleef_cinz_cosd2_u35sse4(__m128d a);
__m128d Sleef_finz_cosd2_u35avx2128(__m128d a);

__m256d Sleef_cosd4_u35(__m256d a);
__m256d Sleef_cosd4_u35avx(__m256d a);
__m256d Sleef_cosd4_u35avx2(__m256d a);
__m256d Sleef_cosd4_u35fma4(__m256d a);
__m256d Sleef_cinz_cosd4_u35avx(__m256d a);
__m256d Sleef_finz_cosd4_u35avx2(__m256d a);
__m256d Sleef_finz_cosd4_u35fma4(__m256d a);

__m512d Sleef_cosd8_u35(__m512d a);
__m512d Sleef_cosd8_u35avx512f(__m512d a);
__m512d Sleef_cosd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_cosd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_cosd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cos_u35 with the same accuracy specification.


Vectorized single precision cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cosf1_u35purec(float a);
float Sleef_cosf1_u35purecfma(float a);
float Sleef_cinz_cosf1_u35purec(float a);
float Sleef_finz_cosf1_u35purecfma(float a);

__m128 Sleef_cosf4_u35(__m128 a);
__m128 Sleef_cosf4_u35sse2(__m128 a);
__m128 Sleef_cosf4_u35sse4(__m128 a);
__m128 Sleef_cosf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_cosf4_u35sse2(__m128 a);
__m128 Sleef_cinz_cosf4_u35sse4(__m128 a);
__m128 Sleef_finz_cosf4_u35avx2128(__m128 a);

__m256 Sleef_cosf8_u35(__m256 a);
__m256 Sleef_cosf8_u35avx(__m256 a);
__m256 Sleef_cosf8_u35avx2(__m256 a);
__m256 Sleef_cosf8_u35fma4(__m256 a);
__m256 Sleef_cinz_cosf8_u35avx(__m256 a);
__m256 Sleef_finz_cosf8_u35avx2(__m256 a);
__m256 Sleef_finz_cosf8_u35fma4(__m256 a);

__m512 Sleef_cosf16_u35(__m512 a);
__m512 Sleef_cosf16_u35avx512f(__m512 a);
__m512 Sleef_cosf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_cosf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_cosf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosf_u35 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincosd1_u10purec(double a);
Sleef_double2 Sleef_sincosd1_u10purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u10purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u10purecfma(double a);

Sleef___m128d_2 Sleef_sincosd2_u10(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u10sse2(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u10sse4(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u10avx2128(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincosd2_u10sse2(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincosd2_u10sse4(__m128d a);
Sleef___m128d_2 Sleef_finz_sincosd2_u10avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincosd4_u10(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u10avx(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u10avx2(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u10fma4(__m256d a);
Sleef___m256d_2 Sleef_cinz_sincosd4_u10avx(__m256d a);
Sleef___m256d_2 Sleef_finz_sincosd4_u10avx2(__m256d a);
Sleef___m256d_2 Sleef_finz_sincosd4_u10fma4(__m256d a);

Sleef___m512d_2 Sleef_sincosd8_u10(__m512d a);
Sleef___m512d_2 Sleef_sincosd8_u10avx512f(__m512d a);
Sleef___m512d_2 Sleef_sincosd8_u10avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_cinz_sincosd8_u10avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_finz_sincosd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincos_u10 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincosf1_u10purec(float a);
Sleef_float2 Sleef_sincosf1_u10purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u10purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u10purecfma(float a);

Sleef___m128_2 Sleef_sincosf4_u10(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u10sse2(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u10sse4(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u10avx2128(__m128 a);
Sleef___m128_2 Sleef_cinz_sincosf4_u10sse2(__m128 a);
Sleef___m128_2 Sleef_cinz_sincosf4_u10sse4(__m128 a);
Sleef___m128_2 Sleef_finz_sincosf4_u10avx2128(__m128 a);

Sleef___m256_2 Sleef_sincosf8_u10(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u10avx(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u10avx2(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u10fma4(__m256 a);
Sleef___m256_2 Sleef_cinz_sincosf8_u10avx(__m256 a);
Sleef___m256_2 Sleef_finz_sincosf8_u10avx2(__m256 a);
Sleef___m256_2 Sleef_finz_sincosf8_u10fma4(__m256 a);

Sleef___m512_2 Sleef_sincosf16_u10(__m512 a);
Sleef___m512_2 Sleef_sincosf16_u10avx512f(__m512 a);
Sleef___m512_2 Sleef_sincosf16_u10avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_cinz_sincosf16_u10avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_finz_sincosf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincosf_u10 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincosd1_u35purec(double a);
Sleef_double2 Sleef_sincosd1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincosd1_u35purec(double a);
Sleef_double2 Sleef_finz_sincosd1_u35purecfma(double a);

Sleef___m128d_2 Sleef_sincosd2_u35(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u35sse2(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u35sse4(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u35avx2128(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincosd2_u35sse2(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincosd2_u35sse4(__m128d a);
Sleef___m128d_2 Sleef_finz_sincosd2_u35avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincosd4_u35(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u35avx(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u35avx2(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u35fma4(__m256d a);
Sleef___m256d_2 Sleef_cinz_sincosd4_u35avx(__m256d a);
Sleef___m256d_2 Sleef_finz_sincosd4_u35avx2(__m256d a);
Sleef___m256d_2 Sleef_finz_sincosd4_u35fma4(__m256d a);

Sleef___m512d_2 Sleef_sincosd8_u35(__m512d a);
Sleef___m512d_2 Sleef_sincosd8_u35avx512f(__m512d a);
Sleef___m512d_2 Sleef_sincosd8_u35avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_cinz_sincosd8_u35avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_finz_sincosd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincos_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincosf1_u35purec(float a);
Sleef_float2 Sleef_sincosf1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincosf1_u35purec(float a);
Sleef_float2 Sleef_finz_sincosf1_u35purecfma(float a);

Sleef___m128_2 Sleef_sincosf4_u35(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u35sse2(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u35sse4(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u35avx2128(__m128 a);
Sleef___m128_2 Sleef_cinz_sincosf4_u35sse2(__m128 a);
Sleef___m128_2 Sleef_cinz_sincosf4_u35sse4(__m128 a);
Sleef___m128_2 Sleef_finz_sincosf4_u35avx2128(__m128 a);

Sleef___m256_2 Sleef_sincosf8_u35(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u35avx(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u35avx2(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u35fma4(__m256 a);
Sleef___m256_2 Sleef_cinz_sincosf8_u35avx(__m256 a);
Sleef___m256_2 Sleef_finz_sincosf8_u35avx2(__m256 a);
Sleef___m256_2 Sleef_finz_sincosf8_u35fma4(__m256 a);

Sleef___m512_2 Sleef_sincosf16_u35(__m512 a);
Sleef___m512_2 Sleef_sincosf16_u35avx512f(__m512 a);
Sleef___m512_2 Sleef_sincosf16_u35avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_cinz_sincosf16_u35avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_finz_sincosf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincosf_u35 with the same accuracy specification.


Vectorized double precision sine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sinpid1_u05purec(double a);
double Sleef_sinpid1_u05purecfma(double a);
double Sleef_cinz_sinpid1_u05purec(double a);
double Sleef_finz_sinpid1_u05purecfma(double a);

__m128d Sleef_sinpid2_u05(__m128d a);
__m128d Sleef_sinpid2_u05sse2(__m128d a);
__m128d Sleef_sinpid2_u05sse4(__m128d a);
__m128d Sleef_sinpid2_u05avx2128(__m128d a);
__m128d Sleef_cinz_sinpid2_u05sse2(__m128d a);
__m128d Sleef_cinz_sinpid2_u05sse4(__m128d a);
__m128d Sleef_finz_sinpid2_u05avx2128(__m128d a);

__m256d Sleef_sinpid4_u05(__m256d a);
__m256d Sleef_sinpid4_u05avx(__m256d a);
__m256d Sleef_sinpid4_u05avx2(__m256d a);
__m256d Sleef_sinpid4_u05fma4(__m256d a);
__m256d Sleef_cinz_sinpid4_u05avx(__m256d a);
__m256d Sleef_finz_sinpid4_u05avx2(__m256d a);
__m256d Sleef_finz_sinpid4_u05fma4(__m256d a);

__m512d Sleef_sinpid8_u05(__m512d a);
__m512d Sleef_sinpid8_u05avx512f(__m512d a);
__m512d Sleef_sinpid8_u05avx512fnofma(__m512d a);
__m512d Sleef_cinz_sinpid8_u05avx512fnofma(__m512d a);
__m512d Sleef_finz_sinpid8_u05avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinpi_u05 with the same accuracy specification.


Vectorized single precision sine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sinpif1_u05purec(float a);
float Sleef_sinpif1_u05purecfma(float a);
float Sleef_cinz_sinpif1_u05purec(float a);
float Sleef_finz_sinpif1_u05purecfma(float a);

__m128 Sleef_sinpif4_u05(__m128 a);
__m128 Sleef_sinpif4_u05sse2(__m128 a);
__m128 Sleef_sinpif4_u05sse4(__m128 a);
__m128 Sleef_sinpif4_u05avx2128(__m128 a);
__m128 Sleef_cinz_sinpif4_u05sse2(__m128 a);
__m128 Sleef_cinz_sinpif4_u05sse4(__m128 a);
__m128 Sleef_finz_sinpif4_u05avx2128(__m128 a);

__m256 Sleef_sinpif8_u05(__m256 a);
__m256 Sleef_sinpif8_u05avx(__m256 a);
__m256 Sleef_sinpif8_u05avx2(__m256 a);
__m256 Sleef_sinpif8_u05fma4(__m256 a);
__m256 Sleef_cinz_sinpif8_u05avx(__m256 a);
__m256 Sleef_finz_sinpif8_u05avx2(__m256 a);
__m256 Sleef_finz_sinpif8_u05fma4(__m256 a);

__m512 Sleef_sinpif16_u05(__m512 a);
__m512 Sleef_sinpif16_u05avx512f(__m512 a);
__m512 Sleef_sinpif16_u05avx512fnofma(__m512 a);
__m512 Sleef_cinz_sinpif16_u05avx512fnofma(__m512 a);
__m512 Sleef_finz_sinpif16_u05avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinpif_u05 with the same accuracy specification.


Vectorized double precision cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cospid1_u05purec(double a);
double Sleef_cospid1_u05purecfma(double a);
double Sleef_cinz_cospid1_u05purec(double a);
double Sleef_finz_cospid1_u05purecfma(double a);

__m128d Sleef_cospid2_u05(__m128d a);
__m128d Sleef_cospid2_u05sse2(__m128d a);
__m128d Sleef_cospid2_u05sse4(__m128d a);
__m128d Sleef_cospid2_u05avx2128(__m128d a);
__m128d Sleef_cinz_cospid2_u05sse2(__m128d a);
__m128d Sleef_cinz_cospid2_u05sse4(__m128d a);
__m128d Sleef_finz_cospid2_u05avx2128(__m128d a);

__m256d Sleef_cospid4_u05(__m256d a);
__m256d Sleef_cospid4_u05avx(__m256d a);
__m256d Sleef_cospid4_u05avx2(__m256d a);
__m256d Sleef_cospid4_u05fma4(__m256d a);
__m256d Sleef_cinz_cospid4_u05avx(__m256d a);
__m256d Sleef_finz_cospid4_u05avx2(__m256d a);
__m256d Sleef_finz_cospid4_u05fma4(__m256d a);

__m512d Sleef_cospid8_u05(__m512d a);
__m512d Sleef_cospid8_u05avx512f(__m512d a);
__m512d Sleef_cospid8_u05avx512fnofma(__m512d a);
__m512d Sleef_cinz_cospid8_u05avx512fnofma(__m512d a);
__m512d Sleef_finz_cospid8_u05avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cospi_u05 with the same accuracy specification.


Vectorized single precision cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cospif1_u05purec(float a);
float Sleef_cospif1_u05purecfma(float a);
float Sleef_cinz_cospif1_u05purec(float a);
float Sleef_finz_cospif1_u05purecfma(float a);

__m128 Sleef_cospif4_u05(__m128 a);
__m128 Sleef_cospif4_u05sse2(__m128 a);
__m128 Sleef_cospif4_u05sse4(__m128 a);
__m128 Sleef_cospif4_u05avx2128(__m128 a);
__m128 Sleef_cinz_cospif4_u05sse2(__m128 a);
__m128 Sleef_cinz_cospif4_u05sse4(__m128 a);
__m128 Sleef_finz_cospif4_u05avx2128(__m128 a);

__m256 Sleef_cospif8_u05(__m256 a);
__m256 Sleef_cospif8_u05avx(__m256 a);
__m256 Sleef_cospif8_u05avx2(__m256 a);
__m256 Sleef_cospif8_u05fma4(__m256 a);
__m256 Sleef_cinz_cospif8_u05avx(__m256 a);
__m256 Sleef_finz_cospif8_u05avx2(__m256 a);
__m256 Sleef_finz_cospif8_u05fma4(__m256 a);

__m512 Sleef_cospif16_u05(__m512 a);
__m512 Sleef_cospif16_u05avx512f(__m512 a);
__m512 Sleef_cospif16_u05avx512fnofma(__m512 a);
__m512 Sleef_cinz_cospif16_u05avx512fnofma(__m512 a);
__m512 Sleef_finz_cospif16_u05avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospid1_u05purec(double a);
Sleef_double2 Sleef_sincospid1_u05purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u05purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u05purecfma(double a);

Sleef___m128d_2 Sleef_sincospid2_u05(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u05sse2(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u05sse4(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u05avx2128(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincospid2_u05sse2(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincospid2_u05sse4(__m128d a);
Sleef___m128d_2 Sleef_finz_sincospid2_u05avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincospid4_u05(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u05avx(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u05avx2(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u05fma4(__m256d a);
Sleef___m256d_2 Sleef_cinz_sincospid4_u05avx(__m256d a);
Sleef___m256d_2 Sleef_finz_sincospid4_u05avx2(__m256d a);
Sleef___m256d_2 Sleef_finz_sincospid4_u05fma4(__m256d a);

Sleef___m512d_2 Sleef_sincospid8_u05(__m512d a);
Sleef___m512d_2 Sleef_sincospid8_u05avx512f(__m512d a);
Sleef___m512d_2 Sleef_sincospid8_u05avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_cinz_sincospid8_u05avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_finz_sincospid8_u05avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospi_u05 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float2 Sleef_sincospif1_u05purec(float a);
Sleef_float2 Sleef_sincospif1_u05purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u05purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u05purecfma(float a);

Sleef___m128_2 Sleef_sincospif4_u05(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u05sse2(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u05sse4(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u05avx2128(__m128 a);
Sleef___m128_2 Sleef_cinz_sincospif4_u05sse2(__m128 a);
Sleef___m128_2 Sleef_cinz_sincospif4_u05sse4(__m128 a);
Sleef___m128_2 Sleef_finz_sincospif4_u05avx2128(__m128 a);

Sleef___m256_2 Sleef_sincospif8_u05(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u05avx(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u05avx2(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u05fma4(__m256 a);
Sleef___m256_2 Sleef_cinz_sincospif8_u05avx(__m256 a);
Sleef___m256_2 Sleef_finz_sincospif8_u05avx2(__m256 a);
Sleef___m256_2 Sleef_finz_sincospif8_u05fma4(__m256 a);

Sleef___m512_2 Sleef_sincospif16_u05(__m512 a);
Sleef___m512_2 Sleef_sincospif16_u05avx512f(__m512 a);
Sleef___m512_2 Sleef_sincospif16_u05avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_cinz_sincospif16_u05avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_finz_sincospif16_u05avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_double2 Sleef_sincospid1_u35purec(double a);
Sleef_double2 Sleef_sincospid1_u35purecfma(double a);
Sleef_double2 Sleef_cinz_sincospid1_u35purec(double a);
Sleef_double2 Sleef_finz_sincospid1_u35purecfma(double a);

Sleef___m128d_2 Sleef_sincospid2_u35(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u35sse2(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u35sse4(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u35avx2128(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincospid2_u35sse2(__m128d a);
Sleef___m128d_2 Sleef_cinz_sincospid2_u35sse4(__m128d a);
Sleef___m128d_2 Sleef_finz_sincospid2_u35avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincospid4_u35(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u35avx(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u35avx2(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u35fma4(__m256d a);
Sleef___m256d_2 Sleef_cinz_sincospid4_u35avx(__m256d a);
Sleef___m256d_2 Sleef_finz_sincospid4_u35avx2(__m256d a);
Sleef___m256d_2 Sleef_finz_sincospid4_u35fma4(__m256d a);

Sleef___m512d_2 Sleef_sincospid8_u35(__m512d a);
Sleef___m512d_2 Sleef_sincospid8_u35avx512f(__m512d a);
Sleef___m512d_2 Sleef_sincospid8_u35avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_cinz_sincospid8_u35avx512fnofma(__m512d a);
Sleef___m512d_2 Sleef_finz_sincospid8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospi_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


Sleef_float2 Sleef_sincospif1_u35purec(float a);
Sleef_float2 Sleef_sincospif1_u35purecfma(float a);
Sleef_float2 Sleef_cinz_sincospif1_u35purec(float a);
Sleef_float2 Sleef_finz_sincospif1_u35purecfma(float a);

Sleef___m128_2 Sleef_sincospif4_u35(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u35sse2(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u35sse4(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u35avx2128(__m128 a);
Sleef___m128_2 Sleef_cinz_sincospif4_u35sse2(__m128 a);
Sleef___m128_2 Sleef_cinz_sincospif4_u35sse4(__m128 a);
Sleef___m128_2 Sleef_finz_sincospif4_u35avx2128(__m128 a);

Sleef___m256_2 Sleef_sincospif8_u35(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u35avx(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u35avx2(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u35fma4(__m256 a);
Sleef___m256_2 Sleef_cinz_sincospif8_u35avx(__m256 a);
Sleef___m256_2 Sleef_finz_sincospif8_u35avx2(__m256 a);
Sleef___m256_2 Sleef_finz_sincospif8_u35fma4(__m256 a);

Sleef___m512_2 Sleef_sincospif16_u35(__m512 a);
Sleef___m512_2 Sleef_sincospif16_u35avx512f(__m512 a);
Sleef___m512_2 Sleef_sincospif16_u35avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_cinz_sincospif16_u35avx512fnofma(__m512 a);
Sleef___m512_2 Sleef_finz_sincospif16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospif_u35 with the same accuracy specification.


Vectorized double precision tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tand1_u10purec(double a);
double Sleef_tand1_u10purecfma(double a);
double Sleef_cinz_tand1_u10purec(double a);
double Sleef_finz_tand1_u10purecfma(double a);

__m128d Sleef_tand2_u10(__m128d a);
__m128d Sleef_tand2_u10sse2(__m128d a);
__m128d Sleef_tand2_u10sse4(__m128d a);
__m128d Sleef_tand2_u10avx2128(__m128d a);
__m128d Sleef_cinz_tand2_u10sse2(__m128d a);
__m128d Sleef_cinz_tand2_u10sse4(__m128d a);
__m128d Sleef_finz_tand2_u10avx2128(__m128d a);

__m256d Sleef_tand4_u10(__m256d a);
__m256d Sleef_tand4_u10avx(__m256d a);
__m256d Sleef_tand4_u10avx2(__m256d a);
__m256d Sleef_tand4_u10fma4(__m256d a);
__m256d Sleef_cinz_tand4_u10avx(__m256d a);
__m256d Sleef_finz_tand4_u10avx2(__m256d a);
__m256d Sleef_finz_tand4_u10fma4(__m256d a);

__m512d Sleef_tand8_u10(__m512d a);
__m512d Sleef_tand8_u10avx512f(__m512d a);
__m512d Sleef_tand8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_tand8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_tand8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tan_u10 with the same accuracy specification.


Vectorized single precision tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_tanf1_u10purec(float a);
float Sleef_tanf1_u10purecfma(float a);
float Sleef_cinz_tanf1_u10purec(float a);
float Sleef_finz_tanf1_u10purecfma(float a);

__m128 Sleef_tanf4_u10(__m128 a);
__m128 Sleef_tanf4_u10sse2(__m128 a);
__m128 Sleef_tanf4_u10sse4(__m128 a);
__m128 Sleef_tanf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_tanf4_u10sse2(__m128 a);
__m128 Sleef_cinz_tanf4_u10sse4(__m128 a);
__m128 Sleef_finz_tanf4_u10avx2128(__m128 a);

__m256 Sleef_tanf8_u10(__m256 a);
__m256 Sleef_tanf8_u10avx(__m256 a);
__m256 Sleef_tanf8_u10avx2(__m256 a);
__m256 Sleef_tanf8_u10fma4(__m256 a);
__m256 Sleef_cinz_tanf8_u10avx(__m256 a);
__m256 Sleef_finz_tanf8_u10avx2(__m256 a);
__m256 Sleef_finz_tanf8_u10fma4(__m256 a);

__m512 Sleef_tanf16_u10(__m512 a);
__m512 Sleef_tanf16_u10avx512f(__m512 a);
__m512 Sleef_tanf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_tanf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_tanf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanf_u10 with the same accuracy specification.


Vectorized double precision tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_tand1_u35purec(double a);
double Sleef_tand1_u35purecfma(double a);
double Sleef_cinz_tand1_u35purec(double a);
double Sleef_finz_tand1_u35purecfma(double a);

__m128d Sleef_tand2_u35(__m128d a);
__m128d Sleef_tand2_u35sse2(__m128d a);
__m128d Sleef_tand2_u35sse4(__m128d a);
__m128d Sleef_tand2_u35avx2128(__m128d a);
__m128d Sleef_cinz_tand2_u35sse2(__m128d a);
__m128d Sleef_cinz_tand2_u35sse4(__m128d a);
__m128d Sleef_finz_tand2_u35avx2128(__m128d a);

__m256d Sleef_tand4_u35(__m256d a);
__m256d Sleef_tand4_u35avx(__m256d a);
__m256d Sleef_tand4_u35avx2(__m256d a);
__m256d Sleef_tand4_u35fma4(__m256d a);
__m256d Sleef_cinz_tand4_u35avx(__m256d a);
__m256d Sleef_finz_tand4_u35avx2(__m256d a);
__m256d Sleef_finz_tand4_u35fma4(__m256d a);

__m512d Sleef_tand8_u35(__m512d a);
__m512d Sleef_tand8_u35avx512f(__m512d a);
__m512d Sleef_tand8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_tand8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_tand8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tan_u35 with the same accuracy specification.


Vectorized single precision tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_tanf1_u35purec(float a);
float Sleef_tanf1_u35purecfma(float a);
float Sleef_cinz_tanf1_u35purec(float a);
float Sleef_finz_tanf1_u35purecfma(float a);

__m128 Sleef_tanf4_u35(__m128 a);
__m128 Sleef_tanf4_u35sse2(__m128 a);
__m128 Sleef_tanf4_u35sse4(__m128 a);
__m128 Sleef_tanf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_tanf4_u35sse2(__m128 a);
__m128 Sleef_cinz_tanf4_u35sse4(__m128 a);
__m128 Sleef_finz_tanf4_u35avx2128(__m128 a);

__m256 Sleef_tanf8_u35(__m256 a);
__m256 Sleef_tanf8_u35avx(__m256 a);
__m256 Sleef_tanf8_u35avx2(__m256 a);
__m256 Sleef_tanf8_u35fma4(__m256 a);
__m256 Sleef_cinz_tanf8_u35avx(__m256 a);
__m256 Sleef_finz_tanf8_u35avx2(__m256 a);
__m256 Sleef_finz_tanf8_u35fma4(__m256 a);

__m512 Sleef_tanf16_u35(__m512 a);
__m512 Sleef_tanf16_u35avx512f(__m512 a);
__m512 Sleef_tanf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_tanf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_tanf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanf_u35 with the same accuracy specification.

Power, exponential, and logarithmic functions

Vectorized double precision power functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_powd1_u10purec(double a, double b);
double Sleef_powd1_u10purecfma(double a, double b);
double Sleef_cinz_powd1_u10purec(double a, double b);
double Sleef_finz_powd1_u10purecfma(double a, double b);

__m128d Sleef_powd2_u10(__m128d a, __m128d b);
__m128d Sleef_powd2_u10sse2(__m128d a, __m128d b);
__m128d Sleef_powd2_u10sse4(__m128d a, __m128d b);
__m128d Sleef_powd2_u10avx2128(__m128d a, __m128d b);
__m128d Sleef_cinz_powd2_u10sse2(__m128d a, __m128d b);
__m128d Sleef_cinz_powd2_u10sse4(__m128d a, __m128d b);
__m128d Sleef_finz_powd2_u10avx2128(__m128d a, __m128d b);

__m256d Sleef_powd4_u10(__m256d a, __m256d b);
__m256d Sleef_powd4_u10avx(__m256d a, __m256d b);
__m256d Sleef_powd4_u10avx2(__m256d a, __m256d b);
__m256d Sleef_powd4_u10fma4(__m256d a, __m256d b);
__m256d Sleef_cinz_powd4_u10avx(__m256d a, __m256d b);
__m256d Sleef_finz_powd4_u10avx2(__m256d a, __m256d b);
__m256d Sleef_finz_powd4_u10fma4(__m256d a, __m256d b);

__m512d Sleef_powd8_u10(__m512d a, __m512d b);
__m512d Sleef_powd8_u10avx512f(__m512d a, __m512d b);
__m512d Sleef_powd8_u10avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_cinz_powd8_u10avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_finz_powd8_u10avx512f(__m512d a, __m512d b);
Link with -lsleef.

Description

These are the vectorized functions of Sleef_pow_u10 with the same accuracy specification.


Vectorized single precision power functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_powf1_u10purec(float a, float b);
float Sleef_powf1_u10purecfma(float a, float b);
float Sleef_cinz_powf1_u10purec(float a, float b);
float Sleef_finz_powf1_u10purecfma(float a, float b);

__m128 Sleef_powf4_u10(__m128 a, __m128 b);
__m128 Sleef_powf4_u10sse2(__m128 a, __m128 b);
__m128 Sleef_powf4_u10sse4(__m128 a, __m128 b);
__m128 Sleef_powf4_u10avx2128(__m128 a, __m128 b);
__m128 Sleef_cinz_powf4_u10sse2(__m128 a, __m128 b);
__m128 Sleef_cinz_powf4_u10sse4(__m128 a, __m128 b);
__m128 Sleef_finz_powf4_u10avx2128(__m128 a, __m128 b);

__m256 Sleef_powf8_u10(__m256 a, __m256 b);
__m256 Sleef_powf8_u10avx(__m256 a, __m256 b);
__m256 Sleef_powf8_u10avx2(__m256 a, __m256 b);
__m256 Sleef_powf8_u10fma4(__m256 a, __m256 b);
__m256 Sleef_cinz_powf8_u10avx(__m256 a, __m256 b);
__m256 Sleef_finz_powf8_u10avx2(__m256 a, __m256 b);
__m256 Sleef_finz_powf8_u10fma4(__m256 a, __m256 b);

__m512 Sleef_powf16_u10(__m512 a, __m512 b);
__m512 Sleef_powf16_u10avx512f(__m512 a, __m512 b);
__m512 Sleef_powf16_u10avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_cinz_powf16_u10avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_finz_powf16_u10avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_powf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_logd1_u10purec(double a);
double Sleef_logd1_u10purecfma(double a);
double Sleef_cinz_logd1_u10purec(double a);
double Sleef_finz_logd1_u10purecfma(double a);

__m128d Sleef_logd2_u10(__m128d a);
__m128d Sleef_logd2_u10sse2(__m128d a);
__m128d Sleef_logd2_u10sse4(__m128d a);
__m128d Sleef_logd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_logd2_u10sse2(__m128d a);
__m128d Sleef_cinz_logd2_u10sse4(__m128d a);
__m128d Sleef_finz_logd2_u10avx2128(__m128d a);

__m256d Sleef_logd4_u10(__m256d a);
__m256d Sleef_logd4_u10avx(__m256d a);
__m256d Sleef_logd4_u10avx2(__m256d a);
__m256d Sleef_logd4_u10fma4(__m256d a);
__m256d Sleef_cinz_logd4_u10avx(__m256d a);
__m256d Sleef_finz_logd4_u10avx2(__m256d a);
__m256d Sleef_finz_logd4_u10fma4(__m256d a);

__m512d Sleef_logd8_u10(__m512d a);
__m512d Sleef_logd8_u10avx512f(__m512d a);
__m512d Sleef_logd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_logd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_logd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log_u10 with the same accuracy specification.


Vectorized single precision natural logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_logf1_u10purec(float a);
float Sleef_logf1_u10purecfma(float a);
float Sleef_cinz_logf1_u10purec(float a);
float Sleef_finz_logf1_u10purecfma(float a);

__m128 Sleef_logf4_u10(__m128 a);
__m128 Sleef_logf4_u10sse2(__m128 a);
__m128 Sleef_logf4_u10sse4(__m128 a);
__m128 Sleef_logf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_logf4_u10sse2(__m128 a);
__m128 Sleef_cinz_logf4_u10sse4(__m128 a);
__m128 Sleef_finz_logf4_u10avx2128(__m128 a);

__m256 Sleef_logf8_u10(__m256 a);
__m256 Sleef_logf8_u10avx(__m256 a);
__m256 Sleef_logf8_u10avx2(__m256 a);
__m256 Sleef_logf8_u10fma4(__m256 a);
__m256 Sleef_cinz_logf8_u10avx(__m256 a);
__m256 Sleef_finz_logf8_u10avx2(__m256 a);
__m256 Sleef_finz_logf8_u10fma4(__m256 a);

__m512 Sleef_logf16_u10(__m512 a);
__m512 Sleef_logf16_u10avx512f(__m512 a);
__m512 Sleef_logf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_logf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_logf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_logf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_logd1_u35purec(double a);
double Sleef_logd1_u35purecfma(double a);
double Sleef_cinz_logd1_u35purec(double a);
double Sleef_finz_logd1_u35purecfma(double a);

__m128d Sleef_logd2_u35(__m128d a);
__m128d Sleef_logd2_u35sse2(__m128d a);
__m128d Sleef_logd2_u35sse4(__m128d a);
__m128d Sleef_logd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_logd2_u35sse2(__m128d a);
__m128d Sleef_cinz_logd2_u35sse4(__m128d a);
__m128d Sleef_finz_logd2_u35avx2128(__m128d a);

__m256d Sleef_logd4_u35(__m256d a);
__m256d Sleef_logd4_u35avx(__m256d a);
__m256d Sleef_logd4_u35avx2(__m256d a);
__m256d Sleef_logd4_u35fma4(__m256d a);
__m256d Sleef_cinz_logd4_u35avx(__m256d a);
__m256d Sleef_finz_logd4_u35avx2(__m256d a);
__m256d Sleef_finz_logd4_u35fma4(__m256d a);

__m512d Sleef_logd8_u35(__m512d a);
__m512d Sleef_logd8_u35avx512f(__m512d a);
__m512d Sleef_logd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_logd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_logd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log_u35 with the same accuracy specification.


Vectorized single precision natural logarithmic functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_logf1_u35purec(float a);
float Sleef_logf1_u35purecfma(float a);
float Sleef_cinz_logf1_u35purec(float a);
float Sleef_finz_logf1_u35purecfma(float a);

__m128 Sleef_logf4_u35(__m128 a);
__m128 Sleef_logf4_u35sse2(__m128 a);
__m128 Sleef_logf4_u35sse4(__m128 a);
__m128 Sleef_logf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_logf4_u35sse2(__m128 a);
__m128 Sleef_cinz_logf4_u35sse4(__m128 a);
__m128 Sleef_finz_logf4_u35avx2128(__m128 a);

__m256 Sleef_logf8_u35(__m256 a);
__m256 Sleef_logf8_u35avx(__m256 a);
__m256 Sleef_logf8_u35avx2(__m256 a);
__m256 Sleef_logf8_u35fma4(__m256 a);
__m256 Sleef_cinz_logf8_u35avx(__m256 a);
__m256 Sleef_finz_logf8_u35avx2(__m256 a);
__m256 Sleef_finz_logf8_u35fma4(__m256 a);

__m512 Sleef_logf16_u35(__m512 a);
__m512 Sleef_logf16_u35avx512f(__m512 a);
__m512 Sleef_logf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_logf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_logf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_logf_u35 with the same accuracy specification.


Vectorized double precision base-10 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log10d1_u10purec(double a);
double Sleef_log10d1_u10purecfma(double a);
double Sleef_cinz_log10d1_u10purec(double a);
double Sleef_finz_log10d1_u10purecfma(double a);

__m128d Sleef_log10d2_u10(__m128d a);
__m128d Sleef_log10d2_u10sse2(__m128d a);
__m128d Sleef_log10d2_u10sse4(__m128d a);
__m128d Sleef_log10d2_u10avx2128(__m128d a);
__m128d Sleef_cinz_log10d2_u10sse2(__m128d a);
__m128d Sleef_cinz_log10d2_u10sse4(__m128d a);
__m128d Sleef_finz_log10d2_u10avx2128(__m128d a);

__m256d Sleef_log10d4_u10(__m256d a);
__m256d Sleef_log10d4_u10avx(__m256d a);
__m256d Sleef_log10d4_u10avx2(__m256d a);
__m256d Sleef_log10d4_u10fma4(__m256d a);
__m256d Sleef_cinz_log10d4_u10avx(__m256d a);
__m256d Sleef_finz_log10d4_u10avx2(__m256d a);
__m256d Sleef_finz_log10d4_u10fma4(__m256d a);

__m512d Sleef_log10d8_u10(__m512d a);
__m512d Sleef_log10d8_u10avx512f(__m512d a);
__m512d Sleef_log10d8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_log10d8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_log10d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log10_u10 with the same accuracy specification.


Vectorized single precision base-10 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log10f1_u10purec(float a);
float Sleef_log10f1_u10purecfma(float a);
float Sleef_cinz_log10f1_u10purec(float a);
float Sleef_finz_log10f1_u10purecfma(float a);

__m128 Sleef_log10f4_u10(__m128 a);
__m128 Sleef_log10f4_u10sse2(__m128 a);
__m128 Sleef_log10f4_u10sse4(__m128 a);
__m128 Sleef_log10f4_u10avx2128(__m128 a);
__m128 Sleef_cinz_log10f4_u10sse2(__m128 a);
__m128 Sleef_cinz_log10f4_u10sse4(__m128 a);
__m128 Sleef_finz_log10f4_u10avx2128(__m128 a);

__m256 Sleef_log10f8_u10(__m256 a);
__m256 Sleef_log10f8_u10avx(__m256 a);
__m256 Sleef_log10f8_u10avx2(__m256 a);
__m256 Sleef_log10f8_u10fma4(__m256 a);
__m256 Sleef_cinz_log10f8_u10avx(__m256 a);
__m256 Sleef_finz_log10f8_u10avx2(__m256 a);
__m256 Sleef_finz_log10f8_u10fma4(__m256 a);

__m512 Sleef_log10f16_u10(__m512 a);
__m512 Sleef_log10f16_u10avx512f(__m512 a);
__m512 Sleef_log10f16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_log10f16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_log10f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log10f_u10 with the same accuracy specification.


Vectorized double precision base-2 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log2d1_u10purec(double a);
double Sleef_log2d1_u10purecfma(double a);
double Sleef_cinz_log2d1_u10purec(double a);
double Sleef_finz_log2d1_u10purecfma(double a);

__m128d Sleef_log2d2_u10(__m128d a);
__m128d Sleef_log2d2_u10sse2(__m128d a);
__m128d Sleef_log2d2_u10sse4(__m128d a);
__m128d Sleef_log2d2_u10avx2128(__m128d a);
__m128d Sleef_cinz_log2d2_u10sse2(__m128d a);
__m128d Sleef_cinz_log2d2_u10sse4(__m128d a);
__m128d Sleef_finz_log2d2_u10avx2128(__m128d a);

__m256d Sleef_log2d4_u10(__m256d a);
__m256d Sleef_log2d4_u10avx(__m256d a);
__m256d Sleef_log2d4_u10avx2(__m256d a);
__m256d Sleef_log2d4_u10fma4(__m256d a);
__m256d Sleef_cinz_log2d4_u10avx(__m256d a);
__m256d Sleef_finz_log2d4_u10avx2(__m256d a);
__m256d Sleef_finz_log2d4_u10fma4(__m256d a);

__m512d Sleef_log2d8_u10(__m512d a);
__m512d Sleef_log2d8_u10avx512f(__m512d a);
__m512d Sleef_log2d8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_log2d8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_log2d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log2_u10 with the same accuracy specification.


Vectorized single precision base-2 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log2f1_u10purec(float a);
float Sleef_log2f1_u10purecfma(float a);
float Sleef_cinz_log2f1_u10purec(float a);
float Sleef_finz_log2f1_u10purecfma(float a);

__m128 Sleef_log2f4_u10(__m128 a);
__m128 Sleef_log2f4_u10sse2(__m128 a);
__m128 Sleef_log2f4_u10sse4(__m128 a);
__m128 Sleef_log2f4_u10avx2128(__m128 a);
__m128 Sleef_cinz_log2f4_u10sse2(__m128 a);
__m128 Sleef_cinz_log2f4_u10sse4(__m128 a);
__m128 Sleef_finz_log2f4_u10avx2128(__m128 a);

__m256 Sleef_log2f8_u10(__m256 a);
__m256 Sleef_log2f8_u10avx(__m256 a);
__m256 Sleef_log2f8_u10avx2(__m256 a);
__m256 Sleef_log2f8_u10fma4(__m256 a);
__m256 Sleef_cinz_log2f8_u10avx(__m256 a);
__m256 Sleef_finz_log2f8_u10avx2(__m256 a);
__m256 Sleef_finz_log2f8_u10fma4(__m256 a);

__m512 Sleef_log2f16_u10(__m512 a);
__m512 Sleef_log2f16_u10avx512f(__m512 a);
__m512 Sleef_log2f16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_log2f16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_log2f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log2f_u10 with the same accuracy specification.


Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log1pd1_u10purec(double a);
double Sleef_log1pd1_u10purecfma(double a);
double Sleef_cinz_log1pd1_u10purec(double a);
double Sleef_finz_log1pd1_u10purecfma(double a);

__m128d Sleef_log1pd2_u10(__m128d a);
__m128d Sleef_log1pd2_u10sse2(__m128d a);
__m128d Sleef_log1pd2_u10sse4(__m128d a);
__m128d Sleef_log1pd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_log1pd2_u10sse2(__m128d a);
__m128d Sleef_cinz_log1pd2_u10sse4(__m128d a);
__m128d Sleef_finz_log1pd2_u10avx2128(__m128d a);

__m256d Sleef_log1pd4_u10(__m256d a);
__m256d Sleef_log1pd4_u10avx(__m256d a);
__m256d Sleef_log1pd4_u10avx2(__m256d a);
__m256d Sleef_log1pd4_u10fma4(__m256d a);
__m256d Sleef_cinz_log1pd4_u10avx(__m256d a);
__m256d Sleef_finz_log1pd4_u10avx2(__m256d a);
__m256d Sleef_finz_log1pd4_u10fma4(__m256d a);

__m512d Sleef_log1pd8_u10(__m512d a);
__m512d Sleef_log1pd8_u10avx512f(__m512d a);
__m512d Sleef_log1pd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_log1pd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_log1pd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log1p_u10 with the same accuracy specification.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_log1pf1_u10purec(float a);
float Sleef_log1pf1_u10purecfma(float a);
float Sleef_cinz_log1pf1_u10purec(float a);
float Sleef_finz_log1pf1_u10purecfma(float a);

__m128 Sleef_log1pf4_u10(__m128 a);
__m128 Sleef_log1pf4_u10sse2(__m128 a);
__m128 Sleef_log1pf4_u10sse4(__m128 a);
__m128 Sleef_log1pf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_log1pf4_u10sse2(__m128 a);
__m128 Sleef_cinz_log1pf4_u10sse4(__m128 a);
__m128 Sleef_finz_log1pf4_u10avx2128(__m128 a);

__m256 Sleef_log1pf8_u10(__m256 a);
__m256 Sleef_log1pf8_u10avx(__m256 a);
__m256 Sleef_log1pf8_u10avx2(__m256 a);
__m256 Sleef_log1pf8_u10fma4(__m256 a);
__m256 Sleef_cinz_log1pf8_u10avx(__m256 a);
__m256 Sleef_finz_log1pf8_u10avx2(__m256 a);
__m256 Sleef_finz_log1pf8_u10fma4(__m256 a);

__m512 Sleef_log1pf16_u10(__m512 a);
__m512 Sleef_log1pf16_u10avx512f(__m512 a);
__m512 Sleef_log1pf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_log1pf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_log1pf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log1pf_u10 with the same accuracy specification.


Vectorized double precision base-e exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expd1_u10purec(double a);
double Sleef_expd1_u10purecfma(double a);
double Sleef_cinz_expd1_u10purec(double a);
double Sleef_finz_expd1_u10purecfma(double a);

__m128d Sleef_expd2_u10(__m128d a);
__m128d Sleef_expd2_u10sse2(__m128d a);
__m128d Sleef_expd2_u10sse4(__m128d a);
__m128d Sleef_expd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_expd2_u10sse2(__m128d a);
__m128d Sleef_cinz_expd2_u10sse4(__m128d a);
__m128d Sleef_finz_expd2_u10avx2128(__m128d a);

__m256d Sleef_expd4_u10(__m256d a);
__m256d Sleef_expd4_u10avx(__m256d a);
__m256d Sleef_expd4_u10avx2(__m256d a);
__m256d Sleef_expd4_u10fma4(__m256d a);
__m256d Sleef_cinz_expd4_u10avx(__m256d a);
__m256d Sleef_finz_expd4_u10avx2(__m256d a);
__m256d Sleef_finz_expd4_u10fma4(__m256d a);

__m512d Sleef_expd8_u10(__m512d a);
__m512d Sleef_expd8_u10avx512f(__m512d a);
__m512d Sleef_expd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_expd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_expd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp_u10 with the same accuracy specification.


Vectorized single precision base-e exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expf1_u10purec(float a);
float Sleef_expf1_u10purecfma(float a);
float Sleef_cinz_expf1_u10purec(float a);
float Sleef_finz_expf1_u10purecfma(float a);

__m128 Sleef_expf4_u10(__m128 a);
__m128 Sleef_expf4_u10sse2(__m128 a);
__m128 Sleef_expf4_u10sse4(__m128 a);
__m128 Sleef_expf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_expf4_u10sse2(__m128 a);
__m128 Sleef_cinz_expf4_u10sse4(__m128 a);
__m128 Sleef_finz_expf4_u10avx2128(__m128 a);

__m256 Sleef_expf8_u10(__m256 a);
__m256 Sleef_expf8_u10avx(__m256 a);
__m256 Sleef_expf8_u10avx2(__m256 a);
__m256 Sleef_expf8_u10fma4(__m256 a);
__m256 Sleef_cinz_expf8_u10avx(__m256 a);
__m256 Sleef_finz_expf8_u10avx2(__m256 a);
__m256 Sleef_finz_expf8_u10fma4(__m256 a);

__m512 Sleef_expf16_u10(__m512 a);
__m512 Sleef_expf16_u10avx512f(__m512 a);
__m512 Sleef_expf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_expf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_expf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_expf_u10 with the same accuracy specification.


Vectorized double precision base-2 exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp2d1_u10purec(double a);
double Sleef_exp2d1_u10purecfma(double a);
double Sleef_cinz_exp2d1_u10purec(double a);
double Sleef_finz_exp2d1_u10purecfma(double a);

__m128d Sleef_exp2d2_u10(__m128d a);
__m128d Sleef_exp2d2_u10sse2(__m128d a);
__m128d Sleef_exp2d2_u10sse4(__m128d a);
__m128d Sleef_exp2d2_u10avx2128(__m128d a);
__m128d Sleef_cinz_exp2d2_u10sse2(__m128d a);
__m128d Sleef_cinz_exp2d2_u10sse4(__m128d a);
__m128d Sleef_finz_exp2d2_u10avx2128(__m128d a);

__m256d Sleef_exp2d4_u10(__m256d a);
__m256d Sleef_exp2d4_u10avx(__m256d a);
__m256d Sleef_exp2d4_u10avx2(__m256d a);
__m256d Sleef_exp2d4_u10fma4(__m256d a);
__m256d Sleef_cinz_exp2d4_u10avx(__m256d a);
__m256d Sleef_finz_exp2d4_u10avx2(__m256d a);
__m256d Sleef_finz_exp2d4_u10fma4(__m256d a);

__m512d Sleef_exp2d8_u10(__m512d a);
__m512d Sleef_exp2d8_u10avx512f(__m512d a);
__m512d Sleef_exp2d8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_exp2d8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_exp2d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp2_u10 with the same accuracy specification.


Vectorized single precision base-2 exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp2f1_u10purec(float a);
float Sleef_exp2f1_u10purecfma(float a);
float Sleef_cinz_exp2f1_u10purec(float a);
float Sleef_finz_exp2f1_u10purecfma(float a);

__m128 Sleef_exp2f4_u10(__m128 a);
__m128 Sleef_exp2f4_u10sse2(__m128 a);
__m128 Sleef_exp2f4_u10sse4(__m128 a);
__m128 Sleef_exp2f4_u10avx2128(__m128 a);
__m128 Sleef_cinz_exp2f4_u10sse2(__m128 a);
__m128 Sleef_cinz_exp2f4_u10sse4(__m128 a);
__m128 Sleef_finz_exp2f4_u10avx2128(__m128 a);

__m256 Sleef_exp2f8_u10(__m256 a);
__m256 Sleef_exp2f8_u10avx(__m256 a);
__m256 Sleef_exp2f8_u10avx2(__m256 a);
__m256 Sleef_exp2f8_u10fma4(__m256 a);
__m256 Sleef_cinz_exp2f8_u10avx(__m256 a);
__m256 Sleef_finz_exp2f8_u10avx2(__m256 a);
__m256 Sleef_finz_exp2f8_u10fma4(__m256 a);

__m512 Sleef_exp2f16_u10(__m512 a);
__m512 Sleef_exp2f16_u10avx512f(__m512 a);
__m512 Sleef_exp2f16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_exp2f16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_exp2f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp2f_u10 with the same accuracy specification.


Vectorized double precision base-10 exponential functions functions with 1.09 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_exp10d1_u10purec(double a);
double Sleef_exp10d1_u10purecfma(double a);
double Sleef_cinz_exp10d1_u10purec(double a);
double Sleef_finz_exp10d1_u10purecfma(double a);

__m128d Sleef_exp10d2_u10(__m128d a);
__m128d Sleef_exp10d2_u10sse2(__m128d a);
__m128d Sleef_exp10d2_u10sse4(__m128d a);
__m128d Sleef_exp10d2_u10avx2128(__m128d a);
__m128d Sleef_cinz_exp10d2_u10sse2(__m128d a);
__m128d Sleef_cinz_exp10d2_u10sse4(__m128d a);
__m128d Sleef_finz_exp10d2_u10avx2128(__m128d a);

__m256d Sleef_exp10d4_u10(__m256d a);
__m256d Sleef_exp10d4_u10avx(__m256d a);
__m256d Sleef_exp10d4_u10avx2(__m256d a);
__m256d Sleef_exp10d4_u10fma4(__m256d a);
__m256d Sleef_cinz_exp10d4_u10avx(__m256d a);
__m256d Sleef_finz_exp10d4_u10avx2(__m256d a);
__m256d Sleef_finz_exp10d4_u10fma4(__m256d a);

__m512d Sleef_exp10d8_u10(__m512d a);
__m512d Sleef_exp10d8_u10avx512f(__m512d a);
__m512d Sleef_exp10d8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_exp10d8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_exp10d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp10_u10 with the same accuracy specification.


Vectorized single precision base-10 exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_exp10f1_u10purec(float a);
float Sleef_exp10f1_u10purecfma(float a);
float Sleef_cinz_exp10f1_u10purec(float a);
float Sleef_finz_exp10f1_u10purecfma(float a);

__m128 Sleef_exp10f4_u10(__m128 a);
__m128 Sleef_exp10f4_u10sse2(__m128 a);
__m128 Sleef_exp10f4_u10sse4(__m128 a);
__m128 Sleef_exp10f4_u10avx2128(__m128 a);
__m128 Sleef_cinz_exp10f4_u10sse2(__m128 a);
__m128 Sleef_cinz_exp10f4_u10sse4(__m128 a);
__m128 Sleef_finz_exp10f4_u10avx2128(__m128 a);

__m256 Sleef_exp10f8_u10(__m256 a);
__m256 Sleef_exp10f8_u10avx(__m256 a);
__m256 Sleef_exp10f8_u10avx2(__m256 a);
__m256 Sleef_exp10f8_u10fma4(__m256 a);
__m256 Sleef_cinz_exp10f8_u10avx(__m256 a);
__m256 Sleef_finz_exp10f8_u10avx2(__m256 a);
__m256 Sleef_finz_exp10f8_u10fma4(__m256 a);

__m512 Sleef_exp10f16_u10(__m512 a);
__m512 Sleef_exp10f16_u10avx512f(__m512 a);
__m512 Sleef_exp10f16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_exp10f16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_exp10f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp10f_u10 with the same accuracy specification.


Vectorized double precision base-e exponential functions minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_expm1d1_u10purec(double a);
double Sleef_expm1d1_u10purecfma(double a);
double Sleef_cinz_expm1d1_u10purec(double a);
double Sleef_finz_expm1d1_u10purecfma(double a);

__m128d Sleef_expm1d2_u10(__m128d a);
__m128d Sleef_expm1d2_u10sse2(__m128d a);
__m128d Sleef_expm1d2_u10sse4(__m128d a);
__m128d Sleef_expm1d2_u10avx2128(__m128d a);
__m128d Sleef_cinz_expm1d2_u10sse2(__m128d a);
__m128d Sleef_cinz_expm1d2_u10sse4(__m128d a);
__m128d Sleef_finz_expm1d2_u10avx2128(__m128d a);

__m256d Sleef_expm1d4_u10(__m256d a);
__m256d Sleef_expm1d4_u10avx(__m256d a);
__m256d Sleef_expm1d4_u10avx2(__m256d a);
__m256d Sleef_expm1d4_u10fma4(__m256d a);
__m256d Sleef_cinz_expm1d4_u10avx(__m256d a);
__m256d Sleef_finz_expm1d4_u10avx2(__m256d a);
__m256d Sleef_finz_expm1d4_u10fma4(__m256d a);

__m512d Sleef_expm1d8_u10(__m512d a);
__m512d Sleef_expm1d8_u10avx512f(__m512d a);
__m512d Sleef_expm1d8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_expm1d8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_expm1d8_u10avx512f(__m512d a);
Link with -lsleef.

Description

These are the vectorized functions of Sleef_expm1_u10 with the same accuracy specification.


Vectorized single precision base-e exponential functions minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_expm1f1_u10purec(float a);
float Sleef_expm1f1_u10purecfma(float a);
float Sleef_cinz_expm1f1_u10purec(float a);
float Sleef_finz_expm1f1_u10purecfma(float a);

__m128 Sleef_expm1f4_u10(__m128 a);
__m128 Sleef_expm1f4_u10sse2(__m128 a);
__m128 Sleef_expm1f4_u10sse4(__m128 a);
__m128 Sleef_expm1f4_u10avx2128(__m128 a);
__m128 Sleef_cinz_expm1f4_u10sse2(__m128 a);
__m128 Sleef_cinz_expm1f4_u10sse4(__m128 a);
__m128 Sleef_finz_expm1f4_u10avx2128(__m128 a);

__m256 Sleef_expm1f8_u10(__m256 a);
__m256 Sleef_expm1f8_u10avx(__m256 a);
__m256 Sleef_expm1f8_u10avx2(__m256 a);
__m256 Sleef_expm1f8_u10fma4(__m256 a);
__m256 Sleef_cinz_expm1f8_u10avx(__m256 a);
__m256 Sleef_finz_expm1f8_u10avx2(__m256 a);
__m256 Sleef_finz_expm1f8_u10fma4(__m256 a);

__m512 Sleef_expm1f16_u10(__m512 a);
__m512 Sleef_expm1f16_u10avx512f(__m512 a);
__m512 Sleef_expm1f16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_expm1f16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_expm1f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_expm1f_u10 with the same accuracy specification.


Vectorized double precision square root functions with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u05purec(double a);
double Sleef_sqrtd1_u05purecfma(double a);
double Sleef_cinz_sqrtd1_u05purec(double a);
double Sleef_finz_sqrtd1_u05purecfma(double a);

__m128d Sleef_sqrtd2_u05(__m128d a);
__m128d Sleef_sqrtd2_u05sse2(__m128d a);
__m128d Sleef_sqrtd2_u05sse4(__m128d a);
__m128d Sleef_sqrtd2_u05avx2128(__m128d a);
__m128d Sleef_cinz_sqrtd2_u05sse2(__m128d a);
__m128d Sleef_cinz_sqrtd2_u05sse4(__m128d a);
__m128d Sleef_finz_sqrtd2_u05avx2128(__m128d a);

__m256d Sleef_sqrtd4_u05(__m256d a);
__m256d Sleef_sqrtd4_u05avx(__m256d a);
__m256d Sleef_sqrtd4_u05avx2(__m256d a);
__m256d Sleef_sqrtd4_u05fma4(__m256d a);
__m256d Sleef_cinz_sqrtd4_u05avx(__m256d a);
__m256d Sleef_finz_sqrtd4_u05avx2(__m256d a);
__m256d Sleef_finz_sqrtd4_u05fma4(__m256d a);

__m512d Sleef_sqrtd8_u05(__m512d a);
__m512d Sleef_sqrtd8_u05avx512f(__m512d a);
__m512d Sleef_sqrtd8_u05avx512fnofma(__m512d a);
__m512d Sleef_cinz_sqrtd8_u05avx512fnofma(__m512d a);
__m512d Sleef_finz_sqrtd8_u05avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrt_u05 with the same accuracy specification.


Vectorized single precision square root functions with 0.5001 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u05purec(float a);
float Sleef_sqrtf1_u05purecfma(float a);
float Sleef_cinz_sqrtf1_u05purec(float a);
float Sleef_finz_sqrtf1_u05purecfma(float a);

__m128 Sleef_sqrtf4_u05(__m128 a);
__m128 Sleef_sqrtf4_u05sse2(__m128 a);
__m128 Sleef_sqrtf4_u05sse4(__m128 a);
__m128 Sleef_sqrtf4_u05avx2128(__m128 a);
__m128 Sleef_cinz_sqrtf4_u05sse2(__m128 a);
__m128 Sleef_cinz_sqrtf4_u05sse4(__m128 a);
__m128 Sleef_finz_sqrtf4_u05avx2128(__m128 a);

__m256 Sleef_sqrtf8_u05(__m256 a);
__m256 Sleef_sqrtf8_u05avx(__m256 a);
__m256 Sleef_sqrtf8_u05avx2(__m256 a);
__m256 Sleef_sqrtf8_u05fma4(__m256 a);
__m256 Sleef_cinz_sqrtf8_u05avx(__m256 a);
__m256 Sleef_finz_sqrtf8_u05avx2(__m256 a);
__m256 Sleef_finz_sqrtf8_u05fma4(__m256 a);

__m512 Sleef_sqrtf16_u05(__m512 a);
__m512 Sleef_sqrtf16_u05avx512f(__m512 a);
__m512 Sleef_sqrtf16_u05avx512fnofma(__m512 a);
__m512 Sleef_cinz_sqrtf16_u05avx512fnofma(__m512 a);
__m512 Sleef_finz_sqrtf16_u05avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrtf_u05 with the same accuracy specification.


Vectorized double precision square root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_sqrtd1_u35purec(double a);
double Sleef_sqrtd1_u35purecfma(double a);
double Sleef_cinz_sqrtd1_u35purec(double a);
double Sleef_finz_sqrtd1_u35purecfma(double a);

__m128d Sleef_sqrtd2_u35(__m128d a);
__m128d Sleef_sqrtd2_u35sse2(__m128d a);
__m128d Sleef_sqrtd2_u35sse4(__m128d a);
__m128d Sleef_sqrtd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_sqrtd2_u35sse2(__m128d a);
__m128d Sleef_cinz_sqrtd2_u35sse4(__m128d a);
__m128d Sleef_finz_sqrtd2_u35avx2128(__m128d a);

__m256d Sleef_sqrtd4_u35(__m256d a);
__m256d Sleef_sqrtd4_u35avx(__m256d a);
__m256d Sleef_sqrtd4_u35avx2(__m256d a);
__m256d Sleef_sqrtd4_u35fma4(__m256d a);
__m256d Sleef_cinz_sqrtd4_u35avx(__m256d a);
__m256d Sleef_finz_sqrtd4_u35avx2(__m256d a);
__m256d Sleef_finz_sqrtd4_u35fma4(__m256d a);

__m512d Sleef_sqrtd8_u35(__m512d a);
__m512d Sleef_sqrtd8_u35avx512f(__m512d a);
__m512d Sleef_sqrtd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_sqrtd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_sqrtd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrt_u35 with the same accuracy specification.


Vectorized single precision square root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_sqrtf1_u35purec(float a);
float Sleef_sqrtf1_u35purecfma(float a);
float Sleef_cinz_sqrtf1_u35purec(float a);
float Sleef_finz_sqrtf1_u35purecfma(float a);

__m128 Sleef_sqrtf4_u35(__m128 a);
__m128 Sleef_sqrtf4_u35sse2(__m128 a);
__m128 Sleef_sqrtf4_u35sse4(__m128 a);
__m128 Sleef_sqrtf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_sqrtf4_u35sse2(__m128 a);
__m128 Sleef_cinz_sqrtf4_u35sse4(__m128 a);
__m128 Sleef_finz_sqrtf4_u35avx2128(__m128 a);

__m256 Sleef_sqrtf8_u35(__m256 a);
__m256 Sleef_sqrtf8_u35avx(__m256 a);
__m256 Sleef_sqrtf8_u35avx2(__m256 a);
__m256 Sleef_sqrtf8_u35fma4(__m256 a);
__m256 Sleef_cinz_sqrtf8_u35avx(__m256 a);
__m256 Sleef_finz_sqrtf8_u35avx2(__m256 a);
__m256 Sleef_finz_sqrtf8_u35fma4(__m256 a);

__m512 Sleef_sqrtf16_u35(__m512 a);
__m512 Sleef_sqrtf16_u35avx512f(__m512 a);
__m512 Sleef_sqrtf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_sqrtf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_sqrtf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrtf_u35 with the same accuracy specification.


Vectorized double precision cubic root functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrtd1_u10purec(double a);
double Sleef_cbrtd1_u10purecfma(double a);
double Sleef_cinz_cbrtd1_u10purec(double a);
double Sleef_finz_cbrtd1_u10purecfma(double a);

__m128d Sleef_cbrtd2_u10(__m128d a);
__m128d Sleef_cbrtd2_u10sse2(__m128d a);
__m128d Sleef_cbrtd2_u10sse4(__m128d a);
__m128d Sleef_cbrtd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_cbrtd2_u10sse2(__m128d a);
__m128d Sleef_cinz_cbrtd2_u10sse4(__m128d a);
__m128d Sleef_finz_cbrtd2_u10avx2128(__m128d a);

__m256d Sleef_cbrtd4_u10(__m256d a);
__m256d Sleef_cbrtd4_u10avx(__m256d a);
__m256d Sleef_cbrtd4_u10avx2(__m256d a);
__m256d Sleef_cbrtd4_u10fma4(__m256d a);
__m256d Sleef_cinz_cbrtd4_u10avx(__m256d a);
__m256d Sleef_finz_cbrtd4_u10avx2(__m256d a);
__m256d Sleef_finz_cbrtd4_u10fma4(__m256d a);

__m512d Sleef_cbrtd8_u10(__m512d a);
__m512d Sleef_cbrtd8_u10avx512f(__m512d a);
__m512d Sleef_cbrtd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_cbrtd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_cbrtd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrt_u10 with the same accuracy specification.


Vectorized single precision cubic root functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_cbrtf1_u10purec(float a);
float Sleef_cbrtf1_u10purecfma(float a);
float Sleef_cinz_cbrtf1_u10purec(float a);
float Sleef_finz_cbrtf1_u10purecfma(float a);

__m128 Sleef_cbrtf4_u10(__m128 a);
__m128 Sleef_cbrtf4_u10sse2(__m128 a);
__m128 Sleef_cbrtf4_u10sse4(__m128 a);
__m128 Sleef_cbrtf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_cbrtf4_u10sse2(__m128 a);
__m128 Sleef_cinz_cbrtf4_u10sse4(__m128 a);
__m128 Sleef_finz_cbrtf4_u10avx2128(__m128 a);

__m256 Sleef_cbrtf8_u10(__m256 a);
__m256 Sleef_cbrtf8_u10avx(__m256 a);
__m256 Sleef_cbrtf8_u10avx2(__m256 a);
__m256 Sleef_cbrtf8_u10fma4(__m256 a);
__m256 Sleef_cinz_cbrtf8_u10avx(__m256 a);
__m256 Sleef_finz_cbrtf8_u10avx2(__m256 a);
__m256 Sleef_finz_cbrtf8_u10fma4(__m256 a);

__m512 Sleef_cbrtf16_u10(__m512 a);
__m512 Sleef_cbrtf16_u10avx512f(__m512 a);
__m512 Sleef_cbrtf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_cbrtf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_cbrtf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrtf_u10 with the same accuracy specification.


Vectorized double precision cubic root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_cbrtd1_u35purec(double a);
double Sleef_cbrtd1_u35purecfma(double a);
double Sleef_cinz_cbrtd1_u35purec(double a);
double Sleef_finz_cbrtd1_u35purecfma(double a);

__m128d Sleef_cbrtd2_u35(__m128d a);
__m128d Sleef_cbrtd2_u35sse2(__m128d a);
__m128d Sleef_cbrtd2_u35sse4(__m128d a);
__m128d Sleef_cbrtd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_cbrtd2_u35sse2(__m128d a);
__m128d Sleef_cinz_cbrtd2_u35sse4(__m128d a);
__m128d Sleef_finz_cbrtd2_u35avx2128(__m128d a);

__m256d Sleef_cbrtd4_u35(__m256d a);
__m256d Sleef_cbrtd4_u35avx(__m256d a);
__m256d Sleef_cbrtd4_u35avx2(__m256d a);
__m256d Sleef_cbrtd4_u35fma4(__m256d a);
__m256d Sleef_cinz_cbrtd4_u35avx(__m256d a);
__m256d Sleef_finz_cbrtd4_u35avx2(__m256d a);
__m256d Sleef_finz_cbrtd4_u35fma4(__m256d a);

__m512d Sleef_cbrtd8_u35(__m512d a);
__m512d Sleef_cbrtd8_u35avx512f(__m512d a);
__m512d Sleef_cbrtd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_cbrtd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_cbrtd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrt_u35 with the same accuracy specification.


Vectorized single precision cubic root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_cbrtf1_u35purec(float a);
float Sleef_cbrtf1_u35purecfma(float a);
float Sleef_cinz_cbrtf1_u35purec(float a);
float Sleef_finz_cbrtf1_u35purecfma(float a);

__m128 Sleef_cbrtf4_u35(__m128 a);
__m128 Sleef_cbrtf4_u35sse2(__m128 a);
__m128 Sleef_cbrtf4_u35sse4(__m128 a);
__m128 Sleef_cbrtf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_cbrtf4_u35sse2(__m128 a);
__m128 Sleef_cinz_cbrtf4_u35sse4(__m128 a);
__m128 Sleef_finz_cbrtf4_u35avx2128(__m128 a);

__m256 Sleef_cbrtf8_u35(__m256 a);
__m256 Sleef_cbrtf8_u35avx(__m256 a);
__m256 Sleef_cbrtf8_u35avx2(__m256 a);
__m256 Sleef_cbrtf8_u35fma4(__m256 a);
__m256 Sleef_cinz_cbrtf8_u35avx(__m256 a);
__m256 Sleef_finz_cbrtf8_u35avx2(__m256 a);
__m256 Sleef_finz_cbrtf8_u35fma4(__m256 a);

__m512 Sleef_cbrtf16_u35(__m512 a);
__m512 Sleef_cbrtf16_u35avx512f(__m512 a);
__m512 Sleef_cbrtf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_cbrtf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_cbrtf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrtf_u35 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance functions with 0.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypotd1_u05purec(double a, double b);
double Sleef_hypotd1_u05purecfma(double a, double b);
double Sleef_cinz_hypotd1_u05purec(double a, double b);
double Sleef_finz_hypotd1_u05purecfma(double a, double b);

__m128d Sleef_hypotd2_u05(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u05sse2(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u05sse4(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u05avx2128(__m128d a, __m128d b);
__m128d Sleef_cinz_hypotd2_u05sse2(__m128d a, __m128d b);
__m128d Sleef_cinz_hypotd2_u05sse4(__m128d a, __m128d b);
__m128d Sleef_finz_hypotd2_u05avx2128(__m128d a, __m128d b);

__m256d Sleef_hypotd4_u05(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u05avx(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u05avx2(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u05fma4(__m256d a, __m256d b);
__m256d Sleef_cinz_hypotd4_u05avx(__m256d a, __m256d b);
__m256d Sleef_finz_hypotd4_u05avx2(__m256d a, __m256d b);
__m256d Sleef_finz_hypotd4_u05fma4(__m256d a, __m256d b);

__m512d Sleef_hypotd8_u05(__m512d a, __m512d b);
__m512d Sleef_hypotd8_u05avx512f(__m512d a, __m512d b);
__m512d Sleef_hypotd8_u05avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_cinz_hypotd8_u05avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_finz_hypotd8_u05avx512f(__m512d a, __m512d b);
Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypot_u05 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance functions with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_hypotf1_u05purec(float a, float b);
float Sleef_hypotf1_u05purecfma(float a, float b);
float Sleef_cinz_hypotf1_u05purec(float a, float b);
float Sleef_finz_hypotf1_u05purecfma(float a, float b);

__m128 Sleef_hypotf4_u05(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u05sse2(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u05sse4(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u05avx2128(__m128 a, __m128 b);
__m128 Sleef_cinz_hypotf4_u05sse2(__m128 a, __m128 b);
__m128 Sleef_cinz_hypotf4_u05sse4(__m128 a, __m128 b);
__m128 Sleef_finz_hypotf4_u05avx2128(__m128 a, __m128 b);

__m256 Sleef_hypotf8_u05(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u05avx(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u05avx2(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u05fma4(__m256 a, __m256 b);
__m256 Sleef_cinz_hypotf8_u05avx(__m256 a, __m256 b);
__m256 Sleef_finz_hypotf8_u05avx2(__m256 a, __m256 b);
__m256 Sleef_finz_hypotf8_u05fma4(__m256 a, __m256 b);

__m512 Sleef_hypotf16_u05(__m512 a, __m512 b);
__m512 Sleef_hypotf16_u05avx512f(__m512 a, __m512 b);
__m512 Sleef_hypotf16_u05avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_cinz_hypotf16_u05avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_finz_hypotf16_u05avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypotf_u05 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_hypotd1_u35purec(double a, double b);
double Sleef_hypotd1_u35purecfma(double a, double b);
double Sleef_cinz_hypotd1_u35purec(double a, double b);
double Sleef_finz_hypotd1_u35purecfma(double a, double b);

__m128d Sleef_hypotd2_u35(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u35sse2(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u35sse4(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u35avx2128(__m128d a, __m128d b);
__m128d Sleef_cinz_hypotd2_u35sse2(__m128d a, __m128d b);
__m128d Sleef_cinz_hypotd2_u35sse4(__m128d a, __m128d b);
__m128d Sleef_finz_hypotd2_u35avx2128(__m128d a, __m128d b);

__m256d Sleef_hypotd4_u35(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u35avx(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u35avx2(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u35fma4(__m256d a, __m256d b);
__m256d Sleef_cinz_hypotd4_u35avx(__m256d a, __m256d b);
__m256d Sleef_finz_hypotd4_u35avx2(__m256d a, __m256d b);
__m256d Sleef_finz_hypotd4_u35fma4(__m256d a, __m256d b);

__m512d Sleef_hypotd8_u35(__m512d a, __m512d b);
__m512d Sleef_hypotd8_u35avx512f(__m512d a, __m512d b);
__m512d Sleef_hypotd8_u35avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_cinz_hypotd8_u35avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_finz_hypotd8_u35avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypot_u35 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_hypotf1_u35purec(float a, float b);
float Sleef_hypotf1_u35purecfma(float a, float b);
float Sleef_cinz_hypotf1_u35purec(float a, float b);
float Sleef_finz_hypotf1_u35purecfma(float a, float b);

__m128 Sleef_hypotf4_u35(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u35sse2(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u35sse4(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u35avx2128(__m128 a, __m128 b);
__m128 Sleef_cinz_hypotf4_u35sse2(__m128 a, __m128 b);
__m128 Sleef_cinz_hypotf4_u35sse4(__m128 a, __m128 b);
__m128 Sleef_finz_hypotf4_u35avx2128(__m128 a, __m128 b);

__m256 Sleef_hypotf8_u35(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u35avx(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u35avx2(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u35fma4(__m256 a, __m256 b);
__m256 Sleef_cinz_hypotf8_u35avx(__m256 a, __m256 b);
__m256 Sleef_finz_hypotf8_u35avx2(__m256 a, __m256 b);
__m256 Sleef_finz_hypotf8_u35fma4(__m256 a, __m256 b);

__m512 Sleef_hypotf16_u35(__m512 a, __m512 b);
__m512 Sleef_hypotf16_u35avx512f(__m512 a, __m512 b);
__m512 Sleef_hypotf16_u35avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_cinz_hypotf16_u35avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_finz_hypotf16_u35avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypotf_u35 with the same accuracy specification.

Inverse Trigonometric Functions

Vectorized double precision arc sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asind1_u10purec(double a);
double Sleef_asind1_u10purecfma(double a);
double Sleef_cinz_asind1_u10purec(double a);
double Sleef_finz_asind1_u10purecfma(double a);

__m128d Sleef_asind2_u10(__m128d a);
__m128d Sleef_asind2_u10sse2(__m128d a);
__m128d Sleef_asind2_u10sse4(__m128d a);
__m128d Sleef_asind2_u10avx2128(__m128d a);
__m128d Sleef_cinz_asind2_u10sse2(__m128d a);
__m128d Sleef_cinz_asind2_u10sse4(__m128d a);
__m128d Sleef_finz_asind2_u10avx2128(__m128d a);

__m256d Sleef_asind4_u10(__m256d a);
__m256d Sleef_asind4_u10avx(__m256d a);
__m256d Sleef_asind4_u10avx2(__m256d a);
__m256d Sleef_asind4_u10fma4(__m256d a);
__m256d Sleef_cinz_asind4_u10avx(__m256d a);
__m256d Sleef_finz_asind4_u10avx2(__m256d a);
__m256d Sleef_finz_asind4_u10fma4(__m256d a);

__m512d Sleef_asind8_u10(__m512d a);
__m512d Sleef_asind8_u10avx512f(__m512d a);
__m512d Sleef_asind8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_asind8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_asind8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asin_u10 with the same accuracy specification.


Vectorized single precision arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_asinf1_u10purec(float a);
float Sleef_asinf1_u10purecfma(float a);
float Sleef_cinz_asinf1_u10purec(float a);
float Sleef_finz_asinf1_u10purecfma(float a);

__m128 Sleef_asinf4_u10(__m128 a);
__m128 Sleef_asinf4_u10sse2(__m128 a);
__m128 Sleef_asinf4_u10sse4(__m128 a);
__m128 Sleef_asinf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_asinf4_u10sse2(__m128 a);
__m128 Sleef_cinz_asinf4_u10sse4(__m128 a);
__m128 Sleef_finz_asinf4_u10avx2128(__m128 a);

__m256 Sleef_asinf8_u10(__m256 a);
__m256 Sleef_asinf8_u10avx(__m256 a);
__m256 Sleef_asinf8_u10avx2(__m256 a);
__m256 Sleef_asinf8_u10fma4(__m256 a);
__m256 Sleef_cinz_asinf8_u10avx(__m256 a);
__m256 Sleef_finz_asinf8_u10avx2(__m256 a);
__m256 Sleef_finz_asinf8_u10fma4(__m256 a);

__m512 Sleef_asinf16_u10(__m512 a);
__m512 Sleef_asinf16_u10avx512f(__m512 a);
__m512 Sleef_asinf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_asinf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_asinf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinf_u10 with the same accuracy specification.


Vectorized double precision arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_asind1_u35purec(double a);
double Sleef_asind1_u35purecfma(double a);
double Sleef_cinz_asind1_u35purec(double a);
double Sleef_finz_asind1_u35purecfma(double a);

__m128d Sleef_asind2_u35(__m128d a);
__m128d Sleef_asind2_u35sse2(__m128d a);
__m128d Sleef_asind2_u35sse4(__m128d a);
__m128d Sleef_asind2_u35avx2128(__m128d a);
__m128d Sleef_cinz_asind2_u35sse2(__m128d a);
__m128d Sleef_cinz_asind2_u35sse4(__m128d a);
__m128d Sleef_finz_asind2_u35avx2128(__m128d a);

__m256d Sleef_asind4_u35(__m256d a);
__m256d Sleef_asind4_u35avx(__m256d a);
__m256d Sleef_asind4_u35avx2(__m256d a);
__m256d Sleef_asind4_u35fma4(__m256d a);
__m256d Sleef_cinz_asind4_u35avx(__m256d a);
__m256d Sleef_finz_asind4_u35avx2(__m256d a);
__m256d Sleef_finz_asind4_u35fma4(__m256d a);

__m512d Sleef_asind8_u35(__m512d a);
__m512d Sleef_asind8_u35avx512f(__m512d a);
__m512d Sleef_asind8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_asind8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_asind8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asin_u35 with the same accuracy specification.


Vectorized single precision arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_asinf1_u35purec(float a);
float Sleef_asinf1_u35purecfma(float a);
float Sleef_cinz_asinf1_u35purec(float a);
float Sleef_finz_asinf1_u35purecfma(float a);

__m128 Sleef_asinf4_u35(__m128 a);
__m128 Sleef_asinf4_u35sse2(__m128 a);
__m128 Sleef_asinf4_u35sse4(__m128 a);
__m128 Sleef_asinf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_asinf4_u35sse2(__m128 a);
__m128 Sleef_cinz_asinf4_u35sse4(__m128 a);
__m128 Sleef_finz_asinf4_u35avx2128(__m128 a);

__m256 Sleef_asinf8_u35(__m256 a);
__m256 Sleef_asinf8_u35avx(__m256 a);
__m256 Sleef_asinf8_u35avx2(__m256 a);
__m256 Sleef_asinf8_u35fma4(__m256 a);
__m256 Sleef_cinz_asinf8_u35avx(__m256 a);
__m256 Sleef_finz_asinf8_u35avx2(__m256 a);
__m256 Sleef_finz_asinf8_u35fma4(__m256 a);

__m512 Sleef_asinf16_u35(__m512 a);
__m512 Sleef_asinf16_u35avx512f(__m512 a);
__m512 Sleef_asinf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_asinf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_asinf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinf_u35 with the same accuracy specification.


Vectorized double precision arc cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acosd1_u10purec(double a);
double Sleef_acosd1_u10purecfma(double a);
double Sleef_cinz_acosd1_u10purec(double a);
double Sleef_finz_acosd1_u10purecfma(double a);

__m128d Sleef_acosd2_u10(__m128d a);
__m128d Sleef_acosd2_u10sse2(__m128d a);
__m128d Sleef_acosd2_u10sse4(__m128d a);
__m128d Sleef_acosd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_acosd2_u10sse2(__m128d a);
__m128d Sleef_cinz_acosd2_u10sse4(__m128d a);
__m128d Sleef_finz_acosd2_u10avx2128(__m128d a);

__m256d Sleef_acosd4_u10(__m256d a);
__m256d Sleef_acosd4_u10avx(__m256d a);
__m256d Sleef_acosd4_u10avx2(__m256d a);
__m256d Sleef_acosd4_u10fma4(__m256d a);
__m256d Sleef_cinz_acosd4_u10avx(__m256d a);
__m256d Sleef_finz_acosd4_u10avx2(__m256d a);
__m256d Sleef_finz_acosd4_u10fma4(__m256d a);

__m512d Sleef_acosd8_u10(__m512d a);
__m512d Sleef_acosd8_u10avx512f(__m512d a);
__m512d Sleef_acosd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_acosd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_acosd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acos_u10 with the same accuracy specification.


Vectorized single precision arc cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_acosf1_u10purec(float a);
float Sleef_acosf1_u10purecfma(float a);
float Sleef_cinz_acosf1_u10purec(float a);
float Sleef_finz_acosf1_u10purecfma(float a);

__m128 Sleef_acosf4_u10(__m128 a);
__m128 Sleef_acosf4_u10sse2(__m128 a);
__m128 Sleef_acosf4_u10sse4(__m128 a);
__m128 Sleef_acosf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_acosf4_u10sse2(__m128 a);
__m128 Sleef_cinz_acosf4_u10sse4(__m128 a);
__m128 Sleef_finz_acosf4_u10avx2128(__m128 a);

__m256 Sleef_acosf8_u10(__m256 a);
__m256 Sleef_acosf8_u10avx(__m256 a);
__m256 Sleef_acosf8_u10avx2(__m256 a);
__m256 Sleef_acosf8_u10fma4(__m256 a);
__m256 Sleef_cinz_acosf8_u10avx(__m256 a);
__m256 Sleef_finz_acosf8_u10avx2(__m256 a);
__m256 Sleef_finz_acosf8_u10fma4(__m256 a);

__m512 Sleef_acosf16_u10(__m512 a);
__m512 Sleef_acosf16_u10avx512f(__m512 a);
__m512 Sleef_acosf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_acosf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_acosf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acosf_u10 with the same accuracy specification.


Vectorized double precision arc cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_acosd1_u35purec(double a);
double Sleef_acosd1_u35purecfma(double a);
double Sleef_cinz_acosd1_u35purec(double a);
double Sleef_finz_acosd1_u35purecfma(double a);

__m128d Sleef_acosd2_u35(__m128d a);
__m128d Sleef_acosd2_u35sse2(__m128d a);
__m128d Sleef_acosd2_u35sse4(__m128d a);
__m128d Sleef_acosd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_acosd2_u35sse2(__m128d a);
__m128d Sleef_cinz_acosd2_u35sse4(__m128d a);
__m128d Sleef_finz_acosd2_u35avx2128(__m128d a);

__m256d Sleef_acosd4_u35(__m256d a);
__m256d Sleef_acosd4_u35avx(__m256d a);
__m256d Sleef_acosd4_u35avx2(__m256d a);
__m256d Sleef_acosd4_u35fma4(__m256d a);
__m256d Sleef_cinz_acosd4_u35avx(__m256d a);
__m256d Sleef_finz_acosd4_u35avx2(__m256d a);
__m256d Sleef_finz_acosd4_u35fma4(__m256d a);

__m512d Sleef_acosd8_u35(__m512d a);
__m512d Sleef_acosd8_u35avx512f(__m512d a);
__m512d Sleef_acosd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_acosd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_acosd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acos_u35 with the same accuracy specification.


Vectorized single precision arc cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_acosf1_u35purec(float a);
float Sleef_acosf1_u35purecfma(float a);
float Sleef_cinz_acosf1_u35purec(float a);
float Sleef_finz_acosf1_u35purecfma(float a);

__m128 Sleef_acosf4_u35(__m128 a);
__m128 Sleef_acosf4_u35sse2(__m128 a);
__m128 Sleef_acosf4_u35sse4(__m128 a);
__m128 Sleef_acosf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_acosf4_u35sse2(__m128 a);
__m128 Sleef_cinz_acosf4_u35sse4(__m128 a);
__m128 Sleef_finz_acosf4_u35avx2128(__m128 a);

__m256 Sleef_acosf8_u35(__m256 a);
__m256 Sleef_acosf8_u35avx(__m256 a);
__m256 Sleef_acosf8_u35avx2(__m256 a);
__m256 Sleef_acosf8_u35fma4(__m256 a);
__m256 Sleef_cinz_acosf8_u35avx(__m256 a);
__m256 Sleef_finz_acosf8_u35avx2(__m256 a);
__m256 Sleef_finz_acosf8_u35fma4(__m256 a);

__m512 Sleef_acosf16_u35(__m512 a);
__m512 Sleef_acosf16_u35avx512f(__m512 a);
__m512 Sleef_acosf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_acosf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_acosf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acosf_u35 with the same accuracy specification.


Vectorized double precision arc tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atand1_u10purec(double a);
double Sleef_atand1_u10purecfma(double a);
double Sleef_cinz_atand1_u10purec(double a);
double Sleef_finz_atand1_u10purecfma(double a);

__m128d Sleef_atand2_u10(__m128d a);
__m128d Sleef_atand2_u10sse2(__m128d a);
__m128d Sleef_atand2_u10sse4(__m128d a);
__m128d Sleef_atand2_u10avx2128(__m128d a);
__m128d Sleef_cinz_atand2_u10sse2(__m128d a);
__m128d Sleef_cinz_atand2_u10sse4(__m128d a);
__m128d Sleef_finz_atand2_u10avx2128(__m128d a);

__m256d Sleef_atand4_u10(__m256d a);
__m256d Sleef_atand4_u10avx(__m256d a);
__m256d Sleef_atand4_u10avx2(__m256d a);
__m256d Sleef_atand4_u10fma4(__m256d a);
__m256d Sleef_cinz_atand4_u10avx(__m256d a);
__m256d Sleef_finz_atand4_u10avx2(__m256d a);
__m256d Sleef_finz_atand4_u10fma4(__m256d a);

__m512d Sleef_atand8_u10(__m512d a);
__m512d Sleef_atand8_u10avx512f(__m512d a);
__m512d Sleef_atand8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_atand8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_atand8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan_u10 with the same accuracy specification.


Vectorized single precision arc tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atanf1_u10purec(float a);
float Sleef_atanf1_u10purecfma(float a);
float Sleef_cinz_atanf1_u10purec(float a);
float Sleef_finz_atanf1_u10purecfma(float a);

__m128 Sleef_atanf4_u10(__m128 a);
__m128 Sleef_atanf4_u10sse2(__m128 a);
__m128 Sleef_atanf4_u10sse4(__m128 a);
__m128 Sleef_atanf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_atanf4_u10sse2(__m128 a);
__m128 Sleef_cinz_atanf4_u10sse4(__m128 a);
__m128 Sleef_finz_atanf4_u10avx2128(__m128 a);

__m256 Sleef_atanf8_u10(__m256 a);
__m256 Sleef_atanf8_u10avx(__m256 a);
__m256 Sleef_atanf8_u10avx2(__m256 a);
__m256 Sleef_atanf8_u10fma4(__m256 a);
__m256 Sleef_cinz_atanf8_u10avx(__m256 a);
__m256 Sleef_finz_atanf8_u10avx2(__m256 a);
__m256 Sleef_finz_atanf8_u10fma4(__m256 a);

__m512 Sleef_atanf16_u10(__m512 a);
__m512 Sleef_atanf16_u10avx512f(__m512 a);
__m512 Sleef_atanf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_atanf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_atanf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanf_u10 with the same accuracy specification.


Vectorized double precision arc tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atand1_u35purec(double a);
double Sleef_atand1_u35purecfma(double a);
double Sleef_cinz_atand1_u35purec(double a);
double Sleef_finz_atand1_u35purecfma(double a);

__m128d Sleef_atand2_u35(__m128d a);
__m128d Sleef_atand2_u35sse2(__m128d a);
__m128d Sleef_atand2_u35sse4(__m128d a);
__m128d Sleef_atand2_u35avx2128(__m128d a);
__m128d Sleef_cinz_atand2_u35sse2(__m128d a);
__m128d Sleef_cinz_atand2_u35sse4(__m128d a);
__m128d Sleef_finz_atand2_u35avx2128(__m128d a);

__m256d Sleef_atand4_u35(__m256d a);
__m256d Sleef_atand4_u35avx(__m256d a);
__m256d Sleef_atand4_u35avx2(__m256d a);
__m256d Sleef_atand4_u35fma4(__m256d a);
__m256d Sleef_cinz_atand4_u35avx(__m256d a);
__m256d Sleef_finz_atand4_u35avx2(__m256d a);
__m256d Sleef_finz_atand4_u35fma4(__m256d a);

__m512d Sleef_atand8_u35(__m512d a);
__m512d Sleef_atand8_u35avx512f(__m512d a);
__m512d Sleef_atand8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_atand8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_atand8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan_u35 with the same accuracy specification.


Vectorized single precision arc tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atanf1_u35purec(float a);
float Sleef_atanf1_u35purecfma(float a);
float Sleef_cinz_atanf1_u35purec(float a);
float Sleef_finz_atanf1_u35purecfma(float a);

__m128 Sleef_atanf4_u35(__m128 a);
__m128 Sleef_atanf4_u35sse2(__m128 a);
__m128 Sleef_atanf4_u35sse4(__m128 a);
__m128 Sleef_atanf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_atanf4_u35sse2(__m128 a);
__m128 Sleef_cinz_atanf4_u35sse4(__m128 a);
__m128 Sleef_finz_atanf4_u35avx2128(__m128 a);

__m256 Sleef_atanf8_u35(__m256 a);
__m256 Sleef_atanf8_u35avx(__m256 a);
__m256 Sleef_atanf8_u35avx2(__m256 a);
__m256 Sleef_atanf8_u35fma4(__m256 a);
__m256 Sleef_cinz_atanf8_u35avx(__m256 a);
__m256 Sleef_finz_atanf8_u35avx2(__m256 a);
__m256 Sleef_finz_atanf8_u35fma4(__m256 a);

__m512 Sleef_atanf16_u35(__m512 a);
__m512 Sleef_atanf16_u35avx512f(__m512 a);
__m512 Sleef_atanf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_atanf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_atanf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanf_u35 with the same accuracy specification.


Vectorized double precision arc tangent functions of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2d1_u10purec(double a, double b);
double Sleef_atan2d1_u10purecfma(double a, double b);
double Sleef_cinz_atan2d1_u10purec(double a, double b);
double Sleef_finz_atan2d1_u10purecfma(double a, double b);

__m128d Sleef_atan2d2_u10(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u10sse2(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u10sse4(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u10avx2128(__m128d a, __m128d b);
__m128d Sleef_cinz_atan2d2_u10sse2(__m128d a, __m128d b);
__m128d Sleef_cinz_atan2d2_u10sse4(__m128d a, __m128d b);
__m128d Sleef_finz_atan2d2_u10avx2128(__m128d a, __m128d b);

__m256d Sleef_atan2d4_u10(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u10avx(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u10avx2(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u10fma4(__m256d a, __m256d b);
__m256d Sleef_cinz_atan2d4_u10avx(__m256d a, __m256d b);
__m256d Sleef_finz_atan2d4_u10avx2(__m256d a, __m256d b);
__m256d Sleef_finz_atan2d4_u10fma4(__m256d a, __m256d b);

__m512d Sleef_atan2d8_u10(__m512d a, __m512d b);
__m512d Sleef_atan2d8_u10avx512f(__m512d a, __m512d b);
__m512d Sleef_atan2d8_u10avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_cinz_atan2d8_u10avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_finz_atan2d8_u10avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2_u10 with the same accuracy specification.


Vectorized single precision arc tangent functions of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_atan2f1_u10purec(float a, float b);
float Sleef_atan2f1_u10purecfma(float a, float b);
float Sleef_cinz_atan2f1_u10purec(float a, float b);
float Sleef_finz_atan2f1_u10purecfma(float a, float b);

__m128 Sleef_atan2f4_u10(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u10sse2(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u10sse4(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u10avx2128(__m128 a, __m128 b);
__m128 Sleef_cinz_atan2f4_u10sse2(__m128 a, __m128 b);
__m128 Sleef_cinz_atan2f4_u10sse4(__m128 a, __m128 b);
__m128 Sleef_finz_atan2f4_u10avx2128(__m128 a, __m128 b);

__m256 Sleef_atan2f8_u10(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u10avx(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u10avx2(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u10fma4(__m256 a, __m256 b);
__m256 Sleef_cinz_atan2f8_u10avx(__m256 a, __m256 b);
__m256 Sleef_finz_atan2f8_u10avx2(__m256 a, __m256 b);
__m256 Sleef_finz_atan2f8_u10fma4(__m256 a, __m256 b);

__m512 Sleef_atan2f16_u10(__m512 a, __m512 b);
__m512 Sleef_atan2f16_u10avx512f(__m512 a, __m512 b);
__m512 Sleef_atan2f16_u10avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_cinz_atan2f16_u10avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_finz_atan2f16_u10avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2f_u10 with the same accuracy specification.


Vectorized double precision arc tangent functions of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


double Sleef_atan2d1_u35purec(double a, double b);
double Sleef_atan2d1_u35purecfma(double a, double b);
double Sleef_cinz_atan2d1_u35purec(double a, double b);
double Sleef_finz_atan2d1_u35purecfma(double a, double b);

__m128d Sleef_atan2d2_u35(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u35sse2(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u35sse4(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u35avx2128(__m128d a, __m128d b);
__m128d Sleef_cinz_atan2d2_u35sse2(__m128d a, __m128d b);
__m128d Sleef_cinz_atan2d2_u35sse4(__m128d a, __m128d b);
__m128d Sleef_finz_atan2d2_u35avx2128(__m128d a, __m128d b);

__m256d Sleef_atan2d4_u35(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u35avx(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u35avx2(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u35fma4(__m256d a, __m256d b);
__m256d Sleef_cinz_atan2d4_u35avx(__m256d a, __m256d b);
__m256d Sleef_finz_atan2d4_u35avx2(__m256d a, __m256d b);
__m256d Sleef_finz_atan2d4_u35fma4(__m256d a, __m256d b);

__m512d Sleef_atan2d8_u35(__m512d a, __m512d b);
__m512d Sleef_atan2d8_u35avx512f(__m512d a, __m512d b);
__m512d Sleef_atan2d8_u35avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_cinz_atan2d8_u35avx512fnofma(__m512d a, __m512d b);
__m512d Sleef_finz_atan2d8_u35avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2_u35 with the same accuracy specification.


Vectorized single precision arc tangent functions of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>


float Sleef_atan2f1_u35purec(float a, float b);
float Sleef_atan2f1_u35purecfma(float a, float b);
float Sleef_cinz_atan2f1_u35purec(float a, float b);
float Sleef_finz_atan2f1_u35purecfma(float a, float b);

__m128 Sleef_atan2f4_u35(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u35sse2(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u35sse4(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u35avx2128(__m128 a, __m128 b);
__m128 Sleef_cinz_atan2f4_u35sse2(__m128 a, __m128 b);
__m128 Sleef_cinz_atan2f4_u35sse4(__m128 a, __m128 b);
__m128 Sleef_finz_atan2f4_u35avx2128(__m128 a, __m128 b);

__m256 Sleef_atan2f8_u35(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u35avx(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u35avx2(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u35fma4(__m256 a, __m256 b);
__m256 Sleef_cinz_atan2f8_u35avx(__m256 a, __m256 b);
__m256 Sleef_finz_atan2f8_u35avx2(__m256 a, __m256 b);
__m256 Sleef_finz_atan2f8_u35fma4(__m256 a, __m256 b);

__m512 Sleef_atan2f16_u35(__m512 a, __m512 b);
__m512 Sleef_atan2f16_u35avx512f(__m512 a, __m512 b);
__m512 Sleef_atan2f16_u35avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_cinz_atan2f16_u35avx512fnofma(__m512 a, __m512 b);
__m512 Sleef_finz_atan2f16_u35avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2f_u35 with the same accuracy specification.

Hyperbolic functions and inverse hyperbolic functions

Vectorized double precision hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_sinhd1_u10purec(double a);
double Sleef_sinhd1_u10purecfma(double a);
double Sleef_cinz_sinhd1_u10purec(double a);
double Sleef_finz_sinhd1_u10purecfma(double a);

__m128d Sleef_sinhd2_u10(__m128d a);
__m128d Sleef_sinhd2_u10sse2(__m128d a);
__m128d Sleef_sinhd2_u10sse4(__m128d a);
__m128d Sleef_sinhd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_sinhd2_u10sse2(__m128d a);
__m128d Sleef_cinz_sinhd2_u10sse4(__m128d a);
__m128d Sleef_finz_sinhd2_u10avx2128(__m128d a);

__m256d Sleef_sinhd4_u10(__m256d a);
__m256d Sleef_sinhd4_u10avx(__m256d a);
__m256d Sleef_sinhd4_u10avx2(__m256d a);
__m256d Sleef_sinhd4_u10fma4(__m256d a);
__m256d Sleef_cinz_sinhd4_u10avx(__m256d a);
__m256d Sleef_finz_sinhd4_u10avx2(__m256d a);
__m256d Sleef_finz_sinhd4_u10fma4(__m256d a);

__m512d Sleef_sinhd8_u10(__m512d a);
__m512d Sleef_sinhd8_u10avx512f(__m512d a);
__m512d Sleef_sinhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_sinhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_sinhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic sine functions

Synopsis

#include <sleef.h>

float Sleef_sinhf1_u10purec(float a);
float Sleef_sinhf1_u10purecfma(float a);
float Sleef_cinz_sinhf1_u10purec(float a);
float Sleef_finz_sinhf1_u10purecfma(float a);

__m128 Sleef_sinhf4_u10(__m128 a);
__m128 Sleef_sinhf4_u10sse2(__m128 a);
__m128 Sleef_sinhf4_u10sse4(__m128 a);
__m128 Sleef_sinhf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_sinhf4_u10sse2(__m128 a);
__m128 Sleef_cinz_sinhf4_u10sse4(__m128 a);
__m128 Sleef_finz_sinhf4_u10avx2128(__m128 a);

__m256 Sleef_sinhf8_u10(__m256 a);
__m256 Sleef_sinhf8_u10avx(__m256 a);
__m256 Sleef_sinhf8_u10avx2(__m256 a);
__m256 Sleef_sinhf8_u10fma4(__m256 a);
__m256 Sleef_cinz_sinhf8_u10avx(__m256 a);
__m256 Sleef_finz_sinhf8_u10avx2(__m256 a);
__m256 Sleef_finz_sinhf8_u10fma4(__m256 a);

__m512 Sleef_sinhf16_u10(__m512 a);
__m512 Sleef_sinhf16_u10avx512f(__m512 a);
__m512 Sleef_sinhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_sinhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_sinhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic sine functions

Synopsis

#include <sleef.h>


double Sleef_sinhd1_u35purec(double a);
double Sleef_sinhd1_u35purecfma(double a);
double Sleef_cinz_sinhd1_u35purec(double a);
double Sleef_finz_sinhd1_u35purecfma(double a);

__m128d Sleef_sinhd2_u35(__m128d a);
__m128d Sleef_sinhd2_u35sse2(__m128d a);
__m128d Sleef_sinhd2_u35sse4(__m128d a);
__m128d Sleef_sinhd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_sinhd2_u35sse2(__m128d a);
__m128d Sleef_cinz_sinhd2_u35sse4(__m128d a);
__m128d Sleef_finz_sinhd2_u35avx2128(__m128d a);

__m256d Sleef_sinhd4_u35(__m256d a);
__m256d Sleef_sinhd4_u35avx(__m256d a);
__m256d Sleef_sinhd4_u35avx2(__m256d a);
__m256d Sleef_sinhd4_u35fma4(__m256d a);
__m256d Sleef_cinz_sinhd4_u35avx(__m256d a);
__m256d Sleef_finz_sinhd4_u35avx2(__m256d a);
__m256d Sleef_finz_sinhd4_u35fma4(__m256d a);

__m512d Sleef_sinhd8_u35(__m512d a);
__m512d Sleef_sinhd8_u35avx512f(__m512d a);
__m512d Sleef_sinhd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_sinhd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_sinhd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic sine functions

Synopsis

#include <sleef.h>


float Sleef_sinhf1_u35purec(float a);
float Sleef_sinhf1_u35purecfma(float a);
float Sleef_cinz_sinhf1_u35purec(float a);
float Sleef_finz_sinhf1_u35purecfma(float a);

__m128 Sleef_sinhf4_u35(__m128 a);
__m128 Sleef_sinhf4_u35sse2(__m128 a);
__m128 Sleef_sinhf4_u35sse4(__m128 a);
__m128 Sleef_sinhf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_sinhf4_u35sse2(__m128 a);
__m128 Sleef_cinz_sinhf4_u35sse4(__m128 a);
__m128 Sleef_finz_sinhf4_u35avx2128(__m128 a);

__m256 Sleef_sinhf8_u35(__m256 a);
__m256 Sleef_sinhf8_u35avx(__m256 a);
__m256 Sleef_sinhf8_u35avx2(__m256 a);
__m256 Sleef_sinhf8_u35fma4(__m256 a);
__m256 Sleef_cinz_sinhf8_u35avx(__m256 a);
__m256 Sleef_finz_sinhf8_u35avx2(__m256 a);
__m256 Sleef_finz_sinhf8_u35fma4(__m256 a);

__m512 Sleef_sinhf16_u35(__m512 a);
__m512 Sleef_sinhf16_u35avx512f(__m512 a);
__m512 Sleef_sinhf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_sinhf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_sinhf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinhf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_coshd1_u10purec(double a);
double Sleef_coshd1_u10purecfma(double a);
double Sleef_cinz_coshd1_u10purec(double a);
double Sleef_finz_coshd1_u10purecfma(double a);

__m128d Sleef_coshd2_u10(__m128d a);
__m128d Sleef_coshd2_u10sse2(__m128d a);
__m128d Sleef_coshd2_u10sse4(__m128d a);
__m128d Sleef_coshd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_coshd2_u10sse2(__m128d a);
__m128d Sleef_cinz_coshd2_u10sse4(__m128d a);
__m128d Sleef_finz_coshd2_u10avx2128(__m128d a);

__m256d Sleef_coshd4_u10(__m256d a);
__m256d Sleef_coshd4_u10avx(__m256d a);
__m256d Sleef_coshd4_u10avx2(__m256d a);
__m256d Sleef_coshd4_u10fma4(__m256d a);
__m256d Sleef_cinz_coshd4_u10avx(__m256d a);
__m256d Sleef_finz_coshd4_u10avx2(__m256d a);
__m256d Sleef_finz_coshd4_u10fma4(__m256d a);

__m512d Sleef_coshd8_u10(__m512d a);
__m512d Sleef_coshd8_u10avx512f(__m512d a);
__m512d Sleef_coshd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_coshd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_coshd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic cosine functions

Synopsis

#include <sleef.h>

float Sleef_coshf1_u10purec(float a);
float Sleef_coshf1_u10purecfma(float a);
float Sleef_cinz_coshf1_u10purec(float a);
float Sleef_finz_coshf1_u10purecfma(float a);

__m128 Sleef_coshf4_u10(__m128 a);
__m128 Sleef_coshf4_u10sse2(__m128 a);
__m128 Sleef_coshf4_u10sse4(__m128 a);
__m128 Sleef_coshf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_coshf4_u10sse2(__m128 a);
__m128 Sleef_cinz_coshf4_u10sse4(__m128 a);
__m128 Sleef_finz_coshf4_u10avx2128(__m128 a);

__m256 Sleef_coshf8_u10(__m256 a);
__m256 Sleef_coshf8_u10avx(__m256 a);
__m256 Sleef_coshf8_u10avx2(__m256 a);
__m256 Sleef_coshf8_u10fma4(__m256 a);
__m256 Sleef_cinz_coshf8_u10avx(__m256 a);
__m256 Sleef_finz_coshf8_u10avx2(__m256 a);
__m256 Sleef_finz_coshf8_u10fma4(__m256 a);

__m512 Sleef_coshf16_u10(__m512 a);
__m512 Sleef_coshf16_u10avx512f(__m512 a);
__m512 Sleef_coshf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_coshf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_coshf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_coshf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic cosine functions

Synopsis

#include <sleef.h>


double Sleef_coshd1_u35purec(double a);
double Sleef_coshd1_u35purecfma(double a);
double Sleef_cinz_coshd1_u35purec(double a);
double Sleef_finz_coshd1_u35purecfma(double a);

__m128d Sleef_coshd2_u35(__m128d a);
__m128d Sleef_coshd2_u35sse2(__m128d a);
__m128d Sleef_coshd2_u35sse4(__m128d a);
__m128d Sleef_coshd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_coshd2_u35sse2(__m128d a);
__m128d Sleef_cinz_coshd2_u35sse4(__m128d a);
__m128d Sleef_finz_coshd2_u35avx2128(__m128d a);

__m256d Sleef_coshd4_u35(__m256d a);
__m256d Sleef_coshd4_u35avx(__m256d a);
__m256d Sleef_coshd4_u35avx2(__m256d a);
__m256d Sleef_coshd4_u35fma4(__m256d a);
__m256d Sleef_cinz_coshd4_u35avx(__m256d a);
__m256d Sleef_finz_coshd4_u35avx2(__m256d a);
__m256d Sleef_finz_coshd4_u35fma4(__m256d a);

__m512d Sleef_coshd8_u35(__m512d a);
__m512d Sleef_coshd8_u35avx512f(__m512d a);
__m512d Sleef_coshd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_coshd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_coshd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic cosine functions

Synopsis

#include <sleef.h>


float Sleef_coshf1_u35purec(float a);
float Sleef_coshf1_u35purecfma(float a);
float Sleef_cinz_coshf1_u35purec(float a);
float Sleef_finz_coshf1_u35purecfma(float a);

__m128 Sleef_coshf4_u35(__m128 a);
__m128 Sleef_coshf4_u35sse2(__m128 a);
__m128 Sleef_coshf4_u35sse4(__m128 a);
__m128 Sleef_coshf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_coshf4_u35sse2(__m128 a);
__m128 Sleef_cinz_coshf4_u35sse4(__m128 a);
__m128 Sleef_finz_coshf4_u35avx2128(__m128 a);

__m256 Sleef_coshf8_u35(__m256 a);
__m256 Sleef_coshf8_u35avx(__m256 a);
__m256 Sleef_coshf8_u35avx2(__m256 a);
__m256 Sleef_coshf8_u35fma4(__m256 a);
__m256 Sleef_cinz_coshf8_u35avx(__m256 a);
__m256 Sleef_finz_coshf8_u35avx2(__m256 a);
__m256 Sleef_finz_coshf8_u35fma4(__m256 a);

__m512 Sleef_coshf16_u35(__m512 a);
__m512 Sleef_coshf16_u35avx512f(__m512 a);
__m512 Sleef_coshf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_coshf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_coshf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_coshf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_tanhd1_u10purec(double a);
double Sleef_tanhd1_u10purecfma(double a);
double Sleef_cinz_tanhd1_u10purec(double a);
double Sleef_finz_tanhd1_u10purecfma(double a);

__m128d Sleef_tanhd2_u10(__m128d a);
__m128d Sleef_tanhd2_u10sse2(__m128d a);
__m128d Sleef_tanhd2_u10sse4(__m128d a);
__m128d Sleef_tanhd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_tanhd2_u10sse2(__m128d a);
__m128d Sleef_cinz_tanhd2_u10sse4(__m128d a);
__m128d Sleef_finz_tanhd2_u10avx2128(__m128d a);

__m256d Sleef_tanhd4_u10(__m256d a);
__m256d Sleef_tanhd4_u10avx(__m256d a);
__m256d Sleef_tanhd4_u10avx2(__m256d a);
__m256d Sleef_tanhd4_u10fma4(__m256d a);
__m256d Sleef_cinz_tanhd4_u10avx(__m256d a);
__m256d Sleef_finz_tanhd4_u10avx2(__m256d a);
__m256d Sleef_finz_tanhd4_u10fma4(__m256d a);

__m512d Sleef_tanhd8_u10(__m512d a);
__m512d Sleef_tanhd8_u10avx512f(__m512d a);
__m512d Sleef_tanhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_tanhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_tanhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic tangent functions

Synopsis

#include <sleef.h>

float Sleef_tanhf1_u10purec(float a);
float Sleef_tanhf1_u10purecfma(float a);
float Sleef_cinz_tanhf1_u10purec(float a);
float Sleef_finz_tanhf1_u10purecfma(float a);

__m128 Sleef_tanhf4_u10(__m128 a);
__m128 Sleef_tanhf4_u10sse2(__m128 a);
__m128 Sleef_tanhf4_u10sse4(__m128 a);
__m128 Sleef_tanhf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_tanhf4_u10sse2(__m128 a);
__m128 Sleef_cinz_tanhf4_u10sse4(__m128 a);
__m128 Sleef_finz_tanhf4_u10avx2128(__m128 a);

__m256 Sleef_tanhf8_u10(__m256 a);
__m256 Sleef_tanhf8_u10avx(__m256 a);
__m256 Sleef_tanhf8_u10avx2(__m256 a);
__m256 Sleef_tanhf8_u10fma4(__m256 a);
__m256 Sleef_cinz_tanhf8_u10avx(__m256 a);
__m256 Sleef_finz_tanhf8_u10avx2(__m256 a);
__m256 Sleef_finz_tanhf8_u10fma4(__m256 a);

__m512 Sleef_tanhf16_u10(__m512 a);
__m512 Sleef_tanhf16_u10avx512f(__m512 a);
__m512 Sleef_tanhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_tanhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_tanhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic tangent functions

Synopsis

#include <sleef.h>


double Sleef_tanhd1_u35purec(double a);
double Sleef_tanhd1_u35purecfma(double a);
double Sleef_cinz_tanhd1_u35purec(double a);
double Sleef_finz_tanhd1_u35purecfma(double a);

__m128d Sleef_tanhd2_u35(__m128d a);
__m128d Sleef_tanhd2_u35sse2(__m128d a);
__m128d Sleef_tanhd2_u35sse4(__m128d a);
__m128d Sleef_tanhd2_u35avx2128(__m128d a);
__m128d Sleef_cinz_tanhd2_u35sse2(__m128d a);
__m128d Sleef_cinz_tanhd2_u35sse4(__m128d a);
__m128d Sleef_finz_tanhd2_u35avx2128(__m128d a);

__m256d Sleef_tanhd4_u35(__m256d a);
__m256d Sleef_tanhd4_u35avx(__m256d a);
__m256d Sleef_tanhd4_u35avx2(__m256d a);
__m256d Sleef_tanhd4_u35fma4(__m256d a);
__m256d Sleef_cinz_tanhd4_u35avx(__m256d a);
__m256d Sleef_finz_tanhd4_u35avx2(__m256d a);
__m256d Sleef_finz_tanhd4_u35fma4(__m256d a);

__m512d Sleef_tanhd8_u35(__m512d a);
__m512d Sleef_tanhd8_u35avx512f(__m512d a);
__m512d Sleef_tanhd8_u35avx512fnofma(__m512d a);
__m512d Sleef_cinz_tanhd8_u35avx512fnofma(__m512d a);
__m512d Sleef_finz_tanhd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic tangent functions

Synopsis

#include <sleef.h>


float Sleef_tanhf1_u35purec(float a);
float Sleef_tanhf1_u35purecfma(float a);
float Sleef_cinz_tanhf1_u35purec(float a);
float Sleef_finz_tanhf1_u35purecfma(float a);

__m128 Sleef_tanhf4_u35(__m128 a);
__m128 Sleef_tanhf4_u35sse2(__m128 a);
__m128 Sleef_tanhf4_u35sse4(__m128 a);
__m128 Sleef_tanhf4_u35avx2128(__m128 a);
__m128 Sleef_cinz_tanhf4_u35sse2(__m128 a);
__m128 Sleef_cinz_tanhf4_u35sse4(__m128 a);
__m128 Sleef_finz_tanhf4_u35avx2128(__m128 a);

__m256 Sleef_tanhf8_u35(__m256 a);
__m256 Sleef_tanhf8_u35avx(__m256 a);
__m256 Sleef_tanhf8_u35avx2(__m256 a);
__m256 Sleef_tanhf8_u35fma4(__m256 a);
__m256 Sleef_cinz_tanhf8_u35avx(__m256 a);
__m256 Sleef_finz_tanhf8_u35avx2(__m256 a);
__m256 Sleef_finz_tanhf8_u35fma4(__m256 a);

__m512 Sleef_tanhf16_u35(__m512 a);
__m512 Sleef_tanhf16_u35avx512f(__m512 a);
__m512 Sleef_tanhf16_u35avx512fnofma(__m512 a);
__m512 Sleef_cinz_tanhf16_u35avx512fnofma(__m512 a);
__m512 Sleef_finz_tanhf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanhf_u35 with the same accuracy specification.


Vectorized double precision inverse hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_asinhd1_u10purec(double a);
double Sleef_asinhd1_u10purecfma(double a);
double Sleef_cinz_asinhd1_u10purec(double a);
double Sleef_finz_asinhd1_u10purecfma(double a);

__m128d Sleef_asinhd2_u10(__m128d a);
__m128d Sleef_asinhd2_u10sse2(__m128d a);
__m128d Sleef_asinhd2_u10sse4(__m128d a);
__m128d Sleef_asinhd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_asinhd2_u10sse2(__m128d a);
__m128d Sleef_cinz_asinhd2_u10sse4(__m128d a);
__m128d Sleef_finz_asinhd2_u10avx2128(__m128d a);

__m256d Sleef_asinhd4_u10(__m256d a);
__m256d Sleef_asinhd4_u10avx(__m256d a);
__m256d Sleef_asinhd4_u10avx2(__m256d a);
__m256d Sleef_asinhd4_u10fma4(__m256d a);
__m256d Sleef_cinz_asinhd4_u10avx(__m256d a);
__m256d Sleef_finz_asinhd4_u10avx2(__m256d a);
__m256d Sleef_finz_asinhd4_u10fma4(__m256d a);

__m512d Sleef_asinhd8_u10(__m512d a);
__m512d Sleef_asinhd8_u10avx512f(__m512d a);
__m512d Sleef_asinhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_asinhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_asinhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic sine functions

Synopsis

#include <sleef.h>

float Sleef_asinhf1_u10purec(float a);
float Sleef_asinhf1_u10purecfma(float a);
float Sleef_cinz_asinhf1_u10purec(float a);
float Sleef_finz_asinhf1_u10purecfma(float a);

__m128 Sleef_asinhf4_u10(__m128 a);
__m128 Sleef_asinhf4_u10sse2(__m128 a);
__m128 Sleef_asinhf4_u10sse4(__m128 a);
__m128 Sleef_asinhf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_asinhf4_u10sse2(__m128 a);
__m128 Sleef_cinz_asinhf4_u10sse4(__m128 a);
__m128 Sleef_finz_asinhf4_u10avx2128(__m128 a);

__m256 Sleef_asinhf8_u10(__m256 a);
__m256 Sleef_asinhf8_u10avx(__m256 a);
__m256 Sleef_asinhf8_u10avx2(__m256 a);
__m256 Sleef_asinhf8_u10fma4(__m256 a);
__m256 Sleef_cinz_asinhf8_u10avx(__m256 a);
__m256 Sleef_finz_asinhf8_u10avx2(__m256 a);
__m256 Sleef_finz_asinhf8_u10fma4(__m256 a);

__m512 Sleef_asinhf16_u10(__m512 a);
__m512 Sleef_asinhf16_u10avx512f(__m512 a);
__m512 Sleef_asinhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_asinhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_asinhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinhf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_acoshd1_u10purec(double a);
double Sleef_acoshd1_u10purecfma(double a);
double Sleef_cinz_acoshd1_u10purec(double a);
double Sleef_finz_acoshd1_u10purecfma(double a);

__m128d Sleef_acoshd2_u10(__m128d a);
__m128d Sleef_acoshd2_u10sse2(__m128d a);
__m128d Sleef_acoshd2_u10sse4(__m128d a);
__m128d Sleef_acoshd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_acoshd2_u10sse2(__m128d a);
__m128d Sleef_cinz_acoshd2_u10sse4(__m128d a);
__m128d Sleef_finz_acoshd2_u10avx2128(__m128d a);

__m256d Sleef_acoshd4_u10(__m256d a);
__m256d Sleef_acoshd4_u10avx(__m256d a);
__m256d Sleef_acoshd4_u10avx2(__m256d a);
__m256d Sleef_acoshd4_u10fma4(__m256d a);
__m256d Sleef_cinz_acoshd4_u10avx(__m256d a);
__m256d Sleef_finz_acoshd4_u10avx2(__m256d a);
__m256d Sleef_finz_acoshd4_u10fma4(__m256d a);

__m512d Sleef_acoshd8_u10(__m512d a);
__m512d Sleef_acoshd8_u10avx512f(__m512d a);
__m512d Sleef_acoshd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_acoshd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_acoshd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acosh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic cosine functions

Synopsis

#include <sleef.h>

float Sleef_acoshf1_u10purec(float a);
float Sleef_acoshf1_u10purecfma(float a);
float Sleef_cinz_acoshf1_u10purec(float a);
float Sleef_finz_acoshf1_u10purecfma(float a);

__m128 Sleef_acoshf4_u10(__m128 a);
__m128 Sleef_acoshf4_u10sse2(__m128 a);
__m128 Sleef_acoshf4_u10sse4(__m128 a);
__m128 Sleef_acoshf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_acoshf4_u10sse2(__m128 a);
__m128 Sleef_cinz_acoshf4_u10sse4(__m128 a);
__m128 Sleef_finz_acoshf4_u10avx2128(__m128 a);

__m256 Sleef_acoshf8_u10(__m256 a);
__m256 Sleef_acoshf8_u10avx(__m256 a);
__m256 Sleef_acoshf8_u10avx2(__m256 a);
__m256 Sleef_acoshf8_u10fma4(__m256 a);
__m256 Sleef_cinz_acoshf8_u10avx(__m256 a);
__m256 Sleef_finz_acoshf8_u10avx2(__m256 a);
__m256 Sleef_finz_acoshf8_u10fma4(__m256 a);

__m512 Sleef_acoshf16_u10(__m512 a);
__m512 Sleef_acoshf16_u10avx512f(__m512 a);
__m512 Sleef_acoshf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_acoshf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_acoshf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acoshf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_atanhd1_u10purec(double a);
double Sleef_atanhd1_u10purecfma(double a);
double Sleef_cinz_atanhd1_u10purec(double a);
double Sleef_finz_atanhd1_u10purecfma(double a);

__m128d Sleef_atanhd2_u10(__m128d a);
__m128d Sleef_atanhd2_u10sse2(__m128d a);
__m128d Sleef_atanhd2_u10sse4(__m128d a);
__m128d Sleef_atanhd2_u10avx2128(__m128d a);
__m128d Sleef_cinz_atanhd2_u10sse2(__m128d a);
__m128d Sleef_cinz_atanhd2_u10sse4(__m128d a);
__m128d Sleef_finz_atanhd2_u10avx2128(__m128d a);

__m256d Sleef_atanhd4_u10(__m256d a);
__m256d Sleef_atanhd4_u10avx(__m256d a);
__m256d Sleef_atanhd4_u10avx2(__m256d a);
__m256d Sleef_atanhd4_u10fma4(__m256d a);
__m256d Sleef_cinz_atanhd4_u10avx(__m256d a);
__m256d Sleef_finz_atanhd4_u10avx2(__m256d a);
__m256d Sleef_finz_atanhd4_u10fma4(__m256d a);

__m512d Sleef_atanhd8_u10(__m512d a);
__m512d Sleef_atanhd8_u10avx512f(__m512d a);
__m512d Sleef_atanhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_atanhd8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_atanhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic tangent functions

Synopsis

#include <sleef.h>

float Sleef_atanhf1_u10purec(float a);
float Sleef_atanhf1_u10purecfma(float a);
float Sleef_cinz_atanhf1_u10purec(float a);
float Sleef_finz_atanhf1_u10purecfma(float a);

__m128 Sleef_atanhf4_u10(__m128 a);
__m128 Sleef_atanhf4_u10sse2(__m128 a);
__m128 Sleef_atanhf4_u10sse4(__m128 a);
__m128 Sleef_atanhf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_atanhf4_u10sse2(__m128 a);
__m128 Sleef_cinz_atanhf4_u10sse4(__m128 a);
__m128 Sleef_finz_atanhf4_u10avx2128(__m128 a);

__m256 Sleef_atanhf8_u10(__m256 a);
__m256 Sleef_atanhf8_u10avx(__m256 a);
__m256 Sleef_atanhf8_u10avx2(__m256 a);
__m256 Sleef_atanhf8_u10fma4(__m256 a);
__m256 Sleef_cinz_atanhf8_u10avx(__m256 a);
__m256 Sleef_finz_atanhf8_u10avx2(__m256 a);
__m256 Sleef_finz_atanhf8_u10fma4(__m256 a);

__m512 Sleef_atanhf16_u10(__m512 a);
__m512 Sleef_atanhf16_u10avx512f(__m512 a);
__m512 Sleef_atanhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_atanhf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_atanhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanhf_u10 with the same accuracy specification.

Error and gamma functions

Vectorized double precision error functions

Synopsis

#include <sleef.h>

float Sleef_erfd1_u10purec(float a);
float Sleef_erfd1_u10purecfma(float a);
float Sleef_cinz_erfd1_u10purec(float a);
float Sleef_finz_erfd1_u10purecfma(float a);

__m128 Sleef_erfd2_u10(__m128 a);
__m128 Sleef_erfd2_u10sse2(__m128 a);
__m128 Sleef_erfd2_u10sse4(__m128 a);
__m128 Sleef_erfd2_u10avx2128(__m128 a);
__m128 Sleef_cinz_erfd2_u10sse2(__m128 a);
__m128 Sleef_cinz_erfd2_u10sse4(__m128 a);
__m128 Sleef_finz_erfd2_u10avx2128(__m128 a);

__m128 Sleef_erfd4_u10(__m128 a);
__m128 Sleef_erfd4_u10avx(__m128 a);
__m128 Sleef_erfd4_u10avx2(__m128 a);
__m128 Sleef_erfd4_u10fma4(__m128 a);
__m128 Sleef_cinz_erfd4_u10avx(__m128 a);
__m128 Sleef_finz_erfd4_u10avx2(__m128 a);
__m128 Sleef_finz_erfd4_u10fma4(__m128 a);

__m256 Sleef_erfd8_u10(__m256 a);
__m256 Sleef_erfd8_u10avx512f(__m256 a);
__m256 Sleef_erfd8_u10avx512fnofma(__m256 a);
__m256 Sleef_cinz_erfd8_u10avx512fnofma(__m256 a);
__m256 Sleef_finz_erfd8_u10avx512f(__m256 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erf_u10 with the same accuracy specification.


Vectorized single precision error functions

Synopsis

#include <sleef.h>

float Sleef_erff1_u10purec(float a);
float Sleef_erff1_u10purecfma(float a);
float Sleef_cinz_erff1_u10purec(float a);
float Sleef_finz_erff1_u10purecfma(float a);

__m128 Sleef_erff4_u10(__m128 a);
__m128 Sleef_erff4_u10sse2(__m128 a);
__m128 Sleef_erff4_u10sse4(__m128 a);
__m128 Sleef_erff4_u10avx2128(__m128 a);
__m128 Sleef_cinz_erff4_u10sse2(__m128 a);
__m128 Sleef_cinz_erff4_u10sse4(__m128 a);
__m128 Sleef_finz_erff4_u10avx2128(__m128 a);

__m256 Sleef_erff8_u10(__m256 a);
__m256 Sleef_erff8_u10avx(__m256 a);
__m256 Sleef_erff8_u10avx2(__m256 a);
__m256 Sleef_erff8_u10fma4(__m256 a);
__m256 Sleef_cinz_erff8_u10avx(__m256 a);
__m256 Sleef_finz_erff8_u10avx2(__m256 a);
__m256 Sleef_finz_erff8_u10fma4(__m256 a);

__m512 Sleef_erff16_u10(__m512 a);
__m512 Sleef_erff16_u10avx512f(__m512 a);
__m512 Sleef_erff16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_erff16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_erff16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erff_u10 with the same accuracy specification.


Vectorized double precision complementary error functions

Synopsis

#include <sleef.h>

double Sleef_erfcd1_u15purec(double a);
double Sleef_erfcd1_u15purecfma(double a);
double Sleef_cinz_erfcd1_u15purec(double a);
double Sleef_finz_erfcd1_u15purecfma(double a);

__m128d Sleef_erfcd2_u15(__m128d a);
__m128d Sleef_erfcd2_u15sse2(__m128d a);
__m128d Sleef_erfcd2_u15sse4(__m128d a);
__m128d Sleef_erfcd2_u15avx2128(__m128d a);
__m128d Sleef_cinz_erfcd2_u15sse2(__m128d a);
__m128d Sleef_cinz_erfcd2_u15sse4(__m128d a);
__m128d Sleef_finz_erfcd2_u15avx2128(__m128d a);

__m256d Sleef_erfcd4_u15(__m256d a);
__m256d Sleef_erfcd4_u15avx(__m256d a);
__m256d Sleef_erfcd4_u15avx2(__m256d a);
__m256d Sleef_erfcd4_u15fma4(__m256d a);
__m256d Sleef_cinz_erfcd4_u15avx(__m256d a);
__m256d Sleef_finz_erfcd4_u15avx2(__m256d a);
__m256d Sleef_finz_erfcd4_u15fma4(__m256d a);

__m512d Sleef_erfcd8_u15(__m512d a);
__m512d Sleef_erfcd8_u15avx512f(__m512d a);
__m512d Sleef_erfcd8_u15avx512fnofma(__m512d a);
__m512d Sleef_cinz_erfcd8_u15avx512fnofma(__m512d a);
__m512d Sleef_finz_erfcd8_u15avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erfc_u15 with the same accuracy specification.


Vectorized single precision complementary error functions

Synopsis

#include <sleef.h>

float Sleef_erfcf1_u15purec(float a);
float Sleef_erfcf1_u15purecfma(float a);
float Sleef_cinz_erfcf1_u15purec(float a);
float Sleef_finz_erfcf1_u15purecfma(float a);

__m128 Sleef_erfcf4_u15(__m128 a);
__m128 Sleef_erfcf4_u15sse2(__m128 a);
__m128 Sleef_erfcf4_u15sse4(__m128 a);
__m128 Sleef_erfcf4_u15avx2128(__m128 a);
__m128 Sleef_cinz_erfcf4_u15sse2(__m128 a);
__m128 Sleef_cinz_erfcf4_u15sse4(__m128 a);
__m128 Sleef_finz_erfcf4_u15avx2128(__m128 a);

__m256 Sleef_erfcf8_u15(__m256 a);
__m256 Sleef_erfcf8_u15avx(__m256 a);
__m256 Sleef_erfcf8_u15avx2(__m256 a);
__m256 Sleef_erfcf8_u15fma4(__m256 a);
__m256 Sleef_cinz_erfcf8_u15avx(__m256 a);
__m256 Sleef_finz_erfcf8_u15avx2(__m256 a);
__m256 Sleef_finz_erfcf8_u15fma4(__m256 a);

__m512 Sleef_erfcf16_u15(__m512 a);
__m512 Sleef_erfcf16_u15avx512f(__m512 a);
__m512 Sleef_erfcf16_u15avx512fnofma(__m512 a);
__m512 Sleef_cinz_erfcf16_u15avx512fnofma(__m512 a);
__m512 Sleef_finz_erfcf16_u15avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erfcf_u15 with the same accuracy specification.


Vectorized double precision gamma functions

Synopsis

#include <sleef.h>

double Sleef_tgammad1_u10purec(double a);
double Sleef_tgammad1_u10purecfma(double a);
double Sleef_cinz_tgammad1_u10purec(double a);
double Sleef_finz_tgammad1_u10purecfma(double a);

__m128d Sleef_tgammad2_u10(__m128d a);
__m128d Sleef_tgammad2_u10sse2(__m128d a);
__m128d Sleef_tgammad2_u10sse4(__m128d a);
__m128d Sleef_tgammad2_u10avx2128(__m128d a);
__m128d Sleef_cinz_tgammad2_u10sse2(__m128d a);
__m128d Sleef_cinz_tgammad2_u10sse4(__m128d a);
__m128d Sleef_finz_tgammad2_u10avx2128(__m128d a);

__m256d Sleef_tgammad4_u10(__m256d a);
__m256d Sleef_tgammad4_u10avx(__m256d a);
__m256d Sleef_tgammad4_u10avx2(__m256d a);
__m256d Sleef_tgammad4_u10fma4(__m256d a);
__m256d Sleef_cinz_tgammad4_u10avx(__m256d a);
__m256d Sleef_finz_tgammad4_u10avx2(__m256d a);
__m256d Sleef_finz_tgammad4_u10fma4(__m256d a);

__m512d Sleef_tgammad8_u10(__m512d a);
__m512d Sleef_tgammad8_u10avx512f(__m512d a);
__m512d Sleef_tgammad8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_tgammad8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_tgammad8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tgamma_u10 with the same accuracy specification.


Vectorized single precision gamma functions

Synopsis

#include <sleef.h>

float Sleef_tgammaf1_u10purec(float a);
float Sleef_tgammaf1_u10purecfma(float a);
float Sleef_cinz_tgammaf1_u10purec(float a);
float Sleef_finz_tgammaf1_u10purecfma(float a);

__m128 Sleef_tgammaf4_u10(__m128 a);
__m128 Sleef_tgammaf4_u10sse2(__m128 a);
__m128 Sleef_tgammaf4_u10sse4(__m128 a);
__m128 Sleef_tgammaf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_tgammaf4_u10sse2(__m128 a);
__m128 Sleef_cinz_tgammaf4_u10sse4(__m128 a);
__m128 Sleef_finz_tgammaf4_u10avx2128(__m128 a);

__m256 Sleef_tgammaf8_u10(__m256 a);
__m256 Sleef_tgammaf8_u10avx(__m256 a);
__m256 Sleef_tgammaf8_u10avx2(__m256 a);
__m256 Sleef_tgammaf8_u10fma4(__m256 a);
__m256 Sleef_cinz_tgammaf8_u10avx(__m256 a);
__m256 Sleef_finz_tgammaf8_u10avx2(__m256 a);
__m256 Sleef_finz_tgammaf8_u10fma4(__m256 a);

__m512 Sleef_tgammaf16_u10(__m512 a);
__m512 Sleef_tgammaf16_u10avx512f(__m512 a);
__m512 Sleef_tgammaf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_tgammaf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_tgammaf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tgammaf_u10 with the same accuracy specification.


Vectorized double precision log gamma functions

Synopsis

#include <sleef.h>

double Sleef_lgammad1_u10purec(double a);
double Sleef_lgammad1_u10purecfma(double a);
double Sleef_cinz_lgammad1_u10purec(double a);
double Sleef_finz_lgammad1_u10purecfma(double a);

__m128d Sleef_lgammad2_u10(__m128d a);
__m128d Sleef_lgammad2_u10sse2(__m128d a);
__m128d Sleef_lgammad2_u10sse4(__m128d a);
__m128d Sleef_lgammad2_u10avx2128(__m128d a);
__m128d Sleef_cinz_lgammad2_u10sse2(__m128d a);
__m128d Sleef_cinz_lgammad2_u10sse4(__m128d a);
__m128d Sleef_finz_lgammad2_u10avx2128(__m128d a);

__m256d Sleef_lgammad4_u10(__m256d a);
__m256d Sleef_lgammad4_u10avx(__m256d a);
__m256d Sleef_lgammad4_u10avx2(__m256d a);
__m256d Sleef_lgammad4_u10fma4(__m256d a);
__m256d Sleef_cinz_lgammad4_u10avx(__m256d a);
__m256d Sleef_finz_lgammad4_u10avx2(__m256d a);
__m256d Sleef_finz_lgammad4_u10fma4(__m256d a);

__m512d Sleef_lgammad8_u10(__m512d a);
__m512d Sleef_lgammad8_u10avx512f(__m512d a);
__m512d Sleef_lgammad8_u10avx512fnofma(__m512d a);
__m512d Sleef_cinz_lgammad8_u10avx512fnofma(__m512d a);
__m512d Sleef_finz_lgammad8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_lgamma_u10 with the same accuracy specification.


Vectorized single precision log gamma functions

Synopsis

#include <sleef.h>

float Sleef_lgammaf1_u10purec(float a);
float Sleef_lgammaf1_u10purecfma(float a);
float Sleef_cinz_lgammaf1_u10purec(float a);
float Sleef_finz_lgammaf1_u10purecfma(float a);

__m128 Sleef_lgammaf4_u10(__m128 a);
__m128 Sleef_lgammaf4_u10sse2(__m128 a);
__m128 Sleef_lgammaf4_u10sse4(__m128 a);
__m128 Sleef_lgammaf4_u10avx2128(__m128 a);
__m128 Sleef_cinz_lgammaf4_u10sse2(__m128 a);
__m128 Sleef_cinz_lgammaf4_u10sse4(__m128 a);
__m128 Sleef_finz_lgammaf4_u10avx2128(__m128 a);

__m256 Sleef_lgammaf8_u10(__m256 a);
__m256 Sleef_lgammaf8_u10avx(__m256 a);
__m256 Sleef_lgammaf8_u10avx2(__m256 a);
__m256 Sleef_lgammaf8_u10fma4(__m256 a);
__m256 Sleef_cinz_lgammaf8_u10avx(__m256 a);
__m256 Sleef_finz_lgammaf8_u10avx2(__m256 a);
__m256 Sleef_finz_lgammaf8_u10fma4(__m256 a);

__m512 Sleef_lgammaf16_u10(__m512 a);
__m512 Sleef_lgammaf16_u10avx512f(__m512 a);
__m512 Sleef_lgammaf16_u10avx512fnofma(__m512 a);
__m512 Sleef_cinz_lgammaf16_u10avx512fnofma(__m512 a);
__m512 Sleef_finz_lgammaf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_lgammaf_u10 with the same accuracy specification.

Nearest integer functions

Vectorized double precision functions for rounding to integer towards zero

Synopsis

#include <sleef.h>

__m128d Sleef_truncd2(__m128d a);
__m128d Sleef_truncd2_sse2(__m128d a);
__m128d Sleef_truncd2_sse4(__m128d a);
__m128d Sleef_truncd2_avx2128(__m128d a);

__m256d Sleef_truncd4(__m256d a);
__m256d Sleef_truncd4_avx(__m256d a);
__m256d Sleef_truncd4_fma4(__m256d a);
__m256d Sleef_truncd4_avx2(__m256d a);

__m512d Sleef_truncd8(__m512d a);
__m512d Sleef_truncd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_trunc with the same accuracy specification.


Vectorized single precision functions for rounding to integer towards zero

Synopsis

#include <sleef.h>

__m128 Sleef_truncf4(__m128 a);
__m128 Sleef_truncf4_sse2(__m128 a);
__m128 Sleef_truncf4_sse4(__m128 a);
__m128 Sleef_truncf4_avx2128(__m128 a);

__m256 Sleef_truncf8(__m256 a);
__m256 Sleef_truncf8_avx(__m256 a);
__m256 Sleef_truncf8_fma4(__m256 a);
__m256 Sleef_truncf8_avx2(__m256 a);

__m512 Sleef_truncf16(__m512 a);
__m512 Sleef_truncf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_truncf with the same accuracy specification.


Vectorized double precision functions for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

__m128d Sleef_floord2(__m128d a);
__m128d Sleef_floord2_sse2(__m128d a);
__m128d Sleef_floord2_sse4(__m128d a);
__m128d Sleef_floord2_avx2128(__m128d a);

__m256d Sleef_floord4(__m256d a);
__m256d Sleef_floord4_avx(__m256d a);
__m256d Sleef_floord4_fma4(__m256d a);
__m256d Sleef_floord4_avx2(__m256d a);

__m512d Sleef_floord8(__m512d a);
__m512d Sleef_floord8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_floor with the same accuracy specification.


Vectorized single precision functions for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

__m128 Sleef_floorf4(__m128 a);
__m128 Sleef_floorf4_sse2(__m128 a);
__m128 Sleef_floorf4_sse4(__m128 a);
__m128 Sleef_floorf4_avx2128(__m128 a);

__m256 Sleef_floorf8(__m256 a);
__m256 Sleef_floorf8_avx(__m256 a);
__m256 Sleef_floorf8_fma4(__m256 a);
__m256 Sleef_floorf8_avx2(__m256 a);

__m512 Sleef_floorf16(__m512 a);
__m512 Sleef_floorf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_floorf with the same accuracy specification.


Vectorized double precision functions for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

__m128d Sleef_ceild2(__m128d a);
__m128d Sleef_ceild2_sse2(__m128d a);
__m128d Sleef_ceild2_sse4(__m128d a);
__m128d Sleef_ceild2_avx2128(__m128d a);

__m256d Sleef_ceild4(__m256d a);
__m256d Sleef_ceild4_avx(__m256d a);
__m256d Sleef_ceild4_fma4(__m256d a);
__m256d Sleef_ceild4_avx2(__m256d a);

__m512d Sleef_ceild8(__m512d a);
__m512d Sleef_ceild8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ceil with the same accuracy specification.


Vectorized single precision functions for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

__m128 Sleef_ceilf4(__m128 a);
__m128 Sleef_ceilf4_sse2(__m128 a);
__m128 Sleef_ceilf4_sse4(__m128 a);
__m128 Sleef_ceilf4_avx2128(__m128 a);

__m256 Sleef_ceilf8(__m256 a);
__m256 Sleef_ceilf8_avx(__m256 a);
__m256 Sleef_ceilf8_fma4(__m256 a);
__m256 Sleef_ceilf8_avx2(__m256 a);

__m512 Sleef_ceilf16(__m512 a);
__m512 Sleef_ceilf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ceilf with the same accuracy specification.


Vectorized double precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128d Sleef_roundd2(__m128d a);
__m128d Sleef_roundd2_sse2(__m128d a);
__m128d Sleef_roundd2_sse4(__m128d a);
__m128d Sleef_roundd2_avx2128(__m128d a);

__m256d Sleef_roundd4(__m256d a);
__m256d Sleef_roundd4_avx(__m256d a);
__m256d Sleef_roundd4_fma4(__m256d a);
__m256d Sleef_roundd4_avx2(__m256d a);

__m512d Sleef_roundd8(__m512d a);
__m512d Sleef_roundd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_round with the same accuracy specification.


Vectorized single precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128 Sleef_roundf4(__m128 a);
__m128 Sleef_roundf4_sse2(__m128 a);
__m128 Sleef_roundf4_sse4(__m128 a);
__m128 Sleef_roundf4_avx2128(__m128 a);

__m256 Sleef_roundf8(__m256 a);
__m256 Sleef_roundf8_avx(__m256 a);
__m256 Sleef_roundf8_fma4(__m256 a);
__m256 Sleef_roundf8_avx2(__m256 a);

__m512 Sleef_roundf16(__m512 a);
__m512 Sleef_roundf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_roundf with the same accuracy specification.


Vectorized double precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128d Sleef_rintd2(__m128d a);
__m128d Sleef_rintd2_sse2(__m128d a);
__m128d Sleef_rintd2_sse4(__m128d a);
__m128d Sleef_rintd2_avx2128(__m128d a);

__m256d Sleef_rintd4(__m256d a);
__m256d Sleef_rintd4_avx(__m256d a);
__m256d Sleef_rintd4_fma4(__m256d a);
__m256d Sleef_rintd4_avx2(__m256d a);

__m512d Sleef_rintd8(__m512d a);
__m512d Sleef_rintd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_rint with the same accuracy specification.


Vectorized single precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128 Sleef_rintf4(__m128 a);
__m128 Sleef_rintf4_sse2(__m128 a);
__m128 Sleef_rintf4_sse4(__m128 a);
__m128 Sleef_rintf4_avx2128(__m128 a);

__m256 Sleef_rintf8(__m256 a);
__m256 Sleef_rintf8_avx(__m256 a);
__m256 Sleef_rintf8_fma4(__m256 a);
__m256 Sleef_rintf8_avx2(__m256 a);

__m512 Sleef_rintf16(__m512 a);
__m512 Sleef_rintf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_rintf with the same accuracy specification.

Other functions

Vectorized double precision functions for fused multiply-accumulation

Synopsis

#include <sleef.h>

__m128d Sleef_fmad2(__m128d a, __m128d b, __m128d c);
__m128d Sleef_fmad2_sse2(__m128d a, __m128d b, __m128d c);
__m128d Sleef_fmad2_sse4(__m128d a, __m128d b, __m128d c);
__m128d Sleef_fmad2_avx2128(__m128d a, __m128d b, __m128d c);

__m256d Sleef_fmad4(__m256d a, __m256d b, __m256d c);
__m256d Sleef_fmad4_avx(__m256d a, __m256d b, __m256d c);
__m256d Sleef_fmad4_fma4(__m256d a, __m256d b, __m256d c);
__m256d Sleef_fmad4_avx2(__m256d a, __m256d b, __m256d c);

__m512d Sleef_fmad8(__m512d a, __m512d b, __m512d c);
__m512d Sleef_fmad8_avx512f(__m512d a, __m512d b, __m512d c);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fma with the same accuracy specification.


Vectorized single precision functions for fused multiply-accumulation

Synopsis

#include <sleef.h>

__m128 Sleef_fmaf4(__m128 a, __m128 b, __m128 c);
__m128 Sleef_fmaf4_sse2(__m128 a, __m128 b, __m128 c);
__m128 Sleef_fmaf4_sse4(__m128 a, __m128 b, __m128 c);
__m128 Sleef_fmaf4_avx2128(__m128 a, __m128 b, __m128 c);

__m256 Sleef_fmaf8(__m256 a, __m256 b, __m256 c);
__m256 Sleef_fmaf8_avx(__m256 a, __m256 b, __m256 c);
__m256 Sleef_fmaf8_fma4(__m256 a, __m256 b, __m256 c);
__m256 Sleef_fmaf8_avx2(__m256 a, __m256 b, __m256 c);

__m512 Sleef_fmaf16(__m512 a, __m512 b, __m512 c);
__m512 Sleef_fmaf16_avx512f(__m512 a, __m512 b, __m512 c);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmaf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

__m128d Sleef_fmodd2(__m128d a, __m128d b);
__m128d Sleef_fmodd2_sse2(__m128d a, __m128d b);
__m128d Sleef_fmodd2_sse4(__m128d a, __m128d b);
__m128d Sleef_fmodd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fmodd4(__m256d a, __m256d b);
__m256d Sleef_fmodd4_avx(__m256d a, __m256d b);
__m256d Sleef_fmodd4_fma4(__m256d a, __m256d b);
__m256d Sleef_fmodd4_avx2(__m256d a, __m256d b);

__m512d Sleef_fmodd8(__m512d a, __m512d b);
__m512d Sleef_fmodd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmod with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

__m128 Sleef_fmodf4(__m128 a, __m128 b);
__m128 Sleef_fmodf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fmodf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fmodf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fmodf8(__m256 a, __m256 b);
__m256 Sleef_fmodf8_avx(__m256 a, __m256 b);
__m256 Sleef_fmodf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fmodf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fmodf16(__m512 a, __m512 b);
__m512 Sleef_fmodf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmodf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

__m128d Sleef_remainderd2(__m128d a, __m128d b);
__m128d Sleef_remainderd2_sse2(__m128d a, __m128d b);
__m128d Sleef_remainderd2_sse4(__m128d a, __m128d b);
__m128d Sleef_remainderd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_remainderd4(__m256d a, __m256d b);
__m256d Sleef_remainderd4_avx(__m256d a, __m256d b);
__m256d Sleef_remainderd4_fma4(__m256d a, __m256d b);
__m256d Sleef_remainderd4_avx2(__m256d a, __m256d b);

__m512d Sleef_remainderd8(__m512d a, __m512d b);
__m512d Sleef_remainderd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_remainder with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

__m128 Sleef_remainderf4(__m128 a, __m128 b);
__m128 Sleef_remainderf4_sse2(__m128 a, __m128 b);
__m128 Sleef_remainderf4_sse4(__m128 a, __m128 b);
__m128 Sleef_remainderf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_remainderf8(__m256 a, __m256 b);
__m256 Sleef_remainderf8_avx(__m256 a, __m256 b);
__m256 Sleef_remainderf8_fma4(__m256 a, __m256 b);
__m256 Sleef_remainderf8_avx2(__m256 a, __m256 b);

__m512 Sleef_remainderf16(__m512 a, __m512 b);
__m512 Sleef_remainderf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_remainderf with the same accuracy specification.


Vectorized double precision functions for multiplying by integral power of 2

Synopsis

#include <sleef.h>

__m128d Sleef_ldexpd2(__m128d a, __m128i b);
__m128d Sleef_ldexpd2_sse2(__m128d a, __m128i b);
__m128d Sleef_ldexpd2_sse4(__m128d a, __m128i b);
__m128d Sleef_ldexpd2_avx2128(__m128d a, __m128i b);

__m256d Sleef_ldexpd4(__m256d a, __m128i b);
__m256d Sleef_ldexpd4_avx(__m256d a, __m128i b);
__m256d Sleef_ldexpd4_fma4(__m256d a, __m128i b);
__m256d Sleef_ldexpd4_avx2(__m256d a, __m128i b);

__m512d Sleef_ldexpd8(__m512d a, __m256i b);
__m512d Sleef_ldexpd8_avx512f(__m512d a, __m256i b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ldexp with the same accuracy specification.


Vectorized double precision functions for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

__m128d Sleef_frfrexpd2(__m128d a);
__m128d Sleef_frfrexpd2_sse2(__m128d a);
__m128d Sleef_frfrexpd2_sse4(__m128d a);
__m128d Sleef_frfrexpd2_avx2128(__m128d a);

__m256d Sleef_frfrexpd4(__m256d a);
__m256d Sleef_frfrexpd4_avx(__m256d a);
__m256d Sleef_frfrexpd4_fma4(__m256d a);
__m256d Sleef_frfrexpd4_avx2(__m256d a);

__m512d Sleef_frfrexpd8(__m512d a);
__m512d Sleef_frfrexpd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_frfrexp with the same accuracy specification.


Vectorized single precision functions for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

__m128 Sleef_frfrexpf4(__m128 a);
__m128 Sleef_frfrexpf4_sse2(__m128 a);
__m128 Sleef_frfrexpf4_sse4(__m128 a);
__m128 Sleef_frfrexpf4_avx2128(__m128 a);

__m256 Sleef_frfrexpf8(__m256 a);
__m256 Sleef_frfrexpf8_avx(__m256 a);
__m256 Sleef_frfrexpf8_fma4(__m256 a);
__m256 Sleef_frfrexpf8_avx2(__m256 a);

__m512 Sleef_frfrexpf16(__m512 a);
__m512 Sleef_frfrexpf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_frfrexpf with the same accuracy specification.


Vectorized double precision function for obtaining integral component of an FP number

Synopsis

#include <sleef.h>

__m128i Sleef_expfrexpd2(__m128d a);
__m128i Sleef_expfrexpd2_sse2(__m128d a);
__m128i Sleef_expfrexpd2_sse4(__m128d a);
__m128i Sleef_expfrexpd2_avx2128(__m128d a);

__m128i Sleef_expfrexpd4(__m256d a);
__m128i Sleef_expfrexpd4_avx(__m256d a);
__m128i Sleef_expfrexpd4_fma4(__m256d a);
__m128i Sleef_expfrexpd4_avx2(__m256d a);

__m256i Sleef_expfrexpd8(__m512d a);
__m256i Sleef_expfrexpd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_expfrexp with the same accuracy specification.


Vectorized double precision functions for getting integer exponent

Synopsis

#include <sleef.h>

__m128i Sleef_ilogbd2(__m128d a);
__m128i Sleef_ilogbd2_sse2(__m128d a);
__m128i Sleef_ilogbd2_sse4(__m128d a);
__m128i Sleef_ilogbd2_avx2128(__m128d a);

__m128i Sleef_ilogbd4(__m256d a);
__m128i Sleef_ilogbd4_avx(__m256d a);
__m128i Sleef_ilogbd4_fma4(__m256d a);
__m128i Sleef_ilogbd4_avx2(__m256d a);

__m256i Sleef_ilogbd8(__m512d a);
__m256i Sleef_ilogbd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ilogb with the same accuracy specification.


Vectorized double precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef___m128d_2 Sleef_modfd2(__m128d a);
Sleef___m128d_2 Sleef_modfd2_sse2(__m128d a);
Sleef___m128d_2 Sleef_modfd2_sse4(__m128d a);
Sleef___m128d_2 Sleef_modfd2_avx2128(__m128d a);

Sleef___m256d_2 Sleef_modfd4(__m256d a);
Sleef___m256d_2 Sleef_modfd4_avx(__m256d a);
Sleef___m256d_2 Sleef_modfd4_fma4(__m256d a);
Sleef___m256d_2 Sleef_modfd4_avx2(__m256d a);

Sleef___m512d_2 Sleef_modfd8(__m512d a);
Sleef___m512d_2 Sleef_modfd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_modf with the same accuracy specification.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef___m128_2 Sleef_modff4(__m128 a);
Sleef___m128_2 Sleef_modff4_sse2(__m128 a);
Sleef___m128_2 Sleef_modff4_sse4(__m128 a);
Sleef___m128_2 Sleef_modff4_avx2128(__m128 a);

Sleef___m256_2 Sleef_modff8(__m256 a);
Sleef___m256_2 Sleef_modff8_avx(__m256 a);
Sleef___m256_2 Sleef_modff8_fma4(__m256 a);
Sleef___m256_2 Sleef_modff8_avx2(__m256 a);

Sleef___m512_2 Sleef_modff16(__m512 a);
Sleef___m512_2 Sleef_modff16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_modff with the same accuracy specification.


Vectorized double precision functions for calculating the absolute value

Synopsis

#include <sleef.h>

__m128d Sleef_fabsd2(__m128d a);
__m128d Sleef_fabsd2_sse2(__m128d a);
__m128d Sleef_fabsd2_sse4(__m128d a);
__m128d Sleef_fabsd2_avx2128(__m128d a);

__m256d Sleef_fabsd4(__m256d a);
__m256d Sleef_fabsd4_avx(__m256d a);
__m256d Sleef_fabsd4_fma4(__m256d a);
__m256d Sleef_fabsd4_avx2(__m256d a);

__m512d Sleef_fabsd8(__m512d a);
__m512d Sleef_fabsd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fabs with the same accuracy specification.


Vectorized single precision functions for calculating the absolute value

Synopsis

#include <sleef.h>

__m128 Sleef_fabsf4(__m128 a);
__m128 Sleef_fabsf4_sse2(__m128 a);
__m128 Sleef_fabsf4_sse4(__m128 a);
__m128 Sleef_fabsf4_avx2128(__m128 a);

__m256 Sleef_fabsf8(__m256 a);
__m256 Sleef_fabsf8_avx(__m256 a);
__m256 Sleef_fabsf8_fma4(__m256 a);
__m256 Sleef_fabsf8_avx2(__m256 a);

__m512 Sleef_fabsf16(__m512 a);
__m512 Sleef_fabsf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fabsf with the same accuracy specification.


Vectorized double precision functions for copying signs

Synopsis

#include <sleef.h>

__m128d Sleef_copysignd2(__m128d a, __m128d b);
__m128d Sleef_copysignd2_sse2(__m128d a, __m128d b);
__m128d Sleef_copysignd2_sse4(__m128d a, __m128d b);
__m128d Sleef_copysignd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_copysignd4(__m256d a, __m256d b);
__m256d Sleef_copysignd4_avx(__m256d a, __m256d b);
__m256d Sleef_copysignd4_fma4(__m256d a, __m256d b);
__m256d Sleef_copysignd4_avx2(__m256d a, __m256d b);

__m512d Sleef_copysignd8(__m512d a, __m512d b);
__m512d Sleef_copysignd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_copysign with the same accuracy specification.


Vectorized single precision functions for copying signs

Synopsis

#include <sleef.h>

__m128 Sleef_copysignf4(__m128 a, __m128 b);
__m128 Sleef_copysignf4_sse2(__m128 a, __m128 b);
__m128 Sleef_copysignf4_sse4(__m128 a, __m128 b);
__m128 Sleef_copysignf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_copysignf8(__m256 a, __m256 b);
__m256 Sleef_copysignf8_avx(__m256 a, __m256 b);
__m256 Sleef_copysignf8_fma4(__m256 a, __m256 b);
__m256 Sleef_copysignf8_avx2(__m256 a, __m256 b);

__m512 Sleef_copysignf16(__m512 a, __m512 b);
__m512 Sleef_copysignf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_copysignf with the same accuracy specification.


Vectorized double precision functions for determining maximum of two values

Synopsis

#include <sleef.h>

__m128d Sleef_fmaxd2(__m128d a, __m128d b);
__m128d Sleef_fmaxd2_sse2(__m128d a, __m128d b);
__m128d Sleef_fmaxd2_sse4(__m128d a, __m128d b);
__m128d Sleef_fmaxd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fmaxd4(__m256d a, __m256d b);
__m256d Sleef_fmaxd4_avx(__m256d a, __m256d b);
__m256d Sleef_fmaxd4_fma4(__m256d a, __m256d b);
__m256d Sleef_fmaxd4_avx2(__m256d a, __m256d b);

__m512d Sleef_fmaxd8(__m512d a, __m512d b);
__m512d Sleef_fmaxd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmax with the same accuracy specification.


Vectorized single precision functions for determining maximum of two values

Synopsis

#include <sleef.h>

__m128 Sleef_fmaxf4(__m128 a, __m128 b);
__m128 Sleef_fmaxf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fmaxf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fmaxf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fmaxf8(__m256 a, __m256 b);
__m256 Sleef_fmaxf8_avx(__m256 a, __m256 b);
__m256 Sleef_fmaxf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fmaxf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fmaxf16(__m512 a, __m512 b);
__m512 Sleef_fmaxf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmaxf with the same accuracy specification.


Vectorized double precision functions for determining minimum of two values

Synopsis

#include <sleef.h>

__m128d Sleef_fmind2(__m128d a, __m128d b);
__m128d Sleef_fmind2_sse2(__m128d a, __m128d b);
__m128d Sleef_fmind2_sse4(__m128d a, __m128d b);
__m128d Sleef_fmind2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fmind4(__m256d a, __m256d b);
__m256d Sleef_fmind4_avx(__m256d a, __m256d b);
__m256d Sleef_fmind4_fma4(__m256d a, __m256d b);
__m256d Sleef_fmind4_avx2(__m256d a, __m256d b);

__m512d Sleef_fmind8(__m512d a, __m512d b);
__m512d Sleef_fmind8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmin with the same accuracy specification.


Vectorized single precision functions for determining minimum of two values

Synopsis

#include <sleef.h>

__m128 Sleef_fminf4(__m128 a, __m128 b);
__m128 Sleef_fminf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fminf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fminf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fminf8(__m256 a, __m256 b);
__m256 Sleef_fminf8_avx(__m256 a, __m256 b);
__m256 Sleef_fminf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fminf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fminf16(__m512 a, __m512 b);
__m512 Sleef_fminf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fminf with the same accuracy specification.


Vectorized double precision functions to calculate positive difference of two values

Synopsis

#include <sleef.h>

__m128d Sleef_fdimd2(__m128d a, __m128d b);
__m128d Sleef_fdimd2_sse2(__m128d a, __m128d b);
__m128d Sleef_fdimd2_sse4(__m128d a, __m128d b);
__m128d Sleef_fdimd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fdimd4(__m256d a, __m256d b);
__m256d Sleef_fdimd4_avx(__m256d a, __m256d b);
__m256d Sleef_fdimd4_fma4(__m256d a, __m256d b);
__m256d Sleef_fdimd4_avx2(__m256d a, __m256d b);

__m512d Sleef_fdimd8(__m512d a, __m512d b);
__m512d Sleef_fdimd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fdim with the same accuracy specification.


Vectorized single precision functions to calculate positive difference of two values

Synopsis

#include <sleef.h>

__m128 Sleef_fdimf4(__m128 a, __m128 b);
__m128 Sleef_fdimf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fdimf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fdimf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fdimf8(__m256 a, __m256 b);
__m256 Sleef_fdimf8_avx(__m256 a, __m256 b);
__m256 Sleef_fdimf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fdimf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fdimf16(__m512 a, __m512 b);
__m512 Sleef_fdimf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fdimf with the same accuracy specification.


Vectorized double precision functions for obtaining the next representable FP value

Synopsis

#include <sleef.h>

__m128d Sleef_nextafterd2(__m128d a, __m128d b);
__m128d Sleef_nextafterd2_sse2(__m128d a, __m128d b);
__m128d Sleef_nextafterd2_sse4(__m128d a, __m128d b);
__m128d Sleef_nextafterd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_nextafterd4(__m256d a, __m256d b);
__m256d Sleef_nextafterd4_avx(__m256d a, __m256d b);
__m256d Sleef_nextafterd4_fma4(__m256d a, __m256d b);
__m256d Sleef_nextafterd4_avx2(__m256d a, __m256d b);

__m512d Sleef_nextafterd8(__m512d a, __m512d b);
__m512d Sleef_nextafterd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_nextafter with the same accuracy specification.


Vectorized single precision functions for obtaining the next representable FP value

Synopsis

#include <sleef.h>

__m128 Sleef_nextafterf4(__m128 a, __m128 b);
__m128 Sleef_nextafterf4_sse2(__m128 a, __m128 b);
__m128 Sleef_nextafterf4_sse4(__m128 a, __m128 b);
__m128 Sleef_nextafterf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_nextafterf8(__m256 a, __m256 b);
__m256 Sleef_nextafterf8_avx(__m256 a, __m256 b);
__m256 Sleef_nextafterf8_fma4(__m256 a, __m256 b);
__m256 Sleef_nextafterf8_avx2(__m256 a, __m256 b);

__m512 Sleef_nextafterf16(__m512 a, __m512 b);
__m512 Sleef_nextafterf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_nextafterf with the same accuracy specification.

Supplementary Functions

CPU feature identification function

Synopsis

#include <sleef.h>

void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx);

Link with -lsleef.

Description

This is a function for obtaining CPU features by the cpuid instruction available on x86 architecture. This function executes cpuid instruction by passing the values eax and ecx to the corresponding registers. The information returned in EAX, EBX, ECX, and EDX registers will be stored in array out.

sleef-3.5.1/doc/ladle.svg000066400000000000000000000574451373003144100152000ustar00rootroot00000000000000 sleef-3.5.1/doc/sleeflogo.pptx000066400000000000000000002640711373003144100162650ustar00rootroot00000000000000PK!kÀ†î2[Content_Types].xml ¢( Ì—ÛnÛ0 †ïì ݶ’nëº!N/v¸Ú¡@»Ðl:Ѧ$&KÞ~”“fnà.Kc#¾1 Käÿ‘’iqr½Ò*Y‚Òšœ³KÀ¶”f–³ïwŸÒ+–¦ÊÈÙ»ž>6¹[; Y›³9¢{Çy(æ EȬC3•õZ ýŒ;Qü3à£Ñ%/¬A0˜bôÁ¦“P‰…ÂäãŠ^oHœ™±äýf]”Ê™ÔÑ>¾ç­ «V‹UgÚm~:h—©'Úm<¨°'$œS²H9äKSîÅŸncÏȲ^æÒ…” G¨âÌÃØ›[»o´i^–Ü_…¦ qç;r[ eÿöÔ‚j«JPÚb¡ÉIÖt¦Õƒa¦…4÷A<~é€ñÆ`Ü5YÃ÷1miúá8†à¢—LCðòì¯ÎBÏö·.t­¾s|h–~÷B°s|ˆ©^¯Ÿ§ µ›ƒŠâ‡‚[\+è<ïø×õ!Šº\|k»Àm%Ø NOÂ^Ål=•©Ÿ ±‰÷©LýԌӘú©"§1½îº¶tpž.Èôf€LWdz;@¦ñhˆPçªätu®éÔãx8>1÷­B´NÝNÀ£„]³ÐvÏÞ)R“p¼àÞ…bVBÙ¢ÍëŽoúÿÿPK!høt¡â _rels/.rels ¢( ¬’ÛJ1†ïß!Ì}7Û*"ÒloDèÈúc2»ÝH¦Ò¾½¡àaa-‚½œÓ?_òÏz³w£x§”mð –U ‚¼Æú^Ásû°¸‘½Á1xRp  ›æòbýD#rʃYŸ ÌñNʬr˜«É—J’C.aêeDý†=ÉU]ßÈôSš‰¦Øik®@´‡X6ÿG[:b4È(uH´ˆ©%¶å-¢ÅÔ+0A?–t>vT…ä<Ðê¼@<ìÜ‹G;Π|Õª×Hýo@Ë¿…®³šîƒÞ9ò'홼!sÚ4Œñ“HN.³ùÿÿPK!“¿EšÙ¾ ppt/slides/_rels/slide1.xml.rels¬ÍjÃ0„ï…¾ƒØ{µv¥”ȹ”B §’>Àb­më­ê·¯J/6zéqfÙo†Ù¾ü¬nœÅÅ` Õ (}´.Œ>ÏïO/ ¤P°4ÇÀ8tûž©Ô'™\U)A L¥¤WDé'ö$:&õ2Äì©T™GLÔ_hdÜ5Í3æ5º S­|´;Pç%Õä¿Ùq\Ïo±¿zåN:_³+òÈÅ€ÖèÙ:úõ[Í~¼_£ýÏ2;Ë'Zâµlʬ|Á•hu]ñ§nVï¾ÿÿPK!c\#´Á7 ppt/slides/_rels/slide2.xml.rels„ÁjÃ0Dï…üƒØ{$;‡RŠe_B Sq>`‘Ö¶ˆ- ­꿯Ž6zœæÍNÓý.³xQb¼†ZV È›`5ÜûËñ gôçàIÃJ ]{øh~hÆ\B<¹È¢P`‘Ö¶ˆ- ­꿯Ž6zœæÍNÓý.³xQb¼†ZV È›`5ÜûËñ gôçàIÃJ ]{øh~hÆ\B<¹È¢P&ó„Åâ ÿÿPK! 'ÿÔppt/presentation.xmlì—ßoÚ0Çß'íˆü:ÑŸ„¨¡‚uL›: ­Ýà&Ü:vd›:íßÙ `@“*í5O89ß×çgçîúf[3ï…HE/Pp5Dᥨ(_è×Ã|!OiÌ+Ì'Ú…n&?\7y#‰"\c ®Èp•ã­µnrßWåšÔX]‰†p°-…¬±†G¹ò+‰_A¾f~8¦~)G¿|¿X.iInE¹©aùVDfãPkÚ¨½Zó5w'!M`‹ŠU?°ÒD~«î”>{ãѪ@aâ,Jcà$sóæÈŸ\ûÿpw¥¾U­H2v¼Cãmæ‘cŽ.ÍîÚñ¥9u¼“chn ÷o^¹-Ð8ˆãár Ü(Í’Ì>è]ÿ¼*%!<Þvës¡‰êÜ3Û^Ãn¢"K¼aúlõ½Þ12¹Æ9¼[,d7ú¹Ã&ÙžðàûÂFçNa/,h`Nå] 2ÌV¨ y ó€ïßö+Â&5³S¾ã3ù éŒ@[SÞ=‚÷–‚Ü[lx©Ý‰âySÓZØ]Ö×°7‡ËùÕìöxÜ]^\·Ë»ùáGÛÝrƒ¿Ýl÷wó#¾îŸ_\ïç_cÔ»õEÇåÅÝ|µ™Yü½{s³Z,º]¼¸[nŽ<È~¹žñä‡ÛÕîÐŽ¶ÓŒ¶Û/Æ`;ô!f¶xº¾¦ÏÃî‹ýrI?m^þl¿{º{²7þåË'ûhuõšE›ù–evaÿ`ÁÌ× ÀðÃEýy;ÒüòÕÍþîÃæ—˜[ôêj†ÅMÿÒürùê-ø—‹Óo·¿ºvqûÉ=Ð-O’*W°Dâ eY ÙM‰3X`Ŭ$R^ÆMž¥ŠYI‘ÏI÷)(98e“7‚Ñ)òšùHø¼ª‹Zõ–¤¸cáj…ÍH$NÞU¬Qådí¤<‘¥ ®ƒT$Mšª&%å=ƒªHI¤ [@ÞÃ\.¾Ì“Rµ€©(³˜Ø(LJJ| áѱ‰D*H!iT_*Õ0 R.Á—å`°¡%,[pZ©ù$KÈ5PÐ’XÖ]QÐ’2Ÿ@½¨ä7•XE'“ ã2Jã9Ô‡ñYÒc4|ôð=VCþmzÙ$„Þc7$¾¡÷.ÆqIåð}õÓ!é4òFò½ãû ô×!¡4½ÇuÈö Bïq8ƒÐ{\—㺴Çuø>ˆzëo„Þã:¤P¡÷¸.Æu”ØpØf×QÖA¢ãû‡§D‚ƒ>Œë(7à ã: ÷ôa\G¼ƒ>Œë((wЇq]Öã:|´ò=®Ë†qÅÃÎÃã:Šqôa\g{:'߇̂QIß¡÷¸Qç ô×!„Þã:ćƒÐ{\‡˜ozëý Bïqb³Aè=®C”6½Çuù0®£0H² ¾¡N‘ƒîpûò6„Ù/Çh}5[Ï"tÆgšŸö³èÙÕ쑜_îæGŠ|Ú£¯ÑÿÒ6ÃÜžzaèïwÛ—Ë/¶òHÄÌ³ØøÊNã¹Þ8Üœb0lωÅháÚÏSà†çÎ/¼maxä*N]<í¸í'oÛ= ¼íâðŽ :5m0<÷bøá¹õ‚á¹£ÂÏ  Ï}~xnƒ°ð¦»ÁÏÍ Þô(xá¹#Á¹ÏÀ Î]Üô xÁ9#Éà\ï÷‚›ê¾…6¹!/4K˜­»{!M•ÇåÚ¹:ÉsäÊ ¸­{Á¹ÚÍà\Ãö‚sÅšÁ¹íçª3ƒs-Ù n+Ç Ïõ`?Û^óïê"Ž[6êÀ g¤õ†ÜǤA£Âùc‡ÇC_µN«€Âc®‘ü`Ì×ϱ£Zn®ŸÌ÷óÏñ—ß¾¸[Ým¿Z™½kS ùjþèO0ôúø©ù¾Ü<úõSû€ï®}"ó©ùÿá-!yGÃý[L@Š ¿|yþ´¿ õÜPŠ6“†óɧhûÉÑÃ[꣪Ûx¸Ý7!Ehðª'dyØ{úøæªl‹§¸ü7ˆ»B=úyKSì 8] 8@IŠŽŽÑËÔz #qàep'[€²]lh5ÒqpP†4Ýy:xÇ&†¶RБ8UEÛCÂïÌÝÑI°ØˆNÃ’H5ÚdP—22)'BE™Pçtpå$Rgº<ƒ„ùžÕIÄÔ `‰Dñ³fé §'B,4aB©jJÞ’àDr'BÈä‰BN%R“4\Ãršö4ð&)µÖÎÅŠÓ„;QB´¤”ÃIÐ¥ å`± ³„Ó럠#<7MP¡yIý€Ù ‡zQBó’Ò®•\*Cv¯¬Ä ¡!JRÞ!PMN½q¡II$â%³W"DI |GKÀNÇ¿ÉP£pˆ’”xNc(æ$‘r<œjNRäò„êèé) <›î*†wÁÃë$FŸÌêE„He2«Øœ0™Õ‹h2«íŽÞɬ 59™U»Ïe°ª¦>È« Í*v£íp×L†‰¸ðÁñ];ÞŽîx—S´:™U:a2«æxˆÉ¬NfÕòå?T´Ú ¤È¹Ñj–`«ÂØI 9™U“Šž’ÀT±øÎI;_Ê Ï­Nfµ5&“YmWbŠVÏV›8¥sB‘›NèTplä$'³ý]%›´(4éÁbiÞ¤™¢D(Ë SiùÅ©´Jæ{*­Rr`*­ ò¾–V'«Ê„þàrÀ“URÀS ¸ëc˜bÕ)V}w)à÷ЪiQ*ÚJ3œ+°^R8È?»²Ê½Æ¦ û»ùÌ3*«'«ê¥ ‹±gƪ^:2¾=³ ØKçf€½tdrçÜXÕKH&„Îmö’e–sÛ€½„¤ÈŒk6Û œ—Óœ¢õœN ´VÈŽ^œ Ûµ{ I¤³cU/¥wÜì§%uƒ]¶ð>Øì§%õh×ì'%UÄÈ~¥Îªú)I%QÇçe€ý”¤–(GVOVÕOIª‰3¬ªŸˆT½ 0v†MGµµ»gu‡‡ å!íf&28K"·[+uÈ`‰<ì`0Dv¬ïïêNsiw³êú\Rv¤©h‰ï¼iOG›.ZpÐÛ=Jt(R}ØA2ÓQm§cÝ£ÚÎ>L¥ÛVˆÃTZ'„ºNG¤ð>lfŸ–iNuwk÷Þ¼jöÚ-µ@í§ÝNO箲`ßÛ Í;îÌØÜCá…æÝ7šý`/´ÝýfÀÙ›õƒs³½gŸÔÎå;:m0óƒ·=!0•ñýàìòØÍ^~pÞÚeÀÞ°å‡çíY^î:m_fûÉ/ÕÖZxò˜¼ã‹Å^ºÄæm»I–=/tf*ÕæQØûðB£ï ¼f íÊC»ÍMë­âb‡:mD7›ÜSœcvko¶´‘žuý´y½šÀû¼ynŽoóºñ™ÞÑæuœðÝ”ìVQ3îÝ á›×³”®âáÍë¸Â¬îNÍ|'{×éÜs›ƒ½†‰ûWYZ&5åÞ8³ýг‰Ö/É ‘ªíš‘qMhBsZu€ŽD¢£° 4DGF™Øá {A·*èH¤´®yã²ÙdŒiGÓ‘H)~=2„/KÞZn†z é áÌÓï럌/kì”7Gç–Mâ༳ƒØOE˜yR©˜MâPPê§ “Oö4ÿàë—88Îìø÷S‘™'³Öÿ>$xR8/8'ãÄ‹œ‡ƒƒlí:­—sÁÜiS²>’9™‰R|þ%sTìd¶ßI €óøè‡ !)ÍERÓå|ᥓH<‘0!)θ….† ’HtŽ”iO ,”gàf‡[`餧vƒãïè.ŒðŒ¤°èƒïH µÖ¸9E"¥u“•-‰£$# âdD/¢ÉˆF“ )izF‚1¢8}ÊÿP›RúSV‚žóÐDs¨µÕRehmµ´†j[-‘´¶ZZCµ­–HZ[-­¡ÚVK$­­–ÖPm«%’ÖVKk¨¶Õ’ÒVKª¶ÕIk«¥{¬µ¡tF×Z¥4¢Rò*\8^°ß–¦¸NþèÕ9±MIF .jÇ-ÛÆÇñ’‘8cÞgÙUìïzéHY%=58à­p]$k9/)ª£lµ=¼nRRG¼H 7fëò+6‚ŠÃ¢¼t¥‚Û¤˜ŽxÓFÅlRFÃ>»4”º‘ ¿‰3<àEX­Y¬síµF,¥Êà6ÌaÆW¿Æ¦§ÍÏ^Že¬‘¸ÑÈ¥$1Ö8A³©8:ô Œ³Í}”±Ft¨ÒœRŒ2ÖuZg •ænrWÖR K´*jt´$cŒ5ò)uã]>™.èŠJÑÖÑÉXK»›LÆü˜ìôd¬Câ)¥s2֯Ɨ½ff§yÀ+Z#§>WÅ„¤ç=k³õnLd=kÞµ¨«ñJ7YëHœ)²NÒ)²Ž~À‘5®+HlúØWIŸeˆ(‚ñ¨&+ãw‰£‹ßÒ°2€wq”ek™)ÓDð^ÁK»«àei\ÚêDëH$­S cdµS ‘´¼Ì“Ú^"i#xXçÚ^"i™,S;IÁKÉ.µ¼DÒ¦Û¥pk5.˜þ¤Û« É3 y*ô?¡wûËF¥Û«)à)ß##x´‹qºÕ7)Þ#D t§ˆÙ¤t'‡„óº µËJñz¤pcF¤Û!@æ¤%ÿ|¤l×cÒí`ÁU“‚H·' ÎÓ ½)Ô(ž¤™"ß*qÂÓÒ¬MHœ¢i2 Íõﺤ¾‹ƒm5%êõNö|¸OP' ±tçã|‚²Œ¹Ôï'o>Ò'hê≮ë¤ÑGúy¦PœNÞ|¤O@+f©Fúeª!$µÀHŸî~X8yó‘>A™¤ BR¸Gú*c-ƒ~-‰“å®y+Ô¸a©6cî9/ö|Ÿ ÀUË´l~2çûûP Ñ9Û'Èâ «¤#UÂ8Ÿ Ï ±ƒtÎ÷ JôÐSï±ÿýH…0Æ'Èi#FŠÔyÜ á#ìHIÒÕþyHã>Æ!ÀEÍIxRŒq ÀbƃòÏÅ5ðcœ:»Â‚øv Š2VÈåùNé3r¤sqÀ˜DA‘`g˜ÞSPÂ] kÎó^±ðÒI0Ê)¨’·n‡ I‘ç`ÇaJvÌ µÁ(§ óØDTBR!hµ›(ÐYkiàµt$ÎäŒJLNA49!m#uÀäLNÁ¸LÁäà´ëÉ)à–‚É)Ó¬?e ÆU†g h|j7¦zkñ2´ÀNŠÂd]ý>µ ´Î‡ÄÑe$d„ ÍHHœpFBºÚŒ„ÄÑe$Ü\²wÁARæ$d®@Û¼àà(s2a¨ÉIHxp¥Êý¹m¡ÂÁQæ$œd¡6'!‘´9 ™-T»©RºRžÕ… ‰¤ÍI8­m^HÚœ„jµû!‘ eNBʶ:'!‘”9‰TÆýÚ\ƒ¤LŒ¡#q”d¤µWp*T¼o…ŠÉýøA»9º]$§çÇ(å¹"é.݉ÉýP–D¤'1¹&„ŸÜº×æ{t?*{ÈŸøÀ¡lç»túÇä%#]–l¤ûaóÒ‘Ê*—ýHÊv—øÀ²9±Ï¸> Œ^7ú¤#û$pr\ðýH}…Ú'aÜDÁ2ð•ý@ó:ŸgæeöŒÊ~ í.¼h2æì~à° 0ìŒË~+LæÜì,œf½Þ™ûáL§»"Mcôi†‹ãŽ4× ¦ ŒÓr™Ë~$|sQ@sJ02û[3’:`dö‹+I%02ûA‡¡†ß‘TcÝÕqZ.Gf?t‚*ÂÈì‡Ê-xÙÉýº‡:2&÷cr?p¨÷ä~ø{µe@ñ>¹ik6¦Mª¬n°­(ìæHO»D&÷ãÂvM†—NzhÜ‹hr?"_¦ìÇ”ý˜²SöCa²¥ËrNñ寙ý˜Ü“¤×û1e?.¢)ûÁEž3²Ý=MŠMªg¸^2²Žb®Ð¤Á$Ò©øâ¥#Ø3³^:Nèà װƖH§â‹—ŽÌ¡šÖÓñÅ/™B=ÓýðÒ‘qOË%G_¼Td!­§ã²^ 2oŠÖÓñÅ/§Ž‚/ºœ®Ìv­§~:Ò•8/ûá§#%Z‘‘z øâ§!UÀyî‡ŸŽ”æ3³~BRàåœS|ñ’J'dh%×D$_ HÙ?!©ÎÌ~ø IE@­§«ó@ñÅOHê´žêU"²^BN½*·ÀA:õ„z 9JDIGâ(ÉHÅs^öÃ;©w&÷åä~`Ûíä~ô´èÉýh{?¼šÆéãxïÜ øt·¢øòFÒg©òº4wÀ¸Mz“ûaz\OÙ?ÓIOâÎý¸ ¼Ä§‡#Ý4=¿Å¦Grñjc‡Ÿ¢ùæùÕŒoBßmÑ««Ùb»ÙVÇåo ¤^Ÿ¾â®, cH`Ñ€døÙœï©F†W!‘ÓA”¦Häl2b‰œB†lJdÓ“ªž3„T"—ƒ(Ãï—Èæä45eøò¹D"‘›AȆKl|Äc}&Æe;Ô‡ñ$}§ÑaLú0^£#–ôaÜF·9èÃøîrЇqE‚ú0ž£Sˆôa\GQ›DÇwÁu,6VCî—‹c´¾š­gÑñjvœEû«Ù~=»š=#œùån~$ÅÚþ}}5kì£Û«™5Ìôç»íËå[x$5Ë1¦yéìCœàÖ ÏAÛ¾| ßBµŸ;3ºÅÜ´×ûÁ¹ŸÁMØn;ai9àñƒÓÁÀ) +÷¬÷À/ž­-'狤,nÕãW•”ÀˆVœç—âÚŠe÷ äe~í_ËÔ 7¯[‚.!wÉ8l1K`:ÑÏXÖe‘³$eq®iñ hcmbVigö0K~zNRï9Kà>çA 8ò]›¶@í§]“Í4À&Gé6)IØl2óó»àVm/ ñÎͨÜðäæ Ÿæ¦é´ûÊxޏUoßà”Y“¥Îâ¦X"œJ¼Y–UR™nù†ýlóGöž-5—Š»¢ì2œ“£ýàúsÚ)˜³åzóqÇæÜ™f—Ôm`šÓZ^è‡4Lû‹õö°dV"5fxªÓg¤_Ž?[nïHg¶ëÕõãÕzm¾,n—wË×ûèåÊñø*™Ñ¯×/î>Û^óï Üo¿þÕÍMï×þÐŽb;ÖR¤ n£4o¶D¹ez€ï.»'û?Àçñõziˆo>_ÞD«khÖÔ uÃ3åùb±ÜùA·óëeèÖ4 |ÚÝØv€öÑݱù -<¡.on`@:dv³x0Fî0 åíæ„|·Úl÷÷ÍlYYÊ Ï Ä ³»<¾úh{ýš†{†Ï'ûh\¼Åkƒ8Ï7‹Û-LÚ⸧!ð ǧ„h¾ìè7Xçh¾~¾@ËÍõ“ù~þ9þòÛw«»íW+X½Y´6±ÄWóG¿x‚¡×ÇOÍ÷åæÑ¯ŸÚĈxwí™×h^"~¿»Ü¼|j_êâ—/1öêösmæwË«ÙÛ7xûͽýæÞ¾ù÷èí›ÿxûæÍÛoþß##û»KàzDƒƒ̒ИOö´6rtfùå«›½aí-˜“¸¦ ÃÍžE’ã»Øî)[¾:F à.ï¼Ï¢Y½ê¾j·g‰‰èx xùæÍ_ÂG`ЄÖöÄ×'†n§{]_ï绫Ùf»YÂûè^¡v?yq„hØ‘ùõÞÿ!+óËýý/Œ_Ðé…ÙxøV1­!ÅÑõjÄœñÓáîøñz970/¯çÇÕ&:¾Þ-oæ ¼&¨‹ýóÕ|íVÇÅíãùÝj Dr÷·óýa‰Ei ž†êøáÓO?ùä1Ù3xQø¿ùí‹î{b³ãòôæÙó/Ø)Ä¿Æõ}ü sVÅìWãgŸmœbè ˾UÖàfc8$ľp®4Á¼çì›}ÜþõxØ's÷+IÃÁßÇC¿ýýïñ߀çõèˆïãyŸþü³ŸFŸ®žíçû×ÑÍvï<9t[*£ãÆ)ºïcŸÀYzAªèyôÉþÓæH³yüb³8®¶›ƒ3'ˆí#{ÿü exXþïþpñÿù->"£™:•»6ÔÑ~ Û‘æ nV`§Ðšm˜ê8‡&#w?ÃRœOMZí¤÷PlO)Óc¬vÖàb(¿Ö[®×«Ýa9£1°ÛޝÚsçž=ÿ®3Ü7l©vàÆ 8#­M^õäP„g|—ÉÙíb÷ÙÙ“~GT¬k½ÛsE¥Æéwœ†Éj\WÓFh­‹â >%ã!”¸¡MË=àß¾Qa§x%’ʹùÁÇ|bµNó·oßüù훿¼}óíÛoþd~øcÄE‚1æ#Ë*\—ÃéÎ|T~Ë ˜ w‘õƒ>ˆDEÉr’‰¢.Ьl“Í­PÈ4Ik0Dáíùµ¬«ÝÜ­çö¸\d9ͦÿÂR2Æë;H¨¾îs¢th†ã"n#ñD¡ddÉ*äÑx „Åè/“Ao˜tW‡”éÞ0%‰Äº‡N¹ Ì LG Ú*‡ Sr‘b\â¢X=hÂŽRÒ õWÑ™ Õ“H9½RœœòØ'JhhRÅ{’Hy‚fNàä¥ é sÇ­JÔw¦"Å WÓA1¹u®áÉ\è©Qådí:Ö£“UK'‘ФA&U3))ïÄPEJ"`Øò^?)ð%Þ¡Y@‰T”t¥ŠÂn8—äÔ0ë ¤4ª/•j)— _8Xlh Ë\AŠN|#3¯ %±¬ÒSÐ’®Aõ¢’ßTb5j¥ÎH¥Ô'uŒÔ¾B«;X\D8—ŠyI÷>äDã'I,¸ ¸X󾤯 $Iœ§Š÷%±pæ=.3P(öTŠ§Æ„ùPb@Sé[T{ª×PbxB¸fŠ÷%@Òà¤2̤ÞH›2Í5òu–B¾2©Èúç½á`¥ '7“Z­!ûäy+ÅB%*øPj¤q V˜–Ä*ó Ž¤‚”T­3&%±p™c¬Ñ†™”ÿ")Ц­°(V†ÆnM<—Iñ‡Ê€¥T©VÊ~xPgäRgXÏ3Ìâ WäRc)šx4”ƒ¥ðÛs)õ(þS‹šb>–J r©(PƒÅýÝ Åî`!o«²¹Ô`»Rå£9X¦Bas©&à\à€…Gí`]kV§»Î2æ6. Š®‹•Õ*ØKy‡BÇñš¦X%92 [•K-‘'úö¶ÊÁBŸREaÑ•Zy¬öCÁê+VM«j"Ãi)œ„àËr°PÓ«( œW!E–+S©Ö½¹T””§ms“n¿2ämŽû˶µD‡ U"‘Û ­ÊA"·Ý±:d®DÖÆ–ÈmµLGÂ%‘Ûª‚™2}›k"`[%:äÆAo;•è°ú´qB½ò=†K†qÜßóƉà–0[zît¾sùWÇu”Ý‘\‡ïƒÐá…9èÃô[ÚSpø>ˆzOÅ¡³yzOÉ!ß2½ÇuÜ0¯ViOÑáû ê=U‡DÇtÊ:ȇïƒÐ{ºŽûƒÔs§Ü€C}×Q¸ï ã:Šàôa\GA¹ƒ>Œë²×áû •ïqïѯ|ë¢Þã:ļCÐ)n•K‡ïƒÐ{\‡àtzë«BïqbÉAè=®C|8½Çuˆù¡÷¸qÜ ô×!¢„Þã:Ä[ƒÐ{\—ã: ƒ$×áûêÙ8èױ䱳±-7ÒÎÆ¶~M=K§-‹¼=ËFbæYl|e§q‚l7!Y nN1¶çÄb´pí§…ç†7]k-‡·pí§Ý0Æ" \Å©‹§…k?Þ¶{xÛÅá}dЩiƒá¹ÃÏ­ Ï~xn `xî‹ðÃs„…7Ý ~xnf°ð¦GÁ Ï Î}^pî*°à¦WÀ ÎIçz¿ÜT÷-´©Ù{¡YÂlÝÝ iªì<.×νÐ\)7à¶þíçj7ƒs Û Îkç:´œ«Î εd/¸­3<׃ýð\ýµð²)®£öÓŠWp-¼©ËúÇç*¬}~S[õÃsá¹>ê…·ÕP†çgÞT4žë”^x[Ùex®5àMeÑÀÛz¡ÞVžk~~x®ð1<×íüð\¥3ð¶öæ‡çJÃsýÌÏÕ2Ï•3¿ú¶/†ç:–w|[µbx®EyámåÉÀs=ÉnÕÙZ® yÁmÈŒÎu?8WqøáMm&n;Tñ0¦¾â‡æjмõ-ZYm?YfmEÄ@RáÐ;¬­j`®UøÁ¹2ÁS¤}—­Ö>BûÉb« œkþѹB`Às“÷÷ƒs–߀sîÞΙzÎùw/¸Í¶p›C÷ÃsÆœá9î‡ç¬7Ãs_p~È6ïnó í=1‡Yà†p»m9°œýTÑÉwsÚ¿rÚåÎ[Åßçý+:}}û&„Ö¶OÁ…mԯ뺲G> Ñ‚šÙ‰ÍOÛ¼àsÁRÙ½+Mœàð+ï¤O?AcMbŠøLÇÐöwéS :9=I¨?ÛÔ²0x]%%k:æ9H¹„Çž½SÐ9Ö´¢6ŽÀ<ðæ; 8Ï£ ÏCbT0/8û.HY‰Ž ©xÓèà‹ÄHbtaÄT, LÙ‹Ž F¯ÑØŒò¥—Žƒ’çXêÏ ÐAš££ƒMЏE+HG¢ Ÿ»Y/QiG‡§¤#Qì,Üù@·OÒÖgÕ•/ú95«tȽŒÚ°„Z/Ÿ6,Ö˦µŽšyìóJ,U”Oj…å¾|+_ãâ´>ëC‰$£EÙ2Ú±ç ¹~¥M%ç4:ÍŒÊºÊ lT?«/4kgÍágEÁ଼à­UDlÐ.0ûi¼xÁ“¾ãvÇò_OœæÃ~ 4y„?„ã{Ì)>_ì—8…iG‡˜ü²ù)z±_]Íþõ£í\ôè£$ü(ÿiS=úÉã²xôf?ÿø£ú'gŸü¶Ûï’ür±_â„„íæç×Ñ«»õæp‰_^ÍnÇÝåÅ…Ù:??üènµØoÛ›ãÛ» ø‡«Åòb·ýz¹ßmW›ã%/îæ«ÍŒy ´©òÂV̳™}õíÓb ‹§ëkšÀb½ÿl¾ûÕKsxÇÝüp\î±÷¿ÚáÜbh„æ¼ÿÿÿPK!1—Êâ½­ ppt/slides/slide1.xmlÜWÝnÛ6¾°w t¯˜’eI6â¶l’6¨Û %Úf+‰I;ñ†Ý$À°‡Ø+ »ÞóøEvH‰vdX–b½˜/BŠ?‡çœï;?9u[•hK¥b¼zÁö­s^°z5ô>~˜ù©‡”&uAJ^Ó¡·£Ê{uñýwçb ÊÁíZ ÈÐ[k-ŽÊ×´"êŒ ZÃÞ’ËŠhø”«N!É H­ÊNˆqÜ©«½ö¾|Î}¾\²œNx¾©h­!’–DƒæjÍ„rÒÄs¤ Iˆ±·OTºËòyY˜Q‰’R3«·¯¥˜‹ki·ßn¯%bøËC5©À-^§ÝhÙÏŽÁ¤óàúÊI"ƒÛ¥¬.ÎÉlC·Cœ¿3áÐ[òf1?®æëwOœÍ×Ó'NwÜ ÁáQ1,oLºfù#ƒp;›`[o$EáÁ¸ö1".yþY¡šgkR¯èH šëÆ!nIJ~³¦¤Pf¹ñCžs~éÃâ¢dbÆÊÒ8ÀÌ‘ÐjAÁÕòMz­_.•vÚH6ô~ ÓÆýpìg=œùN¦þ¨%~‚§I„£4È‚ìgs;ˆEAwRNsô ¢G”©X.¹âK}–óªÓpÏQ¸à–À[R¶ˆ·2€ÛŽVE˜#Œ®JKªóµ™.Á¾÷à-ã’/6¤£õ†{À±ÅÍ/€^d£¹5ÞIžq&8ˆìrËŸ^?ꦉ%Q:°{B¥$Á¸E2„Š¢(í§ 2Ž(€¬Tú5å2ð:èi_ [°¨ÑØ1¶ÔÜ åy„K÷§é4ü(Œ§€ËdâfYädz éMº“,›—5+ Zq_‹õ8/YḤäj‘•Y¸fög) þ?ëzÕpPº±´AÖ¤`âÓäÎ&=ìïÙßý¾¿ûsÿ+Úßÿ¶¿¿ßßýß(9—¹Žôí˜CÌ[TŒÌcqÒ /ŒyRG˜t»i¿×`ßK’(NO±ï…a Œh°q¸ÛºÀ‰rÀþ[ìÁVb`ô/vF¿Œ`Ð$k¨’ºÌ¸ ‹«g¬eUsÞl”JÏõ®¤–U6x¬Á¤}ÞT¬âŸ äÁÏУµÿqî!RêKûý‰ø?\CÍú΄)†”Z0©mEªÒYI :lImŠGôNÐ%É!Ì€ørň‡ƒX‘Š•p1ó5‘ŠB@Øä a Ú]õÅür:™@nB߮Һ¸&’¼ZãVÃƭOilÝñr%zXƒ/*‘EÍ0÷ïùÛÿgþZ–AÅ|kéÿrþ Nz<ïöã°ÛÀß8ŒÒ4„äfrW€“$\š„ôåo¥ÿš¾èø›¿¹š K¶Dîtn'¤‚ÛPüšxüöL!ÃoLĬ䤦S4ÖÌ6unûÄ› P¿4P˜ò âÔ†«kG¡Ñ°ÕHØ.±éÆc›,ûã šùѤŸ@­{þ¬%?§£¬;5µV@‘Ë%µï×ÁÃâóZ Áo¨œÙü´ ꆽ qœØ®ômÚ!;ºÚé:ë¼”WD¼ÛÚt ÿ0h*¡<Ã’äLTCJ91¶CGþÿÿPK!o@Ç—“ppt/slides/slide4.xmlìXÛnÛ6¾°w t¯Z’u°Œ:E,ÛE‡¤ 껦%Úb+‰Iç°a7 0ì!ö î÷<~‘ý¤D«I“.Kzás€H¢Hêÿ¿OÏ_\Ö:'BRÖLÿ™ç Ò䬠Ífâ¼·pG’ 7®XC&Α΋£ï¿{ÎDz*´näOœR)> d^’ËgŒ“Þ­™¨±‚G±_@¯u5Ï!¡¶ª­¢Si˜& Êûœ[á¶´¤^:ÍG¡ñh™ÍÜãEºñÂO¢Ùp–e3ßÒRÒ¢ îîé¬ÀYE +%)6«¬Ȱµ0?£H€¿¯6ÐêèðLÚkËhK ”í}$_íð/G ¢v׿î>ý±ûô×îú7´»þ}w}½ûô'<£áÞ[º9R—S–7vÑ}öã‡í]ËB§g 븠ŠÒvüP<èÀÒbF‘`ÒÐ4÷¾ï%iwØ®,±à~½¡t®d[EIJ,.Ъڊ·¼D¡Ÿ.¨ÖT<ŒÚõý‘§ÂÕ¦U9ZÜ?RU.KÌAøžQŸŽlOߪÂùÇV”/qË)ôÝô…Ú†·}0-‹}œÀ_ %kЋ+ ê ®ÀÂ…À0‡40ßA8ªÊ˜±¸?'.hg…¶¾~QIµTW1˜Ã–ôöq[Óš}€a œWÁ`9qHã¾_ê¤Õ‰yþ€ÝÎ`žý Љ;pðÀÈ2'Ài¡Ðó]ƒÔ'kœ@K¶9AKÜHt&‚y@àº)•ƒ8…gkZi¿ƒó I€ ħ£WGË“ù|¡‡£v3¥¤)ΰÀoïΡ‹yŸC—Ó9|¾iØ}d†VÀè´4fµ%ï7fôÏÆ4ö•ÀkãëÇÓC?Œ3(£dèùÆví(©§÷a’†q¶ÆŒÃx8l'ËÞâõå!J¼ âÂÎ`u¶Wè%S%ÍA̸aR7õ"øßÿ²ÿ…ÔÑg¿[Š„gWÆÖÖòé¹@¯Ngèé‰ )'OÏåH¡+Á‰°™»‘Ìf¦»9Ý}u†;bæ72Áã½ó¿þp+Ø­žK7÷Úä«üOÀS§Šèãí‡GIè`¸½ÚûÏ8`ÛäæÈçüýšð®÷=«Õ½¡ïZ£~ëHáý«V³f³‡Npœ`öTÜœµ[êé4ƒl4u§~¸pÃYšÀ–:ŽÜE4 Ãl::Άs½¥æ°—Í1ç[¯ì9>ì ƒ³ "8£æœíæY‡)¬jãÔl·Llfým£…ìùY^‰SÌßœmÁ± l acE0½Yƒª};œ»ý ÿÿPK!‹©‹[÷&‡ppt/slides/slide3.xmlì]ÍŽGr¾ð;úh€šÊú¯F ‘"kH»ÂR ì‹==œ^÷ª‹äІ/"`ø|óÁW0|6à·á‹ø‹ŒÊîÈæ03ª%- £y`OÏÄWQ™‘ñ›Y_þêa½JÞ.úýr»¹™™/ÒY²ØÌ··ËÍë›Ù~xñ¤™%û¡ÛÜv«ífq3{¿ØÏ~õÕ_ÿÕ—»ëýê6z³¿înf÷𻾺ÚÏïënÿÅv·ØàowÛ~Ý øÚ¿¾ºí»w¸êzu•¥iuµî–›Ùˆï5øíÝÝr¾øf;³^l¾H¿Xuî|¿ÜíÝÕvš«íúÅ—±hï–¾ÂÈæ/W·ô¹ßýÐ/ôÓæí¯ûÝËÝ÷½ýóoß~ß'Ë[Ì×,ÙtkLËìjüÃHf¿n@†®Nà¯Ý•ºë‡»~ýÕ—Ý5Æ–<ÜÌ0ùï逺ëÅÃÌù—óãoç÷¿{„v~ÿüê+Çwp`J£â}:œÂ çã‡ùøá?>~øúáÇ·?ü[’FIÐO†è®··Óļ“~;ÜÌò¼6YžÍÄPMQµyÁ.ë"oZÔe^fµ©f ½lÊ2¯,ÅaLÝõüÍ~øõbk'°{ûí~°Óöú?£×·ã³™o7›ýrXüÜîÖ+ëß\%yQ•i[&ïÇiDŸ€þ$A¦ªš¼n“ûDÜÑcœ GNY]·yç$AYjÒʘ8§Lr¦ÉLœ“e˜è¦ÈãœrÁ ˆ¢IÓ8'”ÖxŠqNÃÃì™¶É›ºˆs’ ‚„§lâœJÉ©©«6S<' *ʼ(5c‚$ÇTcp{QÙ“ ¢ÂäaÊ£²WKNeÓÜ^”“PGcª8'˜†ã˜Š¢5)— ¢.[šˆè˜ZÉ)k‹´Êãc’ ¢ÎL‡åd¤¾×iZ¦Uœ“*Ê&«°²ÄYI…/Œ© …HTVUU*t×xŸæ°ÀŠQIPQ¥m‘gŠQI•/híSpò0U[´­BÐTyÍx$}Q7e£zJRÝ1qÂf‰)Ú²N5K9Y;©Odi¢ŠëJÓf™jPRßs¨¡Š••Øú—r©ðUa*ÕJPYå)‰Qœ•Ôø†C³Æ *iAÒ,}™\&€(iq‰>,U†V°lÑaeRåMnÈ5Pð’¨Ñ]Qð’:o°¼¨ô7“¨²1Ö߉Kj½iÒLµªgUÂE„s©—Ô}øÐŸ$QpA[òDâã’ÚŸ¥p”ŠLñ¼$ª,ÒTå”erÍÈÒÌšƒ¸JT ˜j½Í¤þëçP¢JÜ!\3ÅÊÀ´ø­ŒK¢ÔnL&W€,-óÖ'ÎK¢ ¬À•\®™iêTãÛz¨"ƒ‡¡±'¹\jk͸<LÉTã’+@Vç™ÊoBHx´xyžêÖÃ\®Y[e…F¿A)§\®dý ͺᡲ’‚…“›Ë«ZKö5*‡*ÃbITÈ¡\ X"BÅyITUÔp$¬äàœ¡8+‰*³"U…¨RÿKS&NÈ%*¯šVÏåRý±dÀR*‚T•±]3(Krð ÇlEüYy(Ä#©(äŠQfU­ŠR=”Âo/¤Ö—&oT9 ¥R¨B.E[7°ªŠ‰“(Z‹r€ØU*ÍCe˜ŠLa ¹LÀ¹¨iŽê“‡‚¸6šU½úŽ„Ëâ¬iæO¤.Ÿ&u{7?Mê(ÆõàÓ¤nì 88qø>eìŒJîø> ~"uˆ:'ÁO¤è$ø‰Ô!>œ?‘:Ä|“à'R‡èoüDê›M‚ŸH¢´Ið©+¦I…ARlð} wŠl<¸'uìË!L¿˜Éêf¶š%èŒf šŸúYòêföŠXv×»n ÈÇý˜¼Cÿ‹k†¹?öÂÐß×Û·‹¶–r @hŒÄ콌ñÕ8Œ#åjã!¸9Å"Æž“áèÜçŽ9p‹ ÓsçH~lazä*Ž]<îºî“¯?¶{Xú±‹#x}dЩiƒé¹#LÏ­LÏazn `zî‹ÓsÄHo»ÂôÜÌ0ÒÛ… =w$09÷ɹ«`$·½ArÎH29×ûƒä¶º?RÛÜPš5l¬»)m•¯Ëµó µ) äÊ-ùXÿ’sµ›É¹†$çŠ5“s:HÎUg&çZr|¬3=׃Ãô\ýéeSœS#÷9ªWpGz[— _Ÿ«°ãýÛÚj˜ž+©LÏõÑ ýX ez®qFèmE“é¹N¤«’Lϵƽ­,Zú±^¤«ƒLÏ5¿0=Wø˜žëvaz®ÒYú±ö¦çJÓsý,LÏÕ2KÏ•3ç 8¹qŸãrÏ/¦ç:VðúcՊ鹤+O–žëIaòq9 [Ë5¡ ùX²WçºN˜œ«8|ó¶6!;Tq3¶¾¦æjн¸ó-Ü\»Ožó±"b)©p¼ìXÕ°Ä\«“se‚‡hë r[]°ä\3“s…À’££yÿ09gù-9çîÃ䜩·äœ’ÙvK>æÐÃôœ1gz΃‡é¹˜é¹/ø„~¾ÚîìE’OgSÑçŽ|BÑ¿¼ß®–·/–«9s¶‰~ñlÕ'o;xН^›í^½Y·½åß5eš:1:[Þ•VrM·Á^ÂûãÇ—œÓ*¨p›+$ÏÑ@Î]ÝøÞ¯ÐO¿ÿýâ]ðð;¹³ûäzÝ|Ž{¾õý}w»`6tãß¹½ ]ùóp¸öxÚWpœwmžÝ‘ž ‹»;ø×ð8êø€°œ·›#x½Ül{ž6ÿ+ŒjäÌô±S V϶xð€ºÍü~ >ôt Ìâ~xI@ûùðÿèV¯±ã@´ØÜ~ßõÝïñ—¿{³^®·^Ú½+[ ùs÷äo¿Ç¥W÷öûbóä/ÇÄñìÜÙÇHÍÿŸß€’w4<¾À¤Ø ñÛ·?} @–Áá€9kZt§ØY=n|0d¬„Ä´ÀP¾ð¼»}R…&o Z°ãa§ÿ“^ó?U¶Å]ÄÚþÑÏ[Ùb_„"ÇCqÿPŽðA’‱ë2µDøH ¼ îd‹ðÁÃ9ð©ÐņVã(ƒ2¤í΋ðÁ3>ðiÓ-Xq>S×´=$þ| Ü>“è4ÎH‚´É .dRŽŒÊÊPçttæ$¨Mó]žQFˆ|ŒÓ˜”z"¢ A?k¦Ù™##VÌ8# ªÛŠ·dDd‘Ü‘2…Q詵¦åv„‘×´oÐÀk2jˆÌJ3Ã(1^RËáÇôA)xy(^ã"áõút„¶ *6.¹>`4èá£^”ظ¤¶k5—ʇG Qâ†Ð'©ïP¨¶ Þ¸Ø $ˆdÉq’ ŸÃÑÒ ×ñosÔ(ã$5žÓŠ1IP›SIª¼BŸP=>¢Ã—ðÒ£áÂf]×Ä.̱¤1z1:0ÄQ‚]L¥CX$xZFB ÁÓòù'é|Æênû$™?-—OK´¼ïK (¸Àô‡¨hó¦nš´Ñf$>MÞhÕñàžÄñ ~BÅ‹U·2Óœk#œ€añq1êñ¯~š†ý{{Ãì¶Jí¨Üç˜H£ŽK^ØùR³¯m¯¬\è 5Û]KÍŽpzô{-9»³arö^-9;¥aröAÇ«“k&w!;;ˆaröùf¬›&g§Î’vÕÂôì˜ô2ÞtÓ}òC+GO.SðúâA±Û¤f'Ç^œ]— 5;*–šÝ µ¨™° ¤öÂÍÁ/™ÞÂÞüÏ$‰DnŠRP—ôÖ'éB?7vIoÑy‡;` Bé-kî~¦ôVU©KoaŸâ¸Š‹ôVže8ƒÓ[5v'àçŸ1»E•!»ßkÜ1J×~$g奸ò¬2 åQŽkñ#?ʰ´ÅN/ŽeÇÒägØH̘²Š±ñ#R¬˜¶Ÿ=2 ¢;B˜ã#ÃQä`Whßu„eMéð´É`ô¸7ÂG‚2ª ÆŒ_aí«Š“OáçãPUB.$6o2mK³›k"ÑTÄlŽ!ÑÇ/1h±9Á0™¢²xøyHrS–蜉ŽÃKMñäFÇáar옳hÂ#ñŽ 8¦-Âãñ@¶vK[#Œ#©ÔZãæå±”Ö Þî!;©å#1J6Ò ^ŒèUr1¢ÉňÆiëŒ$?Lj60TŠÕLj?ŠÚYF›ý#ë³T­­–­­–ÖPm«%Hk«¥5̵¶Z‚´¶ZZCµ­– ­­–&Tm«%Hk«¥5TÛj¤´ÕÒ„ªmµimµtµ6”¶Ø ¯ÒˆJÍ«q$aÉ~ÖŸ‰D%FÉF*ŽrÄ9|vU²‘˜s^ƒn—šýÝ ©«äOxk(ë\TÕ³lµ½z|Þ¤¦žð"ÑÞÚÚð˜_ùŒHEE9yrÀK'Ü(¤MªéoÖª„MêhÜg—Šf°‰C£53=àEX­™¬Ÿj¯5j)Õù7œ—3ÍXãp¨Ô6ó„ÅË‹wÏ2ÖHÜhôÒk÷8ÇX)’¯mã,cèPµrÊ%à,cÝdM®XÒ¼x÷cóXUk´$kk©Óò-õã]î]‰º¢RµµFôb¬¥Ý5cþœìôÅXÇÔSjçÅXÿ_5Ö8*§Ó›q„—-ê}ÆéôŒu&ìL1 éy_Œµ=½ÿb¬éc\"k·äÄéV_Œõ/Yg8I|Lm‚4ð87—+½0Y¿KŒ.~÷JÃÊÞÇ(ËÖ2S¦‰à%½6‚—Þº6‚÷0ÊÒ¸´ÕFëHÖ)ʬv $HÁ˃óms$Ï(äqínÑþ²³Òí5¶“FÙHý>3‚G»§[CÃ‘ê­Øç¤Û)ЮÃð´Iíã©NZlâG*7&àŒt;(ç>¶Ð¼IÝnÎI·ã/ñ§#»8Ã)À«œ¸5+4©Ô(žd¹"ß*1ñaHmÖ¦ $¦lñ:¶¨ˆùö]—Ô÷1ØNÐpkah¾¼ìùtŸï¼ŠÏ˜‹ŸçTUÊ¥þàX<ý?Ï'hqtyüáÈàLŸ È §—7?Ó' ‹H.gú^#g$WuV_‚ЭY¼¼ù™>A…w)ÆG$•[k«½w!=b¬/[?‡ÅŸ.[?c»a¹ä6¾ËÖOݦ×ËñÜÇ-Ò(d"…®ß·z"rXf'Á±XJ‰õçþéOÝNl<]øÇöò*mï„×Þq ŸÛÊHLÃâZ7enƒžût›mC“#3U¹ ®ŽÌ}Žä¨XrœÎsÜœäÈÜçHiD $ˆñæ™ûtW·aƒ½º±Ñ@œ½¾rêƒÄìÄó¥a’ƒ´ì‹[ÚðEÙ¥¶„ÖS^Õ&Ëø¬»$F|Ls–“Ï$´.ª%Îq¶kê$ÜM¬û'Øú™–/m÷¼4;‹–B0HÍSâx(‰»÷ÉwÂn›¥Æv`8cÁk³ïe©Ù£ SÛ¶K7ä©?§_î~® ­té·<¯ípê™ý5½ÂÛívø5¦ïp|š]÷ÙçÚ¢SërŠ›;ï²Í5pŠÌ¡ÛæúOüÏ?þ÷Çÿœ|üð¯?|øøãá{buü°Õ5¡#âp’ŸÕNz_ýñÍõ½ÖÄ{|g}†7dáÝV qŠR]Tve:îsE?<Þdޤ=ã†w|f&uzêŽqÛõü&÷„~ÀqØ8ÂÏ º;Ò ÚáHˆófK'%’?K莸ëÜ¡{ïúnw3Ûl7 ±}8€àûÝ×o\‚Þ+ó!}Va?9‹¯§_÷»ÇÇìÝáÛÿ=f‘6CÎ’Ûe?`Ìøi¿ž­Îôã WݰÜ$ÃûÝâ®›/nfx}ÿzÙÍ’Ýr˜ß¿èÖË€,Îï»~¿À¤¸wC75|õòÛçÏ_ÐpT8þ·¿=ç Àä±;¶ÓqþMïÃÞÛäSáÅä×JÙÏ ¿¦Âk1$0á9 ^òJVä(¿taCGŒó1„5 s¶÷ÿ«üÒûÁiñMŽÿ^þæ»o’o—¯ú®ŸÜm{O¨I÷éqØÿÎÔÇ¿ÄxžÃ ¿!y<_á¬ÒÍ@£yñf3–ÛÍÞ„ëŒ=­¢ž å/ §v‘ý¡_àÐ×Ýõâa°'ðØŸ’7ýòföOŸBmž5OŸ<5Å‹'Å7mýäëUùäN/.ž=m¾~–?ÿÇЦ¸ž÷ ÌÑvó›Ûäa½Úì¯ñË›Ùý0쮯®¬Ôí¿X/çýv¿½¾˜o×W°3Ëùâj·}·èwÛåf¸¢\­»åfÆ^T^—8Ho4d½´÷fƒ»[ aþruK˜¯úïºÝïÞZIZwûaÑã ]üj‡'Ç&EÐØû_ÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout7.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout4.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout6.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout9.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout8.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7-ppt/slideLayouts/_rels/slideLayout10.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!i¢_!Ç,ppt/slideMasters/_rels/slideMaster1.xml.relsÄÕÝjà ðûÁÞAÎýb’¶é5½ƒÂ®F÷O>X¢¢v,o?) (ŽBÀ›€Šçüø+æxúzòÆvJ2È’ÊJ‰N6 >/o/; Öq)x¯$2Ñ©|~:~`ÏßdÛN[â«HË uN(µU‹·‰Ò(ýJ­ÌÀš†j^}ñiž¦5ÓPÎj’³``ÎÂ÷¿ŒÚwþ¿¶ªë®ÂWU]”îN jûNà;ÕÕù²Ü4è$ÉtÞN»ÄóÞ—­bÊV!Ù6¦l’eù’4ç¯Îò6Coß,äX”ñè­ÊC²lÉ€•3+bÊŠ`fqC ¦¶‰™Ú&˜šëã=­Y²­cÒÖ!Ù>¦lÿ'£³ßoù ÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout1.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout2.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout5.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7-ppt/slideLayouts/_rels/slideLayout11.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!·cÔ·Y"ppt/slideLayouts/slideLayout11.xmlÔX[oÜD~Gâ?X®Ä›ãËŽ½^“MµW IDRÞ{6ëÖ7ƳÛ]ªJI,!x@}ª Tâ P´¯´P‰ÿR“†ŸÁ™±½IšMIÈ¥ðâ]{fÎÌùÎ÷Íœ3‹×'a Mý8jÊú‚&K$rcÏ6›ò¾bËRÊpäá ŽHSž’T¾¾ôö[‹‰“Þ2žÆ#&(upS2–8ªšºCât!NHmƒ˜†˜Á+ÝT=Šoƒí0P M³Ôû‘\ާ§¾Kº±; IÄ #”˜ÁúÓ¡Ÿ¤•µä4ÖJR0#F]›&à-Ã6|VämLdIô§chÑå%€À]<)Â!|ØúãˇOóí¯ò?òïóì‹<û%ß~ôεIë݃Æìó|çq¾ó Ú……4Ù „p[Ñø=š¬'kT^¯QÉ÷øDå²Z6”ÝÄkÝàúÊðÍÊv&.-b€“&Mâ;åO„2a’[|t¾ºÃÕ9}ÝaoNoµšV0›¨‘wǨÜ9€ä^’>ó²ŠÁÔrìÞJ¥(¿9…»îʸ2Î1àÓ%C©ˆã“¥˜úÛ"ˆå¨¢«@« Ä+7f8Y–Ñ@Z–QGVÍ> ™¡™uÑÎ3mS7 SLRY‚I Ó‰Ã&íØ›rÀ?_ˆ+§US&øcø-Ì)[gÓ€ˆ°xØ—àoB?Œoú‚ 溼‰•Öd lY¼“H¹±: Y' t\Æ—-åÙwœlqö\:ÊÌ'@×½ç÷þzôxïÉ·‹H<*ç%‘·†)þè,Óó¸`ÖžW xlN&Dí8!iDʳy¶ «šyÈ5•mqg (°âÁ¿b Ç+DÑ"*šž0ȬÎ&Ô5/–¦7lÞ~Y|KÁ8˜Åþ2øs(0ùöÉü)h ¸j]Ò‹âõþî®ô g_çp)™`JÍŒq³gÑ!pø`ï‘EÏJ0û?TíÍU+öÎ×þôº4F¡7ú_e^%Pü\݇ª3îåƒþüýëÏ4AèóžiƒLà3È~q0ËÓLä"íá©’øs8ÿ'iufWYŠHEŽç%Èxy^z§Óí÷=­£´»¦¦ V­­´kv_ÑíZ£«ÛÈè7Œ»r™’y˜懤ïoŽ(Y1ùBÒCÓ몡õƒmÖÇM_atÍ*ºyv?ϲ*Ù:)qb%p…Zâ]ºpß«+‘¥¿ÿÿPK!XYJg­!ppt/slideLayouts/slideLayout3.xmlÌXÉnÛF¾è;ì™7qƒå@k¸¶Q'0&GnŽ©AGÚ=ä¸h€žº·I=´Y€¾KX§yŒþ3$%9±SÕv_$ÎöÏ÷o3ß?ëWÆa Œ0Iý8jˆÊš, 8rcÏö⵫=É…”¢ÈCAá†8Á©xeãí·Ö' ¼M4‰‡TQê †8 4qjµÔà¥kq‚#ëÇ$Dšd¿ætd‡AM•e£"?Ëõd™õq¿ï»¸»ÃG´Bp€(àO~’VÒ’e¤%§ †¯>‰NÐ6Åî{y¢À'’t)âèîîž¡:òéÓ|úk>ý=Ͼϳß^~÷ÅÑgOò;‡|^š\%³Ñè]’ì&;„/ßíÁ÷˜¸RŒX+Êi¼Á4ø¨½²|¿’„œqŸ„ëÈ»ã†î›°_X„<¦‚[tºó^w°}Â\wÐ=av­ÚÌ6Ï'…F¯«£VêäÓ?óé7yv7Ï~”™rÅ 6c÷F*D1¨Ë¬Phén*™Lu¶K2 _PŸ¸œW r³TóSnÚ ïÌ ¦ªjŠÆ­¢ë²aË¯ØÆ4MU‡NYHÑ U6ë|“JlRˆN:nÅÞ„YvþÁ(r1„-e+¤t—Np7| (؇¼ ãáþGЕ~ÒaKØsûßE`å¶åJðúq‰`sä€Ià„܆~_÷¹„±l½Ž¤vDØ‘nò6ޤk»½!mŠRuº‘g_çÓÇÌEÙ3aÑSùG/üqôìÞË=úŠA ßGÞ"ˆé°ôö̯Èì`ÇÊ~ܤ̷§Çx¬Ì®ìÓ|ú¡Íî yv˜g¿hÞ~¡•gL•Ù²² Ÿ3sªI SÅã™âLµeĘã1Qåàñ8«Ë²b™¥Ã‹^&Îö ™'ÅYˆÈ&O?òàDcŸ,Tö†[pls$ ÑGo1œÆïõü `sù©ÛF(€ ³£ÀhÑclCàÑÙdîß90VìÄfÁÌ3BeQ Õë& s/W±V—adjrm×VàôX®±B¸ c WŸÃU4Sa(–3/ÓŒÀ ¢,ñÖðZªÅœ|ùð2%^cŽWU-0ïeÄË@–xͼ¦®-Ÿn«Œ²ÄkÍñ2°ËçÛ*ñ2%^{¯Q7/g¾1ÅI¼@N8•`èá›Ñ®ÖS‹…ËûMÔâ, A¯‹Ãoÿzúå©Ô€ßÃ祴m€‚~EŠ›1snaö±Ë]FNH*ÆS1F~W×7opWô¡˜`ÅÀ­v§×³»r[ju겤7µ–ÔÒ¬ž¤XšÝQ,]íÙêm±$Ä¢˜ú!îùûC‚·‡Tdynª²bÖÔšjÎÙàc¢WÈÿê•wóì~žeU=ÿéÁÿú”^þxˆÅÐ(¸à¿ÁÿâèZИYqçy1öùß÷:º…ê)4šW>çÍx؆'Z’Ót º™2†iBÒ4©m¨MIoé©©ºÔÒíf·ÞíZzÏš¥L tG€îB2åùÁãwž<ù¿ó„WÕ3Ô›)Ôd ¯î‡ À[­–m¨m Î EïIzÇ6¥fϨK½º¦ëí–ÕlkÝÛ r¢èŽK0íxß+_] óµ—’ÐwIœÆ}ºæÆa­xr©%ñML’ªxuQäò醗 ªl›²­ÕUž‡€PòНB ]ìÅ„×hù%Û#~ÝÀ#$”ЕÀ³ä›:ŸÂt¯ž™6þÿÿPK!Oùªƒ?4!ppt/slideLayouts/slideLayout2.xmlÔWÝnÔF¾¯Ôw°ÜkÇk¯½?VvÑþ¹j•&Q0سYÛ3Ï.»EHa½â Ñ©W­DÛÀM/ZZ¤¾ n Ñ3c{C`ƒY•"ggCWÁvêf¥RÓ#Äj±Ÿ½É~2îoá˜çFÿ“Q@“Ò}k”áÌÈÝÇ]â3 Ñ’Ë{ª"± µ q{;¡¯Ä(‚‰lþw6ÿ1Kogé¯ÙÙü·,…¿[òyS®NèE†±ØO>et‡n3ids²Í”ÀF cª^¼(–Éa Ëà‡þÒöÝÒr¦Cµ×‘È(Ó– ÎÄ6!O¹âå“ÞѬ7ÚZ²Ö –¬ÖËÀƒÅ¡À=Í#z5³ çElc\¾… â]I”˜@¸…¹ó‡¾_$9Yœ‹c1ôåiŽ!|‡°ËágÎÅÉŒT9–žJ–ÞËÒð[Þ9œî‹0̳±ør%¡+ xUÂI¸ý]5;(q¸ý:ârü% ð(ý’±®*¡ž(/%Ëë.2s‘©Eæ.ÉTó4fOSBßAòsz9P2±>x ªï¨ª¨À(ëýeý/€²Ï(¸öÏïr±ÊËåÙ½ŸþùëÛ¯™Ð Êëý­€Ïá þ„ ‡*ˆ'qÍÈ‹Xê¡Î$† ë„"»Öë»nsPéiݾ]ѬNµ«u« W3ÕfßhX¦Û4¯«…*ñÇ<ˆ°ìŽÞsu%ºÂ¬uÝÔÍúÑgü¦Ï‘]»d7KïfiZªœ“tƒ,Ƴ’<ä,gù«1bà 'z…‚â¬-ëgÙ-|óüî/‡w~?±Vì³É¯\,Cw¶9Ž–")oá—L­^‡¢éZ¯fv4«kõµŽU³´®Õì ìÁ a¹EÉ$aàã¼[I¥<ÝüÉÓý?ßuH¥]örÐXm$ ç©l±Æ z‡kÝn³föð­0,W³úͺÖqk¶æÚUËêu^upB¦†åx ˦ó3¿h~aò•†5 ‰Ísð–E,&²$¦Ñ) èò6xÄ¡”⪓߫“¯ªòÓªü^ªNžUå·âþ˜X䇔¾$¾Oóƒ|ŸŠõ»Ó}*E!·×Ø‘ÕæA3Mܦ0 .ÔW–µ–°7Ñd{{i¶%{sþ ‹°GfL êÁ`9Œ÷ΙŒ‡çÌVÛ‚ÅKø¼öèuwŒÖ3aÑÎÕ+0XØÉ‚;…”fà.Bíe°;mmr×ù[ò±TS0z(ØhŸ‹È´K Ýò"&¶k¹ZC75dXgÃã8Žø$9šVÏXu¾6{lÖËÂ9î-øä`/.Ø›ÇDBƒ=@?@ñI%ÙíHc^S·±r}_–pÌvÄ=I•›Pc ëÇC 6ì±íªü’'$XùRk™gÕ'O_>ùåôùÿ¾ùñô雀”A,š÷’4ÜÇßø7¯ç1Å`‡€¶>ÂeMôÅt›KºªÊÏV1JÆU^LnÕ¤C±@&·yR'ÇeÈ×MG·öM×µaÓ9˾ Ô‹ôì;–Ág×á¨kK„¡ÎÅ62ç²Ï)§±I(%˜îˆbŒÒ¶q‰ã# ²Xé3Ù…íTdLHFÀ,²8 ý(ŽÅ ßCI?¦ÒǰÍøÖDG)«GK[@.Ÿ,x\±¬¶öá²ÁÇíÀ¥±„Š,‡GFZ?¼dƒ×\âíèH”ìúáå ¼h‰w‘†ë˜£l[+€]Ãe±~€9ʰ½l.TîZ¦0GÙvV;È\Óšã(Àî0G»¦EÇQ6€;+€mË{ÿúå0G)¶êV;pôoN:¼úq^Zñòñ×üöyU>®Ê@î!ý$uUsõc^…˜Ȭ¡-Àñ>uBPÔßz®¯!üâ@pÃÅÐwKAtîg)úFÐpI¯?ðýÎPë+½¥)¨kö”žéúŠîšî"Ãï÷åFÖ†˜%ÄŽ&”ìM˜ÌSákGCÓÕP g)7ýå¡Õ²[•ª²l•ìE$£« yK°üÑSFà¦&ú¤#Ðxi¢ßbíEW:Ë?}wúðç kE´7У¶½Ôê¶ ·ß$çFR{¡½¯®dlÇ¢éêJß6º ê¡ÒE6Rz¨ÓZá‹|wQ2¨d’º+©”ÇÏÞ{qüë›®ÑF´‡Йïвå¢GŸPèïõzÛè»°WèÈWРã(]ß¶ß2ê÷ÜnßÞ—sy%âÈ⃰9:Á׎;’( Y‘ØF%j}n¢æÙ]Bó š8:ѵæüEt   h– ½%PІ°E .ðƒ;ˆé‡8ß›ÂG {pÒ…Í åp¶Ã»§3S¸ïíYÑößÿÿPK!A+,]¿z6!ppt/slideMasters/slideMaster1.xmlì[[oãÆ~/ÿ@°WâV$ÙJ·p6F¼ùc’²¸æ­äȱج€ }ÊC¤M€< ¶›¼¦ièOYe“þŒž3J´M_`{ã] kHäÌápæ|çœ9çíÝ×’XÙ‹2ÊÒžªßi«J˜úY¥»=õí#ÍS•’’4 q–†=õ0,Õ×Ö^ùÝݼ[ÆÁ¤¤a¡ÀiÙ%=uBiÞmµJ&¤¼“åa }ã¬H…Ûb·ä;‰[F»í´¥ªx¾8ÏóÙxùázæO“0¥|"Œ …ù—“(/åhùyFË‹°„aØÓµ)­Áúüí8Àï]þùV8V¢à´ÔnëêÚ]Òeë ‡q¡ì“¸§îìêjkín aq…—ùƒ" ñ*ݽȷó­oüûû[Œ CªJJÐ/À:„»MAŒ\{|WŽDºã"Áz˜! xˆŸðé†Tñy£¿hõ'ož ëO6NnÉÀÒª—âªøŠŽ/ÇË™?þÏüñWóÙGóÙ?•ùìÓùìÉ|öÓüñóÙçÐ4Ÿ=‚[…©­ššÍ73¯TÒ ô€êáË…É—¡NðõùD¡‡9hŽF4…ï„Ù¦•|Ét.Riʲ]0D¦.õÓ«ëÌ3ŒŽƒý¨9]·Ì6Üà\åEI_³DÁ‹žZ„>eÆAö7KÊE¥³>‘¼KYpˆíÀ7Øx!Lµ··!B$t‡"ˆ°9º6Ÿ}0þð²Ì?øî—Ïÿõì§ÿ÷ͷϾû UI™BÙ{Ã4Ø"yë"¯JfÚ•Zì¹y6©YéìÃùão™Q~Ôl¤â~Y#E½«"Š\ÆVu0J´[†štðš±Z¶awóæ+ZÛ…ìY‰÷™¡³å_‡½.YÄiö ¶Ë\†Èy±huU~ôë“'Ê9mÁÂ!+{‚ƒö"~þoÛk“>YQÌ¥n¼¢Ìß^QÌo¼¢¬ß^Qˆ§yÈùMùÚ\Ï~žŠ‚ðùí©–ÜSùôëŸükcÆÇ ú²›i@!óxR&ŦÊ" Ëû0;¼`è˜6üÙT Ý4« ÐtlݰoþžZKÙ&)Ó<–òíÇ:n¢$Þ…Œ-V±-ǘs¡:uL,°­Ìâ(EqÌn°¦[Ô:ô€—@4J)¯~\{‘W…Kì–Æ}€¿‰uÀ.Žá×"Åw±ít¬4zo¸>u6ÚCm°n·5«o´é4Ý3;ëºg£Žñ>dɬ  i”„£hwZ„oNy.~élÖhënËhîbWƒùá\Ÿ£sÙÒ¹æ³O泙̭›ª* /ëcc¨ ˜UüiJ ¨õ…Ÿñ¼ë«óú™©–¬´Nv4¯c¿ÔŽ&‹±›çjÏцʆ‘ø;#þüë'ÿxöñ÷› ¸—5d`«îO““l™ùÉ…ö ǶÍÓmùeß48­pó,¹Ú4×…m£¯kCÇèkÖÀZ×ú–ci«Óß°76T·]Çö€S“ôÌ%er¶°I²úqñÉ P!#¡@‡B‚ÑSƒ=¸ÚÙ5° 8EzWÁ\ßÞ$Ä…l~ÞRɘ²èÞ ²Å–-°Ãò.G¶@¼šÄQºÊÀ/Ugñxƒ¼Ât±á›ä0›Ò{@"Žla‰Œ¡[®å™ŽÕ¯‹¬oq/Ôg“,d£ YÁ@5Ê‚®ªqE‚Ý( ú©dE¢Ð( š«dE,n”… ’uŽi¦¦´]ɺgÈ•,ãhk¯ë.ÉvÎŽ7ªqu–úŸ2p 8iÔKªÀÓƨ–hŒ•f·[DÂ+2oLˆ¡ÈÎ6äÝŒíE¼)'qC²™ °<À8Rq " _áekšú@‹ÃˆÜà¡’çþ–/²r¶$Ⱥ¡MôîLïÃI+ Dü®ñ¼Xë /Ù <:o5#â{–k6k–˜‰ý«ÉC-¦ˆäÍäHGHx‡_éðKìhªê*†£`Xé;!ÅfO5-£ƒ ‹Rˆö 7M6ÈBèºÁUrÎö £ Š(¬_¸šúEDbUÉIš• 5 zm§mÁ·ü^–GÔŸŒHÅp2aBƒ?!E²D»3B kî©O}ɵÛ9Ïg®ò´ òTk€<ÕN…œ¹‘,‡ÕX1NV°ž Å(„rQ߾̰þåtX ïº<ù aE,Et4°ÊÓ¶%\ ˆ·×3ÜÕ¸¶}…¸"˜Wk Wqtu;q=Ë_qƒ¸–÷ qE0®öW£m»Ì*qøÖøë<# ¿°"–Vg V[·XÔ½…°ž™5aBwãÝÁ¸ºK¸v\%+\ë?tÁè…ÀÁ¸z \yMSK‡oM~9üÁ¸v–põ<KñÛ™6•¿qÁdôçE‘w3: ‹Š°€ò|‹£/ õåŸÝU,ˆ‘l/ƒÁ4®¾0j¢ øÖñBÒòŸ+e±#Ƭ%\„KNr¥¬š²*[ÓÅße®üðÈoH.$Æ" Òu£„K‹æ‘Å81l÷i•È kÉ^¸ykW.îÊ*®éö¶çòaA²{O²{ϲÃß²þÍ>¿b°$ºcL7ýãh;ÚŠÙîþV,¸•VHåâ‹Æn€Á…<¶}‡KBÖ~'ö×V^öWEÞ€~Â&dá}"Øù¢}²jw7'`ínkZæ †…¸G¹EgÍѸ9ÙáßÙá“,½Ÿ¥¿ êи| ë¡};‚Ì¥^È­´7ú\&5>%ê E$\âá—ÉÜÂñ ¸–ùŒì×Cg@í¿Ù"²¼„l“‡™_@{dpø€(Üîù®îº,¢Y¿‹¤O·DydÝã@º¹ Uà“†‡TIá`²–¥?d‡Ï©±é aÔfȇWÿ8~ñàߟž?û~ÌÒ”ÏΦM£Êo7ʃÜ!qå/z(&nòÙÔŒáÔ"¾BV†¤“»ŸÙœõ›×9~ðûÔZ1.Â0ïßèù=9ƒ/âÉaÉ• MM•†V“ôºÞ”jº¡Ku½Zk•[-So›Ã’I<×Áhw!•òòàùG/þ¼ì:a“J~BóÍõˆGÄ÷½&Òwêõª¡5LèªÞ–ôfµ"ÕÚFYj—KºÞ¨›µF©uLŽTݲcÌŽ1>qŠãX‚Á›®)&ü«6beº±·-®-˜@ÏB¨Ú¶†¢Í>cLpú…Ö`Kœ÷@Pè „ÚÎÏÖþÿÿPK!Õy„‡‘¹$!ppt/slideLayouts/slideLayout5.xmlìZKoÛF¾è Ø3#ñ!’,–l-\Û¨°¦(‹ _%)Ùn 5‹ =åÒ môÔ}$顇6m€þ—°yüŒÎ¹m™6-;N 0dŠ~;3»»puϱ¹±„–ç¶yñJçL×ðú–»Óæ¯mõ爸}b{®Ùæ÷Í¿ºøþ{ ~+´û«dßE`¸a‹´ùaù­Z-4†¦CÂ+žoºðÛÀ Á×`§ÖÈ.`;vMª×ÕšC,—Ïžª<ï –a.{ÆÈ1Ý( L›D 8´ü¡ùUÐüÀ Ÿ>¬R´ïƒµÑ®·µ·µë­o_ç9Æp[äÁ~cÓîs.qàÆËß¾~ý,Æ»¡¿˜&ýÝø›þF€Âk〳úôáì!¾–ý‰áWÄà¢väñ†DZ{ƒÀY\ -ð·×æ!`ûô"-s/âŒô¦1¹k ב5†+ÇHר A>(ÄÚO-š6Gbæ$ÿ$?$ñÝ$þ•sãÒ' ¬zÆs=0—z!µÒX3Lj:År™÷­È63¹ôGt “Áµè³h¯ãõ÷©ýÛðo’–F›Ñ¾ ‘€ë±-bH«o>M=\¸ FÅÁVÒUàbvcäXŽwÝÂÈÙ„æÅu"|¼ÁsÄŽVñ»é ×6!Oœ¨k›ò( G´˜Äß'O©kâg\ÑCÉO^>üóų{¯züâÉw  BaÏÆ5Ýþ hZ}xêOÒÝÁIÌ#p™F®<~r¿øNrð˜jßå’øA?¥ñûCiߦ6HX&’f*›%ñ¥A82ᕆÔ³^lÈ Q”©J“¹¯Ô•º¨mÑ P妦¢Îà›}’Î4æ&6q8âCxh;…,NŠlq V1ë,·ÔA/éèÛ£5àGT$b\øy›—ªé63³0åðR‚I™2«*¡Ö§Q)ÕÔ”'¨MQA ª Šú4*…ÊP• ª(k¢J…+Á¢äaP¬ ¶Q€Õ%u˜–be°êV’tPáÚR¬ V+ÀjŠŒópVm)V«O`)fõã[Š•Á6 °jC;WÈ(L1'(é 0ërÒÃÑ/˜8 ÔtqÎÂJο'1üÝÁÏ/Ë)ù^ÿìÍuÆwå…!±¦äD_÷è`zQ|ÁÑP– $jŠ®5N @¹Ù!­¨DD+†xêÕ9áµ² —Œ†ŠH“/—e ËÈ¥ ‹”Ë2eŒQ”¥ó9—e Ëh T– €,ËíRY&²,aKe™È²,,•e ›¦+MпH¯¹mÿ£ÜÃ>'àëý¢Š©WqG ¥“È&«Êò*-«ÚŽ©Ò¤³Àž¥øƒ ê[¨)’ÔQG! ¼óŽ’ß¾£²jï¤ÀWЛQÊÛw”’¾ÞqG5.ÓQÐ$\^cÕÈ ‹BõR^U`¸f¯*ÒŽ9m¬h…ñÙˆ‘dE& 6Ï•‹ UiÔ%°š¨²6KÔ ô˜·Ym~ÞfA«;o³Ú¼FQ¸Ùé4U©«WˆJOP–›š°ÔSB¯!+J·£/uå•[`²/*-#0ñäËGýìÜœ:5ãXFà…Þ ºbxN-=~Só½]3ð= OàˆõìϘÀ. $É2Œ¥7Yª€–x&i &Г3HÄvð ñ×ǸF †`Áª‹·|8"3€ŠND¨íìÈÑâÿÿPK!oK„J¨Ñ !ppt/slideLayouts/slideLayout6.xml¼VÍŽÛ6¾è;êY+K–ÿ„µ˶ŠÛÝE7yV¢×J(’%iÇn`³º´§œ‚ ÐS ô7×6m€¼KÔMûRÒfÛì)êæ"‰£™áÌ|ó‘³{mk……ÌÚÞN˶0MXšÑã¡}óFìômK*DSDÅC{ƒ¥}môî;»<”$ÝC¶Tø 2DC{¡]W& œ#¹Ã8¦ðoÎDŽ,ű› t|çÄõ[­®›£ŒÚµ½x{6Ÿg ž²d™cª*'¤ ~¹È¸l¼ñ7ñÆ–àÆXÿ=$µá­ÊÁ”ll˨Š={Ù'G$µ(ÊAPž>/O¿.‹ÏÊâ‡òþ“òþs£!ù ±Ö¥«÷?â‡Â•¥ÚQíÀv뵚YRPƒ÷æÇ'®ç"í¢jb­‡6@·ÑO0B!^++©„É+i²8¸D7YÌ.Ñv› ‚óMu^eôz:~“ÎÅzXÞyr•{,¹--Ê ]]…*ËdÕøÔ©ë]øÂº€C­Wý4eiô%”ÖÔL­#–ntþÃÛQH¤:R‚M] z‚sx ·—y–³[™Á‚ Ýó·sýжQ{f©só8« Á8RXÊâ«òô©N¶xf]Ìzàåã_Ξ=øó۟Ξ|¹ •Td½/¦é!裳½® !vH»É>+,®F¤Ý òòÑ7¿ÿöEY<*‹!ZvñXwkq¢ƒ÷·Qª JŸë™ÛÐßÐ|ž©•JúŸ0›Ý4iîN¦q<˜µ&N4í´œ`ÜŽœ¨Ý¯ßL½~àÇÿž]7NŠVYŽãìx)ðÁRÙ[Þoy=×wýÞ+l!>íú-¢4è–Åò(šF¼ äö6@ž+Q¡üÉ …aQÝx ä|‹ìœWPâ;sˆþÇÃïÏü|%W‚m”®Îýe~i% ·L™n¯¤{Î¤ë ¦Î8èN ƳÎlÖâþ9e$ÉRL!º­0åÅÉÓ÷^œüúóĆÍu wßž„—›[p)àx¿Eƒ®?éÃYá±L=gw;NÜiÁ$ê'íÙ=H™{A˜l&‚Òz2ákÓDž%‚I6W; ËÝj,q9»ƒg™™L¼V=Þ¬³°çy¾×éµzº ^ˆ²y›hA¤g vBćˆ¬àÌD! R@´‰q*ë *:÷fýÿÿPK!Û›X¼dp"ppt/slideLayouts/slideLayout10.xmlÔX]oÜD}Gâ?X®Ä›ãµ=ëÝ5Ù­öË’ˆ¤¼ölÖ­¿Ïnw©*%±„àõ©*P‰'@Ò¾ÒB%þKM~wÆö6iw£¤Ý¤ðb¯Ç3×÷žsï̹»zuøÒ˜Ðċ¦¬­Td‰„NäzáNS¾¶m+uYJ]ìG!iÊS’ÈW[ï¾³[‰ï®ái4bØ 7å!c±¥ª‰3$NV¢˜„ðnÑ3x¤;ªKñM°øª^©˜j€½P.ÖÓ³¬Ï!½È$d¹J|ÌÀÿdèÅIi->‹µ˜’̈Õ']bÓ¢`ØöD–Ä<:†MnAèΖïJ!` Ûÿ+Ûÿ1K¿ÊÒß²½ï]™´ß?züóóû³½o²ôËlÿa¶ÿÞ‹…I¼M á&Âñ4ÞŠ7©°·>Þ¤’çrû…]Y-^ÓÄcÓà‡úÒòÒ¶&´V±8I“¦ tNùa‹L˜ääƒÎ‹Qg¸1g®3ìÏ™­–f…Lˆóˆ^ G/Ã9“¤Í‚ËW`°°97)Œ \ŽB¥³>.mòÐùWâ¡”sÃ<æ“b^þRÀRÎOZ›t"wÊãÿ îb[~¶ØÔ'ð[`.ÀÂQàÑuOpác^×±òѦ,aŸ­‰g*×¶ .Öõ †º)f­,ý³ 9‘>•ŽÇœí=‚”8|zçŸ}¿ H2 ²ø. ÝMLñ'çùOêt—£/ƒ3΀,EÔƒ¢Ë«K†¬‡”, ?‘|Û+ wîÝ…¡äóº¡[sͧóÁ×E°4©!à–Þø®ÞqYÜx3ô‘>>aºËÞ@º}ëÓ–çØ;חoÓ³è˜ÉKaÞ?¯¯Ï¿xùøßž]<ÿE L¶¯Ø$&(ùÊÛ£7I dëq«1C¢G—GO™G¤O:yšN~QQæÉ9$W:9EÉU46d •µ‚ ÒZ„üƒäà¡·^HéŠj•Ìý<Ý ¥) †Ýª!µ4Ž|ï²ôvø 1(Ž{@ÊlÕ\t•!¼òó8Ĺð¨bHfPZÅÀYÂ*xêLƒ$Ç+ÏðªŠÆÒh%<œ™Åà!HާÍ𔲡`²®& ¦ÓQrÀÊ  <° ¢ä€ú x\KBDÉ9@Ccž[CeDÉÍ ¢­î”‚ %¬ÎêcM§ Êbv»A‚Ñ8Á¤“/Ò³gŒZ K©¦Œ1u]ªÁZ| ¤ß'^/gFb¬~1«`}?dâÕ†—Ÿ……¬R†2•Õ©Y•/ÐŽY‚²–m‘ÞRÈ,ª^Wb¥ÕXýòZ“u”‹!HŽ·&ë(…ßëT7L:¼ pNo”SÀÛãð6@8¼å|$@Ù™vF,¬6Ü]ÍÑËÛº«uš¤ ç°×O¿ÇóƲ>IÛyÙôu)Y¥EÊZÈ]Œ2yÓÇ[åðÖØöà…§ û­v·[í”ZR³])IZ£Ü”še³+)f¹ÚVLMíVÕb~° u¨ë;]÷d;û*"1\ÛƒjI1dUVY ò!ô V({7þãøùƒzrüô;ªaаï:½…cL5_øó4˜¸ ºC„¸×à6K›Ó“ÂÀkl¢’„tø(ÞÔ€ác¨¼tx@Í—K,׆ê๷xN)š¡Ñ<¡¥6+©4„ ²¤ÒLMAa™O²re¾ÈÒ›»‡'+Üñ È3i*šÔ™È1ÜÊyŒ'›9ŽåÀ*3°ê8–«ÎÀÒ$éÀ€Õæa9°ú<,Ö˜‡åÀšó°ØÒO /уÆÈƒt2ð3›Ì}õêá/Ç~?µVô7áF˜…ntý™žœ³ÝŸÇ“£’Ñ ЦФº.W%µ¦6¤ªª«RM-U›Z³iª-sT2‰çÂÚÑt^ºR^<»öâàÏ·]'¬òa)L.×BEl†Ùaâu§V+érÝ„^Ô–¤6J†TméšÔÒU­×Ìj]iÞ“#¤–­Øa#Þì|Ô /_û®‡IØ&+Vè²9s! o;qºlÔŒŠù¼º‡áˆ¯£’\4d™Ç”dlŽ+ Ð91+Q/þG›=FŠ`0uVg¯"…CPè „šÎGëkÿÿÿPK!ŠK¶e/!ppt/slideLayouts/slideLayout7.xml¼UÍnÓ@¾#ñ–9»ŽçÏj‚â$F ÒV´}€­½iLíÝew“&T•ªúĉSU*®pà爄øãuÜ ÚJ —Ä;»3;ß÷íÌ,Þœ&±6Á\D”´uk¡¢k˜4ŒÈV[ßX÷¦® ‰HˆbJp[Ÿa¡ßì\¿¶È\‡KhFÇRƒD¸¨­¤d®iŠ`„$(Ãö†”'HÂ’o™!G;;‰M»R©› Šˆ>÷ç¿ãO‡Ã(À}ŒLd„ãIÈ_Œ"&Êhìw¢1Ž„QÞ?§$g Ðnƈlëš:Æ'`°ô ÖâP#(ÃÉñ§“7ÇÊ*Ø:Ç8ß'“[œ­±U®/OV¹…¹óÜI7çócjIà|˜¿¸o•‘;ò¤³ˆ\à@›¶uj–ÿ‚rñTjAa άÁh傳ÁhpÁi³¼28½Tf¢ópìÎ÷£ç_?<ÉÒ£,}¥³ƒwYú4K_eé>,5ënAÌ%l P ç¥À,OÊ[r2ò{ÙH+”%¼Ã‡ )Ї:° P-]ñ•V¥¿ò«rêÑp–3´ ÿʈÜXÈ59‹±bð!w‚æíöú¾ßTz†×¯U §[õ ¯Úô «Ymõ­¦cû-{O/“BË(Á~´5æxe,á —ƒÜÛã$JèýH‰Šé>2î¬êŠå’Zcbl¬¨DöbŒ øæJÊŽ]±¦mÚEÐD2•ŸR™„«ˆ£{rCÎ-r%T²Ÿ…®—«[-ÕÍÒÃ,M³ƒÏ¹–—Šl_…ÈCÉ •Œ—…Ðå)^Å_ ÿƒÎ)ƒyA¼Èže飓×ß¿½”ÆêUÐyyœ\Ȥ’Ixu%So4 hº–Ñ«Û]Ãñœ¾ÑuêŽá9­î 64¿yZ2"ŽBL »+©”/ûïn|Ùÿ¯ëD•KÙÜ¡Ó. ¨J¦zî˜Cïz^«n÷šÐ+,Ç7œ~«atýzÍðkUÇéyÍn¯:ØÈÌrÜ€c5on‡ó¹Æs³*‰NÊ…€&f1ôLFw0g4RsϪ̇çÅÐeìj«åXvM= ȲT_f ¦|jåi1¿‹ØÊz&raLC¡õ”‰Á`žwâ³#9örÐw~ÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout3.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!À¥ºppt/theme/theme1.xmlìYOoE¿#ñF{oc'vGuªØ±hÓF±[Ôãxw¼;ÍìÎjfœÔ·ª‘¸TBBÄÄ*µ—ðeA‘úx3³»Þ‰×JZ"@P+н3¿yÿß›7³W¯=ˆ: BRž´½ú嚇Hâó€&aÛ»3ì_ZóT8 0ã i{S"½kï¾s¯«ˆÄÁúD®ã¶)•®/-I†±¼ÌS’Àܘ‹+xáR ð!ÐÙÒr­¶ºcšx(Á1½=SŸ Ÿ>†¿“£OŽŽO޾ô6r6=¼%õ€ÏÄ@3!ÎZƒ öë!§²Ë:À¬íÇ€Éå!†¥‚‰¶W3oiãê^Ï1µ`mi]ß|²uÙ‚`Ùðá¨`Zï7ZW¶ úÀÔ<®×ëu{õ‚ž`ßM­,ešþZ½“Ó,ìÏyÚÝZ³Öpñ%ú+s2·:N³•Éb‰ý٘ïÕV›ËÞ€,¾9‡ot6»ÝUo@¿:‡ï_i­6\¼EŒ&ûshíÐ~?£^@ÆœmWÂ×¾VËà3DC]šÅ˜'jQ¬Åø>}h Ê&HMS2Æ>Äs3:T3Àë—fì/ç†4/$}ASÕöÞO1äÆŒÞ«ão_?C¯ŽŸž˜‘@ÛÝú(w‹ñÂEºHF8 ™´Þó>ª'å±2§ˆÖÃ>Džaµ·–&û¸ÇIevìrïý/å<óP;Ž,)''KÐaÛk5—›òqÚöÆpf†Ÿq ^—º¡Ä,„+(_ ög&³Éò™7[¹bnÔáÄÚ}Na§¤Bª-,#f* –hNVþå&˜õ¢¨¨Fç“be ‚á“ì躖ŒÇÄWeg—F´íìcVJùD1ˆ‚C4b±‡Áý:TAŸ€J¸ú0A?À=¶¶™r‹s–tåÛ1ƒ³ã˜¥ÎÊ­NÑ<“-ܤBóTt«”Ý(÷úª˜”¿ UÊaü?SEï'p ±høpa,0Ò™Òö¸P‡*”FÔï hLí€h»^˜† ‚kkó-Èþ¶9gi˜´†Ó¤Ú£!ö# Bv¡,™è;ƒX=Û»,I–2UW¦Vì9 l¨kàªÞÛ=A¨›j’•ƒ;îs–A£P79å|s*Y±÷Úø»;›Ì ”[‡MC“Û¿±hf»ª]o–ç{oY=1k³yV³ÒVÐÊÒþ ExÍ­ÖV¬9—›¹pàÅya°hˆR¸LBúìTøŒ˜0ÖêïAmEð"Cƒ°¨¾d¤ ¤Aãdm0iRÖ´Y뤭–oÖÜé|O[Kv¿¦±‹æÌeçäâE;³°ck;¶ÐÔàÙÓ) Cãü cc^ž•ßjñÑ}pô¼?˜0%M0Á;+¡‡˜<€ä·ÍÒ?ÿÿPK!ân"øEÈŽppt/media/image1.emfÜœXUUÖÿ×ý£CEuMJ&¯uML,22*4TÔ«¢^ óZX˜¨h¨˜hT·¢Â"#£D£B£¢bŠ)š¨Ð¨(¯Ê$5”7£ÂÄ¢bfh¢†Þ¨aêý,ö=å3O¯Ó<Ïûû=¿Ÿ×gû={Ÿ½×^kíµ×^ûœ}°‰H;Éú%÷)sX9‘Ô"{rD<“fL±IÑ¿ÐÈN•1¤l«ª“ Ú%s¯Ó*³ðc§äÿÎ!“åjYÄ¿™$©â‘)äHŽdÉRY(«e¤x)õRºTÖRïjÚ¯seup½Œÿ×’FP{­,'ÉrY,s{Ûäö¶ùWª†‰~@$)‚t4ÉE²~Gqá"Y²k=Í[¿c¸p‘¬û'q­u¢ÃI¯õן¤×ª›!$éÍFŸª©÷wèu %šDòÇõ¿\+ IÊï@’‹dýæqge@‹/¹AS|Šˆ&3FJ#‰¤8‘¤<ö!ý;O×µƒIÚÞâûÍÆÆCäê½ÎçvþáøÆ´zùz‡òýãÁ^îEñÀÛJE~B½ÖÎ~IåÍâépºü5u†A+š¤4õ·hÑ¢^Êÿª3-ÿ×1ÉÙ%â^fëÕgºa»·%[ 9W­Hó›D=Ã|Yd“´‡Eü6I¸[¤|žMJ׋$̵Ib@Äë³I\.c:Ù&yKEj.4Ø>Ê` ØI¹7Ñ&e"Åؤä*‘ØómRv5íFÚÄ©íÁÊ5Û׉޶Iìí"Î$›ŠÔ±IL±H囤Þc°!Œþ"mR_‚/ðÚ$¢§À磠ò÷”HÆLèm)½z{ñið÷™H ù¿ˆ´ÍÂW|#Ò˜Jùø„‹lRÐÏÖÛ®g¨MZ¦A÷\[/Ýxw"o¼é¯pºëà­>Z¹×¤×)6‰ãá!z<ṵ̈Irõ¾æ¨¯´‹iSÏI\wQ' >:“éoIu7º#mÒƒÞý# w)ù³ ÂXv&m)¯n“îËlR3Œúéèë4h]n“¢A6ñ2Ž-Ñô½€Ô™Âÿ±ÈÆ8çe“”Å6ɱÛzÇ¿ýŸ"EYèà;Æe)4¿f—QÿKô¦¢¿²lh†D| e[–íû±%w¯u™ÿ,Ó\)‚4–M²~V7×ch éæu/)5m«Îáæ™5?Çë@h¹HmŒcúíÃ5¦ÿÓÏâ9’’Ìi6ÉœG3ü¶Þ6:Wr±¿ìKìJ±„q¤\¯«©çSi—Ƹ»»<­¶ÏÀ~@«ï#U·Nì»N·Í³±wêÕQ/ȵo¾M\`6]u1c“i˜-7¡ßÂ%èT. <†¼¶«¥^-tZI:FVßGªnÕ¿Ä ·ê–iúÓϲÛ3(I¥N:êUwê/Õ÷´`‡êó¹NÕOªoôAOÀ"üJòtl”1Pô,0èºÒÐòyÐ,£@õ7ÕS¿QT£¨c•Áöhë˜êw<`ö2ƒ¥Wì@º)Ð^Nß`Ü |ädl ŒÙ+{Êr˜g´ï&¯ôÜÒ¶+‚Ÿè(ß9ÐU¬£ņ,lì??ó¾þCÌÅt’Ú¡®{ŠZG1,-]©¶”{>±ÜvÕýBü†-yÎc]¨I;kD. nxL¤dëð"‰8Ý:Öìxœ̬ÝãD¢Xë “EÒne-knÁ炱`.˜{3×ãi ¶€=ù˜È‘œ0F†±î&¼ýDÖ­DB^Ö÷ëi7™8âZhO¡l-íÀt°ì]|ôµœ>.’Þ&3>³X».¦Î"bÙȳ2„Ë_os˜/W°&^"Òq9uÀ\°d.ýÍgãÓÙ§øM \†hSG™¶)£nu’2‘[Ë‹4Ñ—ÿ*‘ª™ð»À‹ÆKUô /NÚùÀZPyˇ–¢3C„í‡dB«ôC+![¤yÒÁ0º×Þ³¯CßðØz#k94ÚЗþÚÑ_ßè¿ ÞÒ׉TÐWÞíÐBnÏzt /¾;i¯EE´›Í Ȇ¾Jï¦  ÊAÇ÷"'2hl&”в%n¢°ÔòÔÍÐ%_ éÔÛŠ§RçQäžDý§0¹¾½ôÿ<Å®¶‘’ ƆQí*m;mG£'0ì~ {¹€x´lw ,F÷ˆŠOî,üë:nűþÝj0æfƒ:wÚÏÀ'°åáøÁkÑÙéøµ«£¡øÝlìu~;sƲöf¡g0´$ÜŽ|&tÕ.‹éGíU±‡ûŠjç9£ð¿ó°Õ Y³ýô3†xu.û•±Ü5>ÖûE“XsÒ™øð¢+˜·øîŽ…Ø¾[礮WÚ¿úê(ø)˜Aý«¨ïc=Áæu­Iu ¨‚o¥ÛµR¤ž~2W3îÚïì4‰ú× º{‡¯P>s,D/þD|}sí<ú¹9FâãAK×Gª-u»X+ÑsL#H²~–-HAuÔ·äô#vÍDG' 3Æ(-б\À<<‰=Æ|æÞɬ“ø’À)ÄÁÌùüS)gÎë¾*bõ±Ôéø‹xÖשøÅsÈOf,Àæ¼×Md¬@õí ì&`ÉØè,ƒ…iÕgùÆa# —1n¦¼/¼ÆY€– v®“ç8Yh£ò"ÊO?«îQ”è5›­†ÏflT±óRÃw=rÖ"G"ò7Ÿië%RÇj˜ê'~1¾Èetz¤ÚŒg-c.U‡¨ÀúY:<‹‚Æ\Æ›1ë#±]‡>|è'ãmxŠBÂ~úÑ/Øy<ý‚E`B÷ÁØq ß1ئRØktU‚žÖh}PÛ7ïCŸ.¦û{Ø׌ìôSFžÀúñ>‰¼ëCC'z?¼QÏûeýw0þ\­´?QDÂ#cÑú1¶võ¨Cy¯Þ©§öRf{˜ ë4xå~ÎéôyYQ@ýÇŽDÖwÓD#_ýhæE#´ÆW¢ÃÆ ÌWô¯óÅƈ0¦¡k»ZÌüê+ÈF²~–®íDQ§.…‹èìÃešÍªù³ý0ÌRó 60›Ã^»À¬íŒéTláô9=ÖИû:.RÝKð{Ž‘;“¼õ³ä>Š‚šmÌäÔ}i7z.xÑ`ëó{Èaj 硇—±‹Dè‚ÍçÿÜÇÂW\]?¼?­ ŽŽsüNäн^°ìŠÁ>¸ßª_m£^a<¸Y‡@žîzƒ:÷4Ÿ Æ€Vß¿†ß$ôAKŠ&Y?KC(ð}tè5l‰¦Ã=‹ýß aõƒeŽ£»ðïçk;%¿¦N,õ"I6ªï•Y¯õW[[Û‹¨ø§:r­úñ“Ü$ëgéGó–-VÕ:¥f—½w¬Š UWû¤NóN»hÝ‚ ]j_rJ°Þ.¡—â~Å.¯R¾Ý.YõNi¬µ‹û5§ø^°KäëNIxÞ.i;â|Î.¥NI{Ú.ž·’ü;»äíuІÒï-v‰:Õ)7¡“qƒ]ZOrJõuôkìÒé”Ä5Ð?Ê)þÕv©s8%7Ç.ÍB+ìÒõO‡$/‡Î·É_j—²¯R¸>:R¼Ð.mñ_a—îV‡Ä·Ÿý‰¼Ì.ÍIòÛ%bŸC\síR½×!Msà«É!qivÉü“CRgÛ¥¤Ñ!³ì·Ë!Í3h÷*÷§Ù¥¸Î!e)vÉÙæºÉè§Ö!]“Ï ¦¾à<¯]:ž7XÆòg’>=þÞ! ãì’ôˆC|£ &‚ÞL€îf‡TŽDî{èçl»m€ïðU@?Ãì’¾Ú!§Ø%°¹ w|õG~‡T‡œ—8DŽEžÙÉ9†z3‘7y¦A§¯]ò“/Ü?Ï!5ß±ï>×!¹Ýì·Áз<áÎ.öf±Ôÿšýòúù’½Ð@‡”ÿ…½ÙÊÛÙWã´ƒì mÈó!íÓ¯üÑ.±”ÿOP¿Ÿg½ßØ¥>Äëlü öˆ‹?òìÿCÊwRºÜAù;vé©g¯¸¾Ìs7:y‰ç½A»8·Qïetÿ<û÷môóÏ=ž‚~{²JÚ?É~løÏA*ÐÙ6ÉÏĶnCžKË|öœÓÁë©ÏXøÖBw º_Ã;š íÒ–Ës…Dtºš½Ýv‰; }îìtÚ{ß—¶vš÷qÝŸ³ïõóÜAeÃs‡·ÐÅl›dì¢ÏY6IÞaÞût±ñ¼9hÞi¹>‡Ïø¼øàó1öÓy>²žú`Å èŠòÜUæ¹tNŽA¨åúÌ;l\mžÁçÑïTÆçvtí¥ß"è§ìndý{¸7º î½‹î„ÿ³xWUÈ{É8ä/@ÖáÈq3|ƒ§)Šç Ðn|^o°éZƒ^0ó4›x®£ÁìùÁ|Qî>•ý6üD ‚:­ø- >c t[zï!ÀÐI½bî—A¿†úQôßJm·òŒétÚ!Güoæþý÷!÷¹èèAƒyÌ­d~4ž£m"2¿€MŒ¢Ý6t8=.0b;ïn)OÉ`u)¯Ü‹îÇQ׎=úà'»º>‡à£/AW̽ҹä™ó- ±Ë%Ø+cáÃgµ"CÜmØ :K]'F†ö»˜c· óƒ©w¬ù_ö8rtBïYôõ½]ÚáÕÓÇ!õÊS{˜s¼Cº_eLOvHÂNÆ*?ò ãó¶C²¢YW>Ã_ f~ã³Ï2þ7Im:ÿ™Â|÷³ÎLƒç•N©š…ÿ)pJá\»TVâ0WXŸ%â5î/#ÏúÓ™Íc-i¼Æ.ûœRvô?€ÞMv)ßoüy'XW„ùÈ)Ý›¡û>ùÇàçmÚ=m—ø&§ÔWÛ%÷MøªAæ=¬ /°~ìfbîêz×õ:þúøÙ‰®·³4àß·±N‚ÖZûkbk}N|š¿ ¯{Ö¢ZëÞ‰\×TÓ?u"ŸsJy%zªaÝkèß ÿ©¬Ï ‡zäU/º–†à·»?tŠlÛÅ:õ|²NG?€ÿFÞÆMȇüå%¬«ï°Ž‚ÅèQ±¡Ù`zŠÙ}°ý)¦a;®÷X_óÍ:_8ü:_JŒ‰bÎFÞt Â?KÞòZ§êVÖÃ?Á×:|*ú÷².£÷¸;‘›õ>6<…¼wp¿¾ng¼uJ>ؽÕ)éÔ/¿õ™ûUÅØØYH¼B¹w©¸É)ÅÈ@`ö*ê`wNÉC®ìùÄ=ðš{)zB¾´ô¾Ö.)ɬ站C£°»Uè+z+íâ‰f½Ç>ãtOƒ¯o’ŸJý³Žßbßî¨xìë!êÆxmb]scG7:$±ö»‚||^ôó:—ôüÄÙéigžžå0¦ƒÍ¬¾?³ų~SÏ3™uô[ækëÝÌÿóôD»$—8¤49î'>8ƒùÄúˆcÜË 7‚qÜÂz fmuHÆÙðQð˺ßxzÂ!EçÁ×nÖýä{øåbèþÝ!™Wßüyóèæú)‚m£·àd§$ÝMŒ7|1t|Œ×=ô7‹8s#÷çPŽ/`>>ˆïZ‹}U`+Œ‹çqÆãVì°’¸é.3W"7¡ÿ'±û2§”>E<‚h\gÍ¥ÿdþ•w™µ­vWe#hÙã j_ãÿg ç¿¡Kü¬·ƒ5¬Fçú.6ð>{:kЧ¬ÍÓÐ?¾®y*u?þ(×¼Öóµ±nÑÎÿ¹¡„N ôÛ¿À÷²¾:ñë ‡1ÔwÜÝŒ-hñúkäK‚oÝ#üŸÜCýš½Í²WéétH+ö¢cáþ…=@=åE_`»ø‘¸?;ÄǘΜbNú?uHõ|ÞAlÛË:@̺ö3®Æ¶> &\…xŸ5‡5à]b¾ll¸ôcK¥‰Á¦«îamò’'vÍO‡\‡íŸ­Î%f= ßrs-[ÌvØ%ÿ{üö߉¿Âr¿ÀßbM%Îeœ£þŒ_ÀÒ?gž3þŸbÏ­œø^?bN ý~òû‰ÙZˆáöâ×ö±–¿ÁËfï7€ùÆE/ÐVþ–õ ̺±ÝÄ+°ƒAÐÛNa¶3¯c3?W¢gPÏîèzÝVa}[ÖÓÞy4Ü ûRø¼ ;ÀU— ÷ú«¤üEÁרïLG/ï]óY«á±q~ZÝ×à_†Üåös‰-e¯>?ô,kô4bƒçY“g@‹ØÁŸŠox ?F¾BÌp1~l‡Cª.Þ.‡$Ï#¶Ø„—ööæ`îé”7h–ƒm` §DïÁ¦£k0u>ýƒQ`[#1åoá›/‡Þ›ÄÄ`ÙèRÞr%õÞ&ÆX„ï ‘_ÂX¾Çž{íšá L!毸úÝÈw#:›oBŽoé/ÿ –ƒ™`˜Æì.ø£¾µ6©k_î—,nט3q™\Z?+ÑgƉÔ)[˼{Ö`›"õ`ãW"°æb˜0j=Í×@3ŠzÑy¦öq¤ê°r Ïœ™Û??ý,ò˜½÷¼W$~"pñþ% T?S¾™÷`ô]è¿UxïðÑ7òÜÿ™µ š³ðWòÌ{ëÁ帎)øîK‰'âŸçñѳ•ö`éïyyø÷@m/ÁÇQô½Ç`¶ƒÿÂ?k\“wî7u¢ Úäu"+ý–ôE—¨ÃoòY¬!ÈÜ}.ëÎXèfÝ™>7úÇ0è¾k²‹æ™¬È®çjõ{2c©çš.#6º•1`àRÆèfô2z71s©-º™Ê<»‚òI¬~da¬“Щž±^D?I¬‹>ôw>÷é·Ôƒ­Œ@7±Û™¼»Èš‚áùØ žƒG±ÅFú 1æU‘¬…lìhÖ–S™×¿!¦9>ûÀ¿›&N┌ ùËûÓé;;ùšñ‹b >1ã’þ}‡žÞ3X2Xô<Ë{›½´a¼ZÁÆcÈ¿e0¸›y@yñvlGñyx¢¾ûhAß»:ô[D¹›þ °Ë!غR›,½Ùâ«<êžM»5\DßW3F Ì%Pß_¹ÀPÏByFs?‹ü8è,ä½¥ùÒ±+õæ!c*icJêšEÿ”¥R–|ï#g¢»ù葺i´Ší,B†ø] ÝIè‡>"¡™¼š˜×9ð<žùŠQ« 6€únÔ“ŒÂ& ÐO"4×1/˜+åEÐM |#²ÇCg õ±ûìesïIòg ßhŸß0÷ÉïÂFRis6þe6ìÃV¡­ñY4»3ˆÝ ¥qQÁVƒQåkÃÖˆK’À,â² úX‰ï£åãÆ¼‘ü$7ÉúYsJóI$­3–M²~V!xH‡¾;;ô:†{š÷ͪƒÇ@rë÷óµ"TüoyH©]!®clüþY<&ï­Ç޲ÙcÔp½Œùú㽘XïaìŸÝSÂØ]ÎÞ…q‹™¾'<ßïe|™ç!îk,.ø˜M?_#:i aÓØ„¥£#ÕZ÷ãÛÎ16à ¿‚e}¹~Èü£ŽkcÁ<«jBÇqøô·Ìº‰n[cÑiº>Ý&Ú&Ždý,ZjŸ ;ð ¬Ñ;ñ7§Þö vakéï]üLcÛÂ|Æ7—`¾ýhüq×?ðYŸ2fßÃÃçø$ƳçÏŒñ·\ÿ…²ÿâœÇ_±‘.—Ä\w@+—{yý‰Ú±cdK$ß8{ìÀYïÜ_`o ¬ ®7ŠÙ‰Gcäõ½AÊßð‰Iì½:‘möÒ:Ä¡“XC ŸíE²Gq£?ú­òÐ÷ø´XÖòïÒû\&‹qÒu! TzN0tcc‘Ôkøß³ :=Ø)X‰`6ÚÃýVaìÉ·Ù{ÐcÇ?!{;¨ïSˆCô¹Qùô~çQÐ½Ç #åqÇã÷r)Çž¼ ¾³tw¤Ú†ÆÎNÆAm#è°,ãg¿´†"÷UÌåOñßKðC­ø¨LâƲn÷Þ`ÌÀú=ÄSK‰ê˜ÿËh¿Ý`4Ìb‚•‹™;/‘À¼mÔ…^É Ì7è·>…­¬dÍy”WƒmÅoå2ׄnõ7c£×á;îÅǰ—»±©›¨Sˆ­ÝÂu¶´û¸\O½kIw‘ {íÒx¥/ý>"ì+–Ѷû‚—è2Ú/f K'=}_‚ VC#ÕÄ‘£é@ÏÍ&¼FÛÒNê‚黬i`-ŸŸ¬}3ÐX< 9ß@>?²p_¿iI ~þdŽ~+^EW`ù+ÐÈ4¨çÃ5¯çú#_DÏÙЂŸ’æÚ“ð†|•›õ5Ó|÷úÞ‚o.¦þÃôC>ó1hq_ÏùÉWWÐ'úL5TÝkÜÐÜ.(WÔ“>D`-´ê!äÚ ÆA+ô¢+E=[§1KïÙ¾rú­¢ýVä{Úð£ºT¾ôl ¢ûYXÿÚÑWl 1'c®g ƒ dòz–LËÛÁxÓ3dú=‹žÉM…çZt—RB›×ÑÍF³þµÞ˸÷»Î5Þí¡¬~ÿDÿØé^ìîV®C¬o7£¯}ÐÉGOï!çMãÃX|#vó>õÁÐè,j¡ÿÐùGø¤|àÞõÈÛÊ5(M^Q¿åV¬§¼lºŽ±øÛ¿=}=° Í`(Œ‰ŸB?vŸÁÛ5ðêwFÏÑQ.íAk¡ZÉOr“¬_r_lÐarI€ÖKŠ6E½ÿ[u†ó™½Žáž¦ÿ›ñs4k@Ñ›‡?ã»ß߈œ¬_E;oòêóÏDlÂ˺RC¾N±Á¬'º‡o"Ÿm}жêºó–Y·:É[}AfÕ›Ÿt$é6ĺ^Pgt[‚lÖϲ‡ã)¨¢ŽúõP¿KÔ˜@Ïrj|ßÊ<Õw!eOcßKØGê”Q'êó/õõú=i ™¹ ƿϼ¿Ä`у鳉>a\¦³Nu°¾LÆ|ßžH[0Œû/ö*“X»Íý°…z5ßÓ)ØÔÃúåe&î,OâNÅÈHƒõ`yױħÉð †ÆÁ'˜FÇ|ÓŽÇ~@éÏû8y ˜Ì€ê7aåI¬ì‹ô{M_«Ì‹ft!2€®0ê¹ûD7¶skï bùóÑXv‚úÌH±‡ûŠ¥ þ }¦õõ{ý@üÀ¹ðG¾s$ú8Õ œf0z{îwǘöº'Ó~õ»Æ |¸N'mÜWy‹©ßŒüqa¬ŽæýÐMž€\ô×¾›‘«}{ú±GHa,Ñ_ÎTÖI>܇¯ïa-ŸÉø1>ú=­çoØÿlƬç —OÑÙ%øüñíŒ{ÝGÔ÷£ŸÖ¸yè»h¸¹@ýͲ­#uWÏéfîdb^ÖÏš;GQPŒå{¸èÏþpþ[ê&ïÆ2‡[1f%g ;Æ,áLt 怌]ÜpæF¸#U‡Ýo0¿ÑCT$Y?K‡'RPG}îߺ=¬«{ð nì¶‘²~èÔï™ô{®–ã™Ï;±Çã vG²ÞR®Ïµµ]6¨qO÷±"õõ;,7í š˜+'PÌíOÊc£ 04€µú%ñ—´9y ½ÖæiX†#uŒªþ‰üèEÇè²sßwø¾®ß®©//c š)wúL¡õ ¾kª“Ü|¾´úøE‡˜›xH‡Æò‡^3ìÿ+q½EMŒƒdø÷óµ’Xæ*6Ò0RtøM›6)ÈP’UçB®#H~’›dý¬9¥y̸÷Y{`çìÍy«$§U[ïËø9˜Eßáì=ÏÍYÉÆÝœYº†³ŽAÎNä’ó+9ùg+Vpv²žsW  ù2ïó'p¦òiÎdœCù#œSˆ#ÿg@s&³:œ•Ì,áÌÄÑ|[QÄù†>ÐYÏÙÄ¿s>áîÿ•ó[yœÇjqHÉ sÖP²¡ÿ.g9O ç²3íÒÔÈù­+9Sv,à\Ö¾‘5ÊàìXjyÛ會LâÜCÅåôϹˆšùõ¼ƒæ3†\` ˜Ò`Î?äïæÌ#ù.ÎWèý 0sY ž§èÜÉ7œ»(cjÓw˜o7Ò^ƒÎgè7­iœ5©CÁ³©ÿô/6ç:B³8 ù<çÙfræ’smÓù–¢ ¹R8×ñ(ýN¤þÎGŒ£ý:êÅ£‡87‡žòâÎyŠ5œ±†³ÑË`Ε,4gKs}I>޳1S¨ÇY›èó8"èç eM<Ó»Þ*͹•äÍÄœåðlàyz2gån`½¢¯Žµ¬_±œ YMŒ1”3`ö»$ƒÕtÆ€šÏõù¡–§äðÜòTÎç®$öÄ™½UÄ×nιqß5saÔçžš×癵ќYÎþtý‘×s=z?#ŒmQœC%_ÜŸ3%ÜOµ]¨í²¹¯ïNJ@Ý+yAÝ+i=}Ï_~|‘ouOÕ껕—é¯]é{(ՙ‘ŒõJÑO±/:§}Àiètþ@\ݺðü˜=^Õ·æ|›ž‡ªf¯§ç£bÀ¦¯h»˜=]§ÁÌ/ æ|A¹´Ñ®ã!|6XöÏó–Â/®¿žþof}M·õ~§7{_ÌZ<ùæò,ôo†@o,k‚Ëì-"zˆcâØË´²VŸI;ˆ5ÁÌgˆÃ(ï¨ Il´‘8(ø¿ˆXéúYGŒ6±0¤ñäÃS2|¯6¨ïÕ5¯ïÙcÇÓ6‡¾'Hï{ø¶‰ì –Cg2i)Ï¢§ÿ.¢ÿ©ðµ™|Äó‰."Þ»”Xëb³¿Ô=G>׺×ÐóIÜsÏ N£®~§­Ï†‹¡ÕD_ºÏÒ¿ß ïè#ó¹>²;X«A4~ëý;@ÑÈr?kÛ‰Ð{Œ=O?tñ}Ë>k;üC ø2²MŒ÷Gt7Kï>ÍÚ>ÈWÈøD"/X@»°¬?À=bžØÏL<šñüœD{ÁÎpmc®Ñ¿Ëmþ–zNìòdúìà D¿a¸ã±£AfŸVt òôÃÖ3×¢°«!Ľ°7äÓó%iÃÑ›<ƒú|s:JÄyám,cîCÖñÌ/ô'“èw{¼)ðãg¦ã®€ï¹ØûZúÁ–|7#ãBæýFdÄvÝUèIm¶9V`[;¡›Ã<ú–Øz%t¿£?0úÄaì¤~5åÙÆÆ›Á¨ïMy ºÑwb¥È¬kCÄ1ø¨U\»ÐsCÿ.¤ÿZÊ­ý:æñ)ø¼óõ/•µúzÒ`d·ÎPæízæÈ™6ñà¢Î¡ Ô¿×¢çtõª¶»Ð÷xx/d.£‹èäN=¼˼†6è)á&èL3~¬À‰.ÿä¾&^º‹|Òèz•Ã#uõ[9ý[l±µø °õdò1o_ƒ0j7s|€ú·ÜôsõLxÝIÙEæ¾~BÞ‹“ýÆ'Et “¹ŒU7t.£¿£Y×2¸ÏÚŽNõ@Õ¥~˜z56r!1ÁjúcÖoý†Pý‹~S¨ßÇé7†M7B+“˜¢>+ˆ9£ÿJÖëJx›Aýf1ð$~e›ùL¿iÔ5C¿qÌ}‰õû5ýR¿ Óo"³_Å÷Ñ™õ›Éø 剑Y¿©Ôo)õËø½ÈÞý÷¨ÿüý7wçVU•õñ\}¯ÅÔÍÈ(©H±0Ñ(­PÑn†JI%&Ö-™¢Ä"Ãò7»&ÍP’¡R¢QR’’‘‘ab¢ƒŠŠE…I†Š 5T4Có>T4Îç{÷=Ôï8¯ïó>óÌÓ}žÃ—}Î>{¯½öÚû¬³÷Zë4Ò®ãÔó1ôYعB.‚ ʇSiùtÖE6ÎÆNösÆF_ô¿/Èß=ñkd:šóí<+† ?}ËýCC¬„ÿf>[@ù”F‰‡nìOÿŽì]O~4LÄn•g¼|R›{ñÌ¿Åø¦Êg5ìWƇµè òÝM=gÁ»Yø Ó$.„.t#ùÀÊ·&av¾ô‰|gë铚—ÑG G–¢§DGB¿K¤OœÐG££¼ôMóFôÊkÐ1*¸>^m¢Ük±™®Ä×&!ØŠÜŒ-X fŒ…çUØÛŽöë—òõuìB?M2úo󯱫}ûÜI´§»Ý±|”äSì½™ûý°|Žs¦ k5Ó®©Æ'¹v:b[ˆeÝI»¿Áî8Ýø0·ßM;~Änx}k9ü¾Íòyn¹úzá£÷2ê°J„~ï³iW‡9×øPÏ™O9àËót à½áQô×+ñ1ø4ŸŸEðýr‡ß§K>Úòé’ÏvûÊŸI}%ø0Í7¾qòñn(öû|gòWZ>áI¯Sÿ›ø éz5ï/ë)¿¾µÐ½ßøÉ÷ÍYÌûÊGø§ÁƒÜ· ¿’øt½ˆls=ÿ®×q¿ÊÙ‰Scìøª½‰î&„·ƒÛ)gs°åËÞF¶á³´Þê°<ÕôÓóÞÔ¶™ôNê­]ðÌÙM=Ô»—1¸Ÿú:ί×|?í, >Ð~gÅ\ãä` ÿ—ïyY-¬Ï0óV^07~ö¼5‘t+ydwßñ'Ëo“ýφ{˜“þÌT *±ü5ò™ãe»”Ä^È]Ì7½CéÌÑ¿âÿéÌ·g]Js·“ùH럷1ôc.œÆÿ3Ö5_ dÌÞjæjŧUÌàÜTæ*¦°?}9ùo¡¬8ÆìMèNnÊ`^TLbùHÕ3ó·bvÆð¿ü`Ó®5ÀÎ1Ì_×YJç’OöºŠáÙfqè™w5mÛF sSŽë tµñÌWÔïÿ ÝŠ±9Œö9’iëqø4Éð%íFÊý¾ÜÄùÏyƒ²·TÝxGw€níÿ·C_‘êI¥üå<ãéí/kEvŠm` iéÕ²1?‰âÊÈo¦½ýêNÊyÝå.úûê¿ÙØÇý™Ô ôæü£å×mY<ù­‚ÏñSŒü®îŸ-¿§sFq³•Gñ`ÛhG}$ù‘ß\˜‹¬¸@ÅåKë& ?lå'9t‘O6ÀñôUX *Ƹâ_Ë?'òÙ¦édÚ!~6ÁëЛÞÝ «{½¥—Ú‘FýäÑ»–0…¾Ð=yÔ©XÃò rƒ¥\êH¨w']·ë8ºdë•Àx;]² +ÄXCgk»ˆzœè…0†Bƒ¬È¾è g"{}dzàGÁ¹Œ©ÞÈU,ãév%ãèÛÛ‹§ §Ü¿ o“W(zP=»v÷æLÃ0“§ãrÆs”lÓ:iGá¥ô×W”=IŒIÙÕ9Ú¸n1ƸžÉaý =<-è¯åä)ëdme¯æ{Æ ãWö{-ÈkÖÈôQtTîÍ9L¾`úþ rL™aµ–¿Ý¾]ä?ÿÄú£ôeÙº÷PÆ€ó9kº7sMÝ»èâ—ÀÓzæf°ëãr0ýü m§²Ã‹ƒ·aÇ_ ìôZ9o÷å(šéäHåˆà°6•îËáâPì¹RÚ+~G„ð'ð³ózHkÞÈù”9 TÂÜfƒâ‘â¸Cç3±üv^PvÅM̉`õ!øþüldÎ-xé\À¼ù|ñÒw´Qñžä¡¸tÅ ü”½…6?Ë­!%ßJÿrȦ!Ÿsò7‘?‰P´ } Ú§˜ÚZJ¥-ŠÛš4²¡!ÿ7ÛZiÙf¶À7ɪbt—ƒV²²},~™L¤?"¨˜ßŠ9¨95}õΧޱ†oêçRø(”ͦ0ÍkäÀõ(í! 挡ðß¶0ç1“vÒ?ånhϥ߯æÜ“Ð?Þ¯0r'›Àˆáðe-e\Åù7É%y*á#iÙ›:â̸ËÁÿoÿ$Ó£hš“#•#‚ÃþÙ²­t_‡¾­’M=ø¦ûg罘3ÒM‹hâÞdy§#xw‡ÒÃeçøÇòoÓ÷VÊgÁK7zrä_ãœ/®ã:X2Ý”^—/c‹ó’1Åi–_tyõS&X‹¬è~ɾ—¢y­‰rS‘…ÑÔ;~S_¨ïÇhͱ úä¤xË]wÒgæù5ýI;“‘¯ ¨LýóËyú"ÐÅhgm¸á‹ëlÞyçY†_'¢3ýLÞ§µÊó‘Ýé†ÎaÝTþÄÿÎi_ÀCžBäGòe÷ÙÉô³žWòùT?W ööÏæÇPNÈžGsºì{dç#{ŸŠdÚßðGö@Ù×#'¡†·É´ÁÇø¢ s®Ð)û£¢‹à“ˆCž³GÒ¯ð5'Îð5‚F&]Hš 3ágÔ•È÷kAëáä“=”ö´n]–õ¥>Î{ÏEŸÁœsýÏ<¿+©Ç×Ç Þ/”Î ã :Ü`&(:ÿ4öTäçZÊwdÄ‚÷co?@í}Y>2#…ÌDè×UöhÎ7v0’/ñ[æî«‚?BùmÈ^­©‰º‘ï¨OÈ Ë!7ó¼øÞL†§ ž)Ô‘ OÁV0ûô’Ý”ÆMó§Œyî³ûìdúYó£t·°ÿŸí§ÉÎZr/;lÅ9*î©°*‘¶/@¥½>/…ÐÞ¢G‘½hZH;àŸP:ˆ0l_L{GrîIÚsí^Âó›þõ¯W\FYKy¶]ʽÏÀŸ!< è“ÁŒ·z–™´ì*ãbàÁ ò_-…ä_W2PîjžIý9ÿ"eŸßKàÑyŒõ— *þ­Ò ¥äãºçæ °iuG"_`z44r]ñk#Aíq9¹Oq²3(§:ç¼D?_Nÿ¯¡,ÚcCëxµš¾ePr&ý_˜Çuat)®®ÖqdhQŽâsËÿ!ï*Ú=~;@èÐ÷F²^¥Î¡ŒË ÐK}îrÚ ] ¹op/ôè&”N«¸·9 øçE>(~enBÆA½)–k+¨ø¼y›é_òlAö¹¯d[€Žw ]a s¸¹®ñ'ÝÚwmç~Ù#ÖSnLí´wýÈõDò‚Ê/ÝTöºO¾³ºOüý¡WCs²4–¾sÆC箣m䓯nÊÛÈçµ—4g}]ožtèÜI_‚q»«”ޤ´äTö\Ò‰¥Ó7º©o?üÍ}ï!Ð#ôPzxÜûð‰öv|hÚa1寬wRß ò1ò¢wVùÚgåZúL»Ð üžËH+yv3užÃ=Ǩ'œ>³ÎFv¸.Œ  ÊíCûÓŽ0òS_do3?ä»àë!äçLîÓ¹^×hò„ogÁP6RŠçÌu¡‡üÂ4ÊÑ;sçÜ€²HG‚Y`lƒ©Gën^®‡}@{NGþÞ§<ê-›(¯LƒÞÖzî§}¡ â¦ç¼‹œ @6÷rr¿ÓAùHÓAÙ(&ƒ²QŒ‹ú! »¡ ŒÜŹè?¿3j(²:« ö°© þ~îž”B9J¿còänE^¸GrSHý-[kêIÙFûs~;ù@­ÄDñ“vú¼Õð#’ûÀÆóL¾&ú±~'sMƒ¥`>´ÉF-™ûeWj8†z„ª_hmæ>лÉô‡âàë>½›å©ÜUÈ |³’2ÎGöž…ÿðWþSÂÒ–/ƒ&èJYÊýðO¾HЯ8ýqsí èÇ_í_ŒÌ^B?>NßÇ80¾„.â¾!Ì>Æø¥Ô»>ÅBç#ÜgPvÄJ·€òÛ—=±ÖqôõÉ¢‡yι‘I0ïh›À9s˜{ÇÀò%\ OæÑÞd€ûÃÆ1†À<°€óz§Ñrx|™ 7šr„y”#ÔýaÉÌ Ü§gUÿ¯ß':Þ¥¾Þæ¹WÀ}öÏ–—Ó9¡ñ_J¿h~í¡|î„•{ &®!ÅuÅÍo¬£ý.Ú¹R¶|gjèG¡âßW€Î>ôuk—€\- ä·i¥º9R9èöîŸM›NÄs(ÏhŽpûgçéωHŽŸÛþüÿ(®éøwú)Öž§,“-„Ê?›fiÅåÓžšâôU¾bâö•¯5qü¯Yqý´—¦8¾ìÅMbÏm{}I&®¤âú–r}{cùì©õ'ý[öƒ{;¬®löÿLœÁ;Øsû–xÂÓ-`x {|»Ùû¼ž=¾µØ ^e⦠5q .§ÞÕ!–ã2èZ…Í\,ögÏaƒ7„ü…ìéÆ3ˆ=4ˆ½ÛeØÒ]Ğݣ!VÚ¹ÁVõöR™RF°‡úWÞo.¥ž¯xoLÌ¢/ÐcØmEï-’óŠå§xŒ%Ÿ£—^ÆÞ2iÝïmc{<{Ìßò^ôùO öÇxRäTêõôe/ú£@×&C—l‹†±ç÷{²£Œ d‡›øSc/y¢‰/©=\Å›”M£âOvÌdou8}•Å´Ûa¹g±ßŸáŸâWÆÏ‡_ÓÈ—?< &Å» e¯Uñ/s}ì-.pXÖ"ö¬±Ç *^fÉãÔŸO>ö^OSq8_³ŽókL>ÅßtV<ÎÒÅôë›Äú|‚þ|‡=_Pñ;g3æ=âuR³Þìñ ;1ite’Ž›Ç*Ø:»ƒ¨¬fO6yp|L|È…Ô{ÈÔ× úé«ò  ,z†=kòU"_ 6RŽÒŠ+ê]Ž=à~öhW²÷¼úW÷Ä}~U“^Cûß"þg)ö‰c¹:^cOx=v´¯¨Œ½t”îèWáÛ236~©cß»Ÿ6—œx¿"e7ýöûk;±;)æ]nüyžw?η­fL|ˆÜPFn{ü/±r€ñ¹–}8ÎG‚v¿T¦W1΋ CƒìÙÓê^{9ŸS¹[Á•ì_mfÌ=Ç{p%|kßbÿÿYx·‰1TÈ{4Ø ÊÞ Ì~¹q9{BÈdMëUk™3—²þU›Ï<ôs¨ª­Oñþ¾‘rž!åÄ®`^ Àbê©Ôë3@]—C&˜ ¶€ l:]Ð+ûÌ9´­ Ýfè®´Õûs* *Ž¢{«IÛ¼ø¥öµb*;òN<^üóÏïØo^ïsáóP׬…0Év¬ãyS†¾‰ÐAYµœï|XA»Ž_*«"óÓ ;~.Ýã¥/çZ¢y&¤±NäBNoa=òæž›Y§ Ao‚·<³ó“YŸk…oáñ§f¿ò²z{è ¾+ߌLçPßlõ´[pžáZ?Nú‚ùŒ|ù<ûe?Òð5üŸÀÜEùúŽ·l»eœþkšÔ“ÚƒgÇ$®C—lU:û£Ë¤2®b s*e bÜBw¨ýzÏ%ôùm¬SÚüKíSëÃÇô]úÿЧ½8w^&2÷|À$9–ã#ƒu 6…ÏôA([̲#ô}@»Žÿþ»ty½è°þ>DÇ÷ß=€¿¡pÛÉñ¯Þe4]úÉν'˜æO™?öûÊéþ=ÀóH¿Oݨ{•n~Ÿk\÷#WmÔu»Oí:}ÐW÷šw’º@Q*Юs9ÿ;ÑÝ|{wèrÒQB·¢ÃÕ ƒ ã•ìÀW¯™çRÌ^tUžƒ-ûLÌÿâwÍ7ü±ñ7`'yÐ|# ­É|3ÀJ×tÒsÐ5üܮY;]çCtÕ¹¦üÌûxnG÷Î$½;¾{ÐéKVÆô`ËíÃj ºøý\¿‰çn*öIØ;&SÎuèÒC)ßÍ3"„w !ÌŸónÐgÁû¼Cœ‹N~;všÌSmçòN²‹9ätì`·3†z`Ÿºý+{Ò*æ± â­náY~œ²Þæ™ý#úñfæ² µ’¹ó{æÌ·¸ÀÊïLZö¸UoòŽÓÎõãí0zý+ŒÁ%¼_øÐEž2¨oF(­oHä/dn…'Ù^Æé•²ï'Ý Ú³¨Ë í\Oû/“O{Ê7,˜2Éç b[ºXG¸º¿`-l=Êz¨¸Aµ3ÙªgíÌ®3XÂZ†b†ƒ9ä‹®e ’Ψam"™ØN 悲Ásoc­bíÜÂ+/(;Å@­¨`Ýæ^îå»TôtÜCýYã"­oÝe‚Y`*Xõ:ã ”oJ ùdW#ÛÐä×P3Á¸ Ðv¬ç>úº¤ú§Sßs¬Iƒ²ÙÑw×óW°– 3Pe;X´Œu£;¨¯€õ50 ¬µ‡ ”‚b1ÈFR>N1KY¿» :ž œið{1ùÒèÐʶQÏY¡âñ e'§}Tù6eòshñ±V4•þ~Ì #€Õ^øœý÷³.6‘òÓY#M¤Ré—xø:–öŒ4X4 ⲹÜða0t§ü(Ö¸zÂßsX_jgNsÁc¬ÇÊZòaÚp ôî3ØÓb‘ÃþÙsY0'´Ö©¼ ¯ÒžÓÌúWZ;çOó^)§¢ž7ëÂÅ+ikoÖºá}ý”QHߺ8·„¾î YßãåCv¢ÍCèÏ©ôùhê™ÀúW õÐFÅ‘m¿†>›Î½qðh6ô¢’§¹g V:žCyFs„sØ?;ONDrü|íþçÿ3%Z:N´ŽÿÿQF4u„r íV¿ÀÿèûÇOãÿ0ý\úÿÿÿPK !ý1*¢¢ppt/media/image2.png‰PNG  IHDRÛÛ¦oèbKGDÿ‡Ì¿ pHYs  šœtIMEá ×y´53IDATxÚíÝÝy9€QÅë !¹¶¡i$jˆF’ olÀcƒ4ôÃy/öI¼Žíèðif€À¯?A“öb ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶bËVlÅVlÅVlÙŠ­ØŠ­ØŠ­Ø²[±[±[¶b+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶lÅVlÅVlÅVlÙŠ­ØŠ­ØŠ-[±[±[±[¶b+¶b+¶bËVlÅVlÅVlÅ–­ØŠ­ØŠ­Ø²[±[±[±e+¶b+¶b+¶lÅVlÅVlÅ–­%`+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶bËV÷zï'¾ÿæ·… _r]C!ÄŽo|ÛUÙÞÑm¿Ê~ÐÒÚvEöœ–î¸çÉ·iÃb GœÛ;dÍî˜sûv÷1»ƒÍí*müî³î@s›Gkt²Í¤…;Žm6-ÜQŽ·´ºƒ]ßæÑÝlßÊháöoûVþåàöm[x°…;îñîø¶›Æn϶[iáλ'ÃíÖvûØÂynÕ¥m•±5¸SÏ-Üîl¸óÎm¬²%ÃíÏ–‡ã­ÁÄ6ÂÀv !™\{²†²­?fwâ¹…Ûm² SÚ.•O“ îìçRp'«m2¸ÃÛ.{JÁµ'k@[ƒknµ“íbpŸpnÜ lÝãèxkp§±Mp‡¶µöOº'܉·p'>—JN¡'>ONÉB_mÔ5ú]_ßnÞ´å¥TÒûkþ-Y“K§Ö[Ï<´×2§˜«l?µbì›ÁÀvm ãí…;ŽmöÎ ·­£âH¾|ä°ãÜÞ•Á­ zù?MmáV7½ø¼CK[í`zö'-m î¦;Ÿ'ßÛâ-+v$=Û–[ìÉpwÓ/ÝŸü°k èÍfvý<[ns.õÔ“{ÚýÚÙ¦ø„¸§ÇË{rñIÞìÔèûŠç6Ö¸!,3Ꞻúi2çöò±ž´epÃØ³{êù‡;dÛ®=Z›6á£{ì–wÈÙ“ã/à¥/ßÑ+ìÉñƇS!þW4¸QÕžÛ;lcæþœ¶íuNß¼kÛoPbÖòeøF mcþ:¦;y#æçR±lõÓ¿ÿšÉv½”,êí¥Ž±& Ú¢Þçö¸T_Óø>» l—s+ D´=ÚÖa‰ÔJ=à1¾˜um•÷9©Ö8eï×@kA±ÂZüpµKýáOÏ6·Ùá˜Ê¾DlEúý)ŽCû½mÕ¿Ä5n,\Ùö¥¬ÿß“¿Þ£«ÌmZ_“*+RóÑ…J/Æ{Û²[‹vãìöpà±wêAúæó“kÿ\1u2¡¾éR.Ð6yžcì\õîf)èøÚãíõ{QÄJ8éë/ãxªû³ã޶¯û~ËT´-ù¹c‰sª¼è‡ðÍÜ~~§XéÖ–¾þ:ÎèZ} ‹œÓåâ½v{ÓŸ§’Î:@§°z×îÇÜ^pc›U sûtÿŒdÙy4^ë]©d]û==lé:c K_;,nç|rÙÊÎ+K¸çñ6­í lÓR>ŸÇÛ‹îψʟڴú‡Ðn9Lÿ´zW¶/ÔXçqs²ûuf{>¸ÛŸ‘À¶î¥\æ§ÏIhÛw}.•s”â–í˜ì#çö˜Ë“Êgmƒ=9ëŽÌ´ëªÇs©‹Så»cþ]h›ooLË®uÐvqž¼ÿó?Ð6°}Ì¢£mw}»ñõIÐöi{4µsÏí®ƒ‹¶™íÑÔÎ;·Ç]më=y÷Ó)µ±Ýs´Œms»Ïà¢mmK`â¹Ýëtʦ=y‡»•Ñö`{üÄMh'›Û3‡„v¾=¹î!m7¶Çʸh;šÛº¸h»Ú“kâ¢íìx[{[VGçRÙOW6¶ãœ'Ÿ .Ú»|~òu%ÏWF;†mî¿ÛD;О|ö°AÙ1m·s{õºSíL¶×¯¾ÑÎc›÷RhDz½Æ½“—ì¶Eºh± Kîû2 Æv]÷[_²CÙfÌ.ÙálCXîy²CÚ†Þn -ÙqmÏ…½î挶ÿ®‹ÀÎi{Ìu>[õÛ‹%`+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶bËVlÅVlÅVlÙŠ­ØŠ­ØŠ­Ø²[±[±[¶b+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­Ø²[±[±[±e+¶b+¶b+¶lÅVlÅVlÅVlÙŠ­ØŠ­ØŠ-[±[±[±[¶b+¶b+¶bËVlÅVlÅVlÅ–­ØŠ­ØŠ­Ø²[±[±[¶–€­ØŠ­ØŠ­Ø²[±[±[¶b+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[ Ü_ŸŽO^˜BIEND®B`‚PK !—t‰TTdocProps/thumbnail.jpegÿØÿàJFIF``ÿÛCÿÛCÿÀÀ"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?þþ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(®Æß¾ü6[vøãÿø0Þ+5œ>$ñ—¤\Þ*pígk{sÍÚÇÿ-Þ)0 r¢¼ßLý­¿f]\¨µøëðÊ"áÈþÓñ^™¢±Ê6ã¬Í`–RP9S"bH÷FÊÄèz*†¡aªÚA¨iwÖz•…Ôk-µí…Ì7–—0ÊÉÍ»É Ñ° «Æì¤ƒW(¢Š(¢¿Ÿø)÷ü×ãfûhþÌÿðHØ#Zðw„mÚÇM¿ñŠ~<øóÃçÅþý—~Ùh¾,ñ>«ãÁ7‘ âÅøC~øóÄžð®¿pÞ„èšD~"µ{oé÷½¯‰¿à´Ù:¯ÄoÁo?à¨úOí7†®,4­sÆ^8ø/âßÙ®•{›]JûöPÓþøgÀÍ¥M-š{KjÏ[þË[./g%@?vh¯â·öOÿ‚½Á[uïø.ì£ÿ§ý³¬¾ü6À7ß´ï‡þ=ß|7ð7‡àðÿío¡é_³×ʼnÿþ.i·>$ÒüE¯xF1x;Þ*Óaøg⟠A¬Üêš®‘ã=MšÊïÁš7îÇü @ý³|û)þÑ?µoìûxüDý—üOû6þÍÿ>-Íð¾?‚Ÿ³Åß„ßSàç‡|Wñ3]mKþ§Á|Pм_âßióxFßTÒ~%¤;úœ~’ö?ÜxˆõêŠþ¿àÝÛkþ Ëÿˆñ7íO?Çïø)çŇ> ý›ì~ ˆt¿ƒÿ³_ì¦ø›ÅZ—Æ þ+lyuÿ~Ê~6Ò´»ÛáuÀh“Ãw“ÞO¬BÂh#´uŸû¥ ŠóÏŠþ ñGÄ/‡þ!ðƒ>-øóàW‰µˆôñ¥|Vøg¤ü.×|máIluk NwÑôŸ~,ü5½X´²ŸÃú´~#ð¸Ë¢êÚŒš$º'ˆSIñ•þ}ð]¯ø*ü³þ ûi蟳—€à§^!øµà_üð—ÇO k/ý“?b­'žÒ¼Mã_ˆþ—¾#¸Ñ~G£ëú…–«ðÏRÕ#×´Í+Ãö×:~³cfÚ,6ÜÝ€¢Õøÿðÿ‚ëÿÒÅ_ùÈߨóÿ›jþ{?à´?´ïü§ÿwo†þ5×?à¥úíû?üQÕfð†…ñ@ýŽÿdÞè¿m´ëÝlø#Æ>½ø=âÏì{Í_AÓu=gÂú†›âvÏZ³Ñ<@·ÙWZZÛÝ ó'í+ðOãOÆM#C‡à§íñ“öD×´(uõ›Rø[ðóögø‘¤x¾}^-,irx×Dý¡~üY½x|-6q&‘€üAðùï"×u¨¼Cq­:ø~oÿ²ÿü×þ ÁûEÁbu¿ø$­ïüj^h?j¿·maßÙÅvÒÜþÌøÅ­Ïâ;†’ü8ð¼­%øJöqi’xò)4õå»{Ýa´Ãk~þTWóõsÿòÿ‚ð,µ§üK÷! ‚Ÿø$¯ìk’vYn"ñmä¡îék;ѵ~<þÆß¶÷üWà¯üðþ …ÿýª´‹? üg¦üañqÿ„kà‡ìéá/ üdøm§þΟ¼mðßÇ>ñƒ~ x?Ǻ´~7øjš\:Æ›ucâO ëžÔ$Õôˆ§“Tþâè¯Åïø+Wn†ÿ³gíiû^~ÉðR?‹¿uOÿ1Âiÿ ‹þçýK? ?áÿ„sþgýGµÿ·æý—ÿïòŠþpþ(~Àÿðqƒà/k¿à¾~ø‘ñMÑïï<+àoÁ3?dŸ…þñ>¯mi4ÖZ5÷Žì#ø‘?‡Q¹Hl×RoêÖö¦>â!lOÍðlGü3þ 5ûkøëþ QðËþ -ñVûÇÞ=ý”¼[û?øKðƧðÇàÏÃKá¿‹µ}cö–ðÿÅ}P_„ðT:½ñÖ~h:}ØÖ¥Ö£Ó§Ð¤<–‹{¨5èõ¡EPEPEP_‹ÿ·GüwÄ>ñ^«ðöu}_ˆI§Ý¦±ã-Wuݾ$/­äÚ}¬>nÛ}>öêËJ}FKk¹µ bêM;H‚Ò+CÄÚOß?¶ÆåøðSÄ> ³¹þ%Ö£—@ðÖÙLSÅ}yž}ô,™–9--ÁK[”I×SºÓ¤ ëù,Ó4ëé|aãÿk+pÚŽ¿ªiV¶“K1t:&Ÿ¡ØÜ,Ö,V×6§P×5MrçP†XRî8–cl·|×â?ü@ñ÷‚ ‡âV­¯x‡âWÅÿÛk^ñ½{?ÛUo XZë7÷RüðÙÝÅ&¬–’cÌ– BMIV{{׸?liz§¯Ü\G£ØË"ü.ñ׆ü¯hQêZ—Ù¼5¨I ü¼ýšþ0ü)ðÖ±¯‡5¨< *xÎÒÏQ‚hŸZñ¹ÖUo,å¾·´þÂ?`?ø-ü³þ C£è«û=|ðÕ§ÄýJÎÖ]Göø›sið÷㎉¨¾›¥ê:ž—‚u»´ÿ„Ö-MZßMÔ|OðÏPñ¿‚_SŽæÒÃÄׯo.ߨ>$øö-ÿ‚…ü1ø¥ðSâŸÀŸÚ¿áŸ…|}®|,ø³àû}wÂß-~ü[ðD–âxŠãÚæ©ðÛâׂg¼±“PÓ>ÛáïøR{»Y$]:[ˆ™ÿ/Û§þ °øcâg×|iÿöý¢u_†Z“Ŭj_?hn¼eà›‹Ö†Ñô}ß4bñŸ„ô¨&†ú%ñg†~'êW?n³yu‹eÓ§m@úý£?à˜~"ø¡ÿšÿ‚xÿÁQ<¨|<Ñ´ÏÙ»áíð³ö‡°×uŸYxëÆzŠþ|LðŸÁà MðÞ¯á^ïÃÞ*ø¿ãi|câwÁº„^“KŽÒëÅMÓt3éÏø+ü¢Ëþ Yÿfûdë:üF¯ãëþcÿaý¿cø(§üsþ s­x§Æóßø¶÷á§‚õÿˆþ)µñÏÄ?„¿uþOèãâeα¨^ø÷ágÄ}&ëMµðl7ú®½w£ŸxB/ Kcá³&mý‚ÿÁX¿å_ðRÏû0Û#ÿY×â5 _ðcüåþì›ÿ~â¿¿ÊþÿàÆ?ùÊ/ýÙ7þýÅ”Wù‚Áêßò”߀ö` ?õ¢¿jªÿOºÿ0Oø=[þR›ðþÌágþ´WíU@é±âïxSÀ>Ö¼eãèžðŸ‡4ë½__ñ'ˆõ;=DÑô»$º¼¿Ôu+ù ´´µ¶·ŠI¥–iUU‰(|øEáß‹Ÿ /|]ð3á%§Á¯‹?|kã fßÇö^?Ò¾'ÝÿÂ]mwâ½CÂÚ—Ãý7P¶Oì9¼¥i÷ZÞ—¨Gâ¿ø5gþyâk:6‹û"ë~Õµ-:òÏOñw‡?hïÚvÿ\ðõÝżÛêÚe—Œ~1ø§ÃW–:]ÛÁ­øUÓ¥–ŽòÊæÝ¤…ùø6ö:øÉûþÈŸ¶_ìãñ§Á¾:ðÎ­àŸø)í¥ø;Ä>6ðˆ¾[ü\ø{ῆ?< áÓ¼CþÛøsãëŸj·Þñ.…¨ëÞÔ’Þî 3_Õ>Å<ôý&×ù‚Á4ÿårOˆßöÿðV/ýA¿lŠÿOºÿ(_Ùßö\øûiÁÕÿ´ìÑûKøþOÁ/‰_·ÿüïþ_ÂMãmÂoûTxÿßñQøÄ>ñnýâß è:·üJuë¶}ƒìÿjÓ.¯,î?Ñ£þ ¡û{x?þ Åûü}ý¦µ}{ÁVž=ð€5Ë‚>ñªêZ†ãߚ͜ºGÂï ÝxsÃúŽ•â}{C¼ñÞ•7‹bе&{ [kš¥Î½áë­nÃöbøkð“öæøkÿÃÿ‚œütøUái¿k þÊ~ø³ðóÆ>Õ¼á½áƵû[~Ïþ—ã&‡ øY$Õ4 éÿ?á=¾ð¥œò^hº¥¶½u¬ÝþÿÁSÿàÖ?ø'd_°ßíãOØ7ösñ¯ÃÚ{ᇀ5¿‰¿ l|ñ ã÷ƽKâ]ïm%ñ§ðªÛáç¾#øæ]wZø…¢Øjðœ^µ‡Äpø¾û@žÂÛ[T¸ðî±ýÿÁ2¼'â¯Á6ÿàŸ>ñ׆|Aà¿x/ö ý”<'ãx³FÔ|9⯠ø«ÃŸ¼£ø‡Ã^%ðö±mg«è^ е{;½/YѵKK]GKÔmnlo­ ¹‚X”Ÿÿ‚±Ê,¿à¥Ÿö`¶Gþ³¯Äjþ@¿àÆ?ùÊ/ýÙ7þýÅ_¿ðV/ùE—ü³þÌöÈÿÖuø_Àü•ûÿÃqÿÃyÿÆj~ßÿ±ÿü*ÿøeÏù1Ú;þÿþü&¿ðÑ_òTâñoü%¿ð‰Â%ÿOüƒÿ°á&ñwü}ÿm£y_ðPoø)Gƒÿ`߈_°G»¿éŸ~ ~Ý¿¶oÂoÙgÃÞ“â¯õŸø;Ǻݟ‡4Ð&kHõ©~~É_³×ÁOÿ´wíð»áÍ·„¾2þÖ׿ 5ÚÅÖž ñmì_¯þøXð¿ÃûÇðÆ­¯ßøCÂ÷:>¯ë0ÞÍà½Ã’x’óPŸWñCkZ¹[åþ&ÿà£ßðG/þßðRø"×í[eûaþյπ,]|aø±àŸj<#ã¯Aáߦƒ¢Y_xSÄZ„¼pÚ­”ö:UÞ“«hZ|ÖË­A­Ýûð Š( Š( Š( ç+þ •ñ£Uð×ü"¾Ñõ,æ±Òm Œ°"<–zþ»¨=Í÷˜].¼3§…ä_2«h'¶d•CÍß =ljíxÿHø©ðóÁ¿t, 3ÆÓµÈ`ó’áì&»Mö•q4@FךEø¹Ó/B€òÒtÀ+ü~êÚ>¥¡^ VÛì—‹JöæX%tIA1ùžD²¬lÊ7ˆÜ¬›nÇFoÜ¿ø%ÅèõxËྩ¨Ô¼#©¿‹|/g3Ÿ7þuá‡Y·´@¡~Ë¥ø‹Ó’Í'Ú|OÏÉ´(òåÿÿ‚®x'ö7ÿ‚êÿÁZc_ÚÅö^ø#ûWÁDj½Wá÷‹µýBÏNð¿€ÿh-öƒø¢X[kWsÚÆº~™ñoÃÒXxbã\¿ÕRÃKñ…<fö‘Zkz®­§ÿ‘È’Æ’Äé$R"É‘°xäÀdtu%]Hee%XA ׿—Š?àŽ_ðLÏøGö„ðOŒÿd/†¾/ÑÿjO^;ý¢þ3Þx²ãÅ~(ñN±ñ»âD…¿Š¾"xKÆÞ ñ¥ã…ZÔ©«êçDƒáF»à­'ÂRjú¼¾Ó´I5]A®~Wð×üà'€õØ|6ý¹à­? ~xgLƒDÒeo‡ÿðP¯‹žýŸmtKM.]*ËF´Ò­boˆú^•c²kM?Eø—¦ZÁý•§Ú¬_Ù¢îÆìñûöÇý™´ŸÛÛþÉý“¯>Ãiâo~¿¾|_ý¬¼YáË úoÃþ+|aø›ðÓž)Õ´ó=´~5×µ-SàÅ…¿…/'·Õï<:5{†³m/úĶßÒ·üšX¡ÿ‚XÁJÞi#‰ì ûaÄGTS,ÿ³×Ä8 Œ3 “M$pÄ€î’WHÐeÝ¿fOÙ#örýüsðÛöløSá¿…þÕ5y|Iây´±}©ø£Çž-¹¶·´¿ñ·Äk÷š·Œþ#xãT‚ÖÝu_øß^×¼I©yHo5)¶Œywímÿìý•ÿnx¦ÓÿiÝã<1wá›Oj4Ú³ö®øMð›]Ñlu}C\€ø—áÁßžø_âmeµFF¹ñ?ˆ<#©xšúÎÏDÒïu{3úžšübÿÁþ&ðþŸ®ÿÁK|1}­i¶~!ñì}©èz5ÍÜ0ê:µ†qûOÚëWZ}´Ž²]Å¥Üx‡CŠðÂÂú¥žàÊkûÝø“ñGá¿ÁßxƒâÅxKáǼ+¢ê¾"ñ‹×õïý¯ZñŸ‹õ-_Ä:¥¤úÜ>·¿ÔnDðÍ® Ùtý2ÖÞ/àÇþ íã_øCþøâx—DÐ<7sÿÿ‚žikÚ®¥kg¤K©xÏEý¬ü-á;4Ô&‘mZoø]Ñ´m$y¸¼Ô5;+xK<éŸôB×?àŸ²Þ³ð[à·ìïi¥üoð7ÁŸÙûÁ¯ðûᇾþןµïÀû+/;FÒaѼ_¨üøéà]sâšiúv…ek£Þ|SÕø›Eñ¾ |T×®m,•m®g•|£x·NÖ¬­müY£\Íö¯üBãÿ(ÿ£ÿÍ™ý°ÿú «¯ø{ÿ×ÁþøûÀÿüûÿaxçáÇ‹ü5ãßkŸðÑµŽ©ýâÏëV^!ðî«ý™¬üvÔt}Gû;XÓ¬ï>êé÷Úmß“ö{ë;›Y%À>Ìÿ‚³K?ðKø)[Í$q!ý?l8ƒHêŠeŸözø‡†b’i¤ŽÒJéì ÿÿðc-Õ²\ÿÁOìžâ¼¸ƒö.º‚Ñ¥nf¶´“ö®Šîâ( d‚Ö[Û(î%Dhá’îÕ$ekˆƒÿ`Ÿ´÷üöDý²¯|c7í!¤þÐô?A¦Zø«áÌ?¶¿í±àŸ‚Ú…®“¦é]­µ¿ÀŸ‡¿´/…> èðM…¦ßjpèžÓ£Öõøî|O¬­ÿˆõGU»ù3áGü·ÿ‹ø â–ñÏÀßÙÏâgÁ¾›>ŒÞ0øQûjþÝŸ¼RÚEÕÍíÖ”Þ ð‡í/£êÇM¹¼Ó´û¹ìMÙµšæÆÎy"im`dý²¢¹ÿ økNð_…|5àíçÄšG„ü?£xgK¼ñg‹åÒl.uIö;#ZÆya%Ó£À Ç3‡Ya¾¾ž]š,Óˆí´Í+L·y|¸Qb‰Uy  ËÍ4„"( ,Ó:€ØgÀ>øçÂÐnm5;›«mFçP’æá¾Ã$ÈñùQÅnËl²³EFX¬â6Y¦›ËR‡qæ¾$üGoÉ›¥‰íô;YL‡Ìù%Ô§RV9åŒR×-oÛŸÍ™VQBçšî¯s¯êú†±vOŸró•-¼C º¶ÕÝ´ ¼d¨>\K‘šØð7Ò]ë7×S¾Ê’À„>Þs#G>¯k£”]#AI#Vñ¡q‹iº°JòûOƒWº¶’{;´Gcê÷Á¿ø)—Å-kIÖ ñ炼¨M¤Û[ˆ\’‰ h.¼C§ê3DaÎT̉$‹3*Ú´×íkypÏ$Ÿdhç_i ’ ¶ÚN›kÆ8‘ $’\äŠýç¶´µ³ŒCgmoié´1ÁÉ$á"TQÉ'Ô“Ôš±@…þÕ_´îŽæÕÞÝàð³hÔO?Š<â-C*É;¥ö…lK3J-Þ5Lƒˆ„¯Dðßü?ÄvñIá¯x€†A4ÞÖ/´¸[1ldHî.üFÐH×'z‰%”¤_èì'úE}ñªüø+¬ÄÑ^|2ðŒ*ÝN•¤Ã Ëü?vmû:eû£•÷›>5âŸØKövñD"ðÖ££¨;”éú¡¿*ù$ºÿÂMoâŒÀ•-GÚNÖV;¨…Ò?oŸ ÌOö÷ÃÍwMþÈÖ´ýlã-’E姇ð@ “’Ì26ýŸíÑð‚á‚\èþ=°àfIôhA,´ñó)Þsà0m¡¼{]ÿ‚oxe®RëÂ?5Ý-Ñ’ÚÂîÒêXHþ”éZÞ•§RÙÐåÜêŽ׌k°Oǽ+é´xwÄûŸuœóÛ›„ Í•Xï4 Ç…Ý÷'×eÛ YYËîûö×ö¿øp¹—ÆvGŸ–ëÂþ(véÿZEÚüÝFôù¶œ ìtßÚ+à†ªµø•á˜b£ûJê]ä “%ux,X.ÒrÌaŒ€¨ü]ñ7ìåûKxePjµ›Ã30¼¾ð¼÷—«l)Y ƒN‡ÆztJÂk«x `&F#Ä5¼g¡ý®ÓR‡Ä]ÞŸ6ÉÛ]𵦱$ª¬û—ìž½³½·p6ù¯q§Â± ù’72@?£ëŠ_ µGX´ßˆž¿•š4X¬üY ÜL^bDIåE~Ò $ „B¡Ø‚$]µõ•ꇳ¼µ»F Uí®"X+lb'pB·ÊÄå<ñ_ËÕÏ5k‘„ð¦«pÑÅ$vÛW>ÕnD›Wjé:壽¬›‰q ×!@cUiT­C'ÅÛ-§XðÇŠ4ÄóLÜIe –±É¸¶vž™v‘!1Ì„–PÊHõ+E3:?Ç} . þÏñ®¯¤ÜF˜‚VŸXÓ|…hÌm]&È ÄdÄÃÎD(J)e$Wwiñ«T¿*Ïâ¶©q/?ºÆ·ÆR2V3¨‡eeR¹8Îhú+¢¿Ÿø‹ãW SÇ^)pƒ.WÄú³d"øírqÇ=+÷X¾¿ q¨ê—w«$†á¦½½šä<³Lí$ò¸i%g$ÊX³—?1-ÈôEq¨XZ+½Õí¥²G·Ìk‹˜aX÷«½¤u ¸²…ÜFK9"–ÒúÊý ¶7–·±«mi-."¸En»KÂΠÍ4ºÏŽ|) «ÿhëVk2`KyÝæXàfÚÛÍ•SºEEd°¯>ãçŠmõ­2/„Wšÿ‡|HÚ¯Ù5ûk£atŸfº‚çgÙí¤ž;6à@ÃSƒRYͦ‰¡½²–eDþ©h®#ៈïsý£©\ùlä“@?±üßQµkOütmOÖ·Oö¹t딎ß∡™ ›iáw»Oi>`ðO%™¯­oLÙ—–ßп|࿆~²ð—€|5¤øWúz¨·Ó4‹U·äÇÞ^Mó\ê:ÂĆóSÔ&ºÔ/d_:îæyIsØQ@ÿÿjOÛ[Jý¯ÛàÇ‚.f¿…‹âïÀß ü;øwû@ü#ø§qâoÚçàçˆÂÚïÇ¿~ϵ6ñïáçÁm?â÷à IñfK?Ù~o…þ)ivÄnð7Äo7ÈõßÛ?öþð–—ñÓâú¿ì£ãŸ~Ï_·‰~øÇÁšìññƒÂž6×fÏ‡ß t|CáÏÝþÖž0Ò—ã™à»ëÕÑL¾ ¼ð浨è$~ŠmvÒÃNû ]ÿ‚vh¾$ø±ñÇZÇíSûYÞ|,ø§ñ«À³‹§Nxv©º˜ð÷àðuhººQÅÔͱ¼/ŠtêQU–‹­Ìêaq8ÌGŸ|Mý°iÝWö¤øûð»àv½û=è_þ~Å~*øåáxûáÄ?‰Þ$ñ—ÅŸë×ú%ãÿjxWöƒøU ÿ®ÑïVßC½Ñítqâ}OÄ/Š-“ÅZ-¼63H¿²ŸíƒûT\øËö9ð¯íc/Àhß·§ìõsñ‹àç¾|5ø‰ðno†ß<1ð÷Âß¼[ðoâ¾ üiý dñ†›©xÅWׄ~.h>4ð| ¨ø+^ðÞ¿ðæÊmoÃz­Çi¤Á-< á^úóáçí;ûU|?Ðfý•ïd]#ÁºU÷ìéâÃõ;«cRñ-޽ñ?öpñ÷Ä]sâ6£âÍCXñ•ïˆ|qãŸé×:öµº h §èv>—û2ÿÁ?üû9ëþñn¹ñ³ã×í'⿃¢ø ð+]øýwðn/øR_ä‡ÃPkžððàÇÀÏÏ}âÈ|à¨4Ò~:|[ÿ„_ƾ ÓüUñ¢/i¼%ã_ I=®Ÿ¬ü!øgðÊïÂ,ÓBÐ>1x»]_iú«ãßì7«ükøù¡~Ñ>ý²¿jïÙÛÆ~øAâ_‚ZN•ðBÃöIºðݯƒ|kâOx«ÆSÇÿ ËöRø×âxüAâ=cÁþ–}^ÄÚ2xbÅ|-‚÷þ “Yñ{Ïø$ÇÃËχßþŸÚ—ö·‡Ãÿþ~ÊÒé5OÙÆoxOýŽuÍ/Ä¿ |_àý~÷ök»Ô¯|s«köš¾­ã­SâÞ<ҵ˿kÇNÐô8íü5‡¢ σ§QÔ¥ŒÃTÅNµX:nXš‹3\Ë QÊ^Ò”èQáÇ–e±ÂW¢éâkÊT±Jž WûCjŠÚ§MÒ– S/J=¬a‡Âà ÊpÚ\´”+Ƶlñæy•LNª«G I¼4ªâñXzXN§\ÿ‚ø+Â^ñŠ~ÍÖµ?ß³¿íðjŒÐjRãüKñ‡ìQàψ¾ ›Ãßo¾øºÖÿàŸˆ?l;ŽWÂßMñö¯ªþÍ_ ~;xz÷á·‰|;â k:ûÞϧÙún¯ÿØðþ¹àM{@Ô¿jŸÚ¢çâg¾>ü"ý¢>#þÑ“7ìÉqñâŠ~O¡]|"ðž­£Ý~ÌÓü ðÏïÝxS—vøkð[À²_^è“_k:ž¥{â¿Üx²ßíKÿÛðGíKâωÞ(Ô?hOÚCàí¿Æïƒþ ø+ñ—Ãî¾â?…¾ë^=ñËíJûâÏÀ?‹/ðÖ¯àß|Fñ­iÃßx7Ã~!•à³ñ¿‡|U¥µõ…öÔå,<ëAV¨³:3ÄR£ÍË©pþXçET«8V¤ñ9üóuZ´UjôéÑÁ,=:xiUœæ¬g(b~ÎÙ•a†–!&ÖcS<®£Z¯²R‹ú¾J°òÃÑŒU ·«M_­Î2¡ÏkÿðS‹-'Åßü¡þÃß·—Ä ÙÇÆ?|ñ]ð/ÃoƒŸ…¤Ö~|,Ò~4éá½{Søÿ£Zë6¾k6ZÇ×f¹:ÕΙà/Úøâ'ˆü)á={οi¿ø)'Žô/ÙãÇÆo€¿³?Ç‹w¾ýžµ/øÊúûöXÔ¼3w{ûPøwú¾‹®ø|ŸÚ?PÒuíSàůŒ|<þ6ðþ¾4í7Vñ±áfðœÞ8øu¬ÝxÒ×놳wŽÿfë_ÚsÆß>)xïö•ø™ñçÆ|T‡Áß´¯Š>ü1ø}iñOðo‡ü†Ÿã_ÙûöVµñ'„|7«xOÂ>Ñõ[½[Àÿ®íÃ:†›¤SRñÞµæÓÿÁ:<ªþð~Å>9ñÏÁ 꺎âoëŸ/<+{©iZÌ~;¶ø“7„ü 7Çÿ|k°Òþèݦ›á/hžƒ}{áO†>ð¿‚ô[Í/MÒ …9êA¼%J.u*âOÔl;tsyÓÄKˆj©Tö.xZ؉*xz˜^ZÙ~öyãs<>.–ª_¾H¨S„ó æT©Wƒ©åêXYå\’¥*²†# 2¥:xØU§™O5ÄÕö˜jyB¥WšøGûuhñ>üCð·íâ©aøƒáïÙëÅ?µoÆß ~Ì–>‹öŸñw„mþ"éß> Oû=øƒÂz;øòÂÃ\Ò¼ /Ä?„ÿ%ý™&ñïöû?Œš¿Ä ‰í/=öeý´ü-ûgj𗇡ý“¿iÿ‡ž°´øm{ãOŸ >Y|6üø±wðsâÂèu¿ üOø‰÷ŒôŸÚO©i—v¶3øÆ>·Õux×Äw^ñ^¡Mcÿüð%¯Æ{‹w¿ÿh]kD·ø›£|{Õ>j>&øyÁ~ÑÚ/€m~ÃñßXÓôo…úOÄ]iV6šõçÃüEÐÿg—ñõ¼_ ø/oã!ý´}—öYýœ!ý–¾j ->/üWøÑg©|Eøñ/þ_Œ0|$‡ÄÖ:¿ÅOêž?ñv—gÿ oá?Áï kþ#ñ”WÞ½Õlf×nôØõc¡YèºV•ÙNQšö˜¥Oë•*W§†Œ©`ªW©€ÀÒœ0z:Ô¨ÑÏkg˜¼ *´¡í8k ñuégO>¡Šçœ%:xYTT•l)TÅ¸ÖÆÇ KšÔ©*Î2ö5j×ÊipöWžr†‰â Øu2••Ô£­âoÙƒà/‹þÕøk Bv²ìeºÐ#þömt[‹)f¶•:|¹™üOÿÔø#©-̾ÕüUà»É÷yb¹‚M:%99,ôÄÐnï#L“¼Ôæ‘ 6%Ú̧ôNŠÀØü-ñïüëã%¥ãOà?ˆñŽž8 ãM>]'^dÚ–×k¢kñ܃óyu;Wˆ£™2çäo~Å´Ç‚#’Mgà&³ªX¬æ½øÿ …ÜêŽqsž…«ø¢úÚÞ`¹gÔ4 FFô·“‘ýEÑ@Lj|+}á{±§x«Ã4ð]þý—Ä:TÑ]ɱ™\ -FËóD¶«>é°Á@Hž6ú[ü°êWýw_iÞDA{+ÍJ}Äcn-Š“Ì€n?Ù啞£m-ž¡ik}i:”šÖòÞ+«i‚ ËèñH¤ ºA Šð­söTý›}7Â>‡w#\Êo4Km:ì\1‘œ\ „é)$‹*«€å6ßKÑÈg¼ñ5”h#.#²Óµ‹»¦qŒD±ÜÙiÖ¹n~v½T8$høàmsâ§Ä-á¯Ã=P¹¼ñݬ~"ñâ·Ÿ¥x^ mröi,ÅáÝ)!óRiþ×quw4¶v6÷_l¸µ·èCþ?öJÿ¢1¢àëÅßüÐ×Ðþ øwà?†ÚYÑ|àï ø;Kvç´ðæc¥%ÜÑÆ"[›÷´†)oîü°¯/džêAþ²f$šßÑt‹èÚNƒ¥Ãö}3DÓ,4: ³ym¬VVîbY¼»xcMÌI;rI5§EQEQEQEøAûn|!ø•ñgþ CðïÀ~ñÆ[x—ö6ñ¿Œ¾*[øoþ oûkþÄžøooጼ!ƇÞýŸìükð÷Çü'áÏkf×Áþ4ð÷üâéÞãÇ+¸]6ÌCíjÚ3Aø¿ðÇáŸÁŠŸ³µÏ€j¿Ù×Â^(ý†|añ7à¯ÄŸk<ø© ø¯áÝÇÄë/ˆšþûL|/içà?‹ãƒ£xoÁ~ ñ+ø~ÓÇÞ0¼Ôõ;„:ׇüeö_ÆØGöý¡ü`~!üýe?Ž^?m.ÇDoüaýž>|Lñƒhºa´Ý ø›Æ¾Öõ£¥éísrll ïÙ- Äæ£2É»èM7Àþ Ѭ<¥i𾕥ü;³·Óþéºo‡ô›XZhrøbÒÇÁÖv¶‘[øfÎ×ÃSÏáë{m;(aÐæ—IÂG·/l6BµêO˜æXª‘‹ýÖ>–c˜q>*Ø®k/kÃç9m<•ܵò´ñ“Ç`#€Ë°+ÍSŠÄA¥ øl(9¤êeøŒ†¨AP„ÕjUp¸Ü^I‹«šPŸ³†#˜VÃáiàqØŒfk_ñÛã/íõûMøö×ð·Ãßé>øû,EûRü ý“¾,ë·í|}àŠ?ü)áMFïÁÚWÇ_öë“Æ>6ñÎ…Ž<'ñ0ZxcöÔ¾Â«Üøûã§Ž<=¯ê¶^gû øïÆ¿¾<øƒörø+¤üø}ðƒãWíËÿ5ÑŽ—7ÂÝX·„|ðÿà ø…àKï Gá?ˆžðÄ~žêoj1ð/ü#cZñU¥µÝöãO Ée¨Ý\~²xçö*ýþ'üC¾ø»ñ+öJý™¾!üXÕ!ðý¾§ñ?Ç?¾x³â£oá;íTð¬Þ5×ü+¨x’î j^Ð5Åq©Èš5ö‡£ÝéËmq¦YI­û~Áúÿ†uï뿱7ì­x;Å_æø»â êß³wÁ½KÃ>$ø¯qgy§\|N×´ÏM¥ëçÓõBÆoj·$–ÎúòÕõ&‚êt~|5)S‡=Ysâç•gEJД¢ã†Íñ¼ŒšŒà©Î¬0Õøkˆ1X(ÕÿlÂÿlåØOíõò˜ç%{j”£Jnކ3$ÇS£*qªÝ|£Æ8IJjrÖ®.Ž}’᱕¹Ý<_ö^7S æSË¡ø›â¿ø+wí¡ið7áŸÆ¿‡ºGÁ/‰³ø/àï‚~+þ×>ðïìíy§øš?о:|Dømá­kMøÍã¿ø(—€¼Aà;_‹úÿYü=ð§ÃïÙÿöÍñ7„¯ñDðç‹|+k7¡è_n;/‰Ÿu|Yø1ñGàå—ü'áìú|+okûM|!ø“ð‹Ãþ!¾ý¬|3yáx?öžñ6‰ñÁ©aâ{ þÏÞ#ðïÃÿ‡^"×µïüMñ¡âê-ðCUýY¼ÿ‚pÿÁ<µ}ÏPýƒcûOéÞ(Ñü?kyû.üº·Ð´ŸÜj×~4Òôh'ð3ǦiÞ/»×õÛŸYY,ÚýƵ«MªÅw&£xÓXñOü³þ ùãŸÅñÆ¿°·ìsãˆ'‡cƒÇ^)ý˜þ xƒÆ0Çá 3MÑ|&‘xŸVðEÞ·x_GÑ´'ê—ÁtM3JÓl4Ñmkck]9:¼&*ºUÕ9W¥u VöÜOÃö&1¯J£FÁpþ+'ËÕ,4'–lTÌp•!_F^|m˜œ6+ F³Ãª”)à %iìjÑáŒÿ"…JœÏš´ªãóÊYÎ.RŸ..9U ³J¼1˜œd?3¼-ûzÁD4‡^ø£ñÃÿ±ÿÄã?þßZ'…¾ü8ð·ÄŸ„š•ßÅØâÿâ4ñV©ñoâ‡Ç_øCBð?ÄøþêxƒÀz§„íçø~÷öšŠühñ-¤wPAêþ+~Ù_´‡ìëûnë_´f³ƒð³Høs©èßüaðëöSý²àš_uÍZÛá,>2ñ—‹Æ•ñö—ñ_Ç/ÚxOÅÚˆðO‡µ¯éþ‹\½ðÞ¥â?xÇ\Ò5Óôÿ®¼kÿÔýˆ|Oðꇾýšþü›ÃÚ_m>|@øðWà×€¾"| Ö>%ÚGkã?|Ö“áö£¦øÄþ"ò-_ÄsC¢^i1ŠÕ4Ïèþ%Ðæ»Òî|ƒö[ÿ‚C~É_²õ·ÅÍ>ß@Ñ~.h_|ið÷⃼oû=~Ä¿ þø“Áv÷òjsh^-øQû$~Ê¿³?Âÿ‰âæõ•­µoŒ>ø‰¯ønѵ=+Áš·†´xÂÃÄ$ð׋ÝOÂVWE—‡|I«}‹RšÛîŒðP¿Û¹¿hÚ[ÂßþøMø û:xÛUøâ_‰_|7ðŸTÒtˆrüÑþ%øoâ6·¬Kÿ ø7ñPÒµøßÁ&…ðÃ?²6¡©üDðä§]ð¿íc¨x…4_ þƒi¿ðLø&ΣøÃÚ?üÛöÒ´ÛiÖ^.ÐôßÙ;à-Žâ›=T¶×4‹Oé–¾ŠË\¶Òõ«+=cNƒS‚ê+RÒÛP¶H®àŠeôÏþÆ?±ïį'Ž>#~ʳWükÃ럄±ø¿Æ¿¾ø«Åü*¼ÒõMóášxƒ]ðµþ¬Ÿ®ôMsZÑî|·cÓézÆ©§Ë¦µ¦¡w ݹ¬§ŽÄãqXY¼kÑÌMFXJ¬V/ÚáñRåTå%K,¦²J”(,4)¼RÎ2ÙeÿØÙ>QK‡,£õ*8z8«c§Me¯œ§Uz84)T¦¹åZ1u±ŽY’«^X©TXìüÂ9ö®i˜Tü§ðgísÿ?ñ·†¼+©¿‰?`¿ ÜüEý|;ûti—«ðö„ñ¤L:~›&¹ðvûD?´ÿdø²<]'ˆ4ÝCIø¥oâ/ƒ_ð­ÓK¾Ñ/>|UmZßÄ:EÙŸþ iûgþÓ|wáŸÙ»ÃšìɳðWá§ÅïjçàÕŒžñ¿ÆÙóáïÅÛ_iß]ø[ö~øMáû¯†Z¿Ž’¼oªü>¸Ò|%i/ƒ5/ÅooН¼8úmψRSW–ñb@¼ _ðL_ø&ÄÛÃü×ö†+;}zÒÒ(¿d߀±Çkiâ«9´ïZÛ¢x,þ$Óî.,5èb ±g<ÖºŠÜA+£c…”°õ±5ä•g^Y$áJjÆ*|CW«ÒääÄÃ4Äg´ªÕ8á#ƒ§–ÑÁa2ú”°˜=±4§^:Q¬éN<Ú•:ñ¦”ÿáR\? u`¹ù©K/Ãduéaá:¸Ÿo[5©ŒÅÔ«‰ÂN¦að‡ÅÿÚ_þ ½e/íŸãƒ7°­ïÃ?ØçãÏŠôcFñÀßÚ?ÆŸ|yð?Mø)ðÃã¶ÿ ø¿´/…ô[ŸŒ>Ð*øƒA×>*øçöSøgðOáo>%A¢ÝÝËu£ø»Ä2ü.ñ]®¹a©k:5ÇŠì­¬>$xrÓYÔu‡þ8ðg‰^ß^¶ô‰ÿ³'ìÝñ»Â~ðƯÙÿà§Æ/|>¿Ó5oxGâ·ÂßüFð׃5mM—FÒ5o hž2Ðõ­;AÕtÍ"yôËKL·µ½µ°žkHgH%‘£pþÍU9«SËñ ë*´èÎxÿìœVI˜q6.´kGNRÎ+eJŽ]J´çG ny‡Åa*á1xfkZU&±þÍû˜ê ²Ó¬?jÁûLëž3ðÿÆ?ß´ßíùsð{â—Ãß„ÞÕ>hŸ?i?¾.øOñCÅŸ n ðÖ·ã_ü8ñÏ‚íµëýkYñzø/ž ÐtØ–¾oð_íÛÿø¹àŸ„Ú÷Ãí{ö#Ñ&ñ·ì1ñãö™ñˆ ~Ϭ<¥xoš'í?¢µ×~)hž#п³E÷W[øg¨XjšäÚŸÅKMFÏÃzí?ÆÙßöý£ü3¦x+ö‡øðwãσt]fßÄz?„¾3ü2ðWÅ é>!´³¼Ó­uÝ3AñƉ®iVͶŸ¨êú¥­¬WÐÙß^ZÇ:Áu:?‘éŸðOØE7£~IJ’Ú‡ƒüSðòý´ÏÙ«àžðŽou=GÆÞ¼6¾ ˆÜø?Æ:†µ¬_ø§Ã3oÑ|C{«jwZµ•Ü÷÷O/.*8Êóœéâ^“ ˜RÂFÏ£Wû2ÁäS«,K©R¥,³8ÇÐÌ*Ò¯7üNøñö.ø)à?ƒzG‚¼ ãŸügðÅO|XoüQý˜< ñƒGñ߆üqàïŽÿ to ½Ïþ0ø/Á >ë¾Õ>(AáíCVµøÉá _ÅšV…á¯øSûa~×_þ |3ø¯ñÓÄ~;ÿgÁ"dý¥>|EÓ¼gûDü*ÓµýZ GörÓ—íàŸü_øÓðÊê_ IãX¼Kã¿Ú‚Ükÿ|]¦_x¥´o |Ñ4­cHøûi þÅ¿±Ï…|càˆžý“f ü@øSàøþ|.ñÖƒð#ánã†Þ‹OÖ´ˆ¼ àéþ·Öüàøô¯x‹LÃ>¾Ó´TÓõíjÉl…¶©}ü÷Ã_ø'÷ìðcÄ·^3ø=ûþÈß ‡Vÿ~!øÛá7Åß…:‡ŠtjúÇÄ?‰¾ñ÷€.¼'©YYø:ïâq:çíùûqø/Ä~ð§‹&ýœïtŸ†?´/ÄO…´ÏÆ_~Îý³¾økâ$0j-þü7ÓuÜ~¨ZþÃÿ±eÃÏ |"²ýeÛ?…ñÌ<ðÆ×àÂ{‡žø—å|Cð·‚âð’xoÃþ9í_‹t6Ó_O>mº€ó_v?ðOOØÛǶŸmÿaÏØúŠ;Ÿâ•Ĉf‚ñxöËâmÖ¯kâ ŸˆÖž0O/ˆm¼wq¯XÙksø¾Ep=ðÉhQƒáÁÍîújk+ë„:ð¥w.‰ƒ´¯x N!Ø !ߟ„â~e¬Ð±7§xˆWwDƒãOñ5!Šq—\ôî+z3޲­éÏ*,Cœ˜f’Ö¿N³_™öÞǤ]4)žÃ­—*9;YƒßݺØweÆ`Ó=ÌHFaSv ,Z’®1&˜Ñõ5$«é{E7ÈÐòÀ/Vcû<%{g¼ê7 ->‘5OÂY#g«_òzäS 0@»-š1?2¶)a¸  ®Ë ƒYJKÈš¶…MÃ6yQPœüÆ(F~žÂÌØZùx”~ xhóîÀX q·ï`–§,7)YÑдéâ‘f `^õ'î½ãýCü5?ÅØp/†³o»&ùÿÄI?s‰uÞ~,ßÿ’;·û ÿÿPK!Øý¬¶ppt/tableStyles.xml ÌI‚0@Ὁwhþ}-CQ$ +wê*”!é@h£ãÝeùò’/Í?J¢—Xìd4ÿàº5ݤ{ƒc@ÖqÝqi´`° y¶ß¥ß½w猧ûF“­ô ¬©hÑÍ)‘†[¡L]ÑÏåGgH fÓÖÈŠ$ÐéäùiìÊ­’»¹'` d]‡àÊ,¾– ƒ®uÒ`ne}ú¾Î„g;ntÖËóAÖ0eè©ß?ÒoW+Åå»å›Fš@¼Ô, yX+g4÷šó¦í¾¥¤„/TWQÐb¹Þ4߆)#t‚ÂM”Ôºs}Ä ÖK1“«@àˆk|ôrš]ç–Öµ©Q0hSÙÐJÈ8%Áò…W^2É–ùgOQ´d :“1+aOâ{/”üæí î„qø©Ï•Ö«Z²¯h§(ú”Ð û‘>–ñ ƒzƒôf¢¬Ö&ØŠ Ä][¤ÄY¨h¯HòÎ%)8ž5_@"ø•ÂHéV¿±AÂRîÛSœVraóWwRYÿj¾„"xÚÓµ^|ë(öÌìW)ß]{%Žq|¿„ã²Þð ã¿Á!™i_Ût¾ÿÿPK!Û!| O6docProps/app.xml ¢( ÔTÏ‹Ó@¾ þ!'=l§­uYÊté²ô຅f×󘼴ƒéL˜ë®'7YÙƒ¢DÖ›ÿÿ _’þt‹ 7CßûÞ7/o¾¼º}4N¼ h#”ìøZÝ÷@†*rØñ‚Ý-ß3–ˈ'JBÇ?ão³Ë—h_«´`<,!MÇY›¶ 1áÆÜÔ0-1+=æC=$*ŽE;*¼7iI³^ß$pdAFm¤ó‚~U±=±ÿZ4RaÑŸ9 ŽSl˜Ñ@Yžb ¬±EÉ"¢·•Ž knRR!z#Mr‹~°=jeTl½ý²s¯¯îƒî+!-%ËBt n©\¶[î˜ýxöùçË×îäüûÓS—=q'/\væ]iµ¯]¥džö¹æCÍÓ‘a&J!$"ÃZ”L½¥,uJ*@{"Š@N³H¯Äto¯›ˆ´ÔÏ „<.ºÄbžÀÒs‚ö€ÐçBF'¶=Ð*íñg å{w¸ÂÛŽ?áZpiÑãBV%NRc5sù#—qù+J0_q%\–.cÑbR€àÂiýì“Ëß¹ìË{.ûZ¢S—ŸÿÅ÷ÐVìíÂ÷ ²Ú76²êH lf?Æd×t}Ù ²ÏÊžªåé,}{x†ïZæN-æÍsùs—¿wzùaº(ûèò·.oõ¿[ºâòo¾Þò®9HµÃ-ÌÆt•¤ƒ×á2Ë/Úà ÕIQ¤;ârÑLs1QœûÃêdV­ŽOyÄg\qdgûÿÿPK! ÂÙ¹docProps/core.xml ¢( Œ’±nÛ0†÷}»LRnb—e  2Õ€ºHÐ%Ï6‰H6ŠW è+ô²dë%o£ %Ûª‚vÈxüÿûøß‘éü¡È£{°N=CtDPZ©ôf†¾­®ã)ŠœçZòÜh˜¡84Ï>œ¥¢dÂXXZS‚õ \HÚ1QÎÐÖû’aìÄ îFÁ¡ƒ¸6¶à>”vƒK.îøpBÈ.ÀsÉ=Ç-0.{":"¥è‘åO›w)0äP€öÓŽláþÛÐ)g¡ü® 3ãÙRÄÞýàTo¬ªjT»!?Å·‹/_»Qc¥Û] @Y*óÊç-Mvi”öQSÿnê§fÿÒÔšúWS¿4ûç¦~ eŠû†¶UXàÞØLÇNý«éäÓa»ûœ;¿Ï´V ?ï¾µÖná^µOœÑ‹ëpY·–à £0(;¬å¤ÜŒ/¯V×(KÄ$‰)Y‘)#çŒÒïm®7ýíà‡ƒâ˜îÄd²"–|bÏÄ ë¿ýlÙ+ÿÿPK-!kÀ†î2[Content_Types].xmlPK-!høt¡â '_rels/.relsPK-!“¿EšÙ¾ ]ppt/slides/_rels/slide1.xml.relsPK-!c\#´Á7 tppt/slides/_rels/slide2.xml.relsPK-!c\#´Á7 s ppt/slides/_rels/slide3.xml.relsPK-!¯ÚD Ù¾ r ppt/slides/_rels/slide4.xml.relsPK-!Ýó¶+_‰ ppt/_rels/presentation.xml.relsPK-! 'ÿÔù ppt/presentation.xmlPK-!‘ŒÏ6'A?ppt/slides/slide2.xmlPK-!1—Êâ½­ 8ppt/slides/slide1.xmlPK-!o@Ç—“q=ppt/slides/slide4.xmlPK-!‹©‹[÷&‡;Cppt/slides/slide3.xmlPK-!ÕÑ’ñ¾7,eXppt/slideLayouts/_rels/slideLayout7.xml.relsPK-!ÕÑ’ñ¾7,mYppt/slideLayouts/_rels/slideLayout4.xml.relsPK-!ÕÑ’ñ¾7,uZppt/slideLayouts/_rels/slideLayout6.xml.relsPK-!ÕÑ’ñ¾7,}[ppt/slideLayouts/_rels/slideLayout9.xml.relsPK-!ÕÑ’ñ¾7,…\ppt/slideLayouts/_rels/slideLayout8.xml.relsPK-!ÕÑ’ñ¾7-]ppt/slideLayouts/_rels/slideLayout10.xml.relsPK-!i¢_!Ç,–^ppt/slideMasters/_rels/slideMaster1.xml.relsPK-!ÕÑ’ñ¾7,þ_ppt/slideLayouts/_rels/slideLayout1.xml.relsPK-!ÕÑ’ñ¾7,appt/slideLayouts/_rels/slideLayout2.xml.relsPK-!ÕÑ’ñ¾7,bppt/slideLayouts/_rels/slideLayout5.xml.relsPK-!ÕÑ’ñ¾7-cppt/slideLayouts/_rels/slideLayout11.xml.relsPK-!·cÔ·Y"dppt/slideLayouts/slideLayout11.xmlPK-!XYJg­!ippt/slideLayouts/slideLayout3.xmlPK-!Oùªƒ?4!¼nppt/slideLayouts/slideLayout2.xmlPK-!èŸhà r!:sppt/slideLayouts/slideLayout1.xmlPK-!A+,]¿z6!…xppt/slideMasters/slideMaster1.xmlPK-!p¡ù&†!ƒppt/slideLayouts/slideLayout4.xmlPK-!Õy„‡‘¹$!è†ppt/slideLayouts/slideLayout5.xmlPK-!oK„J¨Ñ !¸ppt/slideLayouts/slideLayout6.xmlPK-!Û›X¼dp"Ÿ‘ppt/slideLayouts/slideLayout10.xmlPK-!Ëg_)†¤!C–ppt/slideLayouts/slideLayout9.xmlPK-!ì#?ð¡!œppt/slideLayouts/slideLayout8.xmlPK-!ŠK¶e/!7¢ppt/slideLayouts/slideLayout7.xmlPK-!ÕÑ’ñ¾7,Û¥ppt/slideLayouts/_rels/slideLayout3.xml.relsPK-!À¥ºã¦ppt/theme/theme1.xmlPK-!ân"øEÈŽº­ppt/media/image1.emfPK- !ý1*¢¢äóppt/media/image2.pngPK- !—t‰TT¸ûdocProps/thumbnail.jpegPK-!95ì9íOppt/presProps.xmlPK-!Øý¬¶¬Qppt/tableStyles.xmlPK-!·DTœX‰Rppt/viewProps.xmlPK-!Û!| O6TTdocProps/app.xmlPK-! ÂÙ¹ÙWdocProps/core.xmlPK--„ ŸZsleef-3.5.1/include/000077500000000000000000000000001373003144100142355ustar00rootroot00000000000000sleef-3.5.1/include/sleefdft.h000066400000000000000000000064111373003144100162040ustar00rootroot00000000000000#ifndef __SLEEFDFT_H__ #define __SLEEFDFT_H__ #ifdef __cplusplus extern "C" { #endif #include #include #define SLEEF_MODE_FORWARD (0 << 0) #define SLEEF_MODE_BACKWARD (1 << 0) #define SLEEF_MODE_COMPLEX (0 << 1) #define SLEEF_MODE_REAL (1 << 1) #define SLEEF_MODE_ALT (1 << 2) #define SLEEF_MODE_FFTWCOMPAT (1 << 3) #define SLEEF_MODE_DEBUG (1 << 10) #define SLEEF_MODE_VERBOSE (1 << 11) #define SLEEF_MODE_NO_MT (1 << 12) #define SLEEF_MODE_ESTIMATE (1 << 20) #define SLEEF_MODE_MEASURE (2 << 20) #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllexport) #else // #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllimport) #if (defined(_MSC_VER)) #pragma comment(lib,"sleefdft.lib") #endif // #if (defined(_MSC_VER)) #endif // #ifdef IMPORT_IS_EXPORT #else // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #define IMPORT #endif // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) IMPORT struct SleefDFT *SleefDFT_double_init1d(uint32_t n, const double *in, double *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_double_init2d(uint32_t n, uint32_t m, const double *in, double *out, uint64_t mode); IMPORT void SleefDFT_double_execute(struct SleefDFT *ptr, const double *in, double *out); IMPORT struct SleefDFT *SleefDFT_float_init1d(uint32_t n, const float *in, float *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_float_init2d(uint32_t n, uint32_t m, const float *in, float *out, uint64_t mode); IMPORT void SleefDFT_float_execute(struct SleefDFT *ptr, const float *in, float *out); IMPORT struct SleefDFT *SleefDFT_longdouble_init1d(uint32_t n, const long double *in, long double *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_longdouble_init2d(uint32_t n, uint32_t m, const long double *in, long double *out, uint64_t mode); IMPORT void SleefDFT_longdouble_execute(struct SleefDFT *ptr, const long double *in, long double *out); #if defined(ENABLEFLOAT128) && !defined(Sleef_quad2_DEFINED) #define Sleef_quad2_DEFINED typedef __float128 Sleef_quad; typedef struct { Sleef_quad x, y; } Sleef_quad2; #endif #if defined(Sleef_quad2_DEFINED) IMPORT struct SleefDFT *SleefDFT_quad_init1d(uint32_t n, const Sleef_quad *in, Sleef_quad *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_quad_init2d(uint32_t n, uint32_t m, const Sleef_quad *in, Sleef_quad *out, uint64_t mode); IMPORT void SleefDFT_quad_execute(struct SleefDFT *ptr, const Sleef_quad *in, Sleef_quad *out); #endif IMPORT void SleefDFT_dispose(struct SleefDFT *ptr); IMPORT void SleefDFT_setPath(struct SleefDFT *ptr, char *pathStr); // IMPORT void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode); #define SLEEF_PLAN_AUTOMATIC 0 #define SLEEF_PLAN_READONLY (1 << 0) #define SLEEF_PLAN_RESET (1 << 1) #define SLEEF_PLAN_BUILDALLPLAN (1 << 2) #define SLEEF_PLAN_NOLOCK (1 << 3) #define SLEEF_PLAN_MEASURE (1 << 29) #define SLEEF_PLAN_REFERTOENVVAR (1 << 30) #undef IMPORT #ifdef __cplusplus } #endif #endif // #ifndef __SLEEFDFT_H__ sleef-3.5.1/sleef-config.h.in000066400000000000000000000004371373003144100157350ustar00rootroot00000000000000// Configuration of @PROJECT_NAME@ ///////////////////////////////////////////// #ifndef SLEEF_CONFIG_H #define SLEEF_CONFIG_H #define SLEEF_VERSION_MAJOR @SLEEF_VERSION_MAJOR@ #define SLEEF_VERSION_MINOR @SLEEF_VERSION_MINOR@ #cmakedefine SLEEF_STATIC_LIBS #endif // SLEEF_CONFIG_H sleef-3.5.1/src/000077500000000000000000000000001373003144100134015ustar00rootroot00000000000000sleef-3.5.1/src/CMakeLists.txt000066400000000000000000000006371373003144100161470ustar00rootroot00000000000000include_directories("common") include_directories("arch") add_subdirectory("libm") if (BUILD_TESTS) add_subdirectory("libm-tester") endif() add_subdirectory("common") if (BUILD_DFT AND NOT MINGW) add_subdirectory("dft") if (BUILD_TESTS) add_subdirectory("dft-tester") endif() endif() if (BUILD_QUAD) add_subdirectory("quad") if (BUILD_TESTS) add_subdirectory("quad-tester") endif() endif() sleef-3.5.1/src/arch/000077500000000000000000000000001373003144100143165ustar00rootroot00000000000000sleef-3.5.1/src/arch/helperadvsimd.h000066400000000000000000001005031373003144100173150ustar00rootroot00000000000000/*********************************************************************/ /* Copyright ARM Ltd. 2010 - 2019. */ /* Distributed under the Boost Software License, Version 1.0. */ /* (See accompanying file LICENSE.txt or copy at */ /* http://www.boost.org/LICENSE_1_0.txt) */ /*********************************************************************/ #ifndef __ARM_NEON #error Please specify advsimd flags. #endif #if !defined(SLEEF_GENHEADER) #include #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 1 //@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP 2 //@#define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #endif #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #define ACCURATE_SQRT //@#define ACCURATE_SQRT #define ISANAME "AArch64 AdvSIMD" // Mask definition typedef uint32x4_t vmask; typedef uint32x4_t vopmask; // Single precision definitions typedef float32x4_t vfloat; typedef int32x4_t vint2; // Double precision definitions typedef float64x2_t vdouble; typedef int32x2_t vint; typedef struct { vmask x, y; } vmask2; #define DFTPRIORITY 10 static INLINE int vavailability_i(int name) { return 3; } static INLINE void vprefetch_v_p(const void *ptr) { } static INLINE VECTOR_CC int vtestallones_i_vo32(vopmask g) { uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); uint32x2_t x1 = vpmin_u32(x0, x0); return vget_lane_u32(x1, 0); } static INLINE VECTOR_CC int vtestallones_i_vo64(vopmask g) { uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); uint32x2_t x1 = vpmin_u32(x0, x0); return vget_lane_u32(x1, 0); } // Vector load / store static INLINE VECTOR_CC vdouble vload_vd_p(const double *ptr) { return vld1q_f64(ptr); } static INLINE VECTOR_CC vdouble vloadu_vd_p(const double *ptr) { return vld1q_f64(ptr); } static INLINE VECTOR_CC void vstore_v_p_vd(double *ptr, vdouble v) { vst1q_f64(ptr, v); } static INLINE VECTOR_CC void vstoreu_v_p_vd(double *ptr, vdouble v) { vst1q_f64(ptr, v); } static INLINE VECTOR_CC vfloat vload_vf_p(const float *ptr) { return vld1q_f32(ptr); } static INLINE VECTOR_CC vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); } static INLINE VECTOR_CC void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } static INLINE VECTOR_CC void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } static INLINE VECTOR_CC vint2 vloadu_vi2_p(int32_t *p) { return vld1q_s32(p); } static INLINE VECTOR_CC void vstoreu_v_p_vi2(int32_t *p, vint2 v) { vst1q_s32(p, v); } static INLINE VECTOR_CC vint vloadu_vi_p(int32_t *p) { return vld1_s32(p); } static INLINE VECTOR_CC void vstoreu_v_p_vi(int32_t *p, vint v) { vst1_s32(p, v); } static INLINE VECTOR_CC vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return ((vdouble) { ptr[vget_lane_s32(vi, 0)], ptr[vget_lane_s32(vi, 1)]} ); } static INLINE VECTOR_CC vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return ((vfloat) { ptr[vgetq_lane_s32(vi2, 0)], ptr[vgetq_lane_s32(vi2, 1)], ptr[vgetq_lane_s32(vi2, 2)], ptr[vgetq_lane_s32(vi2, 3)] }); } // Basic logical operations for mask static INLINE VECTOR_CC vmask vand_vm_vm_vm(vmask x, vmask y) { return vandq_u32(x, y); } static INLINE VECTOR_CC vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vbicq_u32(y, x); } static INLINE VECTOR_CC vmask vor_vm_vm_vm(vmask x, vmask y) { return vorrq_u32(x, y); } static INLINE VECTOR_CC vmask vxor_vm_vm_vm(vmask x, vmask y) { return veorq_u32(x, y); } // Mask <--> single precision reinterpret static INLINE VECTOR_CC vmask vreinterpret_vm_vf(vfloat vf) { return vreinterpretq_u32_f32(vf); } static INLINE VECTOR_CC vfloat vreinterpret_vf_vm(vmask vm) { return vreinterpretq_f32_u32(vm); } static INLINE VECTOR_CC vint2 vcast_vi2_vm(vmask vm) { return vreinterpretq_s32_u32(vm); } static INLINE VECTOR_CC vmask vcast_vm_vi2(vint2 vi) { return vreinterpretq_u32_s32(vi); } // Mask <--> double precision reinterpret static INLINE VECTOR_CC vmask vreinterpret_vm_vd(vdouble vd) { return vreinterpretq_u32_f64(vd); } static INLINE VECTOR_CC vdouble vreinterpret_vd_vm(vmask vm) { return vreinterpretq_f64_u32(vm); } static INLINE VECTOR_CC vfloat vreinterpret_vf_vi2(vint2 vm) { return vreinterpretq_f32_s32(vm); } static INLINE VECTOR_CC vint2 vreinterpret_vi2_vf(vfloat vf) { return vreinterpretq_s32_f32(vf); } static INLINE VECTOR_CC vint2 vreinterpret_vi2_vd(vdouble vd) { return vreinterpretq_s32_f64(vd); } /****************************************/ /* Single precision FP operations */ /****************************************/ // Broadcast static INLINE VECTOR_CC vfloat vcast_vf_f(float f) { return vdupq_n_f32(f); } // Add, Sub, Mul static INLINE VECTOR_CC vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vaddq_f32(x, y); } static INLINE VECTOR_CC vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vsubq_f32(x, y); } static INLINE VECTOR_CC vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vmulq_f32(x, y); } // |x|, -x static INLINE VECTOR_CC vfloat vabs_vf_vf(vfloat f) { return vabsq_f32(f); } static INLINE VECTOR_CC vfloat vneg_vf_vf(vfloat f) { return vnegq_f32(f); } #if CONFIG == 1 // Multiply accumulate: z = z + x * y static INLINE VECTOR_CC vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmaq_f32(z, x, y); } // Multiply subtract: z = z - x * y static INLINE VECTOR_CC vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmsq_f32(z, x, y); } // Multiply subtract: z = x * y - z static INLINE VECTOR_CC vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vneg_vf_vf(vfmsq_f32(z, x, y)); } #else static INLINE VECTOR_CC vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE VECTOR_CC vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } static INLINE VECTOR_CC vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } #endif static INLINE VECTOR_CC vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z + x * y return vfmaq_f32(z, x, y); } static INLINE VECTOR_CC vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z - x * y return vfmsq_f32(z, x, y); } static INLINE VECTOR_CC vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // x * y - z return vneg_vf_vf(vfmanp_vf_vf_vf_vf(x, y, z)); } // Reciprocal 1/x, Division, Square root static INLINE VECTOR_CC vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { #ifndef ENABLE_ALTDIV return vdivq_f32(n, d); #else // Finite numbers (including denormal) only, gives mostly correctly rounded result float32x4_t t, u, x, y; uint32x4_t i0, i1; i0 = vandq_u32(vreinterpretq_u32_f32(n), vdupq_n_u32(0x7c000000)); i1 = vandq_u32(vreinterpretq_u32_f32(d), vdupq_n_u32(0x7c000000)); i0 = vsubq_u32(vdupq_n_u32(0x7d000000), vshrq_n_u32(vaddq_u32(i0, i1), 1)); t = vreinterpretq_f32_u32(i0); y = vmulq_f32(d, t); x = vmulq_f32(n, t); t = vrecpeq_f32(y); t = vmulq_f32(t, vrecpsq_f32(y, t)); t = vmulq_f32(t, vrecpsq_f32(y, t)); u = vmulq_f32(x, t); u = vfmaq_f32(u, vfmsq_f32(x, y, u), t); return u; #endif } static INLINE VECTOR_CC vfloat vrec_vf_vf(vfloat d) { #ifndef ENABLE_ALTDIV return vdiv_vf_vf_vf(vcast_vf_f(1.0f), d); #else return vbslq_f32(vceqq_f32(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)), vcast_vf_f(0), vdiv_vf_vf_vf(vcast_vf_f(1.0f), d)); #endif } static INLINE VECTOR_CC vfloat vsqrt_vf_vf(vfloat d) { #ifndef ENABLE_ALTSQRT return vsqrtq_f32(d); #else // Gives correctly rounded result for all input range vfloat w, x, y, z; y = vrsqrteq_f32(d); x = vmul_vf_vf_vf(d, y); w = vmul_vf_vf_vf(vcast_vf_f(0.5), y); y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(1.5)); w = vadd_vf_vf_vf(w, w); w = vmul_vf_vf_vf(w, y); x = vmul_vf_vf_vf(w, d); y = vfmapn_vf_vf_vf_vf(w, d, x); z = vfmanp_vf_vf_vf_vf(w, x, vcast_vf_f(1)); z = vfmanp_vf_vf_vf_vf(w, y, z); w = vmul_vf_vf_vf(vcast_vf_f(0.5), x); w = vfma_vf_vf_vf_vf(w, z, y); w = vadd_vf_vf_vf(w, x); return vbslq_f32(vorrq_u32(vceqq_f32(d, vcast_vf_f(0)), vceqq_f32(d, vcast_vf_f(SLEEF_INFINITYf))), d, w); #endif } // max, min static INLINE VECTOR_CC vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vmaxq_f32(x, y); } static INLINE VECTOR_CC vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vminq_f32(x, y); } // Comparisons static INLINE VECTOR_CC vmask veq_vm_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } static INLINE VECTOR_CC vmask vneq_vm_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); } static INLINE VECTOR_CC vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } static INLINE VECTOR_CC vmask vle_vm_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } static INLINE VECTOR_CC vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } static INLINE VECTOR_CC vmask vge_vm_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } // Conditional select static INLINE VECTOR_CC vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) { return vbslq_f32(mask, x, y); } // int <--> float conversions static INLINE VECTOR_CC vint2 vtruncate_vi2_vf(vfloat vf) { return vcvtq_s32_f32(vf); } static INLINE VECTOR_CC vfloat vcast_vf_vi2(vint2 vi) { return vcvtq_f32_s32(vi); } static INLINE VECTOR_CC vint2 vcast_vi2_i(int i) { return vdupq_n_s32(i); } static INLINE VECTOR_CC vint2 vrint_vi2_vf(vfloat d) { return vcvtq_s32_f32(vrndnq_f32(d)); } /***************************************/ /* Single precision integer operations */ /***************************************/ // Add, Sub, Neg (-x) static INLINE VECTOR_CC vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vaddq_s32(x, y); } static INLINE VECTOR_CC vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsubq_s32(x, y); } static INLINE VECTOR_CC vint2 vneg_vi2_vi2(vint2 e) { return vnegq_s32(e); } // Logical operations static INLINE VECTOR_CC vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vandq_s32(x, y); } static INLINE VECTOR_CC vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vbicq_s32(y, x); } static INLINE VECTOR_CC vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vorrq_s32(x, y); } static INLINE VECTOR_CC vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return veorq_s32(x, y); } // Shifts #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) //@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) \ vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) //@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) //@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) #define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) //@#define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) #define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) //@#define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) #define vsrl_vi_vi_i(x, c) \ vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) //@#define vsrl_vi_vi_i(x, c) vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) // Comparison returning masks static INLINE VECTOR_CC vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE VECTOR_CC vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) { return vcgeq_s32(x, y); } // Comparison returning integers static INLINE VECTOR_CC vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return vreinterpretq_s32_u32(vcgeq_s32(x, y)); } static INLINE VECTOR_CC vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return vreinterpretq_s32_u32(vceqq_s32(x, y)); } // Conditional select static INLINE VECTOR_CC vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { return vbslq_s32(m, x, y); } /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /****************************************/ /* Double precision FP operations */ /****************************************/ // Broadcast static INLINE VECTOR_CC vdouble vcast_vd_d(double f) { return vdupq_n_f64(f); } // Add, Sub, Mul static INLINE VECTOR_CC vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return vaddq_f64(x, y); } static INLINE VECTOR_CC vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return vsubq_f64(x, y); } static INLINE VECTOR_CC vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return vmulq_f64(x, y); } // |x|, -x static INLINE VECTOR_CC vdouble vabs_vd_vd(vdouble f) { return vabsq_f64(f); } static INLINE VECTOR_CC vdouble vneg_vd_vd(vdouble f) { return vnegq_f64(f); } // max, min static INLINE VECTOR_CC vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return vmaxq_f64(x, y); } static INLINE VECTOR_CC vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return vminq_f64(x, y); } #if CONFIG == 1 // Multiply accumulate: z = z + x * y static INLINE VECTOR_CC vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vfmaq_f64(z, x, y); } static INLINE VECTOR_CC vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vfmsq_f64(z, x, y); } //[z = x * y - z] static INLINE VECTOR_CC vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vneg_vd_vd(vfmsq_f64(z, x, y)); } #else static INLINE VECTOR_CC vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE VECTOR_CC vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } #endif static INLINE VECTOR_CC vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z + x * y return vfmaq_f64(z, x, y); } static INLINE VECTOR_CC vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z - x * y return vfmsq_f64(z, x, y); } static INLINE VECTOR_CC vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // x * y - z return vneg_vd_vd(vfmanp_vd_vd_vd_vd(x, y, z)); } // Reciprocal 1/x, Division, Square root static INLINE VECTOR_CC vdouble vdiv_vd_vd_vd(vdouble n, vdouble d) { #ifndef ENABLE_ALTDIV return vdivq_f64(n, d); #else // Finite numbers (including denormal) only, gives mostly correctly rounded result float64x2_t t, u, x, y; uint64x2_t i0, i1; i0 = vandq_u64(vreinterpretq_u64_f64(n), vdupq_n_u64(0x7fc0000000000000L)); i1 = vandq_u64(vreinterpretq_u64_f64(d), vdupq_n_u64(0x7fc0000000000000L)); i0 = vsubq_u64(vdupq_n_u64(0x7fd0000000000000L), vshrq_n_u64(vaddq_u64(i0, i1), 1)); t = vreinterpretq_f64_u64(i0); y = vmulq_f64(d, t); x = vmulq_f64(n, t); t = vrecpeq_f64(y); t = vmulq_f64(t, vrecpsq_f64(y, t)); t = vmulq_f64(t, vrecpsq_f64(y, t)); t = vmulq_f64(t, vrecpsq_f64(y, t)); u = vmulq_f64(x, t); u = vfmaq_f64(u, vfmsq_f64(x, y, u), t); return u; #endif } static INLINE VECTOR_CC vdouble vrec_vd_vd(vdouble d) { #ifndef ENABLE_ALTDIV return vdiv_vd_vd_vd(vcast_vd_d(1.0f), d); #else return vbslq_f64(vceqq_f64(vabs_vd_vd(d), vcast_vd_d(SLEEF_INFINITY)), vcast_vd_d(0), vdiv_vd_vd_vd(vcast_vd_d(1.0f), d)); #endif } static INLINE VECTOR_CC vdouble vsqrt_vd_vd(vdouble d) { #ifndef ENABLE_ALTSQRT return vsqrtq_f64(d); #else // Gives correctly rounded result for all input range vdouble w, x, y, z; y = vrsqrteq_f64(d); x = vmul_vd_vd_vd(d, y); w = vmul_vd_vd_vd(vcast_vd_d(0.5), y); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(1.5)); w = vadd_vd_vd_vd(w, w); w = vmul_vd_vd_vd(w, y); x = vmul_vd_vd_vd(w, d); y = vfmapn_vd_vd_vd_vd(w, d, x); z = vfmanp_vd_vd_vd_vd(w, x, vcast_vd_d(1)); z = vfmanp_vd_vd_vd_vd(w, y, z); w = vmul_vd_vd_vd(vcast_vd_d(0.5), x); w = vfma_vd_vd_vd_vd(w, z, y); w = vadd_vd_vd_vd(w, x); return vbslq_f64(vorrq_u64(vceqq_f64(d, vcast_vd_d(0)), vceqq_f64(d, vcast_vd_d(SLEEF_INFINITY))), d, w); #endif } /* Comparisons */ static INLINE VECTOR_CC vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vceqq_f64(x, y)); } static INLINE VECTOR_CC vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(x, y))); } static INLINE VECTOR_CC vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcltq_f64(x, y)); } static INLINE VECTOR_CC vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcgtq_f64(x, y)); } static INLINE VECTOR_CC vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcleq_f64(x, y)); } static INLINE VECTOR_CC vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcgeq_f64(x, y)); } // Conditional select static INLINE VECTOR_CC vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { return vbslq_f64(vreinterpretq_u64_u32(mask), x, y); } #if 1 static INLINE CONST VECTOR_CC vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } #else // This implementation is slower on the current CPU models (as of May 2017.) // I(Naoki Shibata) expect that on future CPU models with hardware similar to Super Shuffle Engine, this implementation will be faster. static INLINE CONST VECTOR_CC vdouble vsel_vd_vo_d_d(vopmask o, double d0, double d1) { uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }, (uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 }); uint8x16_t tab = (uint8x16_t) (float64x2_t) { d0, d1 }; return (vdouble) vqtbl1q_u8(tab, idx); } static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o0), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }, vbslq_u8(vreinterpretq_u8_u32(o1), (uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 }, vbslq_u8(vreinterpretq_u8_u32(o2), (uint8x16_t) { 16, 17, 18, 19, 20, 21, 22, 23, 16, 17, 18, 19, 20, 21, 22, 23 }, (uint8x16_t) { 24, 25, 26, 27, 28, 29, 30, 31, 24, 25, 26, 27, 28, 29, 30, 31 }))); uint8x16x2_t tab = { { (uint8x16_t) (float64x2_t) { d0, d1 }, (uint8x16_t) (float64x2_t) { d2, d3 } } }; return (vdouble) vqtbl2q_u8(tab, idx); } static INLINE VECTOR_CC vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); } #endif static INLINE VECTOR_CC vdouble vrint_vd_vd(vdouble d) { return vrndnq_f64(d); } static INLINE VECTOR_CC vfloat vrint_vf_vf(vfloat d) { return vrndnq_f32(d); } /****************************************/ /* int <--> float conversions */ /****************************************/ static INLINE VECTOR_CC vint vtruncate_vi_vd(vdouble vf) { return vmovn_s64(vcvtq_s64_f64(vf)); } static INLINE VECTOR_CC vdouble vcast_vd_vi(vint vi) { return vcvtq_f64_s64(vmovl_s32(vi)); } static INLINE VECTOR_CC vint vcast_vi_i(int i) { return vdup_n_s32(i); } static INLINE VECTOR_CC vint vrint_vi_vd(vdouble d) { return vqmovn_s64(vcvtq_s64_f64(vrndnq_f64(d))); } /***************************************/ /* Integer operations */ /***************************************/ // Add, Sub, Neg (-x) static INLINE VECTOR_CC vint vadd_vi_vi_vi(vint x, vint y) { return vadd_s32(x, y); } static INLINE VECTOR_CC vint vsub_vi_vi_vi(vint x, vint y) { return vsub_s32(x, y); } static INLINE VECTOR_CC vint vneg_vi_vi(vint e) { return vneg_s32(e); } // Logical operations static INLINE VECTOR_CC vint vand_vi_vi_vi(vint x, vint y) { return vand_s32(x, y); } static INLINE VECTOR_CC vint vandnot_vi_vi_vi(vint x, vint y) { return vbic_s32(y, x); } static INLINE VECTOR_CC vint vor_vi_vi_vi(vint x, vint y) { return vorr_s32(x, y); } static INLINE VECTOR_CC vint vxor_vi_vi_vi(vint x, vint y) { return veor_s32(x, y); } // Comparison returning masks static INLINE VECTOR_CC vopmask veq_vo_vi_vi(vint x, vint y) { return vcombine_u32(vceq_s32(x, y), vdup_n_u32(0)); } // Conditional select static INLINE VECTOR_CC vint vsel_vi_vm_vi_vi(vmask m, vint x, vint y) { return vbsl_s32(vget_low_u32(m), x, y); } /***************************************/ /* Predicates */ /***************************************/ static INLINE VECTOR_CC vopmask visinf_vo_vd(vdouble d) { const float64x2_t inf = vdupq_n_f64(SLEEF_INFINITY); const float64x2_t neg_inf = vdupq_n_f64(-SLEEF_INFINITY); uint64x2_t cmp = vorrq_u64(vceqq_f64(d, inf), vceqq_f64(d, neg_inf)); return vreinterpretq_u32_u64(cmp); } static INLINE VECTOR_CC vopmask visnan_vo_vd(vdouble d) { return vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(d, d))); } static INLINE VECTOR_CC vopmask vispinf_vo_vd(vdouble d) { return vreinterpretq_u32_u64(vceqq_f64(d, vdupq_n_f64(SLEEF_INFINITY))); } static INLINE VECTOR_CC vopmask visminf_vo_vd(vdouble d) { return vreinterpretq_u32_u64(vceqq_f64(d, vdupq_n_f64(-SLEEF_INFINITY))); } static INLINE VECTOR_CC vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { return vbslq_f32(mask, x, y); } static INLINE CONST VECTOR_CC vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE VECTOR_CC vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE VECTOR_CC vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE VECTOR_CC vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } static INLINE VECTOR_CC vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); } static INLINE VECTOR_CC vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } static INLINE VECTOR_CC vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } static INLINE VECTOR_CC vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } static INLINE VECTOR_CC vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } static INLINE VECTOR_CC vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE VECTOR_CC vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vcgtq_s32(x, y); } static INLINE VECTOR_CC vopmask vgt_vo_vi_vi(vint x, vint y) { return vcombine_u32(vcgt_s32(x, y), vdup_n_u32(0)); } static INLINE VECTOR_CC vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE VECTOR_CC vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE VECTOR_CC vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE VECTOR_CC vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE VECTOR_CC vopmask vcast_vo32_vo64(vopmask m) { return vuzpq_u32(m, m).val[0]; } static INLINE VECTOR_CC vopmask vcast_vo64_vo32(vopmask m) { return vzipq_u32(m, m).val[0]; } static INLINE VECTOR_CC vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vandq_u32(x, y); } static INLINE VECTOR_CC vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vbicq_u32(y, x); } static INLINE VECTOR_CC vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vorrq_u32(x, y); } static INLINE VECTOR_CC vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return veorq_u32(x, y); } static INLINE VECTOR_CC vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return vbslq_s32(m, x, y); } static INLINE VECTOR_CC vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vandq_s32(vreinterpretq_s32_u32(x), y); } static INLINE VECTOR_CC vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vbicq_s32(y, vreinterpretq_s32_u32(x)); } static INLINE VECTOR_CC vint vandnot_vi_vo_vi(vopmask x, vint y) { return vbic_s32(y, vget_low_s32(vreinterpretq_s32_u32(x))); } static INLINE VECTOR_CC vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE VECTOR_CC vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE VECTOR_CC vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE VECTOR_CC vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE VECTOR_CC vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE VECTOR_CC vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE VECTOR_CC vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return veorq_u32(x, y); } static INLINE VECTOR_CC vfloat vtruncate_vf_vf(vfloat vd) { return vrndq_f32(vd); } static INLINE VECTOR_CC vmask vcast_vm_i_i(int i0, int i1) { return vreinterpretq_u32_u64(vdupq_n_u64((0xffffffff & (uint64_t)i1) | (((uint64_t)i0) << 32))); } static INLINE VECTOR_CC vopmask veq64_vo_vm_vm(vmask x, vmask y) { return vreinterpretq_u32_u64(vceqq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); } static INLINE VECTOR_CC vmask vadd64_vm_vm_vm(vmask x, vmask y) { return vreinterpretq_u32_s64(vaddq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); } static INLINE VECTOR_CC vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vbsl_s32(vget_low_u32(m), x, y); } // Logical operations static INLINE VECTOR_CC vint vand_vi_vo_vi(vopmask x, vint y) { return vand_s32(vreinterpret_s32_u32(vget_low_u32(x)), y); } static INLINE VECTOR_CC vint2 vcastu_vi2_vi(vint vi) { return vreinterpretq_s32_u32(vrev64q_u32(vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi))))); } static INLINE VECTOR_CC vint vcastu_vi_vi2(vint2 vi2) { return vreinterpret_s32_u32(vmovn_u64(vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_s32(vi2))))); } static INLINE VECTOR_CC vdouble vreinterpret_vd_vi2(vint2 vi) { return vreinterpretq_f64_s32(vi); } static INLINE VECTOR_CC vdouble vtruncate_vd_vd(vdouble vd) { return vrndq_f64(vd); } // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE VECTOR_CC vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE VECTOR_CC vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE VECTOR_CC vfloat vposneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)PNMASKf); } static INLINE VECTOR_CC vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)NPMASKf); } static INLINE VECTOR_CC vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE VECTOR_CC vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } static INLINE VECTOR_CC vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE VECTOR_CC vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE VECTOR_CC vdouble vrev21_vd_vd(vdouble d0) { return (float64x2_t)vcombine_u64(vget_high_u64((uint64x2_t)d0), vget_low_u64((uint64x2_t)d0)); } static INLINE VECTOR_CC vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE VECTOR_CC void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } static INLINE VECTOR_CC void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE VECTOR_CC void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE VECTOR_CC vfloat vrev21_vf_vf(vfloat d0) { return vrev64q_f32(d0); } static INLINE VECTOR_CC vfloat vreva2_vf_vf(vfloat d0) { return vcombine_f32(vget_high_f32(d0), vget_low_f32(d0)); } static INLINE VECTOR_CC vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE VECTOR_CC void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } static INLINE VECTOR_CC void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } static INLINE VECTOR_CC void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))), vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))) }; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpretq_u32_u64(vtrn1q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))), vreinterpretq_u32_u64(vtrn2q_u64(vreinterpretq_u64_u32(v.x), vreinterpretq_u64_u32(v.y))) }; } static INLINE vint vuninterleave_vi_vi(vint v) { return v; } static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return vd; } static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vd; } static INLINE vmask vinterleave_vm_vm(vmask vm) { return vm; } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return vm; } static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); return vm2; } #if !defined(SLEEF_GENHEADER) typedef Sleef_quad2 vargquad; static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vm2 = vuninterleave_vm2_vm2(vm2); vargquad aq; memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } #endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { uint32x2_t x0 = vorr_u32(vget_low_u32(g), vget_high_u32(g)); uint32x2_t x1 = vpmax_u32(x0, x0); return ~vget_lane_u32(x1, 0); } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask m, vmask x, vmask y) { return vbslq_u32(m, x, y); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return vreinterpretq_u32_s64(vsubq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); } static INLINE vmask vneg64_vm_vm(vmask x) { return vreinterpretq_u32_s64(vnegq_s64(vreinterpretq_s64_u32(x))); } static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return vreinterpretq_u32_u64(vcgtq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); } #define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) //@#define vsll64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x), c)) #define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) //@#define vsrl64_vm_vm_i(x, c) vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x), c)) static INLINE vmask vcast_vm_vi(vint vi) { vmask m = vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi))); return vor_vm_vm_vm(vcast_vm_vi2(vcastu_vi2_vi(vreinterpret_s32_u32(vget_low_u32(vgt_vo_vi_vi(vcast_vi_i(0), vi))))), m); } static INLINE vint vcast_vi_vm(vmask vm) { return vreinterpret_s32_u32(vmovn_u64(vreinterpretq_u64_u32(vm))); } sleef-3.5.1/src/arch/helperavx.h000066400000000000000000000753771373003144100165100ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #if !defined(__AVX__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx. #endif #elif CONFIG == 4 #if (!defined(__AVX__) || !defined(__FMA4__)) && !defined(SLEEF_GENHEADER) #error Please specify -mavx and -mfma4. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 2 //@#define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #define ACCURATE_SQRT //@#define ACCURATE_SQRT #if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) typedef __m256i vmask; typedef __m256i vopmask; typedef __m256d vdouble; typedef __m128i vint; typedef __m256 vfloat; typedef struct { __m128i x, y; } vint2; typedef struct { vmask x, y; } vmask2; // #if !defined(SLEEF_GENHEADER) #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static INLINE int cpuSupportsAVX() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 28)) != 0; } static INLINE int cpuSupportsFMA4() { int32_t reg[4]; Sleef_x86CpuID(reg, 0x80000001, 0); return (reg[2] & (1 << 16)) != 0; } #if CONFIG == 4 && defined(__AVX__) && defined(__FMA4__) static INLINE int vavailability_i(int name) { //int d = __builtin_cpu_supports("avx") && __builtin_cpu_supports("fma4"); int d = cpuSupportsAVX() && cpuSupportsFMA4(); return d ? 3 : 0; } //typedef vint2 vint2_fma4; #define ENABLE_FMA_DP #define ENABLE_FMA_SP #define ISANAME "AVX + AMD FMA4" #define DFTPRIORITY 21 #else static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX(); return d ? 3 : 0; } //typedef vint2 vint2_avx; #define ISANAME "AVX" #define DFTPRIORITY 20 #endif #endif // #if !defined(SLEEF_GENHEADER) static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } // static INLINE vdouble vcast_vd_d(double d) { return _mm256_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm256_castpd_si256(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { vint2 r; r.x = _mm256_castsi256_si128(vreinterpret_vm_vd(vd)); r.y = _mm256_extractf128_si256(vreinterpret_vm_vd(vd), 1); return r; } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { vmask m = _mm256_castsi128_si256(vi.x); m = _mm256_insertf128_si256(m, vi.y, 1); return vreinterpret_vd_vm(m); } // static vint2 vloadu_vi2_p(int32_t *p) { vint2 r; r.x = _mm_loadu_si128((__m128i *) p ); r.y = _mm_loadu_si128((__m128i *)(p + 4)); return r; } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *) p , v.x); _mm_storeu_si128((__m128i *)(p + 4), v.y); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vcast_vo32_vo64(vopmask o) { return _mm256_castsi128_si256(_mm256_cvtpd_epi32(_mm256_and_pd(vreinterpret_vd_vm(o), _mm256_set1_pd(-1.0)))); } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return vreinterpret_vm_vd(_mm256_cmp_pd(_mm256_cvtepi32_pd(_mm256_castsi256_si128(o)), _mm256_set1_pd(-1.0), _CMP_EQ_OQ)); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm256_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm256_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { vint2 r; r.x = _mm_and_si128(_mm_shuffle_epi32(vi, 0x40), _mm_set_epi32(-1, 0, -1, 0)); r.y = _mm_and_si128(_mm_shuffle_epi32(vi, 0xc8), _mm_set_epi32(-1, 0, -1, 0)); return r; } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_or_si128(_mm_and_si128(_mm_shuffle_epi32(vi.x, 0x0d), _mm_set_epi32( 0, 0, -1, -1)), _mm_and_si128(_mm_shuffle_epi32(vi.y, 0xd0), _mm_set_epi32(-1, -1, 0, 0))); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm256_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_cmp_pd(vreinterpret_vd_vm(vxor_vm_vm_vm(vxor_vm_vm_vm(x, y), vreinterpret_vm_vd(_mm256_set1_pd(1.0)))), _mm256_set1_pd(1.0), _CMP_EQ_OQ)); } // static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm256_andnot_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm256_xor_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); } #if CONFIG == 1 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(z, vmul_vd_vd_vd(x, y)); } #else static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmacc_pd(x, y, z); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmacc_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmsub_pd(x, y, z); } #endif static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GE_OQ)); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask m, vint y) { return _mm_andnot_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vand_vi_vo_vi(vopmask m, vint y) { return _mm_and_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpeq_epi32(x, y)); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpgt_epi32(x, y)); } static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { return _mm_blendv_epi8(y, x, _mm256_castsi256_si128(o)); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ)); } static INLINE vdouble vload_vd_p(const double *ptr) { return _mm256_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm256_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm256_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { int a[VECTLENDP]; vstoreu_v_p_vi(a, vi); return _mm256_set_pd(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); } #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[0]; } #endif // static INLINE vint2 vcast_vi2_vm(vmask vm) { vint2 r; r.x = _mm256_castsi256_si128(vm); r.y = _mm256_extractf128_si256(vm, 1); return r; } static INLINE vmask vcast_vm_vi2(vint2 vi) { vmask m = _mm256_castsi128_si256(vi.x); m = _mm256_insertf128_si256(m, vi.y, 1); return m; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvttps_epi32(vf)); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm256_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { vint2 r; r.x = r.y = _mm_set1_epi32(i); return r; } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm256_castps_si256(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm256_castsi256_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); } #if CONFIG == 1 static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } #else static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_msub_ps(x, y, z); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_msub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmsub_ps(x, y, z); } #endif static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GE_OQ)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_add_epi32(x.x, y.x), _mm_add_epi32(x.y, y.y) }; return vi; } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_sub_epi32(x.x, y.x), _mm_sub_epi32(x.y, y.y) }; return vi; } static INLINE vint2 vneg_vi2_vi2(vint2 e) { vint2 vi = { _mm_sub_epi32(_mm_set1_epi32(0), e.x), _mm_sub_epi32(_mm_set1_epi32(0), e.y) }; return vi; } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_and_si128(x.x, y.x), _mm_and_si128(x.y, y.y) }; return vi; } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_andnot_si128(x.x, y.x), _mm_andnot_si128(x.y, y.y) }; return vi; } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_or_si128(x.x, y.x), _mm_or_si128(x.y, y.y) }; return vi; } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_xor_si128(x.x, y.x), _mm_xor_si128(x.y, y.y) }; return vi; } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { vint2 vi = { _mm_slli_epi32(x.x, c), _mm_slli_epi32(x.y, c) }; return vi; } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { vint2 vi = { _mm_srli_epi32(x.x, c), _mm_srli_epi32(x.y, c) }; return vi; } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { vint2 vi = { _mm_srai_epi32(x.x, c), _mm_srai_epi32(x.y, c) }; return vi; } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpeq_epi32(x.x, y.x); r.y = _mm_cmpeq_epi32(x.y, y.y); return vcast_vm_vi2(r); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpgt_epi32(x.x, y.x); r.y = _mm_cmpgt_epi32(x.y, y.y); return vcast_vm_vi2(r); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpeq_epi32(x.x, y.x); r.y = _mm_cmpeq_epi32(x.y, y.y); return r; } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpgt_epi32(x.x, y.x); r.y = _mm_cmpgt_epi32(x.y, y.y); return r; } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { vint2 n = vcast_vi2_vm(m); vint2 r = { _mm_blendv_epi8(y.x, x.x, n.x), _mm_blendv_epi8(y.y, x.y, n.y) }; return r; } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { vint2 ix = vcast_vi2_vm(x), iy = vcast_vi2_vm(y), iz; iz.x = _mm_add_epi64(ix.x, iy.x); iz.y = _mm_add_epi64(ix.y, iy.y); return vcast_vm_vi2(iz); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm256_blendv_ps(y, x, _mm256_castsi256_ps(o)); } static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } // static INLINE vfloat vload_vf_p(const float *ptr) { return _mm256_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm256_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm256_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm256_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { int a[VECTLENSP]; vstoreu_v_p_vi2(a, vi2); return _mm256_set_ps(ptr[a[7]], ptr[a[6]], ptr[a[5]], ptr[a[4]], ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[0]; } #endif // #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_addsub_ps(x, y); } #if CONFIG == 1 static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } #else static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } #endif static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm256_shuffle_pd(d0, d0, (0 << 3) | (1 << 2) | (0 << 1) | (1 << 0)); } static INLINE vdouble vreva2_vd_vd(vdouble d0) { d0 = _mm256_permute2f128_pd(d0, d0, 1); return _mm256_shuffle_pd(d0, d0, (1 << 3) | (0 << 2) | (1 << 1) | (0 << 0)); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm256_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_store_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_store_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm256_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { d0 = _mm256_permute2f128_ps(d0, d0, 1); return _mm256_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm256_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpret_vm_vd(_mm256_unpacklo_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))), vreinterpret_vm_vd(_mm256_unpackhi_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))) }; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return (vmask2) { vreinterpret_vm_vd(_mm256_unpacklo_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))), vreinterpret_vm_vd(_mm256_unpackhi_pd(vreinterpret_vd_vm(v.x), vreinterpret_vd_vm(v.y))) }; } static INLINE vint vuninterleave_vi_vi(vint v) { return _mm_shuffle_epi32(v, (0 << 0) | (2 << 2) | (1 << 4) | (3 << 6)); } static INLINE vdouble vinterleave_vd_vd(vdouble vd) { double tmp[4]; vstoreu_v_p_vd(tmp, vd); double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; return vloadu_vd_p(tmp); } static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { double tmp[4]; vstoreu_v_p_vd(tmp, vd); double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; return vloadu_vd_p(tmp); } static INLINE vmask vinterleave_vm_vm(vmask vm) { double tmp[4]; vstoreu_v_p_vd(tmp, vreinterpret_vd_vm(vm)); double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; return vreinterpret_vm_vd(vloadu_vd_p(tmp)); } static INLINE vmask vuninterleave_vm_vm(vmask vm) { double tmp[4]; vstoreu_v_p_vd(tmp, vreinterpret_vd_vm(vm)); double t = tmp[1]; tmp[1] = tmp[2]; tmp[2] = t; return vreinterpret_vm_vd(vloadu_vd_p(tmp)); } static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); return vm2; } #if !defined(SLEEF_GENHEADER) typedef Sleef_quad4 vargquad; static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vm2 = vuninterleave_vm2_vm2(vm2); vargquad aq; memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } #endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_blendv_pd(vreinterpret_vd_vm(y), vreinterpret_vd_vm(x), vreinterpret_vd_vm(o))); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { __m128i xh = _mm256_extractf128_si256(x, 1), xl = _mm256_extractf128_si256(x, 0); __m128i yh = _mm256_extractf128_si256(y, 1), yl = _mm256_extractf128_si256(y, 0); vmask r = _mm256_castsi128_si256(_mm_sub_epi64(xl, yl)); return _mm256_insertf128_si256(r, _mm_sub_epi64(xh, yh), 1); } static INLINE vmask vneg64_vm_vm(vmask x) { return vsub64_vm_vm_vm(vcast_vm_i_i(0, 0), x); } static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { __m128i xh = _mm256_extractf128_si256(x, 1), xl = _mm256_extractf128_si256(x, 0); __m128i yh = _mm256_extractf128_si256(y, 1), yl = _mm256_extractf128_si256(y, 0); vmask r = _mm256_castsi128_si256(_mm_cmpgt_epi64(xl, yl)); return _mm256_insertf128_si256(r, _mm_cmpgt_epi64(xh, yh), 1); } #define vsll64_vm_vm_i(x, c) \ _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi64(_mm256_extractf128_si256(x, 0), c)), \ _mm_slli_epi64(_mm256_extractf128_si256(x, 1), c), 1) #define vsrl64_vm_vm_i(x, c) \ _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), \ _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) //@#define vsll64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_slli_epi64(_mm256_extractf128_si256(x, 1), c), 1) //@#define vsrl64_vm_vm_i(x, c) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_srli_epi64(_mm256_extractf128_si256(x, 0), c)), _mm_srli_epi64(_mm256_extractf128_si256(x, 1), c), 1) static INLINE vmask vcast_vm_vi(vint vi) { vint vi0 = _mm_and_si128(_mm_shuffle_epi32(vi, (1 << 4) | (1 << 6)), _mm_set_epi32(0, -1, 0, -1)); vint vi1 = _mm_and_si128(_mm_shuffle_epi32(vi, (2 << 0) | (2 << 2) | (3 << 4) | (3 << 6)), _mm_set_epi32(0, -1, 0, -1)); vmask m = _mm256_insertf128_si256(_mm256_castsi128_si256(vi0), vi1, 1); return vor_vm_vm_vm(vcast_vm_vi2(vcastu_vi2_vi(vand_vi_vo_vi(vgt_vo_vi_vi(vcast_vi_i(0), vi), vcast_vi_i(-1)))), m); } static INLINE vint vcast_vi_vm(vmask vm) { return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vm)), _mm_set1_ps(0), 0x08)), _mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80))); } sleef-3.5.1/src/arch/helperavx2.h000066400000000000000000000637641373003144100165670ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #if !defined(__AVX2__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx2. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 2 //@#define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #define ACCURATE_SQRT //@#define ACCURATE_SQRT #if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) typedef __m256i vmask; typedef __m256i vopmask; typedef __m256d vdouble; typedef __m128i vint; typedef __m256 vfloat; typedef __m256i vint2; typedef struct { vmask x, y; } vmask2; // #if !defined(SLEEF_GENHEADER) #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static INLINE int cpuSupportsAVX2() { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); return (reg[1] & (1 << 5)) != 0; } static INLINE int cpuSupportsFMA() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 12)) != 0; } #if CONFIG == 1 && defined(__AVX2__) static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX2() && cpuSupportsFMA(); return d ? 3 : 0; } #define ISANAME "AVX2" #define DFTPRIORITY 25 #endif #endif // #if !defined(SLEEF_GENHEADER) static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } // static INLINE vdouble vcast_vd_d(double d) { return _mm256_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm256_castpd_si256(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm256_castpd_si256(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm256_castsi256_pd(vi); } // static vint2 vloadu_vi2_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm256_storeu_si256((__m256i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vcast_vo32_vo64(vopmask o) { return _mm256_permutevar8x32_epi32(o, _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0)); } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return _mm256_permutevar8x32_epi32(o, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm256_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm256_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm256_slli_epi64(_mm256_cvtepi32_epi64(vi), 32); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vi)), _mm_set1_ps(0), 0x0d)), _mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vi, 1)), 0xd0))); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm256_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpeq_epi64(x, y); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm256_add_epi64(x, y); } // static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm256_andnot_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm256_xor_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmsub_pd(x, y, z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GE_OQ)); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask m, vint y) { return _mm_andnot_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vand_vi_vo_vi(vopmask m, vint y) { return _mm_and_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpeq_epi32(x, y)); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpgt_epi32(x, y)); } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, _mm256_castsi256_si128(m)); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); } static INLINE vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return _mm256_permutevar_pd(_mm256_set_pd(v1, v0, v1, v0), o); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { __m256i v = _mm256_castpd_si256(vsel_vd_vo_vd_vd(o0, _mm256_castsi256_pd(_mm256_set_epi32(1, 0, 1, 0, 1, 0, 1, 0)), vsel_vd_vo_vd_vd(o1, _mm256_castsi256_pd(_mm256_set_epi32(3, 2, 3, 2, 3, 2, 3, 2)), vsel_vd_vo_vd_vd(o2, _mm256_castsi256_pd(_mm256_set_epi32(5, 4, 5, 4, 5, 4, 5, 4)), _mm256_castsi256_pd(_mm256_set_epi32(7, 6, 7, 6, 7, 6, 7, 6)))))); return _mm256_castsi256_pd(_mm256_permutevar8x32_epi32(_mm256_castpd_si256(_mm256_set_pd(d3, d2, d1, d0)), v)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ)); } #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double s[4]; _mm256_storeu_pd(s, v); return s[0]; } #endif static INLINE vdouble vload_vd_p(const double *ptr) { return _mm256_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm256_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm256_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm256_i32gather_pd(ptr, vi, 8); } // static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvttps_epi32(vf)); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm256_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm256_set1_epi32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm256_castps_si256(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm256_castsi256_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmsub_ps(x, y, z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GE_OQ)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_add_epi32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_sub_epi32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_and_si256(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_andnot_si256(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_or_si256(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_xor_si256(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return _mm256_slli_epi32(x, c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return _mm256_srli_epi32(x, c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return _mm256_srai_epi32(x, c); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm256_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm256_blendv_ps(y, x, _mm256_castsi256_ps(o)); } // At this point, the following three functions are implemented in a generic way, // but I will try target-specific optimization later on. static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float s[8]; _mm256_storeu_ps(s, v); return s[0]; } #endif static INLINE vfloat vload_vf_p(const float *ptr) { return _mm256_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm256_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm256_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm256_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm256_i32gather_ps(ptr, vi2, 4); } // #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_addsub_ps(x, y); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm256_shuffle_pd(d0, d0, (0 << 3) | (1 << 2) | (0 << 1) | (1 << 0)); } static INLINE vdouble vreva2_vd_vd(vdouble d0) { d0 = _mm256_permute2f128_pd(d0, d0, 1); return _mm256_shuffle_pd(d0, d0, (1 << 3) | (0 << 2) | (1 << 1) | (0 << 0)); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm256_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_store_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_store_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm256_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { d0 = _mm256_permute2f128_ps(d0, d0, 1); return _mm256_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm256_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm256_unpacklo_epi64(v.x, v.y), _mm256_unpackhi_epi64(v.x, v.y) }; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm256_unpacklo_epi64(v.x, v.y), _mm256_unpackhi_epi64(v.x, v.y) }; } static INLINE vint vuninterleave_vi_vi(vint v) { return _mm_shuffle_epi32(v, (0 << 0) | (2 << 2) | (1 << 4) | (3 << 6)); } static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return vreinterpret_vd_vm(_mm256_permute4x64_epi64(vreinterpret_vm_vd(vd), (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0))); } static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vreinterpret_vd_vm(_mm256_permute4x64_epi64(vreinterpret_vm_vd(vd), (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0))); } static INLINE vmask vinterleave_vm_vm(vmask vm) { return _mm256_permute4x64_epi64(vm, (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0)); } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return _mm256_permute4x64_epi64(vm, (3 << 6) | (1 << 4) | (2 << 2) | (0 << 0)); } static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); return vm2; } #if !defined(SLEEF_GENHEADER) typedef Sleef_quad4 vargquad; static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vm2 = vuninterleave_vm2_vm2(vm2); vargquad aq; memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } #endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(_mm_or_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))) == 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return _mm256_blendv_epi8(y, x, o); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return _mm256_sub_epi64(x, y); } static INLINE vmask vneg64_vm_vm(vmask x) { return _mm256_sub_epi64(vcast_vm_i_i(0, 0), x); } static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpgt_epi64(x, y); } // signed compare #define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) //@#define vsll64_vm_vm_i(x, c) _mm256_slli_epi64(x, c) //@#define vsrl64_vm_vm_i(x, c) _mm256_srli_epi64(x, c) static INLINE vmask vcast_vm_vi(vint vi) { return _mm256_cvtepi32_epi64(vi); } static INLINE vint vcast_vi_vm(vmask vm) { return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vm)), _mm_set1_ps(0), 0x08)), _mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vm, 1)), 0x80))); } sleef-3.5.1/src/arch/helperavx2_128.h000066400000000000000000000560031373003144100171450ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #if !defined(__AVX2__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx2. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 1 //@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #define ACCURATE_SQRT //@#define ACCURATE_SQRT #if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) typedef __m128i vmask; typedef __m128i vopmask; typedef __m128d vdouble; typedef __m128i vint; typedef __m128 vfloat; typedef __m128i vint2; typedef struct { vmask x, y; } vmask2; // #if !defined(SLEEF_GENHEADER) #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static INLINE int cpuSupportsAVX2() { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); return (reg[1] & (1 << 5)) != 0; } static INLINE int cpuSupportsFMA() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 12)) != 0; } #if CONFIG == 1 && defined(__AVX2__) static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX2() && cpuSupportsFMA(); return d ? 3 : 0; } #define ISANAME "AVX2" #define DFTPRIORITY 25 #endif #endif // #if !defined(SLEEF_GENHEADER) static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } // static INLINE vdouble vcast_vd_d(double d) { return _mm_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm_castsi128_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm_castsi128_pd(vi); } // static vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i const *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return _mm_shuffle_epi32(m, 0x08); } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return _mm_shuffle_epi32(m, 0x50); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm_cvttpd_epi32(vd); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm_and_si128(_mm_shuffle_epi32(vi, 0x73), _mm_set_epi32(-1, 0, -1, 0)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_shuffle_epi32(vi, 0x0d); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm_set_epi32(i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpeq_epi64(x, y); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm_add_epi64(x, y); } // static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm_div_pd(_mm_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm_andnot_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm_xor_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmadd_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmsub_pd(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fnmadd_pd(x, y, z); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm_min_pd(x, y); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmadd_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmadd_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmsub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fnmadd_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fnmsub_pd(x, y, z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_GE_OQ)); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, m); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm_blendv_pd(y, x, _mm_castsi128_pd(o)); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(vabs_vd_vd(d), _mm_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(d, _mm_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(d, _mm_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(d, d, _CMP_NEQ_UQ)); } static INLINE vdouble vload_vd_p(const double *ptr) { return _mm_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm_i32gather_pd(ptr, vi, 8); } #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[0]; } #endif // static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm_cvttps_epi32(vf)); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm_set1_epi32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm_castps_si128(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm_castsi128_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmsub_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fnmadd_ps(x, y, z); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm_min_ps(x, y); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmsub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fnmadd_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fnmsub_ps(x, y, z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_GE_OQ)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_add_epi32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_sub_epi32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_and_si128(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_andnot_si128(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_or_si128(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_xor_si128(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return _mm_srai_epi32(x, c); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm_blendv_ps(y, x, _mm_castsi128_ps(o)); } static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vfloat vload_vf_p(const float *ptr) { return _mm_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm_i32gather_ps(ptr, vi2, 4); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[0]; } #endif // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_addsub_ps(x, y); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm_shuffle_pd(d0, d0, 1); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd((double *)(&ptr[2*offset]), v); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2 = { vloadu_vi2_p((int32_t *)p), vloadu_vi2_p((int32_t *)((uint8_t *)p + sizeof(vmask))) }; return vm2; } static void vstoreu_v_p_vm2(void *p, vmask2 vm2) { vstoreu_v_p_vi2((int32_t *)p, vcast_vi2_vm(vm2.x)); vstoreu_v_p_vi2((int32_t *)((uint8_t *)p + sizeof(vmask)), vcast_vi2_vm(vm2.y)); } #if !defined(SLEEF_GENHEADER) typedef Sleef_quad2 vargquad; static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vargquad a; vstoreu_v_p_vm2(&a, vuninterleave_vm2_vm2(vm2)); return a; } #endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return _mm_blendv_epi8(y, x, o); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return _mm_sub_epi64(x, y); } static INLINE vmask vneg64_vm_vm(vmask x) { return _mm_sub_epi64(vcast_vm_i_i(0, 0), x); } static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpgt_epi64(x, y); } // signed compare #define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) //@#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) //@#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) sleef-3.5.1/src/arch/helperavx512f.h000066400000000000000000000723131373003144100170710ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 || CONFIG == 2 #if !defined(__AVX512F__) && !defined(SLEEF_GENHEADER) #error Please specify -mavx512f. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 3 //@#define LOG2VECTLENDP 3 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #endif #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #define ACCURATE_SQRT //@#define ACCURATE_SQRT #if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) typedef __m512i vmask; typedef __mmask16 vopmask; typedef __m512d vdouble; typedef __m256i vint; typedef __m512 vfloat; typedef __m512i vint2; typedef struct { vmask x, y; } vmask2; // #if !defined(SLEEF_GENHEADER) #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static INLINE int cpuSupportsAVX512F() { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); return (reg[1] & (1 << 16)) != 0; } #if CONFIG == 1 && defined(__AVX512F__) static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX512F(); return d ? 3 : 0; } #define ISANAME "AVX512F" #define DFTPRIORITY 30 #endif #if CONFIG == 2 && defined(__AVX512F__) static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX512F(); return d ? 3 : 0; } #define ISANAME "AVX512FNOFMA" #define DFTPRIORITY 0 #endif #endif // #if !defined(SLEEF_GENHEADER) static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } #ifdef __INTEL_COMPILER static INLINE int vtestallones_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0xff; } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm512_mask2int(g) == 0xffff; } #else static INLINE int vtestallones_i_vo64(vopmask g) { return g == 0xff; } static INLINE int vtestallones_i_vo32(vopmask g) { return g == 0xffff; } #endif // static vint2 vloadu_vi2_p(int32_t *p) { return _mm512_loadu_si512((__m512i const *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm512_storeu_si512((__m512i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm256_storeu_si256((__m256i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return _mm512_and_si512(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return _mm512_andnot_si512(x, y); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return _mm512_or_si512(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return _mm512_xor_si512(x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kand(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kandn(x, y); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kor(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kxor(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_and_epi64(_mm512_set1_epi32(0), o, m, m); } static INLINE vmask vandnot_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_and_epi64(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } static INLINE vmask vor_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_or_epi64(m, o, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vmask vand_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_and_epi32(_mm512_set1_epi32(0), o, m, m); } static INLINE vmask vandnot_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_and_epi32(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } static INLINE vmask vor_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_or_epi32(m, o, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vopmask vcast_vo32_vo64(vopmask o) { return o; } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return o; } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm512_cvt_roundpd_epi32(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm512_cvt_roundpd_epi32(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm512_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm256_set1_epi32(i); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm512_roundscale_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm512_roundscale_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm512_maskz_permutexvar_epi32(0xaaaa, _mm512_set_epi32(7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0), _mm512_castsi256_si512(vi)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm512_castsi512_si256(_mm512_maskz_permutexvar_epi32(0x00ff, _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 15, 13, 11, 9, 7, 5, 3, 1), vi)); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm512_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_EQ); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm512_add_epi64(x, y); } // static INLINE vdouble vcast_vd_d(double d) { return _mm512_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm512_castpd_si512(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm512_castsi512_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm512_castpd_si512(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm512_castsi512_pd(vi); } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm512_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm512_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm512_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm512_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm512_div_pd(_mm512_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm512_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_andnot_si512(vreinterpret_vm_vd(_mm512_set1_pd(-0.0)), vreinterpret_vm_vd(d))); } static INLINE vdouble vneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_xor_si512(vreinterpret_vm_vd(_mm512_set1_pd(-0.0)), vreinterpret_vm_vd(d))); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm512_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm512_min_pd(x, y); } #if CONFIG == 1 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmsub_pd(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmadd_pd(x, y, z); } #else static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } #endif static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmsub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmadd_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmsub_pd(x, y, z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_EQ_OQ); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_NEQ_UQ); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_LT_OQ); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_LE_OQ); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_GT_OQ); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_GE_OQ); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm256_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm256_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm256_and_si256(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm256_andnot_si256(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask o, vint y) { return _mm512_castsi512_si256(_mm512_mask_and_epi32(_mm512_castsi256_si512(y), o, _mm512_set1_epi32(0), _mm512_set1_epi32(0))); } static INLINE vint vand_vi_vo_vi(vopmask o, vint y) { return _mm512_castsi512_si256(_mm512_mask_and_epi32(_mm512_set1_epi32(0), o, _mm512_castsi256_si512(y), _mm512_castsi256_si512(y))); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm256_or_si256(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm256_xor_si256(x, y); } #define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) #define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) #define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) //@#define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) //@#define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) //@#define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm256_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm512_cmp_epi32_mask(_mm512_castsi256_si512(x), _mm512_castsi256_si512(y), _MM_CMPINT_EQ); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm512_cmp_epi32_mask(_mm512_castsi256_si512(y), _mm512_castsi256_si512(x), _MM_CMPINT_LT); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { return _mm512_mask_blend_pd(mask, y, x); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } #if 1 // Probably this is faster static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { __m512i v = _mm512_castpd_si512(vsel_vd_vo_vd_vd(o0, _mm512_castsi512_pd(_mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0)), vsel_vd_vo_vd_vd(o1, _mm512_castsi512_pd(_mm512_set_epi64(1, 1, 1, 1, 1, 1, 1, 1)), vsel_vd_vo_vd_vd(o2, _mm512_castsi512_pd(_mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2)), _mm512_castsi512_pd(_mm512_set_epi64(3, 3, 3, 3, 3, 3, 3, 3)))))); return _mm512_permutexvar_pd(v, _mm512_castpd256_pd512(_mm256_set_pd(d3, d2, d1, d0))); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); } #else static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } #endif static INLINE vopmask visinf_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(vabs_vd_vd(d), _mm512_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(d, _mm512_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ); } static INLINE vopmask visminf_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(d, _mm512_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ); } static INLINE vopmask visnan_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(d, d, _CMP_NEQ_UQ); } static INLINE vint vilogbk_vi_vd(vdouble d) { return vrint_vi_vd(_mm512_getexp_pd(d)); } // vilogb2k_vi_vd is similar to vilogbk_vi_vd, but the argument has to // be a normalized FP value. static INLINE vint vilogb2k_vi_vd(vdouble d) { return vrint_vi_vd(_mm512_getexp_pd(d)); } static INLINE vdouble vgetexp_vd_vd(vdouble d) { return _mm512_getexp_pd(d); } static INLINE vfloat vgetexp_vf_vf(vfloat d) { return _mm512_getexp_ps(d); } static INLINE vdouble vgetmant_vd_vd(vdouble d) { return _mm512_getmant_pd(d, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan); } static INLINE vfloat vgetmant_vf_vf(vfloat d) { return _mm512_getmant_ps(d, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan); } #define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) #define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) //@#define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) //@#define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double s[VECTLENDP]; _mm512_storeu_pd(s, v); return s[0]; } #endif static INLINE vdouble vload_vd_p(const double *ptr) { return _mm512_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm512_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm512_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm512_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm512_i32gather_pd(vi, ptr, 8); } // static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm512_castsi512_si256(_mm512_mask_blend_epi32(m, _mm512_castsi256_si512(y), _mm512_castsi256_si512(x))); } // static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm512_castps_si512(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm512_castsi512_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return _mm512_castsi512_ps(vi); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm512_castps_si512(vf); } static INLINE vdouble vreinterpret_vd_vf(vfloat vf) { return _mm512_castps_pd(vf); } static INLINE vfloat vreinterpret_vf_vd(vdouble vd) { return _mm512_castpd_ps(vd); } static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm512_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm512_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm512_set1_epi32(i); } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm512_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm512_cvttps_epi32(vf)); } static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return _mm512_roundscale_ps(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm512_roundscale_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm512_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm512_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm512_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm512_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm512_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm512_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm512_min_ps(x, y); } #if CONFIG == 1 static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmsub_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmadd_ps(x, y, z); } #else static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } #endif static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmsub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmadd_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmsub_ps(x, y, z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_EQ_OQ); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_NEQ_UQ); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_LT_OQ); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_LE_OQ); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_GT_OQ); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_GE_OQ); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_add_epi32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_sub_epi32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_and_si512(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_andnot_si512(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_or_si512(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_xor_si512(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask o, vint2 m) { return _mm512_mask_and_epi32(_mm512_set1_epi32(0), o, m, m); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask o, vint2 m) { return _mm512_mask_and_epi32(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } #define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) #define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) #define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) //@#define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) //@#define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) //@#define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpeq_epi32_mask(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpgt_epi32_mask(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { __mmask16 m = _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_EQ); return _mm512_mask_and_epi32(_mm512_set1_epi32(0), m, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { __mmask16 m = _mm512_cmp_epi32_mask(y, x, _MM_CMPINT_LT); return _mm512_mask_and_epi32(_mm512_set1_epi32(0), m, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm512_mask_blend_epi32(m, y, x); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { return _mm512_mask_blend_ps(m, y, x); } // At this point, the following three functions are implemented in a generic way, // but I will try target-specific optimization later on. static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vint2 vilogbk_vi2_vf(vfloat d) { return vrint_vi2_vf(_mm512_getexp_ps(d)); } static INLINE vint2 vilogb2k_vi2_vf(vfloat d) { return vrint_vi2_vf(_mm512_getexp_ps(d)); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float s[VECTLENSP]; _mm512_storeu_ps(s, v); return s[0]; } #endif static INLINE vfloat vload_vf_p(const float *ptr) { return _mm512_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm512_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm512_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm512_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm512_i32gather_ps(vi2, ptr, 4); } // static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vd(d), 0xcccc, vreinterpret_vm_vd(d), vreinterpret_vm_vd(_mm512_set1_pd(-0.0)))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vd(d), 0x3333, vreinterpret_vm_vd(d), vreinterpret_vm_vd(_mm512_set1_pd(-0.0)))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vf(d), 0xaaaa, vreinterpret_vm_vf(d), vreinterpret_vm_vf(_mm512_set1_ps(-0.0f)))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vf(d), 0x5555, vreinterpret_vm_vf(d), vreinterpret_vm_vf(_mm512_set1_ps(-0.0f)))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmaddsub_pd(x, y, z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmaddsub_ps(x, y, z); } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return _mm512_permute_pd(vd, 0x55); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vreinterpret_vd_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12), vreinterpret_vm_vd(vd))); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm512_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_store_pd(&ptr[(offset + step * 0)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 0))); _mm_store_pd(&ptr[(offset + step * 1)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 1))); _mm_store_pd(&ptr[(offset + step * 2)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 2))); _mm_store_pd(&ptr[(offset + step * 3)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 3))); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 0))); _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 1))); _mm_stream_pd(&ptr[(offset + step * 2)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 2))); _mm_stream_pd(&ptr[(offset + step * 3)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 3))); } // static INLINE vfloat vrev21_vf_vf(vfloat vf) { return _mm512_permute_ps(vf, 0xb1); } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE vfloat vreva2_vf_vf(vfloat vf) { return vreinterpret_vf_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14), vreinterpret_vm_vf(vf))); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm512_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 0))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 0))); _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 1))); _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 1))); _mm_storel_pd((double *)(ptr+(offset + step * 4)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 2))); _mm_storeh_pd((double *)(ptr+(offset + step * 5)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 2))); _mm_storel_pd((double *)(ptr+(offset + step * 6)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 3))); _mm_storeh_pd((double *)(ptr+(offset + step * 7)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 3))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm512_unpacklo_epi64(v.x, v.y), _mm512_unpackhi_epi64(v.x, v.y) }; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm512_unpacklo_epi64(v.x, v.y), _mm512_unpackhi_epi64(v.x, v.y) }; } static INLINE vint vuninterleave_vi_vi(vint v) { return _mm256_permutevar8x32_epi32(v, _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0)); } static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return vreinterpret_vd_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0), vreinterpret_vm_vd(vd))); } static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vreinterpret_vd_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0), vreinterpret_vm_vd(vd))); } static INLINE vmask vinterleave_vm_vm(vmask vm) { return _mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0), vm); } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return _mm512_permutexvar_epi32(_mm512_set_epi32(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0), vm); } static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); return vm2; } #if !defined(SLEEF_GENHEADER) typedef Sleef_quad8 vargquad; static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vm2 = vuninterleave_vm2_vm2(vm2); vargquad aq; memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } #endif // #if !defined(SLEEF_GENHEADER) #ifdef __INTEL_COMPILER static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0; } #else static INLINE int vtestallzeros_i_vo64(vopmask g) { return g == 0; } #endif static INLINE vmask vsel_vm_vo64_vm_vm(vopmask m, vmask x, vmask y) { return _mm512_mask_blend_epi64(m, y, x); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return _mm512_sub_epi64(x, y); } static INLINE vmask vneg64_vm_vm(vmask x) { return _mm512_sub_epi64(vcast_vm_i_i(0, 0), x); } static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return _mm512_cmp_epi64_mask(y, x, _MM_CMPINT_LT); } // signed compare #define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) //@#define vsll64_vm_vm_i(x, c) _mm512_slli_epi64(x, c) //@#define vsrl64_vm_vm_i(x, c) _mm512_srli_epi64(x, c) static INLINE vmask vcast_vm_vi(vint vi) { return _mm512_cvtepi32_epi64(vi); } static INLINE vint vcast_vi_vm(vmask vm) { return _mm512_cvtepi64_epi32(vm); } sleef-3.5.1/src/arch/helperneon32.h000066400000000000000000000323771373003144100170070ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __ARM_NEON #error Please specify -mfpu=neon. #endif #ifdef __aarch64__ #warning This implementation is for AARCH32. #endif #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP 2 //@#define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 4 #define ISANAME "AARCH32 NEON-VFPV4" #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #else #define ISANAME "AARCH32 NEON" #endif #define DFTPRIORITY 10 #define ENABLE_RECSQRT_SP //@#define ENABLE_RECSQRT_SP #include #include #include "misc.h" typedef uint32x4_t vmask; typedef uint32x4_t vopmask; //typedef int32x4_t vint; typedef float32x4_t vfloat; typedef int32x4_t vint2; // static INLINE void vprefetch_v_p(const void *ptr) { } static INLINE int vtestallones_i_vo32(vopmask g) { uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); uint32x2_t x1 = vpmin_u32(x0, x0); return vget_lane_u32(x1, 0); } static vfloat vloaduf(float *p) { return vld1q_f32(p); } static void vstoreuf(float *p, vfloat v) { vst1q_f32(p, v); } static vint2 vloadu_vi2_p(int32_t *p) { return vld1q_s32(p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { vst1q_s32(p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return veorq_u32(x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vandq_u32(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vbicq_u32(y, x); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vorrq_u32(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return veorq_u32(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return veorq_u32(x, y); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return veorq_u32(x, y); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return vuzpq_u32(m, m).val[0]; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return vzipq_u32(m, m).val[0]; } // static INLINE vmask vcast_vm_i_i(int i0, int i1) { return (vmask)vdupq_n_u64((uint64_t)i0 | (((uint64_t)i1) << 32)); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { uint32x4_t t = vceqq_u32(x, y); return vandq_u32(t, vrev64q_u32(t)); } // static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; } static INLINE vint2 vrint_vi2_vf(vfloat d) { return vcvtq_s32_f32(vaddq_f32(d, (float32x4_t)vorrq_u32(vandq_u32((uint32x4_t)d, (uint32x4_t)vdupq_n_f32(-0.0f)), (uint32x4_t)vdupq_n_f32(0.5f)))); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcvtq_s32_f32(vf); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return vcvtq_f32_s32(vi); } static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return vcast_vf_vi2(vrint_vi2_vf(vd)); } static INLINE vfloat vcast_vf_f(float f) { return vdupq_n_f32(f); } static INLINE vint2 vcast_vi2_i(int i) { return vdupq_n_s32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (vmask)vf; } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (vfloat)vm; } static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return (vfloat)vm; } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return (vint2)vf; } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vaddq_f32(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vsubq_f32(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vmulq_f32(x, y); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vabsq_f32(f); } static INLINE vfloat vneg_vf_vf(vfloat f) { return vnegq_f32(f); } #if CONFIG == 4 static INLINE vfloat vmla_vf_vf_vf_vf (vfloat x, vfloat y, vfloat z) { return vfmaq_f32(z, x, y); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmsq_f32(z, x, y); } static INLINE vfloat vfma_vf_vf_vf_vf (vfloat x, vfloat y, vfloat z) { return vfmaq_f32(z, x, y); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmsq_f32(z, x, y); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vneg_vf_vf(vfmanp_vf_vf_vf_vf(x, y, z)); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vneg_vf_vf(vfmanp_vf_vf_vf_vf(x, y, z)); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { float32x4_t t = vrecpeq_f32(y), u; t = vmulq_f32(t, vrecpsq_f32(y, t)); t = vfmaq_f32(t, vfmsq_f32(vdupq_n_f32(1.0f), y, t), t); u = vmulq_f32(x, t); return vfmaq_f32(u, vfmsq_f32(x, y, u), t); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { float32x4_t x = vrsqrteq_f32(d); x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); float32x4_t u = vmulq_f32(x, d); u = vfmaq_f32(u, vfmsq_f32(d, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(u), vceqq_f32(d, vdupq_n_f32(0.0f)))); } static INLINE vfloat vrec_vf_vf(vfloat y) { float32x4_t t = vrecpeq_f32(y), u; t = vmulq_f32(t, vrecpsq_f32(y, t)); t = vfmaq_f32(t, vfmsq_f32(vdupq_n_f32(1.0f), y, t), t); return vfmaq_f32(t, vfmsq_f32(vdupq_n_f32(1.0f), y, t), t); } static INLINE vfloat vrecsqrt_vf_vf(vfloat d) { float32x4_t x = vrsqrteq_f32(d); x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); return vfmaq_f32(x, vfmsq_f32(vdupq_n_f32(1), x, vmulq_f32(x, d)), vmulq_f32(x, vdupq_n_f32(0.5))); } #else // #if CONFIG == 4 static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlaq_f32(z, x, y); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlsq_f32(z, x, y); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vneg_vf_vf(vmlsq_f32(z, x, y)); } static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { float32x4_t x = vrecpeq_f32(d); x = vmulq_f32(x, vrecpsq_f32(d, x)); float32x4_t t = vmulq_f32(n, x); return vmlsq_f32(vaddq_f32(t, t), vmulq_f32(t, x), d); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { float32x4_t x = vrsqrteq_f32(d); x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); float32x4_t u = vmulq_f32(x, d); u = vmlaq_f32(u, vmlsq_f32(d, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(u), vceqq_f32(d, vdupq_n_f32(0.0f)))); } static INLINE vfloat vrec_vf_vf(vfloat d) { float32x4_t x = vrecpeq_f32(d); x = vmulq_f32(x, vrecpsq_f32(d, x)); return vmlsq_f32(vaddq_f32(x, x), vmulq_f32(x, x), d); } static INLINE vfloat vrecsqrt_vf_vf(vfloat d) { float32x4_t x = vrsqrteq_f32(d); x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); return vmlaq_f32(x, vmlsq_f32(vdupq_n_f32(1), x, vmulq_f32(x, d)), vmulq_f32(x, vdupq_n_f32(0.5))); } #endif // #if CONFIG == 4 static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vmaxq_f32(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vminq_f32(x, y); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vaddq_s32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsubq_s32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vnegq_s32(e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vandq_s32(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vbicq_s32(y, x); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vorrq_s32(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return veorq_s32(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vandq_u32(x, (vopmask)y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vbicq_u32((vopmask)y, x); } #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) //@#define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) //@#define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) //@#define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vcgtq_s32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vceqq_s32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vcgtq_s32(x, y); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return (vint2)vbslq_u32(m, (vmask)x, (vmask)y); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { return (vfloat)vbslq_u32(mask, (vmask)x, (vmask)y); } static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float p[4]; vst1q_f32 (p, v); return p[0]; } static INLINE int vavailability_i(int name) { if (name != 2) return 0; return vcast_f_vf(vadd_vf_vf_vf(vcast_vf_f(name), vcast_vf_f(name))) != 0.0; } static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32(__builtin_assume_aligned(ptr, 16)); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(__builtin_assume_aligned(ptr, 16), v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return ((vfloat) { ptr[vgetq_lane_s32(vi2, 0)], ptr[vgetq_lane_s32(vi2, 1)], ptr[vgetq_lane_s32(vi2, 2)], ptr[vgetq_lane_s32(vi2, 3)] }); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)NPMASKf); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vrev21_vf_vf(vfloat d0) { return vrev64q_f32(d0); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { return vcombine_f32(vget_high_f32(d0), vget_low_f32(d0)); } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } sleef-3.5.1/src/arch/helperpower_128.h000066400000000000000000000633751373003144100174330ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 || CONFIG == 2 #ifndef __VSX__ #error Please specify -mcpu=power8 or -mcpu=power9 #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 1 //@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 1 #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #endif #define ACCURATE_SQRT //@#define ACCURATE_SQRT #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #if !defined(SLEEF_GENHEADER) #include // undef altivec types since CPP and C99 use them as compiler tokens // use __vector and __bool instead #undef vector #undef bool #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) #define ISANAME "VSX" #define DFTPRIORITY 25 static INLINE int vavailability_i(int name) { return 3; } static INLINE void vprefetch_v_p(const void *ptr) { } /********************************************** ** Types ***********************************************/ typedef __vector unsigned int vmask; // using __bool with typedef may cause ambiguous errors #define vopmask __vector __bool int //@#define vopmask __vector __bool int typedef __vector signed int vint; typedef __vector signed int vint2; typedef __vector float vfloat; typedef __vector double vdouble; // internal use types typedef __vector unsigned int v__u32; typedef __vector unsigned char v__u8; typedef __vector signed long long v__i64; typedef __vector unsigned long long v__u64; #define v__b64 __vector __bool long long /********************************************** ** Utilities ***********************************************/ #define vset__vi(v0, v1) ((vint) {v0, v1, v0, v1}) #define vset__vi2(...) ((vint2) {__VA_ARGS__}) #define vset__vm(...) ((vmask) {__VA_ARGS__}) #define vset__vo(...) ((vopmask) {__VA_ARGS__}) #define vset__vf(...) ((vfloat) {__VA_ARGS__}) #define vset__vd(...) ((vdouble) {__VA_ARGS__}) #define vset__u8(...) ((v__u8) {__VA_ARGS__}) #define vset__u32(...) ((v__u32) {__VA_ARGS__}) #define vset__s64(...) ((v__i64) {__VA_ARGS__}) #define vset__u64(...) ((v__u64) {__VA_ARGS__}) #define vsetall__vi(v) vset__vi(v, v) #define vsetall__vi2(v) vset__vi2(v, v, v, v) #define vsetall__vm(v) vset__vm(v, v, v, v) #define vsetall__vo(v) vset__vo(v, v, v, v) #define vsetall__vf(v) vset__vf(v, v, v, v) #define vsetall__vd(v) vset__vd(v, v) #define vsetall__u8(v) vset__u8(v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v) #define vsetall__u32(v) vset__u32(v, v, v, v) #define vsetall__s64(v) vset__s64(v, v) #define vsetall__u64(v) vset__u64(v, v) #define vzero__vi() vsetall__vi(0) #define vzero__vi2() vsetall__vi2(0) #define vzero__vm() vsetall__vm(0) #define vzero__vo() vsetall__vo(0) #define vzero__vf() vsetall__vf(0) #define vzero__vd() vsetall__vd(0) #define vzero__u8() vsetall__u8(0) #define vzero__u32() vsetall__u32(0) #define vzero__s64() vsetall__s64(0) #define vzero__u64() vsetall__u64(0) //// Swap doubleword elements #ifdef __clang__ static INLINE v__u64 v__swapd_u64(v__u64 v) { return vec_xxpermdi(v, v, 2); } #else static INLINE v__u64 v__swapd_u64(v__u64 v) { __asm__ __volatile__("xxswapd %x0,%x1" : "=wa" (v) : "wa" (v)); return v; } #endif /********************************************** ** Memory ***********************************************/ ////////////// Unaligned memory access ////////////// /** * It's not safe to use vector assignment via (cast & dereference) for unaligned memory access * with almost all clang versions and gcc8 when VSX3 isn't enabled, * these compilers tends to generate instructions 'lvx/stvx' instead of 'lxvd2x/lxvw4x/stxvd2x/stxvw4x' * for more information check https://github.com/seiko2plus/vsx_mem_test * * TODO: check GCC(9, 10) */ //// load #if defined(__POWER9_VECTOR__) || (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8) static vint vloadu_vi_p(const int32_t *ptr) { return *((vint*)ptr); } static INLINE vint2 vloadu_vi2_p(const int32_t *ptr) { return *((vint2*)ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return *((vfloat*)ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return *((vdouble*)ptr); } #else static vint vloadu_vi_p(const int32_t *ptr) { return vec_vsx_ld(0, ptr); } static INLINE vint2 vloadu_vi2_p(const int32_t *ptr) { return vec_vsx_ld(0, ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return vec_vsx_ld(0, ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return vec_vsx_ld(0, ptr); } #endif //// store #if defined(__POWER9_VECTOR__) || (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8) static void vstoreu_v_p_vi(int32_t *ptr, vint v) { *((vint*)ptr) = v; } static void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) { *((vint2*)ptr) = v; } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { *((vfloat*)ptr) = v; } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { *((vdouble*)ptr) = v; } #else static void vstoreu_v_p_vi(int32_t *ptr, vint v) { vec_vsx_st(v, 0, ptr); } static void vstoreu_v_p_vi2(int32_t *ptr, vint2 v) { vec_vsx_st(v, 0, ptr); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { vec_vsx_st(v, 0, ptr); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { vec_vsx_st(v, 0, ptr); } #endif ////////////// aligned memory access ////////////// //// load static INLINE vfloat vload_vf_p(const float *ptr) { return vec_ld(0, ptr); } static INLINE vdouble vload_vd_p(const double *ptr) { return *((vdouble*)ptr); } //// store static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vec_st(v, 0, ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { *((vdouble*)ptr) = v; } ////////////// non-temporal memory access ////////////// //// store static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } ////////////// LUT ////////////// //// load static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return vset__vd(ptr[vec_extract(vi, 0)], ptr[vec_extract(vi, 1)]); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return vset__vf( ptr[vec_extract(vi2, 0)], ptr[vec_extract(vi2, 1)], ptr[vec_extract(vi2, 2)], ptr[vec_extract(vi2, 3)] ); } //// store static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { const v__u64 vll = (v__u64)v; float *ptr_low = ptr + offset*2; float *ptr_high = ptr + (offset + step)*2; *((uint64_t*)ptr_low) = vec_extract(vll, 0); *((uint64_t*)ptr_high) = vec_extract(vll, 1); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } /********************************************** ** Misc **********************************************/ // vector with a specific value set to all lanes (Vector Splat) static INLINE vint vcast_vi_i(int i) { return vsetall__vi(i); } static INLINE vint2 vcast_vi2_i(int i) { return vsetall__vi2(i); } static INLINE vfloat vcast_vf_f(float f) { return vsetall__vf(f); } static INLINE vdouble vcast_vd_d(double d) { return vsetall__vd(d); } // cast static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; } // get the first element static INLINE float vcast_f_vf(vfloat v) { return vec_extract(v, 0); } static INLINE double vcast_d_vd(vdouble v) { return vec_extract(v, 0); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (vmask)vd; } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return (vdouble)vm; } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return (vint2)vd; } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return (vdouble)vi; } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (vmask)vf; } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (vfloat)vm; } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return (vfloat)vi; } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return (vint2)vf; } // per element select via mask (blend) static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return vec_sel(y, x, (v__b64)o); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return vec_sel(y, x, o); } static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { return vec_sel(y, x, o); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask o, vint2 x, vint2 y) { return vec_sel(y, x, o); } static INLINE vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vsetall__vf(v1), vsetall__vf(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vsetall__vf(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vsetall__vf(d0), vsel_vf_vo_vf_vf(o1, vsetall__vf(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vsetall__vd(v1), vsetall__vd(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vsetall__vd(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vsetall__vd(d0), vsel_vd_vo_vd_vd(o1, vsetall__vd(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE int vtestallones_i_vo32(vopmask g) { return vec_all_ne((vint2)g, vzero__vi2()); } static INLINE int vtestallones_i_vo64(vopmask g) { return vec_all_ne((v__i64)g, vzero__s64()); } /********************************************** ** Conversions **********************************************/ ////////////// Numeric ////////////// // pack 64-bit mask to 32-bit static INLINE vopmask vcast_vo32_vo64(vopmask m) { return (vopmask)vec_pack((v__u64)m, (v__u64)m); } // clip 64-bit lanes to lower 32-bit static INLINE vint vcastu_vi_vi2(vint2 vi2) { return vec_mergeo(vi2, vec_splat(vi2, 3)); } // expand lower 32-bit mask static INLINE vopmask vcast_vo64_vo32(vopmask m) { return vec_mergeh(m, m); } // unsigned expand lower 32-bit integer static INLINE vint2 vcastu_vi2_vi(vint vi) { return vec_mergeh(vzero__vi(), vi); } // signed int to single-precision static INLINE vfloat vcast_vf_vi2(vint2 vi) { vfloat ret; #ifdef __clang__ ret = __builtin_convertvector(vi, vfloat); #else __asm__ __volatile__("xvcvsxwsp %x0,%x1" : "=wa" (ret) : "wa" (vi)); #endif return ret; } // lower signed int to double-precision static INLINE vdouble vcast_vd_vi(vint vi) { vdouble ret; vint swap = vec_mergeh(vi, vi); #ifdef __clang__ ret = __builtin_vsx_xvcvsxwdp(swap); #else __asm__ __volatile__("xvcvsxwdp %x0,%x1" : "=wa" (ret) : "wa" (swap)); #endif return ret; } // zip two scalars static INLINE vmask vcast_vm_i_i(int l, int h) { return (vmask)vec_mergeh(vsetall__vi2(h), vsetall__vi2(l)); } ////////////// Truncation ////////////// static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { vint2 ret; #ifdef __clang__ ret = __builtin_convertvector(vf, vint2); #else __asm__ __volatile__("xvcvspsxws %x0,%x1" : "=wa" (ret) : "wa" (vf)); #endif return ret; } static INLINE vint vtruncate_vi_vd(vdouble vd) { vint ret; #ifdef __clang__ ret = __builtin_vsx_xvcvdpsxws(vd); #else __asm__ __volatile__("xvcvdpsxws %x0,%x1" : "=wa" (ret) : "wa" (vd)); #endif return vec_mergeo(ret, vec_splat(ret, 3)); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vec_trunc(vd); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return vec_trunc(vf); } ////////////// Rounding ////////////// // towards the nearest even static INLINE vint vrint_vi_vd(vdouble vd) { return vtruncate_vi_vd(vec_rint(vd)); } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vtruncate_vi2_vf(vec_rint(vf)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vec_rint(vd); } static INLINE vfloat vrint_vf_vf(vfloat vf) { return vec_rint(vf); } /********************************************** ** Logical **********************************************/ ////////////// And ////////////// static INLINE vint vand_vi_vi_vi(vint x, vint y) { return vec_and(x, y); } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return vec_and((vint)x, y); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_and(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vec_and((vint2)x, y); } static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vec_and(x, y); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vec_and((vmask)x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vec_and((vmask)x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vec_and(x, y); } ////////////// Or ////////////// static INLINE vint vor_vi_vi_vi(vint x, vint y) { return vec_or(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_or(x, y); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vec_or(x, y); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vec_or((vmask)x, y); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vec_or((vmask)x, y); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vec_or(x, y); } ////////////// Xor ////////////// static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return vec_xor(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_xor(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vec_xor(x, y); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vec_xor((vmask)x, y); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vec_xor((vmask)x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vec_xor(x, y); } ////////////// Not ////////////// static INLINE vopmask vnot_vo_vo(vopmask o) { return vec_nor(o, o); } ////////////// And Not ((~x) & y) ////////////// static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return vec_andc(y, x); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return vec_andc(y, (vint)x); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_andc(y, x); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vec_andc(y, x); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vec_andc(y, x); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vec_andc(y, x); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vec_andc(y, x); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vec_andc(y, (vint2)x); } /********************************************** ** Comparison **********************************************/ ////////////// Equal ////////////// static INLINE vint veq_vi_vi_vi(vint x, vint y) { return (vint)vec_cmpeq(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return vec_cmpeq(x, y); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vec_cmpeq(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vec_cmpeq(x, y); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return (vopmask)vec_cmpeq((v__u64)x, (v__u64)y); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vec_cmpeq(x, y); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpeq(x, y); } ////////////// Not Equal ////////////// static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vnot_vo_vo(vec_cmpeq(x, y)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vnot_vo_vo((vopmask)vec_cmpeq(x, y)); } ////////////// Less Than ////////////// static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vec_cmplt(x, y); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmplt(x, y); } ////////////// Greater Than ////////////// static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return (vint)vec_cmpgt(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return vec_cmpgt(x, y);} static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vec_cmpgt(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vec_cmpgt(x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vec_cmpgt(x, y); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpgt(x, y); } ////////////// Less Than Or Equal ////////////// static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vec_cmple(x, y); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmple(x, y); } ////////////// Greater Than Or Equal ////////////// static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vec_cmpge(x, y); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpge(x, y); } ////////////// Special Cases ////////////// static INLINE vopmask visinf_vo_vf(vfloat d) { return vec_cmpeq(vec_abs(d), vsetall__vf(SLEEF_INFINITYf)); } static INLINE vopmask visinf_vo_vd(vdouble d) { return (vopmask)vec_cmpeq(vec_abs(d), vsetall__vd(SLEEF_INFINITY)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return vec_cmpeq(d, vsetall__vf(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return (vopmask)vec_cmpeq(d, vsetall__vd(SLEEF_INFINITY)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return vec_cmpeq(d, vsetall__vf(-SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vd(vdouble d) { return (vopmask)vec_cmpeq(d, vsetall__vd(-SLEEF_INFINITY)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vnot_vo_vo(vec_cmpeq(d, d)); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vnot_vo_vo((vopmask)vec_cmpeq(d, d)); } /********************************************** ** Shift **********************************************/ ////////////// Left ////////////// static INLINE vint vsll_vi_vi_i(vint x, int c) { return vec_sl (x, vsetall__u32(c)); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return vec_sl(x, vsetall__u32(c)); } ////////////// Right ////////////// static INLINE vint vsrl_vi_vi_i(vint x, int c) { return vec_sr(x, vsetall__u32(c)); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return vec_sr(x, vsetall__u32(c)); } ////////////// Algebraic Right ////////////// static INLINE vint vsra_vi_vi_i(vint x, int c) { return vec_sra(x, vsetall__u32(c)); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return vec_sra(x, vsetall__u32(c)); } /********************************************** ** Reorder **********************************************/ ////////////// Reverse ////////////// // Reverse elements order inside the lower and higher parts static INLINE vint2 vrev21_vi2_vi2(vint2 vi) { return vec_mergee(vec_mergeo(vi, vi), vi); } static INLINE vfloat vrev21_vf_vf(vfloat vf) { return (vfloat)vrev21_vi2_vi2((vint2)vf); } // Swap the lower and higher parts static INLINE vfloat vreva2_vf_vf(vfloat vf) { return (vfloat)v__swapd_u64((v__u64)vf); } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble)v__swapd_u64((v__u64)vd); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } /********************************************** ** Arithmetic **********************************************/ ////////////// Negation ////////////// static INLINE vint vneg_vi_vi(vint e) { #ifdef __clang__ return vec_neg(e); #else return vec_sub(vzero__vi(), e); #endif } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vneg_vi_vi(e); } static INLINE vfloat vneg_vf_vf(vfloat d) { vfloat ret; #ifdef __clang__ ret = vec_neg(d); #else __asm__ __volatile__("xvnegsp %x0,%x1" : "=wa" (ret) : "wa" (d)); #endif return ret; } static INLINE vdouble vneg_vd_vd(vdouble d) { vdouble ret; #ifdef __clang__ ret = vec_neg(d); #else __asm__ __volatile__("xvnegdp %x0,%x1" : "=wa" (ret) : "wa" (d)); #endif return ret; } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vec_xor(d, vset__vf(+0.0f, -0.0f, +0.0f, -0.0f)); } static INLINE vdouble vposneg_vd_vd(vdouble d) { return vec_xor(d, vset__vd(+0.0, -0.0)); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vec_xor(d, vset__vf(-0.0f, +0.0f, -0.0f, +0.0f)); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vec_xor(d, vset__vd(-0.0, +0.0)); } ////////////// Addition ////////////// static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return vec_add(x, y); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_add(x, y); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vec_add(x, y); } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return vec_add(x, y); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return (vmask)vec_add((v__i64)x, (v__i64)y); } ////////////// Subtraction ////////////// static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return vec_sub(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_sub(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vec_sub(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return vec_sub(x, y); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vec_add(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vec_add(x, vnegpos_vf_vf(y)); } ////////////// Multiplication ////////////// static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vec_mul(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return vec_mul(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return vec_div(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return vec_div(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vec_div(vsetall__vf(1.0f), x); } static INLINE vdouble vrec_vd_vd(vdouble x) { return vec_div(vsetall__vd(1.0), x); } /********************************************** ** Math **********************************************/ static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vec_max(x, y); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return vec_max(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vec_min(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return vec_min(x, y); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vec_abs(f); } static INLINE vdouble vabs_vd_vd(vdouble d) { return vec_abs(d); } static INLINE vfloat vsqrt_vf_vf(vfloat f) { return vec_sqrt(f); } static INLINE vdouble vsqrt_vd_vd(vdouble d) { return vec_sqrt(d); } /********************************************** ** FMA3 **********************************************/ #if CONFIG == 1 static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_madd(x, y, z); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_msub(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_msub(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmsub(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmsub(x, y, z); } #else static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_add(vec_mul(x, y), z); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_add(vec_mul(x, y), z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_sub(vec_mul(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_sub(vec_mul(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_sub(z, vec_mul(x, y)); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_sub(z, vec_mul(x, y)); } #endif static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_madd(x, y, z); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_madd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_msub(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_msub(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmsub(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmsub(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmadd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmadd(x, y, z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } sleef-3.5.1/src/arch/helperpurec.h000066400000000000000000000752711373003144100170210ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include "misc.h" #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENDP CONFIG //@#define LOG2VECTLENDP CONFIG #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT //@#define ACCURATE_SQRT #define DFTPRIORITY LOG2VECTLENDP #define ISANAME "Pure C Array" typedef union { uint32_t u[VECTLENDP*2]; uint64_t x[VECTLENDP]; double d[VECTLENDP]; float f[VECTLENDP*2]; int32_t i[VECTLENDP*2]; } versatileVector; typedef versatileVector vmask; typedef versatileVector vopmask; typedef versatileVector vdouble; typedef versatileVector vint; typedef versatileVector vfloat; typedef versatileVector vint2; typedef union { uint8_t u[sizeof(long double)*VECTLENDP]; long double ld[VECTLENDP]; } longdoubleVector; typedef longdoubleVector vmaskl; typedef longdoubleVector vlongdouble; #if defined(Sleef_quad2_DEFINED) && defined(ENABLEFLOAT128) typedef union { uint8_t u[sizeof(Sleef_quad)*VECTLENDP]; Sleef_quad q[VECTLENDP]; } quadVector; typedef quadVector vmaskq; typedef quadVector vquad; #endif // static INLINE int vavailability_i(int name) { return -1; } static INLINE void vprefetch_v_p(const void *ptr) { } static INLINE int vtestallones_i_vo64(vopmask g) { int ret = 1; for(int i=0;i 0 ? (int)(vd.d[i] + 0.5) : (int)(vd.d[i] - 0.5); return ret; } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } static INLINE vint vcast_vi_i(int j) { vint ret; for(int i=0;i y.d[i] ? x.d[i] : y.d[i]; return ret; } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { vdouble ret; for(int i=0;i y.d[i] ? -1 : 0; return ret; } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { vopmask ret; for(int i=0;i= y.d[i] ? -1 : 0; return ret; } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { vint ret; for(int i=0;i> c; return ret; } static INLINE vint vsra_vi_vi_i(vint x, int c) { vint ret; for(int i=0;i> c; return ret; } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { union { vopmask vo; vint2 vi2; } cnv; cnv.vo = m; return vor_vi_vi_vi(vand_vi_vi_vi(vreinterpretFirstHalf_vi_vi2(cnv.vi2), x), vandnot_vi_vi_vi(vreinterpretFirstHalf_vi_vi2(cnv.vi2), y)); } static INLINE vopmask visinf_vo_vd(vdouble d) { vopmask ret; for(int i=0;i 0 ? (int)(vf.f[i] + 0.5) : (int)(vf.f[i] - 0.5); return ret; } static INLINE vint2 vcast_vi2_i(int j) { vint2 ret; for(int i=0;i y.f[i] ? x.f[i] : y.f[i]; return ret; } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { vfloat ret; for(int i=0;i y.f[i]) ? -1 : 0); return ret; } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { vopmask ret; for(int i=0;i= y.f[i]) ? -1 : 0); return ret; } static INLINE vint vadd_vi2_vi2_vi2(vint x, vint y) { vint ret; for(int i=0;i> c; return ret; } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { vint2 ret; for(int i=0;i> c; return ret; } static INLINE vopmask visinf_vo_vf (vfloat d) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vfloat vsqrt_vf_vf(vfloat x) { vfloat ret; for(int i=0;i #endif #ifndef ENABLE_BUILTIN_MATH #if !defined(SLEEF_GENHEADER) #include #endif #define SQRT sqrt #define SQRTF sqrtf #define FMA fma #define FMAF fmaf #define RINT rint #define RINTF rintf #define TRUNC trunc #define TRUNCF truncf #else #define SQRT __builtin_sqrt #define SQRTF __builtin_sqrtf #define FMA __builtin_fma #define FMAF __builtin_fmaf #define RINT __builtin_rint #define RINTF __builtin_rintf #define TRUNC __builtin_trunc #define TRUNCF __builtin_truncf #endif #if !defined(SLEEF_GENHEADER) #include "misc.h" #endif #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define ENABLE_SP //@#define ENABLE_SP #if CONFIG == 2 #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) #ifndef FP_FAST_FMA #define FP_FAST_FMA //@#define FP_FAST_FMA #endif #ifndef FP_FAST_FMAF #define FP_FAST_FMAF //@#define FP_FAST_FMAF #endif #endif #if (!defined(FP_FAST_FMA) || !defined(FP_FAST_FMAF)) && !defined(SLEEF_GENHEADER) #error FP_FAST_FMA or FP_FAST_FMAF not defined #endif #define ISANAME "Pure C scalar with FMA" #else // #if CONFIG == 2 #define ISANAME "Pure C scalar" #endif // #if CONFIG == 2 #define LOG2VECTLENDP 0 //@#define LOG2VECTLENDP 0 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP 0 //@#define LOG2VECTLENSP 0 #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT //@#define ACCURATE_SQRT #if defined(__SSE4_1__) || defined(__aarch64__) #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #endif #define DFTPRIORITY LOG2VECTLENDP typedef union { uint32_t u[2]; int32_t i[2]; uint64_t x; double d; float f; int64_t i2; } versatileVector; typedef uint64_t vmask; typedef uint32_t vopmask; typedef double vdouble; typedef int32_t vint; typedef float vfloat; typedef int64_t vint2; typedef struct { vmask x, y; } vmask2; // static INLINE int vavailability_i(int name) { return -1; } static INLINE void vprefetch_v_p(const void *ptr) {} static INLINE int vtestallones_i_vo64(vopmask g) { return g; } static INLINE int vtestallones_i_vo32(vopmask g) { return g; } // static vint2 vloadu_vi2_p(int32_t *p) { return *p; } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { *p = v; } static vint vloadu_vi_p(int32_t *p) { return *p; } static void vstoreu_v_p_vi(int32_t *p, vint v) { *p = v; } // static INLINE vopmask vcast_vo32_vo64(vopmask m) { return m; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return m; } static INLINE vmask vcast_vm_i_i(int h, int l) { return (((uint64_t)h) << 32) | (uint32_t)l; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return ((int64_t)vi) << 32; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return vi2 >> 32; } static INLINE vint2 vrev21_vi2_vi2(vint2 vi2) { return (((uint64_t)vi2) << 32) | (((uint64_t)vi2) >> 32); } static INLINE vdouble vcast_vd_d(double d) { return d; } // static INLINE vopmask vand_vo_vo_vo (vopmask x, vopmask y) { return x & y; } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return y & ~x; } static INLINE vopmask vor_vo_vo_vo (vopmask x, vopmask y) { return x | y; } static INLINE vopmask vxor_vo_vo_vo (vopmask x, vopmask y) { return x ^ y; } static INLINE vmask vand_vm_vm_vm (vmask x, vmask y) { return x & y; } static INLINE vmask vandnot_vm_vm_vm (vmask x, vmask y) { return y & ~x; } static INLINE vmask vor_vm_vm_vm (vmask x, vmask y) { return x | y; } static INLINE vmask vxor_vm_vm_vm (vmask x, vmask y) { return x ^ y; } static INLINE vmask vcast_vm_vo(vopmask o) { return (vmask)o | (((vmask)o) << 32); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vcast_vm_vo(x) & y; } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return y & ~vcast_vm_vo(x); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vcast_vm_vo(x) | y; } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vcast_vm_vo(x) ^ y; } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vcast_vm_vo(x) & y; } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return y & ~vcast_vm_vo(x); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vcast_vm_vo(x) | y; } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vcast_vm_vo(x) ^ y; } // static INLINE vdouble vsel_vd_vo_vd_vd (vopmask o, vdouble x, vdouble y) { return o ? x : y; } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask o, vint2 x, vint2 y) { return o ? x : y; } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return o ? v1 : v0; } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vdouble vcast_vd_vi(vint vi) { return vi; } static INLINE vint vcast_vi_i(int j) { return j; } #ifdef FULL_FP_ROUNDING static INLINE vint vrint_vi_vd(vdouble d) { return (int32_t)RINT(d); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return RINT(vd); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return TRUNC(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return (int32_t)TRUNC(vd); } #else static INLINE vint vrint_vi_vd(vdouble a) { a += a > 0 ? 0.5 : -0.5; versatileVector v = { .d = a }; v.x -= 1 & (int)a; return (int32_t)v.d; } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return vd; } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } #endif static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return x == y ? ~(uint32_t)0 : 0; } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return x + y; } // static INLINE vmask vreinterpret_vm_vd(vdouble vd) { union { vdouble vd; vmask vm; } cnv; cnv.vd = vd; return cnv.vm; } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { union { vdouble vd; vint2 vi2; } cnv; cnv.vd = vd; return cnv.vi2; } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { union { vint2 vi2; vdouble vd; } cnv; cnv.vi2 = vi; return cnv.vd; } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { union { vmask vm; vdouble vd; } cnv; cnv.vm = vm; return cnv.vd; } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return x + y; } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return x - y; } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return x * y; } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return x / y; } static INLINE vdouble vrec_vd_vd(vdouble x) { return 1 / x; } static INLINE vdouble vabs_vd_vd(vdouble d) { versatileVector v = { .d = d }; v.x &= 0x7fffffffffffffffULL; return v.d; } static INLINE vdouble vneg_vd_vd(vdouble d) { return -d; } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return x > y ? x : y; } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return x < y ? x : y; } #ifndef ENABLE_FMA_DP static INLINE vdouble vmla_vd_vd_vd_vd (vdouble x, vdouble y, vdouble z) { return x * y + z; } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return x * y - z; } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return -x * y + z; } #else static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(x, y, -z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(-x, y, z); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(x, y, -z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(-x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return FMA(-x, y, -z); } #endif static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return x == y ? ~(uint32_t)0 : 0; } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return x != y ? ~(uint32_t)0 : 0; } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return x < y ? ~(uint32_t)0 : 0; } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return x <= y ? ~(uint32_t)0 : 0; } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return x > y ? ~(uint32_t)0 : 0; } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return x >= y ? ~(uint32_t)0 : 0; } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return x + y; } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return x - y; } static INLINE vint vneg_vi_vi (vint x) { return - x; } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return x & y; } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return y & ~x; } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return x | y; } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return x ^ y; } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return x & y; } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return y & ~x; } static INLINE vint vsll_vi_vi_i(vint x, int c) { return (uint32_t)x << c; } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return (uint32_t)x >> c; } static INLINE vint vsra_vi_vi_i(vint x, int c) { return x >> c; } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return x == y ? ~(uint32_t)0 : 0; } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return x > y ? ~(uint32_t)0 : 0; } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return m ? x : y; } static INLINE vopmask visinf_vo_vd(vdouble d) { return (d == SLEEF_INFINITY || d == -SLEEF_INFINITY) ? ~(uint32_t)0 : 0; } static INLINE vopmask vispinf_vo_vd(vdouble d) { return d == SLEEF_INFINITY ? ~(uint32_t)0 : 0; } static INLINE vopmask visminf_vo_vd(vdouble d) { return d == -SLEEF_INFINITY ? ~(uint32_t)0 : 0; } static INLINE vopmask visnan_vo_vd(vdouble d) { return d != d ? ~(uint32_t)0 : 0; } static INLINE vdouble vsqrt_vd_vd(vdouble d) { return SQRT(d); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return SQRTF(x); } static INLINE double vcast_d_vd(vdouble v) { return v; } static INLINE vdouble vload_vd_p(const double *ptr) { return *ptr; } static INLINE vdouble vloadu_vd_p(const double *ptr) { return *ptr; } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return ptr[vi]; } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { *ptr = v; } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { *ptr = v; } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { *ptr = v; } // static INLINE vint2 vcast_vi2_vm(vmask vm) { union { vint2 vi2; vmask vm; } cnv; cnv.vm = vm; return cnv.vi2; } static INLINE vmask vcast_vm_vi2(vint2 vi) { union { vint2 vi2; vmask vm; } cnv; cnv.vi2 = vi; return cnv.vm; } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return (int32_t)vi; } static INLINE vint2 vcast_vi2_i(int j) { return j; } #ifdef FULL_FP_ROUNDING static INLINE vint2 vrint_vi2_vf(vfloat d) { return (int)RINTF(d); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return RINTF(vd); } static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return TRUNCF(vd); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return (int32_t)TRUNCF(vf); } #else static INLINE vint2 vrint_vi2_vf(vfloat a) { a += a > 0 ? 0.5f : -0.5f; versatileVector v = { .f = a }; v.u[0] -= 1 & (int)a; return (int32_t)v.f; } static INLINE vfloat vrint_vf_vf(vfloat vd) { return vcast_vf_vi2(vrint_vi2_vf(vd)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vf; } static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } #endif static INLINE vfloat vcast_vf_f(float f) { return f; } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { union { vfloat vf; vmask vm; } cnv; cnv.vf = vf; return cnv.vm; } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { union { vfloat vf; vmask vm; } cnv; cnv.vm = vm; return cnv.vf; } #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { union { vfloat vf[2]; vint2 vi2; } cnv; cnv.vi2 = vi; return cnv.vf[1]; } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { union { vfloat vf[2]; vint2 vi2; } cnv; cnv.vi2 = 0; cnv.vf[1] = vf; return cnv.vi2; } #else static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { union { vfloat vf; vint2 vi2; } cnv; cnv.vi2 = vi; return cnv.vf; } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { union { vfloat vf; vint2 vi2; } cnv; cnv.vi2 = 0; cnv.vf = vf; return cnv.vi2; } #endif static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return x + y; } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return x - y; } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return x * y; } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return x / y; } static INLINE vfloat vrec_vf_vf (vfloat x) { return 1 / x; } static INLINE vfloat vabs_vf_vf(vfloat x) { versatileVector v = { .f = x }; v.i[0] &= 0x7fffffff; return v.f; } static INLINE vfloat vneg_vf_vf(vfloat x) { return -x; } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return x > y ? x : y; } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return x < y ? x : y; } #ifndef ENABLE_FMA_SP static INLINE vfloat vmla_vf_vf_vf_vf (vfloat x, vfloat y, vfloat z) { return x * y + z; } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return - x * y + z; } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return x * y - z; } #else static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(x, y, -z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(-x, y, z); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(x, y, -z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(-x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return FMAF(-x, y, -z); } #endif static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return x == y ? ~(uint32_t)0 : 0; } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return x != y ? ~(uint32_t)0 : 0; } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return x < y ? ~(uint32_t)0 : 0; } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return x <= y ? ~(uint32_t)0 : 0; } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return x > y ? ~(uint32_t)0 : 0; } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return x >= y ? ~(uint32_t)0 : 0; } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { versatileVector v = { .i2 = x }, w = { .i2 = y }; v.i[0] += w.i[0]; v.i[1] += w.i[1]; return v.i2; } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { versatileVector v = { .i2 = x }, w = { .i2 = y }; v.i[0] -= w.i[0]; v.i[1] -= w.i[1]; return v.i2; } static INLINE vint2 vneg_vi2_vi2(vint2 x) { versatileVector v = { .i2 = x }; v.i[0] = -v.i[0]; v.i[1] = -v.i[1]; return v.i2; } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return x & y; } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return y & ~x; } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return x | y; } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return x ^ y; } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return o ? x : y; } static INLINE vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return o ? v1 : v0; } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vcast_vm_vo(x) & y; } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return y & ~vcast_vm_vo(x); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { versatileVector v = { .i2 = x }; v.u[0] <<= c; v.u[1] <<= c; return v.i2; } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { versatileVector v = { .i2 = x }; v.u[0] >>= c; v.u[1] >>= c; return v.i2; } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { versatileVector v = { .i2 = x }; v.i[0] >>= c; v.i[1] >>= c; return v.i2; } static INLINE vopmask visinf_vo_vf (vfloat d) { return (d == SLEEF_INFINITYf || d == -SLEEF_INFINITYf) ? ~(uint32_t)0 : 0; } static INLINE vopmask vispinf_vo_vf(vfloat d) { return d == SLEEF_INFINITYf ? ~(uint32_t)0 : 0; } static INLINE vopmask visminf_vo_vf(vfloat d) { return d == -SLEEF_INFINITYf ? ~(uint32_t)0 : 0; } static INLINE vopmask visnan_vo_vf (vfloat d) { return d != d ? ~(uint32_t)0 : 0; } static INLINE vopmask veq_vo_vi2_vi2 (vint2 x, vint2 y) { return (int32_t)x == (int32_t)y ? ~(uint32_t)0 : 0; } static INLINE vopmask vgt_vo_vi2_vi2 (vint2 x, vint2 y) { return (int32_t)x > (int32_t)y ? ~(uint32_t)0 : 0; } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return (int32_t)x == (int32_t)y ? ~(uint32_t)0 : 0; } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return (int32_t)x > (int32_t)y ? ~(uint32_t)0 : 0; } static INLINE float vcast_f_vf(vfloat v) { return v; } static INLINE vfloat vload_vf_p(const float *ptr) { return *ptr; } static INLINE vfloat vloadu_vf_p(const float *ptr) { return *ptr; } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi) { return ptr[vi]; } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { *ptr = v; } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { *ptr = v; } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { *ptr = v; } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return v; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return v; } static INLINE vint vuninterleave_vi_vi(vint v) { return v; } static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return vd; } static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vd; } static INLINE vmask vinterleave_vm_vm(vmask vm) { return vm; } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return vm; } #if !defined(SLEEF_GENHEADER) typedef Sleef_quad1 vargquad; static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); return vm2; } static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vm2 = vuninterleave_vm2_vm2(vm2); vargquad aq; memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } #endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return !g ? ~(uint32_t)0 : 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return o ? x : y; } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return (int64_t)x - (int64_t)y; } static INLINE vmask vneg64_vm_vm(vmask x) { return -(int64_t)x; } #define vsll64_vm_vm_i(x, c) ((uint64_t)(x) << (c)) #define vsrl64_vm_vm_i(x, c) ((uint64_t)(x) >> (c)) //@#define vsll64_vm_vm_i(x, c) ((uint64_t)(x) << (c)) //@#define vsrl64_vm_vm_i(x, c) ((uint64_t)(x) >> (c)) static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return (int64_t)x > (int64_t)y ? ~(uint32_t)0 : 0; } static INLINE vmask vcast_vm_vi(vint vi) { return vi; } static INLINE vint vcast_vi_vm(vmask vm) { return vm; } sleef-3.5.1/src/arch/helpers390x_128.h000066400000000000000000000503301373003144100171500ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2019. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 140 || CONFIG == 141 #if !defined(__VX__) && !defined(SLEEF_GENHEADER) #error This helper is for IBM s390x. #endif #if __ARCH__ < 12 && !defined(SLEEF_GENHEADER) #error Please specify -march=z14. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 1 //@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #if CONFIG == 140 #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #endif #define ACCURATE_SQRT //@#define ACCURATE_SQRT #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #if !defined(SLEEF_GENHEADER) #ifndef SLEEF_VECINTRIN_H_INCLUDED #include #define SLEEF_VECINTRIN_H_INCLUDED #endif #include #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) typedef __vector unsigned long long vmask; typedef __vector unsigned long long vopmask; typedef __vector double vdouble; typedef __vector int vint; typedef __vector float vfloat; typedef __vector int vint2; // #if !defined(SLEEF_GENHEADER) static INLINE int vavailability_i(int n) { if (n == 1 || n == 2) { return vec_max((vdouble) {n, n}, (vdouble) {n, n})[0] != 0; } return 0; } #define ISANAME "ZVECTOR2" #define DFTPRIORITY 14 #endif // #if !defined(SLEEF_GENHEADER) static INLINE void vprefetch_v_p(const void *ptr) { } static vint2 vloadu_vi2_p(int32_t *p) { return (vint2) { p[0], p[1], p[2], p[3] }; } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { p[0] = v[0]; p[1] = v[1]; p[2] = v[2]; p[3] = v[3]; } static vint vloadu_vi_p(int32_t *p) { return (vint) { p[0], p[1] }; } static void vstoreu_v_p_vi(int32_t *p, vint v) { p[0] = v[0]; p[1] = v[1]; } static INLINE vdouble vload_vd_p(const double *p) { return (vdouble) { p[0], p[1] }; } static INLINE void vstore_v_p_vd(double *p, vdouble v) { p[0] = v[0]; p[1] = v[1]; } static INLINE vdouble vloadu_vd_p(const double *p) { return (vdouble) { p[0], p[1] }; } static INLINE void vstoreu_v_p_vd(double *p, vdouble v) { p[0] = v[0]; p[1] = v[1]; } static INLINE vfloat vload_vf_p(const float *p) { return (vfloat) { p[0], p[1], p[2], p[3] }; } static INLINE void vstore_v_p_vf(float *p, vfloat v) { p[0] = v[0]; p[1] = v[1]; p[2] = v[2]; p[3] = v[3]; } static INLINE void vscatter2_v_p_i_i_vf(float *p, int offset, int step, vfloat v) { *(p+(offset + step * 0)*2 + 0) = v[0]; *(p+(offset + step * 0)*2 + 1) = v[1]; *(p+(offset + step * 1)*2 + 0) = v[2]; *(p+(offset + step * 1)*2 + 1) = v[3]; } static INLINE vfloat vloadu_vf_p(const float *p) { return (vfloat) { p[0], p[1], p[2], p[3] }; } static INLINE void vstoreu_v_p_vf(float *p, vfloat v) { p[0] = v[0]; p[1] = v[1]; p[2] = v[2]; p[3] = v[3]; } static INLINE void vscatter2_v_p_i_i_vd(double *p, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&p[2*offset]), v); } static INLINE vdouble vgather_vd_p_vi(const double *p, vint vi) { int a[VECTLENDP]; return ((vdouble) { p[vi[0]], p[vi[1]] }); } static INLINE vfloat vgather_vf_p_vi2(const float *p, vint2 vi2) { int a[VECTLENSP]; return ((vfloat) { p[vi2[0]], p[vi2[1]], p[vi2[2]], p[vi2[3]] }); } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d }; } static INLINE vdouble vcast_vd_vi(vint vi) { return (vdouble) { vi[0], vi[1] }; } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return (vfloat) { vi[0], vi[1], vi[2], vi[3] }; } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return __builtin_s390_vfidb(vd, 4, 5); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return __builtin_s390_vfidb(vd, 4, 4); } static INLINE vint vrint_vi_vd(vdouble vd) { vd = vrint_vd_vd(vd); return (vint) { vd[0], vd[1] }; } static INLINE vint vtruncate_vi_vd(vdouble vd) { return (vint) { vd[0], vd[1] }; } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return (vint) { vf[0], vf[1], vf[2], vf[3] }; } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (vmask)vd; } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return (vdouble)vm; } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return (vint2)vd; } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return (vdouble)vi; } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (vmask)vf; } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (vfloat)vm; } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return (vfloat)vi; } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return (vint2)vf; } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return x + y; } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return x - y; } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return x * y; } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return x / y; } static INLINE vdouble vrec_vd_vd(vdouble x) { return 1 / x; } static INLINE vdouble vneg_vd_vd(vdouble d) { return -d; } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return x + y; } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return x - y; } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return x * y; } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return x / y; } static INLINE vfloat vrec_vf_vf(vfloat x) { return 1 / x; } static INLINE vfloat vneg_vf_vf(vfloat d) { return -d; } static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return x & y; } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return y & ~x; } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return x | y; } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return x ^ y; } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return x & y; } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return y & ~x; } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return x | y; } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return x ^ y; } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return x & y; } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return y & ~x; } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return x | y; } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return x ^ y; } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return x & y; } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return y & ~x; } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return x | y; } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return x ^ y; } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return vec_sel(y, x, o); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return vec_sel(y, x, (__vector unsigned int)o); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask o, vint2 x, vint2 y) { return vec_sel(y, x, (__vector unsigned int)o); } static INLINE int vtestallones_i_vo64(vopmask g) { return g[0] == 0xffffffffffffffffLL && g[1] == 0xffffffffffffffffLL; } static INLINE int vtestallones_i_vo32(vopmask g) { return g[0] == 0xffffffffffffffffLL && g[1] == 0xffffffffffffffffLL; } static INLINE vopmask vcast_vo32_vo64(vopmask g) { return (vopmask)(vint) { g[0] != 0 ? -1 : 0, g[1] != 0 ? -1 : 0, 0, 0 }; } static INLINE vopmask vcast_vo64_vo32(vopmask g) { return (vopmask) { ((vint)g)[0] != 0 ? 0xffffffffffffffffLL : 0, ((vint)g)[1] != 0 ? 0xffffffffffffffffLL : 0 }; } static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask)(vint){ h, l, h, l }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ vi[0], 0, vi[1], 0 }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[2] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], 0, 0 }; } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble) { vd[1], vd[0] }; } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE vfloat vrev21_vf_vf(vfloat vd) { return (vfloat) { vd[1], vd[0], vd[3], vd[2] }; } static INLINE vfloat vreva2_vf_vf(vfloat vd) { return (vfloat) { vd[2], vd[3], vd[0], vd[1] }; } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return (vopmask) { x[0] == y[0] ? 0xffffffffffffffffLL : 0, x[1] == y[1] ? 0xffffffffffffffffLL : 0 }; } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return (vmask)((__vector long long)x + (__vector long long)y); } // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } // static INLINE vdouble vabs_vd_vd(vdouble d) { return vec_abs(d); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } #if CONFIG == 140 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_msub(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmsub(x, y, z); } #else static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } #endif static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_msub(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmsub(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmadd(x, y, z); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } #if CONFIG == 140 static INLINE vfloat vmla_vf_vf_vf_vf (vfloat x, vfloat y, vfloat z) { return __builtin_s390_vfmasb(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmsub(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return __builtin_s390_vfmssb(x, y, z); } static INLINE vfloat vfma_vf_vf_vf_vf (vfloat x, vfloat y, vfloat z) { return __builtin_s390_vfmasb(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return __builtin_s390_vfmasb(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return __builtin_s390_vfmssb(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmsub(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmadd(x, y, z); } #else static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } #endif static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } // static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } // static INLINE vopmask vnot_vo_vo(vopmask o) { return ~o; } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpeq(x, y); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vnot_vo_vo(vec_cmpeq(x, y)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmplt(x, y); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmple(x, y); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpgt(x, y); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpge(x, y); } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return x + y; } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return x - y; } static INLINE vint vneg_vi_vi(vint e) { return -e; } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return x & y; } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return y & ~x; } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return x | y; } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return x ^ y; } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return vreinterpretFirstHalf_vi_vi2((vint2)x) & y; } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return vec_andc(y, vreinterpretFirstHalf_vi_vi2((vint2)x)); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return (vint)(((__vector unsigned int)x) << (__vector unsigned int){c, c, c, c}); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return (vint)(((__vector unsigned int)x) >> (__vector unsigned int){c, c, c, c}); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return x >> (__vector int){c, c, c, c}; } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return vec_cmpeq(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return vec_cmpgt(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(vec_cmpeq(x, y)); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(vec_cmpgt(x, y));} static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vor_vi_vi_vi(vand_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), x), vandnot_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), y)); } static INLINE vopmask visinf_vo_vd(vdouble d) { return (vopmask)(vec_cmpeq(vabs_vd_vd(d), vcast_vd_d(SLEEF_INFINITY))); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return (vopmask)(vec_cmpeq(d, vcast_vd_d(SLEEF_INFINITY))); } static INLINE vopmask visminf_vo_vd(vdouble d) { return (vopmask)(vec_cmpeq(d, vcast_vd_d(-SLEEF_INFINITY))); } static INLINE vopmask visnan_vo_vd(vdouble d) { return (vopmask)(vnot_vo_vo(vec_cmpeq(d, d))); } static INLINE double vcast_d_vd(vdouble v) { return v[0]; } static INLINE float vcast_f_vf(vfloat v) { return v[0]; } static INLINE void vstream_v_p_vd(double *p, vdouble v) { vstore_v_p_vd(p, v); } static INLINE void vsscatter2_v_p_i_i_vd(double *p, int offset, int step, vdouble v) { vscatter2_v_p_i_i_vd(p, offset, step, v); } // static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return x + y; } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return x - y; } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return -e; } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return x & y; } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return y & ~x; } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return x | y; } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return x ^ y; } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)x & y; } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return y & ~(vint2)x; } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return (vint2)(((__vector unsigned int)x) << (__vector unsigned int){c, c, c, c}); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return (vint2)(((__vector unsigned int)x) >> (__vector unsigned int){c, c, c, c}); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return x >> (__vector int){c, c, c, c}; } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)vec_cmpeq(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)vec_cmpgt(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_cmpeq(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_cmpgt(x, y); } static INLINE void vsscatter2_v_p_i_i_vf(float *p, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(p, offset, step, v); } static INLINE void vstream_v_p_vf(float *p, vfloat v) { vstore_v_p_vf(p, v); } // static INLINE vdouble vsqrt_vd_vd(vdouble d) { return vec_sqrt(d); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return vec_max(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return vec_min(x, y); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmpeq(x, y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vnot_vo_vo(vec_cmpeq(x, y)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmplt(x, y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmple(x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmpgt(x, y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmpge(x, y); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vec_abs(f); } static INLINE vfloat vrint_vf_vf(vfloat vf) { return __builtin_s390_vfisb(vf, 4, 4); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return __builtin_s390_vfisb(vf, 4, 5); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vec_max(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vec_min(x, y); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { return vec_sqrt(d); } static INLINE vopmask visinf_vo_vf (vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf (vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vint2 vrint_vi2_vf(vfloat vf) { vf = vrint_vf_vf(vf); return (vint) { vf[0], vf[1], vf[2], vf[3] }; } sleef-3.5.1/src/arch/helpersse2.h000066400000000000000000000576651373003144100165660ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 2 #if !defined(__SSE2__) && !defined(SLEEF_GENHEADER) #error Please specify -msse2. #endif #elif CONFIG == 3 #if (!defined(__SSE2__) || !defined(__SSE3__)) && !defined(SLEEF_GENHEADER) #error Please specify -msse2 and -msse3 #endif #elif CONFIG == 4 #if (!defined(__SSE2__) || !defined(__SSE3__) || !defined(__SSE4_1__)) && !defined(SLEEF_GENHEADER) #error Please specify -msse2, -msse3 and -msse4.1 #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP //@#define ENABLE_DP #define LOG2VECTLENDP 1 //@#define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) //@#define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP //@#define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) //@#define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) //@#define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT //@#define ACCURATE_SQRT #if !defined(SLEEF_GENHEADER) #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) typedef __m128i vmask; typedef __m128i vopmask; typedef __m128d vdouble; typedef __m128i vint; typedef __m128 vfloat; typedef __m128i vint2; typedef struct { vmask x, y; } vmask2; // #if !defined(SLEEF_GENHEADER) #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static INLINE int cpuSupportsSSE2() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[3] & (1 << 26)) != 0; } static INLINE int cpuSupportsSSE3() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 0)) != 0; } static INLINE int cpuSupportsSSE4_1() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 19)) != 0; } #if defined(__SSE2__) && defined(__SSE3__) && defined(__SSE4_1__) static INLINE int vavailability_i(int name) { //int d = __builtin_cpu_supports("sse2") && __builtin_cpu_supports("sse3") && __builtin_cpu_supports("sse4.1"); int d = cpuSupportsSSE2() && cpuSupportsSSE3() && cpuSupportsSSE4_1(); return d ? 3 : 0; } #define ISANAME "SSE4.1" #define DFTPRIORITY 12 #elif defined(__SSE2__) && defined(__SSE3__) static INLINE int vavailability_i(int name) { //int d = __builtin_cpu_supports("sse2") && __builtin_cpu_supports("sse3"); int d = cpuSupportsSSE2() && cpuSupportsSSE3(); return d ? 3 : 0; } #define ISANAME "SSE3" #define DFTPRIORITY 11 #else static INLINE int vavailability_i(int name) { int d = cpuSupportsSSE2(); return d ? 3 : 0; } #define ISANAME "SSE2" #define DFTPRIORITY 10 #endif #endif // #if !defined(SLEEF_GENHEADER) static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } // static vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return _mm_and_si128(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return _mm_or_si128(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return _mm_and_si128(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return _mm_andnot_si128(x, y); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return _mm_or_si128(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return _mm_xor_si128(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return _mm_and_si128(x, y); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return _mm_or_si128(x, y); } static INLINE vmask vandnot_vm_vo64_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } static INLINE vmask vxor_vm_vo64_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return _mm_and_si128(x, y); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return _mm_or_si128(x, y); } static INLINE vmask vandnot_vm_vo32_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } static INLINE vmask vxor_vm_vo32_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return _mm_shuffle_epi32(m, 0x08); } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return _mm_shuffle_epi32(m, 0x50); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm_cvttpd_epi32(vd); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(0, 0, i, i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm_and_si128(_mm_shuffle_epi32(vi, 0x73), _mm_set_epi32(-1, 0, -1, 0)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_shuffle_epi32(vi, 0x0d); } #if CONFIG == 4 static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpeq_epi64(x, y); } #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #else static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { vmask t = _mm_cmpeq_epi32(x, y); return vand_vm_vm_vm(t, _mm_shuffle_epi32(t, 0xb1)); } #endif static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm_add_epi64(x, y); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm_set_epi32(i0, i1, i0, i1); } // static INLINE vdouble vcast_vd_d(double d) { return _mm_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm_castsi128_pd(vi); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm_castsi128_pd(vm); } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm_div_pd(_mm_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm_andnot_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm_xor_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(z, vmul_vd_vd_vd(x, y)); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm_min_pd(x, y); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpeq_pd(x, y)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpneq_pd(x, y)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmplt_pd(x, y)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmple_pd(x, y)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpgt_pd(x, y)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpge_pd(x, y)); } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } #if CONFIG == 4 static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, m); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask m, vdouble x, vdouble y) { return _mm_blendv_pd(y, x, _mm_castsi128_pd(m)); } #else static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vor_vm_vm_vm(vand_vm_vm_vm(m, x), vandnot_vm_vm_vm(m, y)); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask opmask, vdouble x, vdouble y) { return _mm_or_pd(_mm_and_pd(_mm_castsi128_pd(opmask), x), _mm_andnot_pd(_mm_castsi128_pd(opmask), y)); } #endif static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpeq_pd(vabs_vd_vd(d), _mm_set1_pd(SLEEF_INFINITY))); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpeq_pd(d, _mm_set1_pd(SLEEF_INFINITY))); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpeq_pd(d, _mm_set1_pd(-SLEEF_INFINITY))); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpneq_pd(d, d)); } // static INLINE vdouble vload_vd_p(const double *ptr) { return _mm_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { int a[sizeof(vint)/sizeof(int)]; vstoreu_v_p_vi(a, vi); return _mm_set_pd(ptr[a[1]], ptr[a[0]]); } // This function is for debugging static INLINE double vcast_d_vd(vdouble v) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[0]; } // static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return _mm_cvtps_epi32(vf); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return _mm_cvttps_epi32(vf); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm_set1_epi32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm_castps_si128(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm_castsi128_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return _mm_castsi128_ps(vm); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm_castps_si128(vf); } #if CONFIG != 4 static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } static INLINE vfloat vrint_vf_vf(vfloat vf) { return vcast_vf_vi2(vrint_vi2_vf(vf)); } #endif static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm_min_ps(x, y); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpeq_ps(x, y)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpneq_ps(x, y)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmplt_ps(x, y)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmple_ps(x, y)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpgt_ps(x, y)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpge_ps(x, y)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vadd_vi_vi_vi(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsub_vi_vi_vi(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vand_vi_vi_vi(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vandnot_vi_vi_vi(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vor_vi_vi_vi(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return vxor_vi_vi_vi(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi_vo_vi(x, y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi_vo_vi(x, y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return vsll_vi_vi_i(x, c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return vsrl_vi_vi_i(x, c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return vsra_vi_vi_i(x, c); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } #if CONFIG == 4 static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { return _mm_blendv_ps(y, x, _mm_castsi128_ps(m)); } #else static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return vor_vi2_vi2_vi2(vand_vi2_vi2_vi2(m, x), vandnot_vi2_vi2_vi2(m, y)); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask opmask, vfloat x, vfloat y) { return _mm_or_ps(_mm_and_ps(_mm_castsi128_ps(opmask), x), _mm_andnot_ps(_mm_castsi128_ps(opmask), y)); } #endif static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vfloat vload_vf_p(const float *ptr) { return _mm_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi) { int a[VECTLENSP]; vstoreu_v_p_vi2(a, vi); return _mm_set_ps(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); } // This function is for debugging static INLINE float vcast_f_vf(vfloat v) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[0]; } // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } #if CONFIG >= 3 static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_addsub_ps(x, y); } #else static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } #endif static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm_shuffle_pd(d0, d0, 1); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd((double *)(&ptr[2*offset]), v); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return (vmask2) { _mm_unpacklo_epi64(v.x, v.y), _mm_unpackhi_epi64(v.x, v.y) }; } static INLINE vint vuninterleave_vi_vi(vint v) { return v; } static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return vd; } static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return vd; } static INLINE vmask vinterleave_vm_vm(vmask vm) { return vm; } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return vm; } static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); return vm2; } #if !defined(SLEEF_GENHEADER) typedef Sleef_quad2 vargquad; static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vm2 = vuninterleave_vm2_vm2(vm2); vargquad aq; memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } #endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return vor_vm_vm_vm(vand_vm_vm_vm(o, x), vandnot_vm_vm_vm(o, y)); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return _mm_sub_epi64(x, y); } static INLINE vmask vneg64_vm_vm(vmask x) { return _mm_sub_epi64(vcast_vm_i_i(0, 0), x); } #define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) #define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) //@#define vsll64_vm_vm_i(x, c) _mm_slli_epi64(x, c) //@#define vsrl64_vm_vm_i(x, c) _mm_srli_epi64(x, c) static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { int64_t ax[2], ay[2]; _mm_storeu_si128((__m128i *)ax, x); _mm_storeu_si128((__m128i *)ay, y); return _mm_set_epi64x(ax[1] > ay[1] ? -1 : 0, ax[0] > ay[0] ? -1 : 0); } static INLINE vmask vcast_vm_vi(vint vi) { vmask m = _mm_and_si128(_mm_shuffle_epi32(vi, (0 << 6) | (1 << 4) | (0 << 2) | (0 << 0)), _mm_set_epi32(0, -1, 0, -1)); return vor_vm_vm_vm(vcastu_vi2_vi(vgt_vo_vi_vi(vcast_vi_i(0), vi)), m); } static INLINE vint vcast_vi_vm(vmask vm) { return _mm_shuffle_epi32(vm, 0x08); } sleef-3.5.1/src/arch/helpersve.h000066400000000000000000001144451373003144100164750ustar00rootroot00000000000000/*********************************************************************/ /* Copyright ARM Ltd. 2010 - 2019. */ /* Distributed under the Boost Software License, Version 1.0. */ /* (See accompanying file LICENSE.txt or copy at */ /* http://www.boost.org/LICENSE_1_0.txt) */ /*********************************************************************/ #if !defined(__ARM_FEATURE_SVE) && !defined(SLEEF_GENHEADER) #error Please specify SVE flags. #endif #if !defined(SLEEF_GENHEADER) #include #include #include "misc.h" #endif // #if !defined(SLEEF_GENHEADER) #if defined(VECTLENDP) || defined(VECTLENSP) #error VECTLENDP or VECTLENSP already defined #endif #if CONFIG == 1 || CONFIG == 2 // Vector length agnostic #define VECTLENSP (svcntw()) //@#define VECTLENSP (svcntw()) #define VECTLENDP (svcntd()) //@#define VECTLENDP (svcntd()) #define ISANAME "AArch64 SVE" #define ptrue svptrue_b8() //@#define ptrue svptrue_b8() #elif CONFIG == 8 // 256-bit vector length #define ISANAME "AArch64 SVE 256-bit" #define LOG2VECTLENDP 2 #define ptrue svptrue_pat_b8(SV_VL32) #define DFTPRIORITY 20 #elif CONFIG == 9 // 512-bit vector length #define ISANAME "AArch64 SVE 512-bit" #define LOG2VECTLENDP 3 #define ptrue svptrue_pat_b8(SV_VL64) #define DFTPRIORITY 21 #elif CONFIG == 10 // 1024-bit vector length #define ISANAME "AArch64 SVE 1024-bit" #define LOG2VECTLENDP 4 #define ptrue svptrue_pat_b8(SV_VL128) #define DFTPRIORITY 22 #elif CONFIG == 11 // 2048-bit vector length #define ISANAME "AArch64 SVE 2048-bit" #define LOG2VECTLENDP 5 #define ptrue svptrue_pat_b8(SV_VL256) #define DFTPRIORITY 23 #else #error CONFIG macro invalid or not defined #endif #ifdef LOG2VECTLENDP // For DFT, VECTLENDP and VECTLENSP are not the size of the available // vector length, but the size of the partial vectors utilized in the // computation. The appropriate VECTLENDP and VECTLENSP are chosen by // the dispatcher according to the value of svcntd(). #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENDP (1 << LOG2VECTLENDP) #define VECTLENSP (1 << LOG2VECTLENSP) static INLINE int vavailability_i(int name) { return svcntd() >= VECTLENDP ? 3 : 0; } #else static INLINE int vavailability_i(int name) { return 3; } #endif #define ENABLE_SP //@#define ENABLE_SP #define ENABLE_DP //@#define ENABLE_DP #if CONFIG != 2 #define ENABLE_FMA_SP //@#define ENABLE_FMA_SP #define ENABLE_FMA_DP //@#define ENABLE_FMA_DP //#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. #endif #define FULL_FP_ROUNDING //@#define FULL_FP_ROUNDING #define ACCURATE_SQRT //@#define ACCURATE_SQRT // Type definitions // Mask definition typedef svint32_t vmask; typedef svbool_t vopmask; // Single precision definitions typedef svfloat32_t vfloat; typedef svint32_t vint2; // Double precision definitions typedef svfloat64_t vdouble; typedef svint32_t vint; // Double-double data type with setter/getter functions typedef svfloat64x2_t vdouble2; static INLINE vdouble vd2getx_vd_vd2(vdouble2 v) { return svget2_f64(v, 0); } static INLINE vdouble vd2gety_vd_vd2(vdouble2 v) { return svget2_f64(v, 1); } static INLINE vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) { return svcreate2_f64(x, y); } static INLINE vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) { return svset2_f64(v, 0, d); } static INLINE vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) { return svset2_f64(v, 1, d); } // Double-float data type with setter/getter functions typedef svfloat32x2_t vfloat2; static INLINE vfloat vf2getx_vf_vf2(vfloat2 v) { return svget2_f32(v, 0); } static INLINE vfloat vf2gety_vf_vf2(vfloat2 v) { return svget2_f32(v, 1); } static INLINE vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) { return svcreate2_f32(x, y); } static INLINE vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) { return svset2_f32(v, 0, d); } static INLINE vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) { return svset2_f32(v, 1, d); } // vmask2 is mainly used for quad-precision functions typedef svint32x2_t vmask2; static INLINE vmask vm2getx_vm_vm2(vmask2 v) { return svget2_s32(v, 0); } static INLINE vmask vm2gety_vm_vm2(vmask2 v) { return svget2_s32(v, 1); } static INLINE vmask2 vm2setxy_vm2_vm_vm(vmask x, vmask y) { return svcreate2_s32(x, y); } static INLINE vmask2 vm2setx_vm2_vm2_vm(vmask2 v, vmask x) { return svset2_s32(v, 0, x); } static INLINE vmask2 vm2sety_vm2_vm2_vm(vmask2 v, vmask y) { return svset2_s32(v, 1, y); } // Auxiliary data types typedef svfloat64x2_t di_t; static INLINE vdouble digetd_vd_di(di_t d) { return svget2_f64(d, 0); } static INLINE vint digeti_vi_di(di_t d) { return svreinterpret_s32_f64(svget2_f64(d, 1)); } static INLINE di_t disetdi_di_vd_vi(vdouble d, vint i) { return svcreate2_f64(d, svreinterpret_f64_s32(i)); } // typedef svfloat32x2_t fi_t; static INLINE vfloat figetd_vf_di(fi_t d) { return svget2_f32(d, 0); } static INLINE vint2 figeti_vi2_di(fi_t d) { return svreinterpret_s32_f32(svget2_f32(d, 1)); } static INLINE fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) { return svcreate2_f32(d, svreinterpret_f32_s32(i)); } // typedef svfloat64x3_t ddi_t; static INLINE vdouble2 ddigetdd_vd2_ddi(ddi_t d) { return svcreate2_f64(svget3_f64(d, 0), svget3_f64(d, 1)); } static INLINE vint ddigeti_vi_ddi(ddi_t d) { return svreinterpret_s32_f64(svget3_f64(d, 2)); } static INLINE ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { return svcreate3_f64(svget2_f64(v, 0), svget2_f64(v, 1), svreinterpret_f64_s32(i)); } static INLINE ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) { return svcreate3_f64(svget2_f64(v, 0), svget2_f64(v, 1), svget3_f64(ddi, 2)); } // typedef svfloat32x3_t dfi_t; static INLINE vfloat2 dfigetdf_vf2_dfi(dfi_t d) { return svcreate2_f32(svget3_f32(d, 0), svget3_f32(d, 1)); } static INLINE vint2 dfigeti_vi2_dfi(dfi_t d) { return svreinterpret_s32_f32(svget3_f32(d, 2)); } static INLINE dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) { return svcreate3_f32(svget2_f32(v, 0), svget2_f32(v, 1), svreinterpret_f32_s32(i)); } static INLINE dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { return svcreate3_f32(svget2_f32(v, 0), svget2_f32(v, 1), svget3_f32(dfi, 2)); } // typedef svfloat64x4_t dd2; static INLINE dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) { return svcreate4_f64(svget2_f64(a, 0), svget2_f64(a, 1), svget2_f64(b, 0), svget2_f64(b, 1)); } static INLINE vdouble2 dd2geta_vd2_dd2(dd2 d) { return svcreate2_f64(svget4_f64(d, 0), svget4_f64(d, 1)); } static INLINE vdouble2 dd2getb_vd2_dd2(dd2 d) { return svcreate2_f64(svget4_f64(d, 2), svget4_f64(d, 3)); } // typedef svfloat32x4_t df2; static INLINE df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) { return svcreate4_f32(svget2_f32(a, 0), svget2_f32(a, 1), svget2_f32(b, 0), svget2_f32(b, 1)); } static INLINE vfloat2 df2geta_vf2_df2(df2 d) { return svcreate2_f32(svget4_f32(d, 0), svget4_f32(d, 1)); } static INLINE vfloat2 df2getb_vf2_df2(df2 d) { return svcreate2_f32(svget4_f32(d, 2), svget4_f32(d, 3)); } // typedef svfloat64x3_t vdouble3; static INLINE vdouble vd3getx_vd_vd3(vdouble3 v) { return svget3_f64(v, 0); } static INLINE vdouble vd3gety_vd_vd3(vdouble3 v) { return svget3_f64(v, 1); } static INLINE vdouble vd3getz_vd_vd3(vdouble3 v) { return svget3_f64(v, 2); } static INLINE vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return svcreate3_f64(x, y, z); } static INLINE vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { return svset3_f64(v, 0, d); } static INLINE vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { return svset3_f64(v, 1, d); } static INLINE vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { return svset3_f64(v, 2, d); } // typedef svfloat64x4_t tdx; static INLINE vmask tdxgete_vm_tdx(tdx t) { return svreinterpret_s32_f64(svget4_f64(t, 0)); } static INLINE vdouble3 tdxgetd3_vd3_tdx(tdx t) { return svcreate3_f64(svget4_f64(t, 1), svget4_f64(t, 2), svget4_f64(t, 3)); } static INLINE vdouble tdxgetd3x_vd_tdx(tdx t) { return svget4_f64(t, 1); } static INLINE vdouble tdxgetd3y_vd_tdx(tdx t) { return svget4_f64(t, 2); } static INLINE vdouble tdxgetd3z_vd_tdx(tdx t) { return svget4_f64(t, 3); } static INLINE tdx tdxsete_tdx_tdx_vm(tdx t, vmask e) { return svset4_f64(t, 0, svreinterpret_f64_s32(e)); } static INLINE tdx tdxsetd3_tdx_tdx_vd3(tdx t, vdouble3 d3) { return svcreate4_f64(svget4_f64(t, 0), svget3_f64(d3, 0), svget3_f64(d3, 1), svget3_f64(d3, 2)); } static INLINE tdx tdxsetx_tdx_tdx_vd(tdx t, vdouble x) { return svset4_f64(t, 1, x); } static INLINE tdx tdxsety_tdx_tdx_vd(tdx t, vdouble y) { return svset4_f64(t, 2, y); } static INLINE tdx tdxsetz_tdx_tdx_vd(tdx t, vdouble z) { return svset4_f64(t, 3, z); } static INLINE tdx tdxsetxyz_tdx_tdx_vd_vd_vd(tdx t, vdouble x, vdouble y, vdouble z) { return svcreate4_f64(svget4_f64(t, 0), x, y, z); } static INLINE tdx tdxseted3_tdx_vm_vd3(vmask e, vdouble3 d3) { return svcreate4_f64(svreinterpret_f64_s32(e), svget3_f64(d3, 0), svget3_f64(d3, 1), svget3_f64(d3, 2)); } static INLINE tdx tdxsetexyz_tdx_vm_vd_vd_vd(vmask e, vdouble x, vdouble y, vdouble z) { return svcreate4_f64(svreinterpret_f64_s32(e), x, y, z); } // typedef svfloat64x4_t tdi_t; static INLINE vdouble3 tdigettd_vd3_tdi(tdi_t d) { return svcreate3_f64(svget4_f64(d, 0), svget4_f64(d, 1), svget4_f64(d, 2)); } static INLINE vdouble tdigetx_vd_tdi(tdi_t d) { return svget4_f64(d, 0); } static INLINE vint tdigeti_vi_tdi(tdi_t d) { return svreinterpret_s32_f64(svget4_f64(d, 3)); } static INLINE tdi_t tdisettdi_tdi_vd3_vi(vdouble3 v, vint i) { return svcreate4_f64(svget3_f64(v, 0), svget3_f64(v, 1), svget3_f64(v, 2), svreinterpret_f64_s32(i)); } static INLINE tdi_t tdisettd_tdi_tdi_vd3(tdi_t tdi, vdouble3 v) { return svcreate4_f64(svget3_f64(v, 0), svget3_f64(v, 1), svget3_f64(v, 2), svget4_f64(tdi, 3)); } // // masking predicates #define ALL_TRUE_MASK svdup_n_s32(0xffffffff) #define ALL_FALSE_MASK svdup_n_s32(0x0) //@#define ALL_TRUE_MASK svdup_n_s32(0xffffffff) //@#define ALL_FALSE_MASK svdup_n_s32(0x0) static INLINE void vprefetch_v_p(const void *ptr) {} // // // // Test if all lanes are active // // // static INLINE int vtestallones_i_vo32(vopmask g) { svbool_t pg = svptrue_b32(); return (svcntp_b32(pg, g) == svcntw()); } static INLINE int vtestallones_i_vo64(vopmask g) { svbool_t pg = svptrue_b64(); return (svcntp_b64(pg, g) == svcntd()); } // // // // // // // Vector load / store static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { svst1_s32(ptrue, p, v); } static INLINE vfloat vload_vf_p(const float *ptr) { return svld1_f32(ptrue, ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return svld1_f32(ptrue, ptr); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { svst1_f32(ptrue, ptr, v); } // Basic logical operations for mask static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return svand_s32_x(ptrue, x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return svbic_s32_x(ptrue, y, x); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return svorr_s32_x(ptrue, x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return sveor_s32_x(ptrue, x, y); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return svreinterpret_s32_s64( svadd_s64_x(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y))); } // Mask <--> single precision reinterpret static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return svreinterpret_s32_f32(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return svreinterpret_f32_s32(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return svreinterpret_f32_s32(vm); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return svreinterpret_s32_f32(vf); } static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } // Conditional select static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { return svsel_s32(svcmpeq_s32(ptrue, m, ALL_TRUE_MASK), x, y); } /****************************************/ /* Single precision FP operations */ /****************************************/ // Broadcast static INLINE vfloat vcast_vf_f(float f) { return svdup_n_f32(f); } // Add, Sub, Mul static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return svadd_f32_x(ptrue, x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return svsub_f32_x(ptrue, x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return svmul_f32_x(ptrue, x, y); } // |x|, -x static INLINE vfloat vabs_vf_vf(vfloat f) { return svabs_f32_x(ptrue, f); } static INLINE vfloat vneg_vf_vf(vfloat f) { return svneg_f32_x(ptrue, f); } // max, min static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return svmax_f32_x(ptrue, x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return svmin_f32_x(ptrue, x, y); } // int <--> float conversions static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return svcvt_s32_f32_x(ptrue, vf); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return svcvt_f32_s32_x(ptrue, vi); } static INLINE vint2 vcast_vi2_i(int i) { return svdup_n_s32(i); } static INLINE vint2 vrint_vi2_vf(vfloat d) { return svcvt_s32_f32_x(ptrue, svrintn_f32_x(ptrue, d)); } #if CONFIG == 1 // Multiply accumulate: z = z + x * y static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return svmad_f32_x(ptrue, x, y, z); } // Multiply subtract: z = z - x * y static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return svmsb_f32_x(ptrue, x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return svnmsb_f32_x(ptrue, x, y, z); } #else static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } #endif // fused multiply add / sub static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z + x * y return svmad_f32_x(ptrue, x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z - x * y return svmsb_f32_x(ptrue, x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // x * y - z return svnmsb_f32_x(ptrue, x, y, z); } // conditional select static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { return svsel_f32(mask, x, y); } // Reciprocal 1/x, Division, Square root static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { #ifndef ENABLE_ALTDIV return svdiv_f32_x(ptrue, n, d); #else // Finite numbers (including denormal) only, gives mostly correctly rounded result vfloat t, u, x, y; svuint32_t i0, i1; i0 = svand_u32_x(ptrue, svreinterpret_u32_f32(n), svdup_n_u32(0x7c000000)); i1 = svand_u32_x(ptrue, svreinterpret_u32_f32(d), svdup_n_u32(0x7c000000)); i0 = svsub_u32_x(ptrue, svdup_n_u32(0x7d000000), svlsr_n_u32_x(ptrue, svadd_u32_x(ptrue, i0, i1), 1)); t = svreinterpret_f32_u32(i0); y = svmul_f32_x(ptrue, d, t); x = svmul_f32_x(ptrue, n, t); t = svrecpe_f32(y); t = svmul_f32_x(ptrue, t, svrecps_f32(y, t)); t = svmul_f32_x(ptrue, t, svrecps_f32(y, t)); u = svmul_f32_x(ptrue, x, t); u = svmad_f32_x(ptrue, svmsb_f32_x(ptrue, y, u, x), t, u); return u; #endif } static INLINE vfloat vrec_vf_vf(vfloat d) { #ifndef ENABLE_ALTDIV return svdivr_n_f32_x(ptrue, d, 1.0f); #else return vsel_vf_vo_vf_vf(svcmpeq_f32(ptrue, vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)), vcast_vf_f(0), vdiv_vf_vf_vf(vcast_vf_f(1.0f), d)); #endif } static INLINE vfloat vsqrt_vf_vf(vfloat d) { #ifndef ENABLE_ALTSQRT return svsqrt_f32_x(ptrue, d); #else // Gives correctly rounded result for all input range vfloat w, x, y, z; y = svrsqrte_f32(d); x = vmul_vf_vf_vf(d, y); w = vmul_vf_vf_vf(vcast_vf_f(0.5), y); y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(1.5)); w = vadd_vf_vf_vf(w, w); w = vmul_vf_vf_vf(w, y); x = vmul_vf_vf_vf(w, d); y = vfmapn_vf_vf_vf_vf(w, d, x); z = vfmanp_vf_vf_vf_vf(w, x, vcast_vf_f(1)); z = vfmanp_vf_vf_vf_vf(w, y, z); w = vmul_vf_vf_vf(vcast_vf_f(0.5), x); w = vfma_vf_vf_vf_vf(w, z, y); w = vadd_vf_vf_vf(w, x); return svsel_f32(svorr_b_z(ptrue, svcmpeq_f32(ptrue, d, vcast_vf_f(0)), svcmpeq_f32(ptrue, d, vcast_vf_f(SLEEF_INFINITYf))), d, w); #endif } // // // // // // static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } // // // // // // // truncate static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return svrintz_f32_x(ptrue, vd); } // // // // Round float // // // static INLINE vfloat vrint_vf_vf(vfloat vf) { return svrintn_f32_x(svptrue_b32(), vf); } // // // // // // /***************************************/ /* Single precision integer operations */ /***************************************/ // Add, Sub, Neg (-x) static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return svadd_s32_x(ptrue, x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return svsub_s32_x(ptrue, x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return svneg_s32_x(ptrue, e); } // Logical operations static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return svand_s32_x(ptrue, x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return svbic_s32_x(ptrue, y, x); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return svorr_s32_x(ptrue, x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return sveor_s32_x(ptrue, x, y); } // Shifts #define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) //@#define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) #define vsrl_vi2_vi2_i(x, c) \ svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) //@#define vsrl_vi2_vi2_i(x, c) svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) //@#define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) // Comparison returning integers static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return svsel_s32(svcmpgt_s32(ptrue, x, y), ALL_TRUE_MASK, ALL_FALSE_MASK); } // conditional select static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return svsel_s32(m, x, y); } /****************************************/ /* opmask operations */ /****************************************/ // single precision FP static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return svcmpeq_f32(ptrue, x, y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return svcmpne_f32(ptrue, x, y); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return svcmplt_f32(ptrue, x, y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return svcmple_f32(ptrue, x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return svcmpgt_f32(ptrue, x, y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return svcmpge_f32(ptrue, x, y); } static INLINE vopmask visinf_vo_vf(vfloat d) { return svcmpeq_n_f32(ptrue, vabs_vf_vf(d), SLEEF_INFINITYf); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return svcmpeq_n_f32(ptrue, d, SLEEF_INFINITYf); } static INLINE vopmask visminf_vo_vf(vfloat d) { return svcmpeq_n_f32(ptrue, d, -SLEEF_INFINITYf); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } // integers static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return svcmpeq_s32(ptrue, x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return svcmpgt_s32(ptrue, x, y); } // logical opmask static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return svand_b_z(ptrue, x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return svbic_b_z(ptrue, y, x); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return svorr_b_z(ptrue, x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return sveor_b_z(ptrue, x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { // This needs to be zeroing to prevent asinf and atanf denormal test // failing. return svand_s32_z(x, y, y); } // bitmask logical operations static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return svsel_s32(x, y, ALL_FALSE_MASK); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return svsel_s32(x, ALL_FALSE_MASK, y); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return svsel_s32(x, ALL_TRUE_MASK, y); } // broadcast bitmask static INLINE vmask vcast_vm_i_i(int i0, int i1) { return svreinterpret_s32_u64( svdup_n_u64((0xffffffff & (uint64_t)i1) | (((uint64_t)i0) << 32))); } /*********************************/ /* SVE for double precision math */ /*********************************/ // Vector load/store static INLINE vdouble vload_vd_p(const double *ptr) { return svld1_f64(ptrue, ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return svld1_f64(ptrue, ptr); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { svst1_f64(ptrue, ptr, v); } static INLINE void vstoreu_v_p_vi(int *ptr, vint v) { svst1w_s64(ptrue, ptr, svreinterpret_s64_s32(v)); } static vint vloadu_vi_p(int32_t *p) { return svreinterpret_s32_s64(svld1uw_s64(ptrue, (uint32_t *)p)); } // Reinterpret static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return svreinterpret_f64_s32(vm); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return svreinterpret_s32_f64(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 x) { return svreinterpret_f64_s32(x); } static INLINE vint2 vreinterpret_vi2_vd(vdouble x) { return svreinterpret_s32_f64(x); } static INLINE vint2 vcastu_vi2_vi(vint x) { return svreinterpret_s32_s64( svlsl_n_s64_x(ptrue, svreinterpret_s64_s32(x), 32)); } static INLINE vint vcastu_vi_vi2(vint2 x) { return svreinterpret_s32_u64( svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), 32)); } static INLINE vdouble vcast_vd_vi(vint vi) { return svcvt_f64_s32_x(ptrue, vi); } // Splat static INLINE vdouble vcast_vd_d(double d) { return svdup_n_f64(d); } // Conditional select static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return svsel_f64(o, x, y); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { return svsel_s32(o, x, y); } // truncate static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return svrintz_f64_x(ptrue, vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return svcvt_s32_f64_x(ptrue, vd); } static INLINE vint vrint_vi_vd(vdouble vd) { return svcvt_s32_f64_x(ptrue, svrintn_f64_x(ptrue, vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return svrintn_f64_x(ptrue, vd); } // FP math operations static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return svadd_f64_x(ptrue, x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return svsub_f64_x(ptrue, x, y); } static INLINE vdouble vneg_vd_vd(vdouble x) { return svneg_f64_x(ptrue, x); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return svmul_f64_x(ptrue, x, y); } static INLINE vdouble vabs_vd_vd(vdouble x) { return svabs_f64_x(ptrue, x); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return svmax_f64_x(ptrue, x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return svmin_f64_x(ptrue, x, y); } #if CONFIG == 1 // Multiply accumulate / subtract static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z = x*y + z return svmad_f64_x(ptrue, x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z = x * y - z return svnmsb_f64_x(ptrue, x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return svmsb_f64_x(ptrue, x, y, z); } #else static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } #endif static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z + x * y return svmad_f64_x(ptrue, x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z - x * y return svmsb_f64_x(ptrue, x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // x * y - z return svnmsb_f64_x(ptrue, x, y, z); } // Reciprocal 1/x, Division, Square root static INLINE vdouble vdiv_vd_vd_vd(vdouble n, vdouble d) { #ifndef ENABLE_ALTDIV return svdiv_f64_x(ptrue, n, d); #else // Finite numbers (including denormal) only, gives mostly correctly rounded result vdouble t, u, x, y; svuint64_t i0, i1; i0 = svand_u64_x(ptrue, svreinterpret_u64_f64(n), svdup_n_u64(0x7fc0000000000000L)); i1 = svand_u64_x(ptrue, svreinterpret_u64_f64(d), svdup_n_u64(0x7fc0000000000000L)); i0 = svsub_u64_x(ptrue, svdup_n_u64(0x7fd0000000000000L), svlsr_n_u64_x(ptrue, svadd_u64_x(ptrue, i0, i1), 1)); t = svreinterpret_f64_u64(i0); y = svmul_f64_x(ptrue, d, t); x = svmul_f64_x(ptrue, n, t); t = svrecpe_f64(y); t = svmul_f64_x(ptrue, t, svrecps_f64(y, t)); t = svmul_f64_x(ptrue, t, svrecps_f64(y, t)); t = svmul_f64_x(ptrue, t, svrecps_f64(y, t)); u = svmul_f64_x(ptrue, x, t); u = svmad_f64_x(ptrue, svmsb_f64_x(ptrue, y, u, x), t, u); return u; #endif } static INLINE vdouble vrec_vd_vd(vdouble d) { #ifndef ENABLE_ALTDIV return svdivr_n_f64_x(ptrue, d, 1.0); #else return vsel_vd_vo_vd_vd(svcmpeq_f64(ptrue, vabs_vd_vd(d), vcast_vd_d(SLEEF_INFINITY)), vcast_vd_d(0), vdiv_vd_vd_vd(vcast_vd_d(1.0f), d)); #endif } static INLINE vdouble vsqrt_vd_vd(vdouble d) { #ifndef ENABLE_ALTSQRT return svsqrt_f64_x(ptrue, d); #else // Gives correctly rounded result for all input range vdouble w, x, y, z; y = svrsqrte_f64(d); x = vmul_vd_vd_vd(d, y); w = vmul_vd_vd_vd(vcast_vd_d(0.5), y); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(1.5)); w = vadd_vd_vd_vd(w, w); w = vmul_vd_vd_vd(w, y); x = vmul_vd_vd_vd(w, d); y = vfmapn_vd_vd_vd_vd(w, d, x); z = vfmanp_vd_vd_vd_vd(w, x, vcast_vd_d(1)); z = vfmanp_vd_vd_vd_vd(w, y, z); w = vmul_vd_vd_vd(vcast_vd_d(0.5), x); w = vfma_vd_vd_vd_vd(w, z, y); w = vadd_vd_vd_vd(w, x); return svsel_f64(svorr_b_z(ptrue, svcmpeq_f64(ptrue, d, vcast_vd_d(0)), svcmpeq_f64(ptrue, d, vcast_vd_d(SLEEF_INFINITY))), d, w); #endif } // Float comparison static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return svcmplt_f64(ptrue, x, y); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return svcmpeq_f64(ptrue, x, y); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return svcmpgt_f64(ptrue, x, y); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return svcmpge_f64(ptrue, x, y); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return svcmpne_f64(ptrue, x, y); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return svcmple_f64(ptrue, x, y); } // predicates static INLINE vopmask visnan_vo_vd(vdouble vd) { return svcmpne_f64(ptrue, vd, vd); } static INLINE vopmask visinf_vo_vd(vdouble vd) { return svcmpeq_n_f64(ptrue, svabs_f64_x(ptrue, vd), SLEEF_INFINITY); } static INLINE vopmask vispinf_vo_vd(vdouble vd) { return svcmpeq_n_f64(ptrue, vd, SLEEF_INFINITY); } static INLINE vopmask visminf_vo_vd(vdouble vd) { return svcmpeq_n_f64(ptrue, vd, -SLEEF_INFINITY); } // Comparing bit masks static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return svcmpeq_s64(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y)); } // pure predicate operations static INLINE vopmask vcast_vo32_vo64(vopmask o) { return o; } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return o; } // logical integer operations static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { // This needs to be a zeroing instruction because we need to make // sure that the inactive elements for the unpacked integers vector // are zero. return svand_s32_z(x, y, y); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return svsel_s32(x, ALL_FALSE_MASK, y); } #define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) //@#define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) #define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) //@#define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) static INLINE vint vsrl_vi_vi_i(vint x, int c) { return svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return svand_s32_x(ptrue, x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return svbic_s32_x(ptrue, y, x); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return sveor_s32_x(ptrue, x, y); } // integer math static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return svadd_s32_x(ptrue, x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return svsub_s32_x(ptrue, x, y); } static INLINE vint vneg_vi_vi(vint x) { return svneg_s32_x(ptrue, x); } // integer comparison static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return svcmpgt_s32(ptrue, x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return svcmpeq_s32(ptrue, x, y); } // Splat static INLINE vint vcast_vi_i(int i) { return svdup_n_s32(i); } // bitmask logical operations static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { // This needs to be a zeroing instruction because we need to make // sure that the inactive elements for the unpacked integers vector // are zero. return svreinterpret_s32_s64( svand_s64_z(x, svreinterpret_s64_s32(y), svreinterpret_s64_s32(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return svreinterpret_s32_s64(svsel_s64( x, svreinterpret_s64_s32(ALL_FALSE_MASK), svreinterpret_s64_s32(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return svreinterpret_s32_s64(svsel_s64( x, svreinterpret_s64_s32(ALL_TRUE_MASK), svreinterpret_s64_s32(y))); } static INLINE vfloat vrev21_vf_vf(vfloat vf) { return svreinterpret_f32_u64(svrevw_u64_x(ptrue, svreinterpret_u64_f32(vf))); } static INLINE vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } // Comparison returning integer static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return svsel_s32(svcmpeq_s32(ptrue, x, y), ALL_TRUE_MASK, ALL_FALSE_MASK); } // Gather static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return svld1_gather_s64index_f64(ptrue, ptr, svreinterpret_s64_s32(vi)); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return svld1_gather_s32index_f32(ptrue, ptr, vi2); } // Operations for DFT static INLINE vdouble vposneg_vd_vd(vdouble d) { return svneg_f64_m(d, svdupq_n_b64(0, 1), d); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return svneg_f64_m(d, svdupq_n_b64(1, 0), d); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return svneg_f32_m(d, svdupq_n_b32(0, 1, 0, 1), d); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return svneg_f32_m(d, svdupq_n_b32(1, 0, 1, 0), d); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vfma_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfma_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } // static INLINE vdouble vrev21_vd_vd(vdouble x) { return svzip1_f64(svuzp2_f64(x, x), svuzp1_f64(x, x)); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { svint64_t x = svindex_s64((VECTLENDP-1), -1); x = svzip1_s64(svuzp2_s64(x, x), svuzp1_s64(x, x)); return svtbl_f64(vd, svreinterpret_u64_s64(x)); } static INLINE vfloat vreva2_vf_vf(vfloat vf) { svint32_t x = svindex_s32((VECTLENSP-1), -1); x = svzip1_s32(svuzp2_s32(x, x), svuzp1_s32(x, x)); return svtbl_f32(vf, svreinterpret_u32_s32(x)); } // static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { svst1_scatter_u64index_f64(ptrue, ptr + offset*2, svzip1_u64(svindex_u64(0, step*2), svindex_u64(1, step*2)), v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { svst1_scatter_u32index_f32(ptrue, ptr + offset*2, svzip1_u32(svindex_u32(0, step*2), svindex_u32(1, step*2)), v); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { vstoreu_v_p_vd(ptr, v); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vstoreu_v_p_vf(ptr, v); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } // These functions are for debugging static double vcast_d_vd(vdouble v) { double a[svcntd()]; vstoreu_v_p_vd(a, v); return a[0]; } static float vcast_f_vf(vfloat v) { float a[svcntw()]; vstoreu_v_p_vf(a, v); return a[0]; } static int vcast_i_vi(vint v) { int a[svcntw()]; vstoreu_v_p_vi(a, v); return a[0]; } static int vcast_i_vi2(vint2 v) { int a[svcntw()]; vstoreu_v_p_vi2(a, v); return a[0]; } // static INLINE vmask2 vinterleave_vm2_vm2(vmask2 v) { return vm2setxy_vm2_vm_vm(svreinterpret_s32_u64(svtrn1_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v)))), svreinterpret_s32_u64(svtrn2_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v))))); } static INLINE vmask2 vuninterleave_vm2_vm2(vmask2 v) { return vm2setxy_vm2_vm_vm(svreinterpret_s32_u64(svtrn1_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v)))), svreinterpret_s32_u64(svtrn2_u64(svreinterpret_u64_s32(vm2getx_vm_vm2(v)), svreinterpret_u64_s32(vm2gety_vm_vm2(v))))); } static INLINE vint vuninterleave_vi_vi(vint v) { return svreinterpret_s32_u64(svuzp1_u64(svtrn1_u64(svreinterpret_u64_s32(v), svreinterpret_u64_s32(v)), svtrn2_u64(svreinterpret_u64_s32(v), svreinterpret_u64_s32(v)))); } static INLINE vdouble vinterleave_vd_vd(vdouble vd) { return svtrn1_f64(svzip1_f64(vd, vd), svzip2_f64(vd, vd)); } static INLINE vdouble vuninterleave_vd_vd(vdouble vd) { return svuzp1_f64(svtrn1_f64(vd, vd), svtrn2_f64(vd, vd)); } static INLINE vmask vinterleave_vm_vm(vmask vm) { return svreinterpret_s32_u64(svtrn1_u64(svzip1_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)), svzip2_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)))); } static INLINE vmask vuninterleave_vm_vm(vmask vm) { return svreinterpret_s32_u64(svuzp1_u64(svtrn1_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)), svtrn2_u64(svreinterpret_u64_s32(vm), svreinterpret_u64_s32(vm)))); } static vmask2 vloadu_vm2_p(void *p) { vmask2 vm2; memcpy(&vm2, p, VECTLENDP * 16); return vm2; } #if !defined(SLEEF_GENHEADER) typedef Sleef_quadx vargquad; static INLINE vmask2 vcast_vm2_aq(vargquad aq) { return vinterleave_vm2_vm2(vloadu_vm2_p(&aq)); } static INLINE vargquad vcast_aq_vm2(vmask2 vm2) { vm2 = vuninterleave_vm2_vm2(vm2); vargquad aq; memcpy(&aq, &vm2, VECTLENDP * 16); return aq; } #endif // #if !defined(SLEEF_GENHEADER) static INLINE int vtestallzeros_i_vo64(vopmask g) { return svcntp_b64(svptrue_b64(), g) == 0; } static INLINE vmask vsel_vm_vo64_vm_vm(vopmask o, vmask x, vmask y) { return svreinterpret_s32_s64(svsel_s64(o, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y))); } static INLINE vmask vsub64_vm_vm_vm(vmask x, vmask y) { return svreinterpret_s32_s64( svsub_s64_x(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y))); } static INLINE vmask vneg64_vm_vm(vmask x) { return svreinterpret_s32_s64(svneg_s64_x(ptrue, svreinterpret_s64_s32(x))); } static INLINE vopmask vgt64_vo_vm_vm(vmask x, vmask y) { return svcmpgt_s64(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y)); } #define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) //@#define vsll64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsl_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) #define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) //@#define vsrl64_vm_vm_i(x, c) svreinterpret_s32_u64(svlsr_n_u64_x(ptrue, svreinterpret_u64_s32(x), c)) static INLINE vmask vcast_vm_vi(vint vi) { return svreinterpret_s32_s64(svextw_s64_z(ptrue, svreinterpret_s64_s32(vi))); } static INLINE vint vcast_vi_vm(vmask vm) { return vand_vm_vm_vm(vm, vcast_vm_i_i(0, 0xffffffff)); } sleef-3.5.1/src/arch/helpervecext.h000066400000000000000000001067151373003144100171770ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include "misc.h" #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP #define ENABLE_SP #define LOG2VECTLENDP CONFIG #define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define DFTPRIORITY LOG2VECTLENDP #if defined(__clang__) #define ISANAME "Clang Vector Extension" typedef uint32_t vmask __attribute__((ext_vector_type(VECTLENDP*2))); typedef uint32_t vopmask __attribute__((ext_vector_type(VECTLENDP*2))); typedef double vdouble __attribute__((ext_vector_type(VECTLENDP))); typedef int32_t vint __attribute__((ext_vector_type(VECTLENDP))); typedef float vfloat __attribute__((ext_vector_type(VECTLENDP*2))); typedef int32_t vint2 __attribute__((ext_vector_type(VECTLENDP*2))); #ifdef ENABLE_LONGDOUBLE typedef uint8_t vmaskl __attribute__((ext_vector_type(sizeof(long double)*VECTLENDP))); typedef long double vlongdouble __attribute__((ext_vector_type(VECTLENDP))); #endif #if defined(Sleef_quad2_DEFINED) && defined(ENABLEFLOAT128) typedef uint8_t vmaskq __attribute__((ext_vector_type(sizeof(Sleef_quad)*VECTLENDP))); #ifdef ENABLE_LONGDOUBLE typedef Sleef_quad vquad __attribute__((ext_vector_type(VECTLENDP))); #endif #endif #elif defined(__GNUC__) #define ISANAME "GCC Vector Extension" typedef uint32_t vmask __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); typedef uint32_t vopmask __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); typedef double vdouble __attribute__((vector_size(sizeof(double)*VECTLENDP))); typedef int32_t vint __attribute__((vector_size(sizeof(int32_t)*VECTLENDP))); typedef float vfloat __attribute__((vector_size(sizeof(float)*VECTLENDP*2))); typedef int32_t vint2 __attribute__((vector_size(sizeof(int32_t)*VECTLENDP*2))); #ifdef ENABLE_LONGDOUBLE typedef uint8_t vmaskl __attribute__((vector_size(sizeof(long double)*VECTLENDP))); typedef long double vlongdouble __attribute__((vector_size(sizeof(long double)*VECTLENDP))); #endif #if defined(Sleef_quad2_DEFINED) && defined(ENABLEFLOAT128) typedef uint8_t vmaskq __attribute__((vector_size(sizeof(Sleef_quad)*VECTLENDP))); typedef Sleef_quad vquad __attribute__((vector_size(sizeof(Sleef_quad)*VECTLENDP))); #endif #endif // #if VECTLENDP == 2 static INLINE vopmask vcast_vo32_vo64(vopmask m) { return (vopmask){ m[1], m[3], 0, 0 }; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return (vopmask){ m[0], m[0], m[1], m[1] }; } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d }; } #endif #if defined(Sleef_quad2_DEFINED) && defined(ENABLEFLOAT128) static INLINE vquad vcast_vq_q(Sleef_quad d) { return (vquad) { d, d }; } #endif static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask){ l, h, l, h }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ 0, vi[0], 0, vi[1] }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[1], vi2[3] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], 0, 0 }; } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble) { vd[1], vd[0] }; } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE vfloat vrev21_vf_vf(vfloat vd) { return (vfloat) { vd[1], vd[0], vd[3], vd[2] }; } static INLINE vfloat vreva2_vf_vf(vfloat vd) { return (vfloat) { vd[2], vd[3], vd[0], vd[1] }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vrev21_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[1], vd[0] }; } static INLINE vlongdouble vreva2_vl_vl(vlongdouble vd) { return vd; } static INLINE vlongdouble vposneg_vl_vl(vlongdouble vd) { return (vlongdouble) { +vd[0], -vd[1] }; } static INLINE vlongdouble vnegpos_vl_vl(vlongdouble vd) { return (vlongdouble) { -vd[0], +vd[1] }; } #endif #if defined(Sleef_quad2_DEFINED) && defined(ENABLEFLOAT128) static INLINE vquad vrev21_vq_vq(vquad vd) { return (vquad) { vd[1], vd[0] }; } static INLINE vquad vreva2_vq_vq(vquad vd) { return vd; } static INLINE vquad vposneg_vq_vq(vquad vd) { return (vquad) { +vd[0], -vd[1] }; } static INLINE vquad vnegpos_vq_vq(vquad vd) { return (vquad) { -vd[0], +vd[1] }; } #endif #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)PNMASK); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)NPMASK); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)NPMASKf); } #elif VECTLENDP == 4 static INLINE vopmask vcast_vo32_vo64(vopmask m) { return (vopmask){ m[1], m[3], m[5], m[7], 0, 0, 0, 0 }; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return (vopmask){ m[0], m[0], m[1], m[1], m[2], m[2], m[3], m[3] }; } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i, i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i, i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f, f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d, d, d }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d, d, d }; } #endif static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask){ l, h, l, h, l, h, l, h }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ 0, vi[0], 0, vi[1], 0, vi[2], 0, vi[3] }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[1], vi2[3], vi2[5], vi2[7] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1], vi2[2], vi2[3] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], vi[2], vi[3], 0, 0, 0, 0 }; } #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)PNMASK); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)NPMASK); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)NPMASKf); } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble) { vd[1], vd[0], vd[3], vd[2] }; } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return (vdouble) { vd[2], vd[3], vd[0], vd[1] }; } static INLINE vfloat vrev21_vf_vf(vfloat vd) { return (vfloat) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6] }; } static INLINE vfloat vreva2_vf_vf(vfloat vd) { return (vfloat) { vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1] }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vrev21_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[1], vd[0], vd[3], vd[2] }; } static INLINE vlongdouble vreva2_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[2], vd[3], vd[0], vd[1] }; } static INLINE vlongdouble vposneg_vl_vl(vlongdouble vd) { return (vlongdouble) { +vd[0], -vd[1], +vd[2], -vd[3] }; } static INLINE vlongdouble vnegpos_vl_vl(vlongdouble vd) { return (vlongdouble) { -vd[0], +vd[1], -vd[2], +vd[3] }; } #endif #elif VECTLENDP == 8 static INLINE vopmask vcast_vo32_vo64(vopmask m) { return (vopmask){ m[1], m[3], m[5], m[7], m[9], m[11], m[13], m[15], 0, 0, 0, 0, 0, 0, 0, 0 }; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return (vopmask){ m[0], m[0], m[1], m[1], m[2], m[2], m[3], m[3], m[4], m[4], m[5], m[5], m[6], m[6], m[7], m[7] }; } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i, i, i, i, i, i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f, f, f, f, f, f, f, f, f, f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d, d, d, d, d, d, d }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d, d, d, d, d, d, d }; } #endif static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask){ l, h, l, h, l, h, l, h, l, h, l, h, l, h, l, h }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ 0, vi[0], 0, vi[1], 0, vi[2], 0, vi[3], 0, vi[4], 0, vi[5], 0, vi[6], 0, vi[7] }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[1], vi2[3], vi2[5], vi2[7], vi2[9], vi2[11], vi2[13], vi2[15] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1], vi2[2], vi2[3], vi2[4], vi2[5], vi2[6], vi2[7] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], vi[2], vi[3], vi[4], vi[5], vi[6], vi[7], 0, 0, 0, 0, 0, 0, 0, 0 }; } #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0 }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)PNMASK); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)NPMASK); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)NPMASKf); } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6] }; } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return (vdouble) { vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1] }; } static INLINE vfloat vrev21_vf_vf(vfloat vd) { return (vfloat) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6], vd[9], vd[8], vd[11], vd[10], vd[13], vd[12], vd[15], vd[14] }; } static INLINE vfloat vreva2_vf_vf(vfloat vd) { return (vfloat) { vd[14], vd[15], vd[12], vd[13], vd[10], vd[11], vd[8], vd[9], vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1]}; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vrev21_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6] }; } static INLINE vlongdouble vreva2_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1] }; } static INLINE vlongdouble vposneg_vl_vl(vlongdouble vd) { return (vlongdouble) { +vd[0], -vd[1], +vd[2], -vd[3], +vd[4], -vd[5], +vd[6], -vd[7] }; } static INLINE vlongdouble vnegpos_vl_vl(vlongdouble vd) { return (vlongdouble) { -vd[0], +vd[1], -vd[2], +vd[3], -vd[4], +vd[5], -vd[6], +vd[7] }; } #endif #else static INLINE vint vcast_vi_i(int k) { vint ret; for(int i=0;i y), x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd((vopmask)(x < y), x, y); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x == y); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x != y); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x < y); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x <= y); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x > y); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x >= y); } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return x + y; } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return x - y; } static INLINE vint vneg_vi_vi(vint e) { return -e; } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return x & y; } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return y & ~x; } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return x | y; } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return x ^ y; } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return vreinterpretFirstHalf_vi_vi2((vint2)x) & y; } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return y & ~vreinterpretFirstHalf_vi_vi2((vint2)x); } static INLINE vint vsll_vi_vi_i(vint x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP))); #endif return (vint)(((vu)x) << c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP))); #endif return (vint)(((vu)x) >> c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return x >> c; } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return x == y; } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return x > y; } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(x == y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(x > y);} static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vor_vi_vi_vi(vand_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), x), vandnot_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), y)); } static INLINE vopmask visinf_vo_vd(vdouble d) { return (vopmask)(vabs_vd_vd(d) == SLEEF_INFINITY); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return (vopmask)(d == SLEEF_INFINITY); } static INLINE vopmask visminf_vo_vd(vdouble d) { return (vopmask)(d == -SLEEF_INFINITY); } static INLINE vopmask visnan_vo_vd(vdouble d) { return (vopmask)(d != d); } static INLINE vdouble vsqrt_vd_vd(vdouble d) { #if defined(__clang__) typedef int64_t vi64 __attribute__((ext_vector_type(VECTLENDP))); #else typedef int64_t vi64 __attribute__((vector_size(sizeof(int64_t)*VECTLENDP))); #endif vdouble q = vcast_vd_d(1); vopmask o = (vopmask)(d < 8.636168555094445E-78); d = (vdouble)((o & (vmask)(d * 1.157920892373162E77)) | (~o & (vmask)d)); q = (vdouble)((o & (vmask)vcast_vd_d(2.9387358770557188E-39)) | (~o & (vmask)vcast_vd_d(1))); q = (vdouble)vor_vm_vm_vm(vlt_vo_vd_vd(d, vcast_vd_d(0)), (vmask)q); vdouble x = (vdouble)(0x5fe6ec85e7de30daLL - ((vi64)(d + 1e-320) >> 1)); x = x * ( 3 - d * x * x); x = x * ( 12 - d * x * x); x = x * (768 - d * x * x); x *= 1.0 / (1 << 13); x = (d - (d * x) * (d * x)) * (x * 0.5) + d * x; return x * q; } static INLINE double vcast_d_vd(vdouble v) { return v[0]; } static INLINE float vcast_f_vf(vfloat v) { return v[0]; } static INLINE vdouble vload_vd_p(const double *ptr) { return *(vdouble *)ptr; } static INLINE vdouble vloadu_vd_p(const double *ptr) { vdouble vd; for(int i=0;i y), x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf((vopmask)(x < y), x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x == y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x != y); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x < y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x <= y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x > y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x >= y); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return x + y; } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return x - y; } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return -e; } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return x & y; } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return y & ~x; } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return x | y; } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return x ^ y; } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)x & y; } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return y & ~(vint2)x; } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP*2))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); #endif return (vint2)(((vu)x) << c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP*2))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); #endif return (vint2)(((vu)x) >> c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return x >> c; } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)(x == y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)(x > y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return x == y; } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return x > y; } static INLINE vopmask visinf_vo_vf(vfloat d) { return (vopmask)(vabs_vf_vf(d) == SLEEF_INFINITYf); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return (vopmask)(d == SLEEF_INFINITYf); } static INLINE vopmask visminf_vo_vf(vfloat d) { return (vopmask)(d == -SLEEF_INFINITYf); } static INLINE vopmask visnan_vo_vf(vfloat d) { return (vopmask)(d != d); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { vfloat q = vcast_vf_f(1); vopmask o = (vopmask)(d < 5.4210108624275221700372640043497e-20f); // 2^-64 d = (vfloat)((o & (vmask)(d * vcast_vf_f(18446744073709551616.0f))) | (~o & (vmask)d)); // 2^64 q = (vfloat)((o & (vmask)vcast_vf_f(0.00000000023283064365386962890625f)) | (~o & (vmask)vcast_vf_f(1))); // 2^-32 q = (vfloat)vor_vm_vm_vm(vlt_vo_vf_vf(d, vcast_vf_f(0)), (vmask)q); vfloat x = (vfloat)(0x5f330de2 - (((vint2)d) >> 1)); x = x * ( 3.0f - d * x * x); x = x * (12.0f - d * x * x); x *= 0.0625f; x = (d - (d * x) * (d * x)) * (x * 0.5) + d * x; return x * q; } static INLINE vfloat vload_vf_p(const float *ptr) { return *(vfloat *)ptr; } static INLINE vfloat vloadu_vf_p(const float *ptr) { vfloat vf; for(int i=0;i #include #include #include #include #include #include // #if !(defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER)) #include #include #include static void FLOCK(FILE *fp) { flock(fileno(fp), LOCK_EX); } static void FUNLOCK(FILE *fp) { flock(fileno(fp), LOCK_UN); } static void FTRUNCATE(FILE *fp, off_t z) { if (ftruncate(fileno(fp), z)) ; } static FILE *OPENTMPFILE() { return tmpfile(); } static void CLOSETMPFILE(FILE *fp) { fclose(fp); } #else #include #include static void FLOCK(FILE *fp) { } static void FUNLOCK(FILE *fp) { } static void FTRUNCATE(FILE *fp, long z) { fseek(fp, 0, SEEK_SET); SetEndOfFile((HANDLE)_get_osfhandle(_fileno(fp))); } static FILE *OPENTMPFILE() { return fopen("tmpfile.txt", "w+"); } static void CLOSETMPFILE(FILE *fp) { fclose(fp); remove("tmpfile.txt"); } #endif // #define MAGIC_ARRAYMAPNODE 0xf73130fa #define MAGIC_ARRAYMAP 0x8693bd21 #define LOGNBUCKETS 8 #define NBUCKETS (1 << LOGNBUCKETS) static int hash(uint64_t key) { return (key ^ (key >> LOGNBUCKETS) ^ (key >> (LOGNBUCKETS*2)) ^ (key >> (LOGNBUCKETS*3))) & (NBUCKETS-1); } static void String_trim(char *str) { char *dst = str, *src = str, *pterm = src; while(*src != '\0' && isspace(*src)) src++; for(;*src != '\0';src++) { *dst++ = *src; if (!isspace(*src)) pterm = dst; } *pterm = '\0'; } typedef struct ArrayMapNode { uint32_t magic; uint64_t key; void *value; } ArrayMapNode; typedef struct ArrayMap { uint32_t magic; ArrayMapNode *array[NBUCKETS]; int size[NBUCKETS], capacity[NBUCKETS], totalSize; } ArrayMap; ArrayMap *initArrayMap() { ArrayMap *thiz = (ArrayMap *)calloc(1, sizeof(ArrayMap)); thiz->magic = MAGIC_ARRAYMAP; for(int i=0;icapacity[i] = 8; thiz->array[i] = (ArrayMapNode *)malloc(thiz->capacity[i] * sizeof(ArrayMapNode)); thiz->size[i] = 0; } thiz->totalSize = 0; return thiz; } void ArrayMap_dispose(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); for(int j=0;jsize[j];i++) { assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE); thiz->array[j][i].magic = 0; } free(thiz->array[j]); } thiz->magic = 0; free(thiz); } int ArrayMap_size(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); return thiz->totalSize; } uint64_t *ArrayMap_keyArray(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); uint64_t *a = (uint64_t *)malloc(sizeof(uint64_t) * thiz->totalSize); int p = 0; for(int j=0;jsize[j];i++) { assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE); a[p++] = thiz->array[j][i].key; } } return a; } void **ArrayMap_valueArray(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); void **a = (void **)malloc(sizeof(void *) * thiz->totalSize); int p = 0; for(int j=0;jsize[j];i++) { assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE); a[p++] = thiz->array[j][i].value; } } return a; } void *ArrayMap_remove(ArrayMap *thiz, uint64_t key) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); int h = hash(key); for(int i=0;isize[h];i++) { assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE); if (thiz->array[h][i].key == key) { void *old = thiz->array[h][i].value; thiz->array[h][i].key = thiz->array[h][thiz->size[h]-1].key; thiz->array[h][i].value = thiz->array[h][thiz->size[h]-1].value; thiz->array[h][thiz->size[h]-1].magic = 0; thiz->size[h]--; thiz->totalSize--; return old; } } return NULL; } void *ArrayMap_put(ArrayMap *thiz, uint64_t key, void *value) { if (value == NULL) return ArrayMap_remove(thiz, key); assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); int h = hash(key); for(int i=0;isize[h];i++) { assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE); if (thiz->array[h][i].key == key) { void *old = thiz->array[h][i].value; thiz->array[h][i].value = value; return old; } } if (thiz->size[h] >= thiz->capacity[h]) { thiz->capacity[h] *= 2; thiz->array[h] = (ArrayMapNode *)realloc(thiz->array[h], thiz->capacity[h] * sizeof(ArrayMapNode)); } ArrayMapNode *n = &(thiz->array[h][thiz->size[h]++]); n->magic = MAGIC_ARRAYMAPNODE; n->key = key; n->value = value; thiz->totalSize++; return NULL; } void *ArrayMap_get(ArrayMap *thiz, uint64_t key) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); int h = hash(key); for(int i=0;isize[h];i++) { assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE); if (thiz->array[h][i].key == key) { return thiz->array[h][i].value; } } return NULL; } #define LINELEN (1024*1024) ArrayMap *ArrayMap_load(const char *fn, const char *prefix, const char *idstr, int doLock) { const int idstrlen = strlen(idstr); int prefixLen = strlen(prefix) + 3; if (prefixLen >= LINELEN-10 || idstrlen >= LINELEN-10) return NULL; FILE *fp = fopen(fn, "r"); if (fp == NULL) return NULL; if (doLock) FLOCK(fp); ArrayMap *thiz = initArrayMap(); char *prefix2 = malloc(prefixLen+10); strcpy(prefix2, prefix); String_trim(prefix2); for(char *p = prefix2;*p != '\0';p++) { if (*p == ':') *p = ';'; if (*p == ' ') *p = '_'; } strcat(prefix2, " : "); prefixLen = strlen(prefix2); char *line = malloc(sizeof(char) * (LINELEN+10)); line[idstrlen] = '\0'; if (fread(line, sizeof(char), idstrlen, fp) != idstrlen || strcmp(idstr, line) != 0) { if (doLock) FUNLOCK(fp); fclose(fp); free(prefix2); free(line); return NULL; } int found = 0; for(;;) { line[LINELEN] = '\0'; if (fgets(line, LINELEN, fp) == NULL) break; if (strncmp(line, prefix2, prefixLen) != 0) continue; uint64_t key; char *value = malloc(sizeof(char) * LINELEN); if (sscanf(line + prefixLen, "%" SCNx64 " : %s\n", &key, value) == 2) { found = 1; ArrayMap_put(thiz, (uint64_t)key, (void *)value); } else { free(value); } } if (doLock) FUNLOCK(fp); fclose(fp); free(prefix2); free(line); return thiz; } int ArrayMap_save(ArrayMap *thiz, const char *fn, const char *prefix, const char *idstr) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); const int idstrlen = strlen(idstr); int prefixLen = strlen(prefix) + 3; if (prefixLen >= LINELEN-10 || idstrlen >= LINELEN-10) return -1; // Generate prefix2 char *prefix2 = malloc(prefixLen+10); strcpy(prefix2, prefix); String_trim(prefix2); for(char *p = prefix2;*p != '\0';p++) { if (*p == ':') *p = ';'; if (*p == ' ') *p = '_'; } strcat(prefix2, " : "); prefixLen = strlen(prefix2); // FILE *fp = fopen(fn, "a+"); if (fp == NULL) return -1; FLOCK(fp); fseek(fp, 0, SEEK_SET); // Copy the file specified by fn to tmpfile FILE *tmpfp = OPENTMPFILE(); if (tmpfp == NULL) { FUNLOCK(fp); fclose(fp); return -1; } char *line = malloc(sizeof(char) * (LINELEN+10)); line[idstrlen] = '\0'; if (fread(line, sizeof(char), idstrlen, fp) == idstrlen && strcmp(idstr, line) == 0) { for(;;) { line[LINELEN] = '\0'; if (fgets(line, LINELEN, fp) == NULL) break; if (strncmp(line, prefix2, prefixLen) != 0) fputs(line, tmpfp); } } // Write the contents in the map into tmpfile uint64_t *keys = ArrayMap_keyArray(thiz); int s = ArrayMap_size(thiz); for(int i=0;i= LINELEN-10) continue; fprintf(tmpfp, "%s %" PRIx64 " : %s\n", prefix2, keys[i], value); } free(keys); fseek(fp, 0, SEEK_SET); FTRUNCATE(fp, 0); fwrite(idstr, sizeof(char), strlen(idstr), fp); fseek(tmpfp, 0, SEEK_SET); for(;;) { size_t s = fread(line, 1, LINELEN, tmpfp); if (s == 0) break; fwrite(line, 1, s, fp); } FUNLOCK(fp); fclose(fp); CLOSETMPFILE(tmpfp); free(prefix2); free(line); return 0; } sleef-3.5.1/src/common/arraymap.h000066400000000000000000000015221373003144100166560ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __ARRAYMAP_H__ #define __ARRAYMAP_H__ typedef struct ArrayMap ArrayMap; ArrayMap *initArrayMap(); void ArrayMap_dispose(ArrayMap *thiz); int ArrayMap_size(ArrayMap *thiz); void *ArrayMap_remove(ArrayMap *thiz, uint64_t key); void *ArrayMap_put(ArrayMap *thiz, uint64_t key, void *value); void *ArrayMap_get(ArrayMap *thiz, uint64_t key); uint64_t *ArrayMap_keyArray(ArrayMap *thiz); void **ArrayMap_valueArray(ArrayMap *thiz); int ArrayMap_save(ArrayMap *thiz, const char *fn, const char *prefix, const char *idstr); ArrayMap *ArrayMap_load(const char *fn, const char *prefix, const char *idstr, int doLock); #endif sleef-3.5.1/src/common/common.c000066400000000000000000000051161373003144100163300ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "misc.h" #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #include EXPORT void *Sleef_malloc(size_t z) { return _aligned_malloc(z, 256); } EXPORT void Sleef_free(void *ptr) { _aligned_free(ptr); } EXPORT uint64_t Sleef_currentTimeMicros() { struct __timeb64 t; _ftime64(&t); return t.time * INT64_C(1000000) + t.millitm*1000; } #elif defined(__APPLE__) #include EXPORT void *Sleef_malloc(size_t z) { void *ptr = NULL; posix_memalign(&ptr, 256, z); return ptr; } EXPORT void Sleef_free(void *ptr) { free(ptr); } EXPORT uint64_t Sleef_currentTimeMicros() { struct timeval time; gettimeofday(&time, NULL); return (uint64_t)((time.tv_sec * INT64_C(1000000)) + time.tv_usec); } #else // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #include #include #if defined(__FreeBSD__) || defined(__OpenBSD__) #include #else #include #endif EXPORT void *Sleef_malloc(size_t z) { void *ptr = NULL; posix_memalign(&ptr, 4096, z); return ptr; } EXPORT void Sleef_free(void *ptr) { free(ptr); } EXPORT uint64_t Sleef_currentTimeMicros() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * INT64_C(1000000) + ((uint64_t)tp.tv_nsec/1000); } #endif // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #ifdef _MSC_VER #include EXPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { __cpuidex(out, eax, ecx); } #else #if defined(__x86_64__) || defined(__i386__) EXPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { uint32_t a, b, c, d; __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); out[0] = a; out[1] = b; out[2] = c; out[3] = d; } #endif #endif #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) static char x86BrandString[256]; EXPORT char *Sleef_getCpuIdString() { union { int32_t info[4]; uint8_t str[16]; } u; int i,j; char *p; p = x86BrandString; for(i=0;i<3;i++) { Sleef_x86CpuID(u.info, i + 0x80000002, 0); for(j=0;j<16;j++) { *p++ = u.str[j]; } } *p++ = '\n'; return x86BrandString; } #else EXPORT char *Sleef_getCpuIdString() { return "Unknown architecture"; } #endif sleef-3.5.1/src/common/common.h000066400000000000000000000004571373003144100163400ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __COMMON_H__ #define __COMMON_H__ char *Sleef_getCpuIdString(); #endif sleef-3.5.1/src/common/f128util.h000066400000000000000000000045471373003144100164320ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include static __float128 mpfr_get_f128(mpfr_t m, mpfr_rnd_t rnd) { if (isnan(mpfr_get_d(m, GMP_RNDN))) return __builtin_nan(""); mpfr_t frr, frd; mpfr_inits(frr, frd, NULL); mpfr_exp_t e; mpfr_frexp(&e, frr, m, GMP_RNDN); double d0 = mpfr_get_d(frr, GMP_RNDN); mpfr_set_d(frd, d0, GMP_RNDN); mpfr_sub(frr, frr, frd, GMP_RNDN); double d1 = mpfr_get_d(frr, GMP_RNDN); mpfr_set_d(frd, d1, GMP_RNDN); mpfr_sub(frr, frr, frd, GMP_RNDN); double d2 = mpfr_get_d(frr, GMP_RNDN); mpfr_clears(frr, frd, NULL); return ldexpq((__float128)d2 + (__float128)d1 + (__float128)d0, e); } static void mpfr_set_f128(mpfr_t frx, __float128 f, mpfr_rnd_t rnd) { char s[128]; quadmath_snprintf(s, 120, "%.50Qg", f); mpfr_set_str(frx, s, 10, rnd); } static void printf128(__float128 f) { char s[128]; quadmath_snprintf(s, 120, "%.50Qg", f); printf("%s", s); } static char frstr[16][1000]; static int frstrcnt = 0; static char *toBC(double d) { union { double d; uint64_t u64; int64_t i64; } cnv; cnv.d = d; int64_t l = cnv.i64; int e = (int)((l >> 52) & ~(-1L << 11)); int s = (int)(l >> 63); l = d == 0 ? 0 : ((l & ~((-1L) << 52)) | (1L << 52)); char *ptr = frstr[(frstrcnt++) & 15]; sprintf(ptr, "%s%lld*2^%d", s != 0 ? "-" : "", (long long int)l, (e-0x3ff-52)); return ptr; } static char *toBCq(__float128 d) { union { __float128 d; __uint128_t u128; } cnv; cnv.d = d; __uint128_t m = cnv.u128; int e = (int)((m >> 112) & ~(-1L << 15)); int s = (int)(m >> 127); m = d == 0 ? 0 : ((m & ((((__uint128_t)1) << 112)-1)) | ((__uint128_t)1 << 112)); uint64_t h = m / UINT64_C(10000000000000000000); uint64_t l = m % UINT64_C(10000000000000000000); char *ptr = frstr[(frstrcnt++) & 15]; sprintf(ptr, "%s%" PRIu64 "%019" PRIu64 "*2^%d", s != 0 ? "-" : "", h, l, (e-0x3fff-112)); return ptr; } static int xisnanq(Sleef_quad x) { return x != x; } static int xisinfq(Sleef_quad x) { return x == (Sleef_quad)__builtin_inf() || x == -(Sleef_quad)__builtin_inf(); } static int xisfiniteq(Sleef_quad x) { return !xisnanq(x) && !isinfq(x); } sleef-3.5.1/src/common/misc.h000066400000000000000000000213251373003144100160000ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // #ifndef __MISC_H__ #define __MISC_H__ #if !defined(SLEEF_GENHEADER) #include #include #endif #ifndef M_PI #define M_PI 3.141592653589793238462643383279502884 #endif #ifndef M_PIl #define M_PIl 3.141592653589793238462643383279502884L #endif #ifndef M_1_PI #define M_1_PI 0.318309886183790671537767526745028724 #endif #ifndef M_1_PIl #define M_1_PIl 0.318309886183790671537767526745028724L #endif #ifndef M_2_PI #define M_2_PI 0.636619772367581343075535053490057448 #endif #ifndef M_2_PIl #define M_2_PIl 0.636619772367581343075535053490057448L #endif #ifndef SLEEF_FP_ILOGB0 #define SLEEF_FP_ILOGB0 ((int)-2147483648) #endif #ifndef SLEEF_FP_ILOGBNAN #define SLEEF_FP_ILOGBNAN ((int)2147483647) #endif #define SLEEF_SNAN (((union { long long int i; double d; }) { .i = INT64_C(0x7ff0000000000001) }).d) #define SLEEF_SNANf (((union { long int i; float f; }) { .i = 0xff800001 }).f) // /* PI_A to PI_D are constants that satisfy the following two conditions. * For PI_A, PI_B and PI_C, the last 28 bits are zero. * PI_A + PI_B + PI_C + PI_D is close to PI as much as possible. The argument of a trig function is multiplied by 1/PI, and the integral part is divided into two parts, each has at most 28 bits. So, the maximum argument that could be correctly reduced should be 2^(28*2-1) PI = 1.1e+17. However, due to internal double precision calculation, the actual maximum argument that can be correctly reduced is around 2^47. */ #define PI_A 3.1415926218032836914 #define PI_B 3.1786509424591713469e-08 #define PI_C 1.2246467864107188502e-16 #define PI_D 1.2736634327021899816e-24 #define TRIGRANGEMAX 1e+14 /* PI_A2 and PI_B2 are constants that satisfy the following two conditions. * The last 3 bits of PI_A2 are zero. * PI_A2 + PI_B2 is close to PI as much as possible. The argument of a trig function is multiplied by 1/PI, and the integral part is multiplied by PI_A2. So, the maximum argument that could be correctly reduced should be 2^(3-1) PI = 12.6. By testing, we confirmed that it correctly reduces the argument up to around 15. */ #define PI_A2 3.141592653589793116 #define PI_B2 1.2246467991473532072e-16 #define TRIGRANGEMAX2 15 #define M_2_PI_H 0.63661977236758138243 #define M_2_PI_L -3.9357353350364971764e-17 #define SQRT_DBL_MAX 1.3407807929942596355e+154 #define TRIGRANGEMAX3 1e+9 #define M_4_PI 1.273239544735162542821171882678754627704620361328125 #define L2U .69314718055966295651160180568695068359375 #define L2L .28235290563031577122588448175013436025525412068e-12 #define R_LN2 1.442695040888963407359924681001892137426645954152985934135449406931 #define L10U 0.30102999566383914498 // log 2 / log 10 #define L10L 1.4205023227266099418e-13 #define LOG10_2 3.3219280948873623478703194294893901758648313930 #define L10Uf 0.3010253906f #define L10Lf 4.605038981e-06f // #define PI_Af 3.140625f #define PI_Bf 0.0009670257568359375f #define PI_Cf 6.2771141529083251953e-07f #define PI_Df 1.2154201256553420762e-10f #define TRIGRANGEMAXf 39000 #define PI_A2f 3.1414794921875f #define PI_B2f 0.00011315941810607910156f #define PI_C2f 1.9841872589410058936e-09f #define TRIGRANGEMAX2f 125.0f #define TRIGRANGEMAX4f 8e+6f #define SQRT_FLT_MAX 18446743523953729536.0 #define L2Uf 0.693145751953125f #define L2Lf 1.428606765330187045e-06f #define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f #define M_PIf ((float)M_PI) // #ifndef MIN #define MIN(x, y) ((x) < (y) ? (x) : (y)) #endif #ifndef MAX #define MAX(x, y) ((x) > (y) ? (x) : (y)) #endif #ifndef ABS #define ABS(x) ((x) < 0 ? -(x) : (x)) #endif #define stringify(s) stringify_(s) #define stringify_(s) #s #if !defined(SLEEF_GENHEADER) typedef long double longdouble; #endif #if !defined(Sleef_double2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_double2_DEFINED typedef struct { double x, y; } Sleef_double2; #endif #if !defined(Sleef_float2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_float2_DEFINED typedef struct { float x, y; } Sleef_float2; #endif #if !defined(Sleef_longdouble2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_longdouble2_DEFINED typedef struct { long double x, y; } Sleef_longdouble2; #endif #if !defined(Sleef_quad_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad_DEFINED #if defined(ENABLEFLOAT128) typedef __float128 Sleef_quad; #else typedef struct { double x, y; } Sleef_quad; #endif #endif #if !defined(Sleef_quad1_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad1_DEFINED typedef union { struct { Sleef_quad x; }; Sleef_quad s[1]; } Sleef_quad1; #endif #if !defined(Sleef_quad2_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad2_DEFINED typedef union { struct { Sleef_quad x, y; }; Sleef_quad s[2]; } Sleef_quad2; #endif #if !defined(Sleef_quad4_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad4_DEFINED typedef union { struct { Sleef_quad x, y, z, w; }; Sleef_quad s[4]; } Sleef_quad4; #endif #if !defined(Sleef_quad8_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quad8_DEFINED typedef union { Sleef_quad s[8]; } Sleef_quad8; #endif #if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) && !defined(SLEEF_GENHEADER) #define Sleef_quadx_DEFINED typedef union { Sleef_quad s[32]; } Sleef_quadx; #endif // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER) #define LIKELY(condition) __builtin_expect(!!(condition), 1) #define UNLIKELY(condition) __builtin_expect(!!(condition), 0) #define RESTRICT __restrict__ #ifndef __arm__ #define ALIGNED(x) __attribute__((aligned(x))) #else #define ALIGNED(x) #endif #if defined(SLEEF_GENHEADER) #define INLINE SLEEF_ALWAYS_INLINE #define EXPORT SLEEF_INLINE #define CONST SLEEF_CONST #define NOEXPORT #else // #if defined(SLEEF_GENHEADER) #ifndef __INTEL_COMPILER #define CONST const #else #define CONST #endif #define INLINE __attribute__((always_inline)) #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #ifndef SLEEF_STATIC_LIBS #define EXPORT __stdcall __declspec(dllexport) #define NOEXPORT #else // #ifndef SLEEF_STATIC_LIBS #define EXPORT #define NOEXPORT #endif // #ifndef SLEEF_STATIC_LIBS #else // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #define EXPORT __attribute__((visibility("default"))) #define NOEXPORT __attribute__ ((visibility ("hidden"))) #endif // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #endif // #if defined(SLEEF_GENHEADER) #define SLEEF_NAN __builtin_nan("") #define SLEEF_NANf __builtin_nanf("") #define SLEEF_NANl __builtin_nanl("") #define SLEEF_INFINITY __builtin_inf() #define SLEEF_INFINITYf __builtin_inff() #define SLEEF_INFINITYl __builtin_infl() #if defined(__INTEL_COMPILER) || defined (__clang__) #define SLEEF_INFINITYq __builtin_inf() #define SLEEF_NANq __builtin_nan("") #else #define SLEEF_INFINITYq __builtin_infq() #define SLEEF_NANq (SLEEF_INFINITYq - SLEEF_INFINITYq) #endif #elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER) #define INLINE __forceinline #define CONST #define RESTRICT #define ALIGNED(x) #define LIKELY(condition) (condition) #define UNLIKELY(condition) (condition) #ifndef SLEEF_STATIC_LIBS #define EXPORT __declspec(dllexport) #define NOEXPORT #else #define EXPORT #define NOEXPORT #endif #if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) && !defined(SLEEF_GENHEADER) #include #endif #define SLEEF_INFINITY (1e+300 * 1e+300) #define SLEEF_NAN (SLEEF_INFINITY - SLEEF_INFINITY) #define SLEEF_INFINITYf ((float)SLEEF_INFINITY) #define SLEEF_NANf ((float)SLEEF_NAN) #define SLEEF_INFINITYl ((long double)SLEEF_INFINITY) #define SLEEF_NANl ((long double)SLEEF_NAN) #if (defined(_M_AMD64) || defined(_M_X64)) #ifndef __SSE2__ #define __SSE2__ #define __SSE3__ #define __SSE4_1__ #endif #elif _M_IX86_FP == 2 #ifndef __SSE2__ #define __SSE2__ #define __SSE3__ #define __SSE4_1__ #endif #elif _M_IX86_FP == 1 #ifndef __SSE__ #define __SSE__ #endif #endif #endif // #elif defined(_MSC_VER) // #if (defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER)) && !defined(_MSC_VER) #if !defined(__linux__) #define isinff(x) ((x) == SLEEF_INFINITYf || (x) == -SLEEF_INFINITYf) #define isinfl(x) ((x) == SLEEF_INFINITYl || (x) == -SLEEF_INFINITYl) #define isnanf(x) ((x) != (x)) #define isnanl(x) ((x) != (x)) #endif #endif // #ifndef __MISC_H__ #ifdef ENABLE_AAVPCS #define VECTOR_CC __attribute__((aarch64_vector_pcs)) #else #define VECTOR_CC #endif sleef-3.5.1/src/dft-tester/000077500000000000000000000000001373003144100154625ustar00rootroot00000000000000sleef-3.5.1/src/dft-tester/CMakeLists.txt000066400000000000000000000267651373003144100202420ustar00rootroot00000000000000# Compiler properties set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${DFT_C_FLAGS}") set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 ) # function(add_test_dft TESTNAME) if (ARMIE_COMMAND) add_test(NAME ${TESTNAME} COMMAND ${ARMIE_COMMAND} -msve-vector-bits=${SVE_VECTOR_BITS} ${ARGN}) elseif (NOT EMULATOR AND NOT SDE_COMMAND) add_test(NAME ${TESTNAME} COMMAND ${ARGN}) elseif(NOT EMULATOR) add_test(NAME ${TESTNAME} COMMAND ${SDE_COMMAND} "--" ${ARGN}) else() add_test(NAME ${TESTNAME} COMMAND ${EMULATOR} ${ARGN}) endif() endfunction() # Include directories include_directories(${PROJECT_SOURCE_DIR}/include) # sleefdft.h include_directories(${sleef_BINARY_DIR}/include) # sleef.h if (FFTW3_INCLUDE_DIR) include_directories(${FFTW3_INCLUDE_DIR}) # fftw3.h endif() # Link directories link_directories(${sleef_BINARY_DIR}/lib) # libsleef, libsleefdft # Link libraries set(COMMON_LINK_LIBRARIES ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) if (COMPILER_SUPPORTS_OPENMP) set(COMMON_LINK_LIBRARIES ${COMMON_LINK_LIBRARIES} ${OpenMP_C_FLAGS}) endif() if((NOT MSVC) AND NOT SLEEF_CLANG_ON_WINDOWS) # Target executable naivetestdp set(TARGET_NAIVETESTDP "naivetestdp") add_executable(${TARGET_NAIVETESTDP} naivetest.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_NAIVETESTDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_NAIVETESTDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_NAIVETESTDP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_NAIVETESTDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable naivetestsp set(TARGET_NAIVETESTSP "naivetestsp") add_executable(${TARGET_NAIVETESTSP} naivetest.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_NAIVETESTSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_NAIVETESTSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_NAIVETESTSP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_NAIVETESTSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Test naivetestdp add_test_dft(${TARGET_NAIVETESTDP}_1 $ 1) add_test_dft(${TARGET_NAIVETESTDP}_2 $ 2) add_test_dft(${TARGET_NAIVETESTDP}_3 $ 3) add_test_dft(${TARGET_NAIVETESTDP}_4 $ 4) add_test_dft(${TARGET_NAIVETESTDP}_5 $ 5) add_test_dft(${TARGET_NAIVETESTDP}_10 $ 10) # Test naivetestsp add_test_dft(${TARGET_NAIVETESTSP}_1 $ 1) add_test_dft(${TARGET_NAIVETESTSP}_2 $ 2) add_test_dft(${TARGET_NAIVETESTSP}_3 $ 3) add_test_dft(${TARGET_NAIVETESTSP}_4 $ 4) add_test_dft(${TARGET_NAIVETESTSP}_5 $ 5) add_test_dft(${TARGET_NAIVETESTSP}_10 $ 10) endif() # Target executable roundtriptest1ddp set(TARGET_ROUNDTRIPTEST1DDP "roundtriptest1ddp") add_executable(${TARGET_ROUNDTRIPTEST1DDP} roundtriptest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST1DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST1DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_ROUNDTRIPTEST1DDP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST1DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable roundtriptest1dsp set(TARGET_ROUNDTRIPTEST1DSP "roundtriptest1dsp") add_executable(${TARGET_ROUNDTRIPTEST1DSP} roundtriptest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST1DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST1DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_ROUNDTRIPTEST1DSP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST1DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable roundtriptest2ddp set(TARGET_ROUNDTRIPTEST2DDP "roundtriptest2ddp") add_executable(${TARGET_ROUNDTRIPTEST2DDP} roundtriptest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST2DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST2DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_ROUNDTRIPTEST2DDP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST2DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable roundtriptest2dsp set(TARGET_ROUNDTRIPTEST2DSP "roundtriptest2dsp") add_executable(${TARGET_ROUNDTRIPTEST2DSP} roundtriptest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST2DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST2DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_ROUNDTRIPTEST2DSP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST2DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (LIBFFTW3 AND NOT DISABLE_FFTW) # Target executable fftwtest1ddp set(TARGET_FFTWTEST1DDP "fftwtest1ddp") add_executable(${TARGET_FFTWTEST1DDP} fftwtest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST1DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST1DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_FFTWTEST1DDP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST1DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable fftwtest1dsp set(TARGET_FFTWTEST1DSP "fftwtest1dsp") add_executable(${TARGET_FFTWTEST1DSP} fftwtest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST1DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST1DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_FFTWTEST1DSP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST1DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable fftwtest2ddp set(TARGET_FFTWTEST2DDP "fftwtest2ddp") add_executable(${TARGET_FFTWTEST2DDP} fftwtest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST2DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST2DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_FFTWTEST2DDP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST2DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable fftwtest2dsp set(TARGET_FFTWTEST2DSP "fftwtest2dsp") add_executable(${TARGET_FFTWTEST2DSP} fftwtest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST2DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST2DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_FFTWTEST2DSP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST2DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Test fftwtest1ddp add_test_dft(${TARGET_FFTWTEST1DDP}_12 $ 12) add_test_dft(${TARGET_FFTWTEST1DDP}_16 $ 16) # Test fftwtest1dsp add_test_dft(${TARGET_FFTWTEST1DSP}_12 $ 12) add_test_dft(${TARGET_FFTWTEST1DSP}_16 $ 16) # Test fftwtest2ddp add_test_dft(${TARGET_FFTWTEST2DDP}_2_2 $ 2 2) add_test_dft(${TARGET_FFTWTEST2DDP}_4_4 $ 4 4) add_test_dft(${TARGET_FFTWTEST2DDP}_8_8 $ 8 8) add_test_dft(${TARGET_FFTWTEST2DDP}_10_10 $ 10 10) add_test_dft(${TARGET_FFTWTEST2DDP}_5_15 $ 5 15) # Test fftwtest2dsp add_test_dft(${TARGET_FFTWTEST2DSP}_2_2 $ 2 2) add_test_dft(${TARGET_FFTWTEST2DSP}_4_4 $ 4 4) add_test_dft(${TARGET_FFTWTEST2DSP}_8_8 $ 8 8) add_test_dft(${TARGET_FFTWTEST2DSP}_10_10 $ 10 10) add_test_dft(${TARGET_FFTWTEST2DSP}_5_15 $ 5 15) else(LIBFFTW3 AND NOT DISABLE_FFTW) if(MSVC OR SLEEF_CLANG_ON_WINDOWS) # Test roundtriptestdp add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_1 $ 1 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_2 $ 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_3 $ 3 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_4 $ 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_5 $ 5 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_10 $ 10 10) # Test roundtriptestsp add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_1 $ 1 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_2 $ 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_3 $ 3 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_4 $ 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_5 $ 5 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_10 $ 10 10) endif() add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_12 $ 12 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_16 $ 16 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_12 $ 12 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_16 $ 16 10) # Test roundtriptest2ddp add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_2_2 $ 2 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_4_4 $ 4 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_8_8 $ 8 8 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_10_10 $ 10 10 2) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_5_15 $ 5 15 2) # Test roundtriptest2dsp add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_2_2 $ 2 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_4_4 $ 4 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_8_8 $ 8 8 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_10_10 $ 10 10 2) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_5_15 $ 5 15 2) endif(LIBFFTW3 AND NOT DISABLE_FFTW) sleef-3.5.1/src/dft-tester/bench1d.c000066400000000000000000000055401373003144100171360ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define _DEFAULT_SOURCE #define _XOPEN_SOURCE 700 #include #include #include #include #include #include #include #include #include #ifdef USEFFTW #include #include #else #include "sleef.h" #include "sleefdft.h" #endif typedef double real; static uint64_t gettime() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * 1000000000 + ((uint64_t)tp.tv_nsec); } #define REPEAT 8 int main(int argc, char **argv) { if (argc == 1) { fprintf(stderr, "%s \n", argv[0]); exit(-1); } int backward = 0; int log2n = atoi(argv[1]); if (log2n < 0) { backward = 1; log2n = -log2n; } const int n = 1 << log2n; const int64_t niter = (int)(100000000000.0 / n / log2n); printf("Number of iterations = %lld\n", (long long int)niter); #ifdef USEFFTW fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); #if 0 int fftw_init_threads(void); fftw_plan_with_nthreads(omp_get_max_threads()); #endif fftw_plan w = fftw_plan_dft_1d(n, in, out, backward ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_MEASURE); //fftw_plan w = fftw_plan_dft_1d(n, in, out, backward ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_PATIENT); for(int i=0;i= 3) mode = SLEEF_MODE_VERBOSE | SLEEF_MODE_ESTIMATE; if (backward) mode |= SLEEF_MODE_BACKWARD; struct SleefDFT *p = SleefDFT_double_init1d(n, in, out, mode); if (argc >= 3) SleefDFT_setPath(p, argv[2]); for(int i=0;i #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #include #ifndef MODE #define MODE SLEEF_MODE_DEBUG #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init1d SleefDFT_double_init1d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init1d SleefDFT_float_init1d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex forward double check_cf(int n) { fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); fftw_plan w = fftw_plan_dft_1d(n, in, out, FFTW_FORWARD, FFTW_ESTIMATE); real *sx = (real *)Sleef_malloc(n*2*sizeof(real)); real *sy = (real *)Sleef_malloc(n*2*sizeof(real)); struct SleefDFT *p = SleefDFT_init1d(n, sx, sy, MODE); for(int i=0;i\n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; e = check_cf(n); success = success && e < THRES; printf("complex forward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_cb(n); success = success && e < THRES; printf("complex backward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_rf(n); success = success && e < THRES; printf("real forward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_rb(n); success = success && e < THRES; printf("real backward : %s (%g)\n", e < THRES ? "OK" : "NG", e); exit(success ? 0 : -1); } sleef-3.5.1/src/dft-tester/fftwtest2d.c000066400000000000000000000071761373003144100177350ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #include #ifndef MODE #define MODE SLEEF_MODE_DEBUG #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init2d SleefDFT_double_init2d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init2d SleefDFT_float_init2d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex forward double check_cf(int n, int m) { fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m); fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m); fftw_plan w = fftw_plan_dft_2d(n, m, in, out, FFTW_FORWARD, FFTW_ESTIMATE); real *sx = (real *)Sleef_malloc(n*m*2*sizeof(real)); real *sy = (real *)Sleef_malloc(n*m*2*sizeof(real)); struct SleefDFT *p = SleefDFT_init2d(n, m, sx, sy, MODE); for(int i=0;i \n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); const int m = 1 << atoi(argv[2]); srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; e = check_cf(n, m); success = success && e < THRES; printf("complex forward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_cb(n, m); success = success && e < THRES; printf("complex backward : %s (%g)\n", e < THRES ? "OK" : "NG", e); exit(success ? 0 : -1); } sleef-3.5.1/src/dft-tester/measuredft.c000066400000000000000000000075241373003144100177750ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define _DEFAULT_SOURCE #define _XOPEN_SOURCE 700 #include #include #include #include #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" static uint64_t gettime() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * 1000000000 + ((uint64_t)tp.tv_nsec); } int mode[] = { SLEEF_MODE_MEASURE | SLEEF_MODE_NO_MT, SLEEF_MODE_MEASURE}; #define ENABLE_SP //#define ROUNDTRIP #define REPEAT 2 //#define ENABLE_SLEEP //#define WARMUP int main(int argc, char **argv) { int start = 1, end = 18; if (argc > 1) start = atoi(argv[1]); if (argc > 2) end = atoi(argv[2]); double *din = (double *)Sleef_malloc((1 << 18)*2 * sizeof(double)); double *dout = (double *)Sleef_malloc((1 << 18)*2 * sizeof(double)); float *sin = (float *)Sleef_malloc((1 << 18)*2 * sizeof(float)); float *sout = (float *)Sleef_malloc((1 << 18)*2 * sizeof(float)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); for(int log2n=start;log2n<=end;log2n++) { const int n = 1 << log2n; int64_t niter = (int64_t)(1000000000.0 / REPEAT / n / log2n); printf("%d ", n); for(int m=0;m<2;m++) { #ifdef ENABLE_SLEEP sleep(1); #endif struct SleefDFT *pf = SleefDFT_double_init1d(n, NULL, NULL, mode[m]); #ifdef ROUNDTRIP struct SleefDFT *pb = SleefDFT_double_init1d(n, NULL, NULL, mode[m] | SLEEF_MODE_BACKWARD); #endif for(int i=0;i #include #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #include "misc.h" #ifndef MODE #define MODE SLEEF_MODE_DEBUG #endif #define THRES 1e-4 #if BASETYPEID == 1 #define SleefDFT_init SleefDFT_double_init1d #define SleefDFT_execute SleefDFT_double_execute typedef double real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #elif BASETYPEID == 2 #define SleefDFT_init SleefDFT_float_init1d #define SleefDFT_execute SleefDFT_float_execute typedef float real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #elif BASETYPEID == 3 #define SleefDFT_init SleefDFT_longdouble_init1d #define SleefDFT_execute SleefDFT_longdouble_execute typedef double real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #elif BASETYPEID == 4 #include #define SleefDFT_init SleefDFT_quad_init1d #define SleefDFT_execute SleefDFT_quad_execute typedef Sleef_quad real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #else #error No BASETYPEID specified #endif void forward(cmpl *ts, cmpl *fs, int len) { int k, n; for(k=0;k THRES) || (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) { success = 0; } double t; t = (sy[(i*2+0)] - creal(fs[i])); rmsn += t*t; t = (sy[(i*2+1)] - cimag(fs[i])); rmsn += t*t; rmsd += creal(fs[i]) * creal(fs[i]) + cimag(fs[i]) * cimag(fs[i]); } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } // complex backward int check_cb(int n) { int i; real *sx = (real *)Sleef_malloc(sizeof(real)*n*2); real *sy = (real *)Sleef_malloc(sizeof(real)*n*2); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) || (fabs(sy[(i*2+1)] - cimag(ts[i])) > THRES)) { success = 0; } } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } // real forward int check_rf(int n) { int i; real *sx = (real *)Sleef_malloc(n * sizeof(real)); real *sy = (real *)Sleef_malloc((n/2+1)*sizeof(real)*2); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) success = 0; if (fabs(sy[(2*i+1)] - cimag(fs[i])) > THRES) success = 0; } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } // real backward int check_rb(int n) { int i; cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) { success = 0; } if ((fabs(sy[i] - creal(ts[i])) > THRES)) { success = 0; } } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } int check_arf(int n) { int i; real *sx = (real *)Sleef_malloc(n * sizeof(real)); real *sy = (real *)Sleef_malloc(n * sizeof(real)); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) success = 0; if (fabs(sy[(2*0+1)] - creal(fs[n/2])) > THRES) success = 0; } else { if (fabs(sy[(2*i+0)] - creal(fs[i])) > THRES) success = 0; if (fabs(sy[(2*i+1)] - cimag(fs[i])) > THRES) success = 0; } } // Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } int check_arb(int n) { int i; real *sx = (real *)Sleef_malloc(n * sizeof(real)); real *sy = (real *)Sleef_malloc(n * sizeof(real)); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) { success = 0; } if ((fabs(sy[i]*2 - creal(ts[i])) > THRES)) { success = 0; } } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } int main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "%s \n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; printf("complex forward : %s\n", (success &= check_cf(n)) ? "OK" : "NG"); printf("complex backward : %s\n", (success &= check_cb(n)) ? "OK" : "NG"); printf("real forward : %s\n", (success &= check_rf(n)) ? "OK" : "NG"); printf("real backward : %s\n", (success &= check_rb(n)) ? "OK" : "NG"); printf("real alt forward : %s\n", (success &= check_arf(n)) ? "OK" : "NG"); printf("real alt backward : %s\n", (success &= check_arb(n)) ? "OK" : "NG"); exit(!success); } sleef-3.5.1/src/dft-tester/roundtriptest1d.c000066400000000000000000000070511373003144100210040ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #ifndef MODE #define MODE (SLEEF_MODE_DEBUG | SLEEF_MODE_VERBOSE) #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init SleefDFT_double_init1d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init SleefDFT_float_init1d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex transforms double check_c(int n) { struct SleefDFT *p; real *sx = (real *)Sleef_malloc(n*2 * sizeof(real)); real *sy = (real *)Sleef_malloc(n*2 * sizeof(real)); real *sz = (real *)Sleef_malloc(n*2 * sizeof(real)); for(int i=0;i []\n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); const int nloop = argc >= 3 ? atoi(argv[2]) : 1; srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; for(int i=0;(nloop < 0 || i < nloop) && success;i++) { e = check_c(n); success = success && e < THRES; printf("complex : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_r(n); success = success && e < THRES; printf("real : %s (%g)\n", e < THRES ? "OK" : "NG", e); } exit(!success); } sleef-3.5.1/src/dft-tester/roundtriptest2d.c000066400000000000000000000050241373003144100210030ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #ifndef MODE #define MODE (SLEEF_MODE_DEBUG | SLEEF_MODE_VERBOSE) #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init2d SleefDFT_double_init2d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init2d SleefDFT_float_init2d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex transforms double check_c(int n, int m) { struct SleefDFT *p; real *sx = (real *)Sleef_malloc(n*m*2 * sizeof(real)); real *sy = (real *)Sleef_malloc(n*m*2 * sizeof(real)); real *sz = (real *)Sleef_malloc(n*m*2 * sizeof(real)); for(int i=0;i []\n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); const int m = 1 << atoi(argv[2]); const int nloop = argc >= 4 ? atoi(argv[3]) : 1; srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; for(int i=0;(nloop < 0 || i < nloop) && success;i++) { e = check_c(n, m); success = success && e < THRES; printf("complex : %s (%g)\n", e < THRES ? "OK" : "NG", e); } exit(!success); } sleef-3.5.1/src/dft-tester/tutorial.c000066400000000000000000000034701373003144100174750ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // gcc tutorial.c -lsleef -lsleefdft -lm #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #define THRES 1e-4 typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PI * _Complex_I / n) * kn); } void forward(cmpl *ts, cmpl *fs, int len) { for(int k=0;k THRES) || (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) { success = 0; } } printf("%s\n", success ? "OK" : "NG"); free(fs); free(ts); Sleef_free(sy); Sleef_free(sx); SleefDFT_dispose(p); exit(success); } sleef-3.5.1/src/dft/000077500000000000000000000000001373003144100141565ustar00rootroot00000000000000sleef-3.5.1/src/dft/CMakeLists.txt000066400000000000000000000341121373003144100167170ustar00rootroot00000000000000# Option MAXBUTWIDTH if (COMPILER_SUPPORTS_SVE) set(SLEEFDFT_MAXBUTWIDTH 6 CACHE STRING "Log_2 (Maximum butterfly length) of butterflies") else() set(SLEEFDFT_MAXBUTWIDTH 4 CACHE STRING "Log_2 (Maximum butterfly length) of butterflies") endif() if (SLEEFDFT_MAXBUTWIDTH GREATER 7) message(FATAL_ERROR "SLEEFDFT_MAXBUTWIDTH has to be smaller than 8." ) endif() # Option option(SLEEFDFT_ENABLE_STREAM "Streaming instructions are utilized in DFT." OFF) option(SLEEFDFT_ENABLE_LONGDOUBLE "Long double routines will be compiled in." OFF) option(SLEEFDFT_ENABLE_QUAD "Quad precision routines will be compiled in." OFF) # Compiler properties set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${DFT_C_FLAGS}") set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 ) if (BUILD_SHARED_LIBS) list(APPEND COMMON_TARGET_PROPERTIES POSITION_INDEPENDENT_CODE ON) # -fPIC endif() set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} MAXBUTWIDTH=${SLEEFDFT_MAXBUTWIDTH}) if (SLEEFDFT_ENABLE_STREAM) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=1) else() set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=0) endif() if (COMPILER_SUPPORTS_FLOAT128) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLEFLOAT128) endif(COMPILER_SUPPORTS_FLOAT128) if (COMPILER_SUPPORTS_LONG_DOUBLE) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_LONGDOUBLE) endif (COMPILER_SUPPORTS_LONG_DOUBLE) if(COMPILER_SUPPORTS_OPENMP) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") endif(COMPILER_SUPPORTS_OPENMP) # Include directories include_directories(${PROJECT_SOURCE_DIR}/include) include_directories(${PROJECT_BINARY_DIR}/include) include_directories(${CMAKE_CURRENT_BINARY_DIR}) # Constants definition set(LISTSHORTTYPENAME "dp" "sp" "ld" "qp") set(LISTLONGTYPENAME "double" "float" "longdouble" "Sleef_quad") set(LISTTYPEID "1" "2" "3" "4") set(MACRODEF_vecextdp BASETYPEID=1 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextdp ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_vecextsp BASETYPEID=2 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextsp ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_vecextld BASETYPEID=3 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextld ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_vecextqp BASETYPEID=4 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextqp ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_purecdp BASETYPEID=1 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecdp ${FLAGS_ENABLE_PUREC}) set(MACRODEF_purecsp BASETYPEID=2 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecsp ${FLAGS_ENABLE_PUREC}) set(MACRODEF_purecld BASETYPEID=3 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecld ${FLAGS_ENABLE_PUREC}) set(MACRODEF_purecqp BASETYPEID=4 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecqp ${FLAGS_ENABLE_PUREC}) set(MACRODEF_sse2dp BASETYPEID=1 ENABLE_SSE2 CONFIG=4) set(CFLAGS_sse2dp ${FLAGS_ENABLE_SSE4}) set(MACRODEF_sse2sp BASETYPEID=2 ENABLE_SSE2 CONFIG=4) set(CFLAGS_sse2sp ${FLAGS_ENABLE_SSE4}) set(MACRODEF_avxdp BASETYPEID=1 ENABLE_AVX CONFIG=1) set(CFLAGS_avxdp ${FLAGS_ENABLE_AVX}) set(MACRODEF_avxsp BASETYPEID=2 ENABLE_AVX CONFIG=1) set(CFLAGS_avxsp ${FLAGS_ENABLE_AVX}) set(MACRODEF_avx2dp BASETYPEID=1 ENABLE_AVX2 CONFIG=1) set(CFLAGS_avx2dp ${FLAGS_ENABLE_AVX2}) set(MACRODEF_avx2sp BASETYPEID=2 ENABLE_AVX2 CONFIG=1) set(CFLAGS_avx2sp ${FLAGS_ENABLE_AVX2}) set(MACRODEF_avx512fdp BASETYPEID=1 ENABLE_AVX512F CONFIG=1) set(CFLAGS_avx512fdp ${FLAGS_ENABLE_AVX512F}) set(MACRODEF_avx512fsp BASETYPEID=2 ENABLE_AVX512F CONFIG=1) set(CFLAGS_avx512fsp ${FLAGS_ENABLE_AVX512F}) set(MACRODEF_advsimddp BASETYPEID=1 ENABLE_ADVSIMD CONFIG=1) set(CFLAGS_advsimddp ${FLAGS_ENABLE_ADVSIMD}) set(MACRODEF_advsimdsp BASETYPEID=2 ENABLE_ADVSIMD CONFIG=1) set(CFLAGS_advsimdsp ${FLAGS_ENABLE_ADVSIMD}) set(MACRODEF_neon32sp BASETYPEID=2 ENABLE_NEON32 CONFIG=1) set(CFLAGS_neon32sp ${FLAGS_ENABLE_NEON32}) set(MACRODEF_sve256dp BASETYPEID=1 ENABLE_SVE CONFIG=8) set(CFLAGS_sve256dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve256sp BASETYPEID=2 ENABLE_SVE CONFIG=8) set(CFLAGS_sve256sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve512dp BASETYPEID=1 ENABLE_SVE CONFIG=9) set(CFLAGS_sve512dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve512sp BASETYPEID=2 ENABLE_SVE CONFIG=9) set(CFLAGS_sve512sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve1024dp BASETYPEID=1 ENABLE_SVE CONFIG=10) set(CFLAGS_sve1024dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve1024sp BASETYPEID=2 ENABLE_SVE CONFIG=10) set(CFLAGS_sve1024sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve2048dp BASETYPEID=1 ENABLE_SVE CONFIG=11) set(CFLAGS_sve2048dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve2048sp BASETYPEID=2 ENABLE_SVE CONFIG=11) set(CFLAGS_sve2048sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_vsxdp BASETYPEID=1 ENABLE_VSX CONFIG=1) set(CFLAGS_vsxdp ${FLAGS_ENABLE_VSX}) set(MACRODEF_vsxsp BASETYPEID=2 ENABLE_VSX CONFIG=1) set(CFLAGS_vsxsp ${FLAGS_ENABLE_VSX}) set(MACRODEF_zvector2dp BASETYPEID=1 ENABLE_ZVECTOR2 CONFIG=140) set(CFLAGS_zvector2dp ${FLAGS_ENABLE_ZVECTOR2}) set(MACRODEF_zvector2sp BASETYPEID=2 ENABLE_ZVECTOR2 CONFIG=140) set(CFLAGS_zvector2sp ${FLAGS_ENABLE_ZVECTOR2}) # List all available scalar data types set(ISALIST_SP purecsp) set(ISALIST_DP purecdp) set(LIST_SUPPORTED_FPTYPE 0 1) if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(ISALIST_SP vecextsp) set(ISALIST_DP vecextdp) endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") if (COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE) set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 2) set(ISALIST_QP purecld) if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(ISALIST_LD vecextld) endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") endif(COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE) if (COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD) set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 3) set(ISALIST_QP purecqp) if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(ISALIST_QP vecextqp) endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") endif(COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD) # List all available vector data types if (COMPILER_SUPPORTS_SSE4) set(ISALIST_SP ${ISALIST_SP} sse2sp) set(ISALIST_DP ${ISALIST_DP} sse2dp) endif(COMPILER_SUPPORTS_SSE4) if (COMPILER_SUPPORTS_AVX) set(ISALIST_SP ${ISALIST_SP} avxsp) set(ISALIST_DP ${ISALIST_DP} avxdp) endif(COMPILER_SUPPORTS_AVX) if (COMPILER_SUPPORTS_AVX2) set(ISALIST_SP ${ISALIST_SP} avx2sp) set(ISALIST_DP ${ISALIST_DP} avx2dp) endif(COMPILER_SUPPORTS_AVX2) if (COMPILER_SUPPORTS_AVX512F) set(ISALIST_SP ${ISALIST_SP} avx512fsp) set(ISALIST_DP ${ISALIST_DP} avx512fdp) endif(COMPILER_SUPPORTS_AVX512F) if (COMPILER_SUPPORTS_ADVSIMD) set(ISALIST_SP ${ISALIST_SP} advsimdsp) set(ISALIST_DP ${ISALIST_DP} advsimddp) endif(COMPILER_SUPPORTS_ADVSIMD) if (COMPILER_SUPPORTS_SVE) set(ISALIST_SP ${ISALIST_SP} sve256sp sve512sp sve1024sp sve2048sp) set(ISALIST_DP ${ISALIST_DP} sve256dp sve512dp sve1024dp sve2048dp) endif(COMPILER_SUPPORTS_SVE) if (COMPILER_SUPPORTS_NEON32) set(ISALIST_SP ${ISALIST_SP} neon32sp) endif(COMPILER_SUPPORTS_NEON32) if (COMPILER_SUPPORTS_VSX) set(ISALIST_SP ${ISALIST_SP} vsxsp) set(ISALIST_DP ${ISALIST_DP} vsxdp) endif(COMPILER_SUPPORTS_VSX) if (COMPILER_SUPPORTS_ZVECTOR2) set(ISALIST_SP ${ISALIST_SP} zvector2sp) set(ISALIST_DP ${ISALIST_DP} zvector2dp) endif(COMPILER_SUPPORTS_ZVECTOR2) if(SLEEFDFT_ENABLE_STREAM) set(NLIST 0 1 2 3) else() set(NLIST 0 2) endif() # Target mkunroll set(TARGET_MKUNROLL "mkunroll") add_host_executable(${TARGET_MKUNROLL} mkunroll.c) set_target_properties(${TARGET_MKUNROLL} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (NOT CMAKE_CROSSCOMPILING) target_compile_definitions(${TARGET_MKUNROLL} PRIVATE ${COMMON_TARGET_DEFINITIONS}) endif() # Target mkdispatch set(TARGET_MKDISPATCH "mkdispatch") add_host_executable(${TARGET_MKDISPATCH} mkdispatch.c) set_target_properties(${TARGET_MKDISPATCH} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (NOT CMAKE_CROSSCOMPILING) target_compile_definitions(${TARGET_MKDISPATCH} PRIVATE ${COMMON_TARGET_DEFINITIONS}) endif() # Target dispatchparam.h add_custom_command(OUTPUT dispatchparam.h COMMENT "Generating dispatchparam.h" COMMAND $ paramonly ${SLEEFDFT_MAXBUTWIDTH} ${ISALIST_DP} > ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h DEPENDS ${TARGET_MKDISPATCH} ) add_custom_target(dispatchparam.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h) # Target dispatch*.h foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" list(GET LISTLONGTYPENAME ${T} LT) # LT is "double" list(GET LISTTYPEID ${T} ID) # ID is 1 string(CONCAT S "dispatch" ${ST} ".h") # S is dispatchdp.h add_custom_command(OUTPUT ${S} COMMENT "Generating ${S}" COMMAND $ ${LT} ${SLEEFDFT_MAXBUTWIDTH} ${ISALIST_${CST}} > ${S} DEPENDS ${TARGET_MKDISPATCH} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) string(CONCAT G ${S} "_generated") # G is dispatchdp.h_generated add_custom_target(${G} SOURCES ${S}) endforeach() # Target dftcommon.o add_library(dftcommon_obj OBJECT dftcommon.c dftcommon.h ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h ${sleef_BINARY_DIR}/include/sleef.h) add_dependencies(dftcommon_obj ${TARGET_HEADERS} dispatchparam.h_generated) set_source_files_properties(${sleef_BINARY_DIR}/include/sleef.h PROPERTIES GENERATED TRUE) set_target_properties(dftcommon_obj PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_compile_definitions(dftcommon_obj PRIVATE ${COMMON_TARGET_DEFINITIONS}) # Target dft*.o foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(CONCAT G "dft" ${ST} "_obj") # G is "dftdp_obj" string(CONCAT S "dispatch" ${ST} ".h") # S is "dispatchdp.h" add_library(${G} OBJECT dft.c dftcommon.h ${S}) string(CONCAT SG ${S} "_generated") # SG is "dispatchdp.h_generated" add_dependencies(${G} ${SG} ${TARGET_HEADERS}) set_target_properties(${G} PROPERTIES ${COMMON_TARGET_PROPERTIES}) list(GET LISTTYPEID ${T} ID) # ID is 1 target_compile_definitions(${G} PRIVATE BASETYPEID=${ID} ${COMMON_TARGET_DEFINITIONS}) endforeach() # Copy unroll0.org to ${CMAKE_CURRENT_BINARY_DIR} add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/unroll0.org COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.org ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.org) add_custom_target(unroll0.org.copied DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/unroll0.org) # Target unroll*.c foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" list(GET LISTLONGTYPENAME ${T} LT) # LT is "double" foreach(E ${ISALIST_${CST}}) # E is "sse2dp" foreach(N ${NLIST}) string(CONCAT UC unroll_ ${N} _ ${E} ".c") # UC is "unroll_0_sse2dp.c" set(UNROLL_TARGET_${CST} ${UNROLL_TARGET_${CST}} ${UC}) endforeach() endforeach() message(STATUS "Unroll target for ${CST} : ${UNROLL_TARGET_${CST}}") if(UNROLL_TARGET_${CST}) add_custom_command(OUTPUT ${UNROLL_TARGET_${CST}} COMMENT "Generating ${UNROLL_TARGET_${CST}}" COMMAND $ ${LT} ${ISALIST_${CST}} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${TARGET_MKUNROLL} unroll0.org.copied ) add_custom_target(unroll_target_${ST} DEPENDS ${UNROLL_TARGET_${CST}}) endif() endforeach() # Target unroll*.o foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" list(GET LISTLONGTYPENAME ${T} LT) # LT is "double" foreach(E ${ISALIST_${CST}}) # E is "sse2dp" foreach(N ${NLIST}) string(CONCAT U unroll_ ${N} _ ${E}) # U is "unroll_0_sse2dp" string(CONCAT UG ${U} "_obj") # UG is "unroll_0_sse2dp_obj" string(CONCAT UC ${U} ".c") # UC is "unroll_0_sse2dp.c" add_library(${UG} OBJECT ${UC}) set_target_properties(${UG} PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_include_directories(${UG} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_compile_definitions(${UG} PRIVATE ${COMMON_TARGET_DEFINITIONS} ${MACRODEF_${E}}) target_compile_options(${UG} PRIVATE ${CFLAGS_${E}}) add_dependencies(${UG} ${TARGET_HEADERS} unroll_target_${ST}) endforeach() endforeach() endforeach() # Target libdft add_library(${TARGET_LIBDFT} $ $) target_link_libraries(${TARGET_LIBDFT} ${TARGET_LIBSLEEF} ${LIBM}) foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(CONCAT G "dft" ${ST} "_obj") # G is "dftdp_obj" target_sources(${TARGET_LIBDFT} PRIVATE $) endforeach() foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" foreach(E ${ISALIST_${CST}}) # E is "sse2dp" foreach(N ${NLIST}) string(CONCAT UG unroll_ ${N} _ ${E} "_obj") # U is "unroll_0_sse2dp_obj" target_sources(${TARGET_LIBDFT} PRIVATE $) endforeach() endforeach() endforeach() set_target_properties(${TARGET_LIBDFT} PROPERTIES VERSION ${SLEEF_VERSION} SOVERSION ${SLEEF_SOVERSION} PUBLIC_HEADER ${PROJECT_SOURCE_DIR}/include/sleefdft.h ${COMMON_TARGET_PROPERTIES} ) # Install install(TARGETS ${TARGET_LIBDFT} PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") sleef-3.5.1/src/dft/dft.c000066400000000000000000001427101373003144100151040ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include "sleef.h" #include "misc.h" #include "common.h" #include "arraymap.h" #include "dftcommon.h" #ifdef _OPENMP #include #endif #if BASETYPEID == 1 typedef double real; typedef Sleef_double2 sc_t; #define BASETYPESTRING "double" #define MAGIC 0x27182818 #define MAGIC2D 0x17320508 #define INIT SleefDFT_double_init1d #define EXECUTE SleefDFT_double_execute #define INIT2D SleefDFT_double_init2d #define CTBL ctbl_double #define REALSUB0 realSub0_double #define REALSUB1 realSub1_double #define GETINT getInt_double #define GETPTR getPtr_double #define DFTF dftf_double #define DFTB dftb_double #define TBUTF tbutf_double #define TBUTB tbutb_double #define BUTF butf_double #define BUTB butb_double #define SINCOSPI Sleef_sincospi_u05 #include "dispatchdp.h" #elif BASETYPEID == 2 typedef float real; typedef Sleef_float2 sc_t; #define BASETYPESTRING "float" #define MAGIC 0x31415926 #define MAGIC2D 0x22360679 #define INIT SleefDFT_float_init1d #define EXECUTE SleefDFT_float_execute #define INIT2D SleefDFT_float_init2d #define CTBL ctbl_float #define REALSUB0 realSub0_float #define REALSUB1 realSub1_float #define GETINT getInt_float #define GETPTR getPtr_float #define DFTF dftf_float #define DFTB dftb_float #define TBUTF tbutf_float #define TBUTB tbutb_float #define BUTF butf_float #define BUTB butb_float #define SINCOSPI Sleef_sincospif_u05 #include "dispatchsp.h" #elif BASETYPEID == 3 typedef long double real; typedef Sleef_longdouble2 sc_t; #define BASETYPESTRING "long double" #define MAGIC 0x14142135 #define MAGIC2D 0x26457513 #define INIT SleefDFT_longdouble_init1d #define EXECUTE SleefDFT_longdouble_execute #define INIT2D SleefDFT_longdouble_init2d #define CTBL ctbl_longdouble #define REALSUB0 realSub0_longdouble #define REALSUB1 realSub1_longdouble #define GETINT getInt_longdouble #define GETPTR getPtr_longdouble #define DFTF dftf_longdouble #define DFTB dftb_longdouble #define TBUTF tbutf_longdouble #define TBUTB tbutb_longdouble #define BUTF butf_longdouble #define BUTB butb_longdouble #define SINCOSPI Sleef_sincospil_u05 #include "dispatchld.h" #elif BASETYPEID == 4 typedef Sleef_quad real; typedef Sleef_quad2 sc_t; #define BASETYPESTRING "Sleef_quad" #define MAGIC 0x33166247 #define MAGIC2D 0x36055512 #define INIT SleefDFT_quad_init1d #define EXECUTE SleefDFT_quad_execute #define INIT2D SleefDFT_quad_init2d #define CTBL ctbl_Sleef_quad #define REALSUB0 realSub0_Sleef_quad #define REALSUB1 realSub1_Sleef_quad #define GETINT getInt_Sleef_quad #define GETPTR getPtr_Sleef_quad #define DFTF dftf_Sleef_quad #define DFTB dftb_Sleef_quad #define TBUTF tbutf_Sleef_quad #define TBUTB tbutb_Sleef_quad #define BUTF butf_Sleef_quad #define BUTB butb_Sleef_quad #define SINCOSPI Sleef_sincospiq_u05 #include "dispatchqp.h" #else #error No BASETYPEID specified #endif #define IMPORT_IS_EXPORT #include "sleefdft.h" // #if BASETYPEID == 4 real CTBL[] = { 0.7071067811865475243818940365159164684883Q, -0.7071067811865475243818940365159164684883Q, 0.9238795325112867561014214079495587839119Q, -0.382683432365089771723257530688933059082Q, 0.382683432365089771723257530688933059082Q, -0.9238795325112867561014214079495587839119Q, #if MAXBUTWIDTH >= 5 0.9807852804032304491190993878113602022495Q, -0.1950903220161282678433729148581576851029Q, 0.5555702330196022247573058028269343822103Q, -0.8314696123025452370808655033762590846891Q, 0.8314696123025452370808655033762590846891Q, -0.5555702330196022247573058028269343822103Q, 0.1950903220161282678433729148581576851029Q, -0.9807852804032304491190993878113602022495Q, #endif #if MAXBUTWIDTH >= 6 0.9951847266721968862310254699821143731242Q, -0.09801714032956060199569840382660679267701Q, 0.6343932841636454982026105398063009488396Q, -0.7730104533627369607965383602188325085081Q, 0.881921264348355029715105513066220055407Q, -0.4713967368259976485449225247492677226546Q, 0.2902846772544623676448431737195932100803Q, -0.9569403357322088649310892760624369657307Q, 0.9569403357322088649310892760624369657307Q, -0.2902846772544623676448431737195932100803Q, 0.4713967368259976485449225247492677226546Q, -0.881921264348355029715105513066220055407Q, 0.7730104533627369607965383602188325085081Q, -0.6343932841636454982026105398063009488396Q, 0.09801714032956060199569840382660679267701Q, -0.9951847266721968862310254699821143731242Q, #endif #if MAXBUTWIDTH >= 7 0.9987954562051723927007702841240899260811Q, -0.04906767432741801425355085940205324135377Q, 0.6715589548470184006194634573905233310143Q, -0.7409511253549590911932944126139233276263Q, 0.9039892931234433315823215138173907234886Q, -0.427555093430282094315230886905077056781Q, 0.336889853392220050702686798271834334173Q, -0.9415440651830207783906830087961026265475Q, 0.9700312531945439926159106824865574481009Q, -0.2429801799032638899447731489766866275204Q, 0.5141027441932217266072797923204262815489Q, -0.8577286100002720698929313536407192941624Q, 0.8032075314806449097991200569701675249235Q, -0.5956993044924333434615715265891822127742Q, 0.1467304744553617516588479505190711904561Q, -0.9891765099647809734561415551112872890371Q, 0.9891765099647809734561415551112872890371Q, -0.1467304744553617516588479505190711904561Q, 0.5956993044924333434615715265891822127742Q, -0.8032075314806449097991200569701675249235Q, 0.8577286100002720698929313536407192941624Q, -0.5141027441932217266072797923204262815489Q, 0.2429801799032638899447731489766866275204Q, -0.9700312531945439926159106824865574481009Q, 0.9415440651830207783906830087961026265475Q, -0.336889853392220050702686798271834334173Q, 0.427555093430282094315230886905077056781Q, -0.9039892931234433315823215138173907234886Q, 0.7409511253549590911932944126139233276263Q, -0.6715589548470184006194634573905233310143Q, 0.04906767432741801425355085940205324135377Q, -0.9987954562051723927007702841240899260811Q, #endif }; #else real CTBL[] = { 0.7071067811865475243818940365159164684883L, -0.7071067811865475243818940365159164684883L, 0.9238795325112867561014214079495587839119L, -0.382683432365089771723257530688933059082L, 0.382683432365089771723257530688933059082L, -0.9238795325112867561014214079495587839119L, #if MAXBUTWIDTH >= 5 0.9807852804032304491190993878113602022495L, -0.1950903220161282678433729148581576851029L, 0.5555702330196022247573058028269343822103L, -0.8314696123025452370808655033762590846891L, 0.8314696123025452370808655033762590846891L, -0.5555702330196022247573058028269343822103L, 0.1950903220161282678433729148581576851029L, -0.9807852804032304491190993878113602022495L, #endif #if MAXBUTWIDTH >= 6 0.9951847266721968862310254699821143731242L, -0.09801714032956060199569840382660679267701L, 0.6343932841636454982026105398063009488396L, -0.7730104533627369607965383602188325085081L, 0.881921264348355029715105513066220055407L, -0.4713967368259976485449225247492677226546L, 0.2902846772544623676448431737195932100803L, -0.9569403357322088649310892760624369657307L, 0.9569403357322088649310892760624369657307L, -0.2902846772544623676448431737195932100803L, 0.4713967368259976485449225247492677226546L, -0.881921264348355029715105513066220055407L, 0.7730104533627369607965383602188325085081L, -0.6343932841636454982026105398063009488396L, 0.09801714032956060199569840382660679267701L, -0.9951847266721968862310254699821143731242L, #endif #if MAXBUTWIDTH >= 7 0.9987954562051723927007702841240899260811L, -0.04906767432741801425355085940205324135377L, 0.6715589548470184006194634573905233310143L, -0.7409511253549590911932944126139233276263L, 0.9039892931234433315823215138173907234886L, -0.427555093430282094315230886905077056781L, 0.336889853392220050702686798271834334173L, -0.9415440651830207783906830087961026265475L, 0.9700312531945439926159106824865574481009L, -0.2429801799032638899447731489766866275204L, 0.5141027441932217266072797923204262815489L, -0.8577286100002720698929313536407192941624L, 0.8032075314806449097991200569701675249235L, -0.5956993044924333434615715265891822127742L, 0.1467304744553617516588479505190711904561L, -0.9891765099647809734561415551112872890371L, 0.9891765099647809734561415551112872890371L, -0.1467304744553617516588479505190711904561L, 0.5956993044924333434615715265891822127742L, -0.8032075314806449097991200569701675249235L, 0.8577286100002720698929313536407192941624L, -0.5141027441932217266072797923204262815489L, 0.2429801799032638899447731489766866275204L, -0.9700312531945439926159106824865574481009L, 0.9415440651830207783906830087961026265475L, -0.336889853392220050702686798271834334173L, 0.427555093430282094315230886905077056781L, -0.9039892931234433315823215138173907234886L, 0.7409511253549590911932944126139233276263L, -0.6715589548470184006194634573905233310143L, 0.04906767432741801425355085940205324135377L, -0.9987954562051723927007702841240899260811L, #endif }; #endif #ifndef ENABLE_STREAM #error ENABLE_STREAM not defined #endif static const int constK[] = { 0, 2, 6, 14, 38, 94, 230, 542, 1254 }; extern const char *configStr[]; extern int planFilePathSet; // Utility functions static jmp_buf sigjmp; static void sighandler(int signum) { longjmp(sigjmp, 1); } static int checkISAAvailability(int isa) { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) { int ret = GETINT[isa] != NULL && (*GETINT[isa])(BASETYPEID); signal(SIGILL, SIG_DFL); return ret; } signal(SIGILL, SIG_DFL); return 0; } #ifdef _OPENMP static int omp_thread_count() { int n = 0; #pragma omp parallel reduction(+:n) n += 1; return n; } #endif static void startAllThreads(const int nth) { #ifdef _OPENMP volatile int8_t *state = calloc(nth, 1); int th=0; #pragma omp parallel for for(th=0;thlog2len; if (level == N) { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { void (*func)(real *, const real *, const int) = DFTF[config][p->isa][N]; (*func)(d, s, log2len-N); } else { void (*func)(real *, const real *, const int) = DFTB[config][p->isa][N]; (*func)(d, s, log2len-N); } } else if (level == log2len) { assert(p->vecwidth <= (1 << N)); if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { void (*func)(real *, uint32_t *, const real *, const int, const real *, const int) = TBUTF[config][p->isa][N]; (*func)(d, p->perm[level], s, log2len-N, p->tbl[N][level], K); } else { void (*func)(real *, uint32_t *, const real *, const int, const real *, const int) = TBUTB[config][p->isa][N]; (*func)(d, p->perm[level], s, log2len-N, p->tbl[N][level], K); } } else { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { void (*func)(real *, uint32_t *, const int, const real *, const int, const real *, const int) = BUTF[config][p->isa][N]; (*func)(d, p->perm[level], log2len-level, s, log2len-N, p->tbl[N][level], K); } else { void (*func)(real *, uint32_t *, const int, const real *, const int, const real *, const int) = BUTB[config][p->isa][N]; (*func)(d, p->perm[level], log2len-level, s, log2len-N, p->tbl[N][level], K); } } } // Transposer #if defined(__GNUC__) && __GNUC__ < 5 // This is another workaround of a bug in gcc-4 #define LOG2BS 3 #else #define LOG2BS 4 #endif #define BS (1 << LOG2BS) #define TRANSPOSE_BLOCK(y2) do { \ for(int x2=y2+1;x2= N-1) return cnt; const int level = levelorg - levelinc; if (bot - top > 4) { const int bl = 1 << (N - levelinc); const int w = bl/4; for(int j=0;j<(bot-top)/bl;j++) { for(int i=0;i> 1) | ((k & 0x55555555) << 1)); r = (((r & 0xcccccccc) >> 2) | ((r & 0x33333333) << 2)); r = (((r & 0xf0f0f0f0) >> 4) | ((r & 0x0f0f0f0f) << 4)); r = (((r & 0xff00ff00) >> 8) | ((r & 0x00ff00ff) << 8)); r = ((r >> 16) | (r << 16)) >> (32-nbits); return (((r << s) | (k & ~(-1 << s))) & ~(-1 << d)) | ((((k >> s) | (r & (-1 << (nbits-s)))) << d) & ~(-1 << nbits)); } static real **makeTable(int sign, int vecwidth, int log2len, const int N, const int K) { if (log2len < N) return NULL; int *p = (int *)malloc(sizeof(int)*((N+1)<bestTime = tm; for(uint32_t j = 0;j < p->log2len+1;j++) { p->bestPathConfig[j] = pathConfig[j]; p->bestPath[j] = path[j]; } return nTrial; } if (level < 1) return nTrial-1; for(int i=0;i<10;i++) { int N; do { N = 1 + rand() % MAXBUTWIDTH; } while(p->tm[0][level*(MAXBUTWIDTH+1)+N] >= 1ULL << 60); if (p->vecwidth > (1 << N) || N == p->log2len) continue; path[level] = N; for(;;) { pathConfig[level] = rand() % CONFIGMAX; #if ENABLE_STREAM == 0 pathConfig[level] &= ~1; #endif if ((p->mode2 & SLEEF_MODE2_MT1D) == 0 && (pathConfig[level] & CONFIG_MT) != 0) continue; break; } for(int j = level-1;j >= 0;j--) path[j] = 0; nTrial = searchForRandomPathRecurse(p, level - N, path, pathConfig, 0, nTrial); if (nTrial <= 0) break; if (p->bestTime < 1ULL << 60) break; } return nTrial - 1; } // Planner #define NSHORTESTPATHS 15 #define MAXPATHLEN (MAXLOG2LEN+1) #define POSMAX (CONFIGMAX * MAXLOG2LEN * (MAXBUTWIDTH+1)) static int cln2pos(int config, int level, int N) { return (config * MAXLOG2LEN + level) * MAXBUTWIDTH + N; } static int pos2config(int pos) { return pos == -1 ? -1 : ((pos - 1) / (MAXBUTWIDTH * MAXLOG2LEN)); } static int pos2level(int pos) { return pos == -1 ? -1 : (((pos - 1) / MAXBUTWIDTH) % MAXLOG2LEN); } static int pos2N(int pos) { return pos == -1 ? -1 : ((pos - 1) % MAXBUTWIDTH + 1); } typedef struct { SleefDFT *p; int countu[POSMAX]; int path[NSHORTESTPATHS][MAXPATHLEN]; int pathLen[NSHORTESTPATHS]; uint64_t cost[NSHORTESTPATHS]; int nPaths; int *heap; int *heapLen; uint64_t *heapCost; int heapSize, nPathsInHeap; } ks_t; static ks_t *ksInit(SleefDFT *p) { ks_t *q = calloc(1, sizeof(ks_t)); q->p = p; q->heapSize = 10; q->heap = calloc(q->heapSize, sizeof(int)*MAXPATHLEN); q->heapCost = calloc(q->heapSize, sizeof(uint64_t)); q->heapLen = calloc(q->heapSize, sizeof(int)); return q; } static void ksDispose(ks_t *q) { free(q->heapCost); free(q->heapLen); free(q->heap); free(q); } // returns the number of paths in the heap static int ksSize(ks_t *q) { return q->nPathsInHeap; } // adds a path to the heap static void ksAddPath(ks_t *q, int *path, int pathLen, uint64_t cost) { assert(pathLen <= MAXPATHLEN); if (q->nPathsInHeap == q->heapSize) { q->heapSize *= 2; q->heap = realloc(q->heap, q->heapSize * sizeof(int)*MAXPATHLEN); q->heapCost = realloc(q->heapCost, q->heapSize * sizeof(uint64_t)); q->heapLen = realloc(q->heapLen, q->heapSize * sizeof(int)); } for(int i=0;iheap[q->nPathsInHeap * MAXPATHLEN + i] = path[i]; q->heapLen[q->nPathsInHeap] = pathLen; q->heapCost[q->nPathsInHeap] = cost; q->nPathsInHeap++; } // returns the cost of n-th paths in the heap static uint64_t ksCost(ks_t *q, int n) { assert(0 <= n && n < q->nPathsInHeap); return q->heapCost[n]; } // copies the n-th paths in the heap to path, returns its length static int ksGetPath(ks_t *q, int *path, int n) { assert(0 <= n && n < q->nPathsInHeap); int len = q->heapLen[n]; for(int i=0;iheap[n * MAXPATHLEN + i]; return len; } // removes the n-th paths in the heap static void ksRemove(ks_t *q, int n) { assert(0 <= n && n < q->nPathsInHeap); for(int i=n;inPathsInHeap-1;i++) { int len = q->heapLen[i+1]; assert(len < MAXPATHLEN); for(int j=0;jheap[i * MAXPATHLEN + j] = q->heap[(i+1) * MAXPATHLEN + j]; q->heapLen[i] = q->heapLen[i+1]; q->heapCost[i] = q->heapCost[i+1]; } q->nPathsInHeap--; } // returns the countu value at pos static int ksCountu(ks_t *q, int pos) { assert(0 <= pos && pos < POSMAX); return q->countu[pos]; } // set the countu value at pos to n static void ksSetCountu(ks_t *q, int pos, int n) { assert(0 <= pos && pos < POSMAX); q->countu[pos] = n; } // adds a path as one of the best k paths, returns the number best paths static int ksAddBestPath(ks_t *q, int *path, int pathLen, uint64_t cost) { assert(pathLen <= MAXPATHLEN); assert(q->nPaths < NSHORTESTPATHS); for(int i=0;ipath[q->nPaths][i] = path[i]; q->pathLen[q->nPaths] = pathLen; q->cost[q->nPaths] = cost; q->nPaths++; return q->nPaths; } // returns if pos is a destination static int ksIsDest(ks_t *q, int pos) { return pos2level(pos) == 0; } // returns n-th adjacent nodes at pos. static int ksAdjacent(ks_t *q, int pos, int n) { if (pos != -1 && pos2level(pos) == 0) return -1; int NMAX = MIN(MIN(q->p->log2len, MAXBUTWIDTH+1), q->p->log2len - q->p->log2vecwidth + 1); if (pos == -1) { int N = n / 2 + MAX(q->p->log2vecwidth, 1); if (N >= NMAX) return -1; return cln2pos((n & 1) * CONFIG_MT, q->p->log2len, N); } int config = (pos2config(pos) & CONFIG_MT); int N = n + 1; int level = pos2level(pos) - pos2N(pos); if (level < 0 || N >= NMAX) return -1; if (level == 0) return n == 0 ? cln2pos(0, 0, 0) : -1; return cln2pos(config, level, N); } static uint64_t ksAdjacentCost(ks_t *q, int pos, int n) { int nxpos = ksAdjacent(q, pos, n); if (nxpos == -1) return 0; int config = pos2config(nxpos), level = pos2level(nxpos), N = pos2N(nxpos); uint64_t ret0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t ret1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; return MIN(ret0, ret1); } static void searchForBestPath(SleefDFT *p) { ks_t *q = ksInit(p); for(int i=0;;i++) { int v = ksAdjacent(q, -1, i); if (v == -1) break; uint64_t c = ksAdjacentCost(q, -1, i); int path[1] = { v }; ksAddPath(q, path, 1, c); } while(ksSize(q) != 0) { uint64_t bestCost = 1ULL << 60; int bestPathNum = -1; for(int i=0;i= NSHORTESTPATHS) continue; ksSetCountu(q, lastPos, ksCountu(q, lastPos)+1); if (ksIsDest(q, lastPos)) { if (ksAddBestPath(q, path, pathLen, cost) >= NSHORTESTPATHS) break; continue; } for(int i=0;;i++) { int v = ksAdjacent(q, lastPos, i); if (v == -1) break; assert(0 <= pos2N(v) && pos2N(v) <= q->p->log2len); uint64_t c = ksAdjacentCost(q, lastPos, i); path[pathLen] = v; ksAddPath(q, path, pathLen+1, cost + c); } } for(int j = p->log2len;j >= 0;j--) p->bestPath[j] = 0; if (((p->mode & SLEEF_MODE_MEASURE) != 0 || (planFilePathSet && (p->mode & SLEEF_MODE_MEASUREBITS) == 0))) { uint64_t besttm = 1ULL << 62; int bestPath = -1; const int niter = 1 + 5000000 / ((1 << p->log2len) + 1); real *s2 = NULL, *d2 = NULL; const real *s = p->in == NULL ? (s2 = (real *)memset(Sleef_malloc((2 << p->log2len) * sizeof(real)), 0, sizeof(real) * (2 << p->log2len))) : p->in; real *d = p->out == NULL ? (d2 = (real *)memset(Sleef_malloc((2 << p->log2len) * sizeof(real)), 0, sizeof(real) * (2 << p->log2len))) : p->out; #ifdef _OPENMP const int tn = omp_get_thread_num(); #else const int tn = 0; #endif real *t[] = { p->x1[tn], p->x0[tn], d }; for(int mt=0;mt<2;mt++) { for(int i=q->nPaths-1;i>=0;i--) { if (((pos2config(q->path[i][0]) & CONFIG_MT) != 0) != mt) continue; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { for(int j=0;jpathLen[i];j++) { int N = pos2N(q->path[i][j]); int level = pos2level(q->path[i][j]); int config = pos2config(q->path[i][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); if (N != 0) printf("%d(%s) ", N, configStr[config]); } } if (mt) startAllThreads(p->nThread); uint64_t tm0 = Sleef_currentTimeMicros(); for(int k=0;kpathLen & 1) == 1) nb = -1; for(int level = p->log2len, j=0;level >= 1;j++) { assert(pos2level(q->path[i][j]) == level); int N = pos2N(q->path[i][j]); int config = pos2config(q->path[i][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); dispatch(p, N, t[nb+1], lb, level, config); level -= N; lb = t[nb+1]; nb = (nb + 1) & 1; } } uint64_t tm1 = Sleef_currentTimeMicros(); for(int k=0;kpathLen & 1) == 1) nb = -1; for(int level = p->log2len, j=0;level >= 1;j++) { assert(pos2level(q->path[i][j]) == level); int N = pos2N(q->path[i][j]); int config = pos2config(q->path[i][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); dispatch(p, N, t[nb+1], lb, level, config); level -= N; lb = t[nb+1]; nb = (nb + 1) & 1; } } uint64_t tm2 = Sleef_currentTimeMicros(); if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf(" : %lld %lld\n", (long long int)(tm1 - tm0), (long long int)(tm2 - tm1)); if ((tm1 - tm0) < besttm) { bestPath = i; besttm = tm1 - tm0; } if ((tm2 - tm1) < besttm) { bestPath = i; besttm = tm2 - tm1; } } } for(int level = p->log2len, j=0;level >= 1;j++) { assert(pos2level(q->path[bestPath][j]) == level); int N = pos2N(q->path[bestPath][j]); int config = pos2config(q->path[bestPath][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); p->bestPath[level] = N; p->bestPathConfig[level] = config; level -= N; } if (d2 != NULL) Sleef_free(d2); if (s2 != NULL) Sleef_free(s2); } else { for(int level = p->log2len, j=0;level >= 1;j++) { int bestPath = 0; assert(pos2level(q->path[bestPath][j]) == level); int N = pos2N(q->path[bestPath][j]); int config = pos2config(q->path[bestPath][j]); p->bestPath[level] = N; p->bestPathConfig[level] = config; level -= N; } } ksDispose(q); } // static uint64_t estimate(int log2len, int level, int N, int config) { uint64_t ret = N * 1000 + ABS(N-3) * 1000; if (log2len >= 14 && (config & CONFIG_MT) != 0) ret /= 2; return ret; } static void measureBut(SleefDFT *p) { if (p->x0 == NULL) return; // #ifdef _OPENMP const int tn = omp_get_thread_num(); #else const int tn = 0; #endif real *s = (real *)memset(p->x0[tn], 0, sizeof(real) * (2 << p->log2len)); real *d = (real *)memset(p->x1[tn], 0, sizeof(real) * (2 << p->log2len)); const int niter = 1 + 100000 / ((1 << p->log2len) + 1); #define MEASURE_REPEAT 4 for(int rep=1;rep<=MEASURE_REPEAT;rep++) { for(int config=0;configmode2 & SLEEF_MODE2_MT1D) == 0 && (config & CONFIG_MT) != 0) continue; for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { if (level < N || p->log2len <= N) continue; if (level == N) { if ((int)p->log2len - (int)level < p->log2vecwidth) continue; uint64_t tm = Sleef_currentTimeMicros(); for(int i=0;itm[config][level*(MAXBUTWIDTH+1)+N] = MIN(p->tm[config][level*(MAXBUTWIDTH+1)+N], tm); } else if (level == p->log2len) { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > (1 << N)) continue; if ((config & CONFIG_MT) != 0) { int i1=0; #ifdef _OPENMP #pragma omp parallel for #endif for(i1=0;i1 < (1 << (p->log2len-N-p->log2vecwidth));i1++) { int i0 = i1 << p->log2vecwidth; p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } else { for(int i0=0, i1=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } uint64_t tm = Sleef_currentTimeMicros(); for(int i=0;itm[config][level*(MAXBUTWIDTH+1)+N] = MIN(p->tm[config][level*(MAXBUTWIDTH+1)+N], tm); } else { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > 2 && p->log2len <= N+2) continue; if ((int)p->log2len - (int)level < p->log2vecwidth) continue; if ((config & CONFIG_MT) != 0) { int i1=0; #ifdef _OPENMP #pragma omp parallel for #endif for(i1=0;i1 < (1 << (p->log2len-N-p->log2vecwidth));i1++) { int i0 = i1 << p->log2vecwidth; p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } else { for(int i0=0, i1=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } uint64_t tm = Sleef_currentTimeMicros(); for(int i=0;itm[config][level*(MAXBUTWIDTH+1)+N] = MIN(p->tm[config][level*(MAXBUTWIDTH+1)+N], tm); } } } } } if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { if (level < N || p->log2len <= N) continue; if (level == N) { if ((int)p->log2len - (int)level < p->log2vecwidth) continue; printf("bot %d, %d, %d, ", p->log2len, level, N); for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] == 1ULL << 60) { printf("N/A, "); } else { printf("%lld, ", (long long int)p->tm[config][level*(MAXBUTWIDTH+1)+N]); } } printf("\n"); } else if (level == p->log2len) { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > (1 << N)) continue; printf("top %d, %d, %d, ", p->log2len, level, N); for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] == 1ULL << 60) { printf("N/A, "); } else { printf("%lld, ", (long long int)p->tm[config][level*(MAXBUTWIDTH+1)+N]); } } printf("\n"); } else { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > 2 && p->log2len <= N+2) continue; if ((int)p->log2len - (int)level < p->log2vecwidth) continue; printf("mid %d, %d, %d, ", p->log2len, level, N); for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] == 1ULL << 60) { printf("N/A, "); } else { printf("%lld, ", (long long int)p->tm[config][level*(MAXBUTWIDTH+1)+N]); } } printf("\n"); } } } } } static void estimateBut(SleefDFT *p) { for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { if (level < N || p->log2len <= N) continue; if (level == N) { if ((int)p->log2len - (int)level < p->log2vecwidth) continue; for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = estimate(p->log2len, level, N, config); } } else if (level == p->log2len) { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > (1 << N)) continue; for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = estimate(p->log2len, level, N, config); } } else { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > 2 && p->log2len <= N+2) continue; if ((int)p->log2len - (int)level < p->log2vecwidth) continue; for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = estimate(p->log2len, level, N, config); } } } } } static int measure(SleefDFT *p, int randomize) { if (p->log2len == 1) { p->bestTime = 1ULL << 60; p->pathLen = 1; p->bestPath[1] = 1; return 1; } if (PlanManager_loadMeasurementResultsP(p, (p->mode & SLEEF_MODE_NO_MT) != 0 ? 1 : 0)) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { printf("Path(loaded) : "); for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) printf("%d(%s) ", p->bestPath[j], configStr[p->bestPathConfig[j]]); printf("\n"); } return 1; } int toBeSaved = 0; for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = 1ULL << 60; } } } if (((p->mode & SLEEF_MODE_MEASURE) != 0 || (planFilePathSet && (p->mode & SLEEF_MODE_MEASUREBITS) == 0)) && !randomize) { measureBut(p); toBeSaved = 1; } else { estimateBut(p); } int executable = 0; for(int i=1;i<=MAXBUTWIDTH && !executable;i++) { if (p->tm[0][p->log2len*(MAXBUTWIDTH+1)+i] < (1ULL << 60)) executable = 1; } if (!executable) return 0; p->bestTime = 1ULL << 60; p->bestPath[p->log2len] = 0; if (!randomize) { searchForBestPath(p); } else { int path[MAXLOG2LEN+1]; int pathConfig[MAXLOG2LEN+1]; for(int j = p->log2len;j >= 0;j--) path[j] = pathConfig[j] = 0; int nTrial = 100000; do { nTrial = searchForRandomPathRecurse(p, p->log2len, path, pathConfig, 0, nTrial); } while(p->bestTime == 1ULL << 60 && nTrial >= 0); } if (p->bestPath[p->log2len] == 0) return 0; p->pathLen = 0; for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { printf("Path"); if (randomize) printf("(random) :"); else if (toBeSaved) printf("(measured) :"); else printf("(estimated) :"); for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) printf("%d(%s) ", p->bestPath[j], configStr[p->bestPathConfig[j]]); printf("\n"); } if (toBeSaved) { PlanManager_saveMeasurementResultsP(p, (p->mode & SLEEF_MODE_NO_MT) != 0 ? 1 : 0); } return 1; } static void measureTranspose(SleefDFT *p) { if (PlanManager_loadMeasurementResultsT(p)) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose NoMT(loaded): %lld\n", (long long int)p->tmNoMT); if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose MT(loaded): %lld\n", (long long int)p->tmMT); return; } if ((p->mode & SLEEF_MODE_MEASURE) == 0 && (!planFilePathSet || (p->mode & SLEEF_MODE_MEASUREBITS) != 0)) { if (p->log2hlen + p->log2vlen >= 14) { p->tmNoMT = 20; p->tmMT = 10; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose : selected MT(estimated)\n"); } else { p->tmNoMT = 10; p->tmMT = 20; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose : selected NoMT(estimated)\n"); } return; } real *tBuf2 = (real *)Sleef_malloc(sizeof(real)*2*p->hlen*p->vlen); const int niter = 1 + 5000000 / (p->hlen * p->vlen + 1); uint64_t tm; tm = Sleef_currentTimeMicros(); for(int i=0;itBuf, p->log2hlen, p->log2vlen); transpose(tBuf2, p->tBuf, p->log2vlen, p->log2hlen); } p->tmNoMT = Sleef_currentTimeMicros() - tm + 1; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose NoMT(measured): %lld\n", (long long int)p->tmNoMT); #ifdef _OPENMP tm = Sleef_currentTimeMicros(); for(int i=0;itBuf, p->log2hlen, p->log2vlen); transposeMT(tBuf2, p->tBuf, p->log2vlen, p->log2hlen); } p->tmMT = Sleef_currentTimeMicros() - tm + 1; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose MT(measured): %lld\n", (long long int)p->tmMT); #else p->tmMT = p->tmNoMT*2; #endif Sleef_free(tBuf2); PlanManager_saveMeasurementResultsT(p); } // Implementation of SleefDFT_*_init1d EXPORT SleefDFT *INIT(uint32_t n, const real *in, real *out, uint64_t mode) { SleefDFT *p = (SleefDFT *)calloc(1, sizeof(SleefDFT)); p->magic = MAGIC; p->baseTypeID = BASETYPEID; p->in = (const void *)in; p->out = (void *)out; // Mode p->mode = mode; if ((p->mode & SLEEF_MODE_NO_MT) == 0) { p->mode2 |= SLEEF_MODE2_MT1D; } if ((mode & SLEEF_MODE_REAL) != 0) n /= 2; p->log2len = ilog2(n); if (p->log2len <= 1) return p; if ((mode & SLEEF_MODE_ALT) != 0) p->mode = mode = mode ^ SLEEF_MODE_BACKWARD; #ifdef _OPENMP p->nThread = omp_thread_count(); #else p->nThread = 1; p->mode2 &= ~SLEEF_MODE2_MT1D; #endif // ISA availability int bestPriority = -1; p->isa = -1; for(int i=0;i= (*GETINT[i])(GETINT_VECWIDTH) * (*GETINT[i])(GETINT_VECWIDTH)) { bestPriority = (*GETINT[i])(GETINT_DFTPRIORITY); p->isa = i; } } if (p->isa == -1) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("ISA not available\n"); p->magic = 0; free(p); return NULL; } // Tables p->perm = (uint32_t **)calloc(sizeof(uint32_t *), p->log2len+1); for(int level = p->log2len;level >= 1;level--) { p->perm[level] = (uint32_t *)Sleef_malloc(sizeof(uint32_t) * ((1 << p->log2len) + 8)); } p->x0 = malloc(sizeof(real *) * p->nThread); p->x1 = malloc(sizeof(real *) * p->nThread); for(int i=0;inThread;i++) { p->x0[i] = (real *)Sleef_malloc(sizeof(real) * 2 * n); p->x1[i] = (real *)Sleef_malloc(sizeof(real) * 2 * n); } if ((mode & SLEEF_MODE_REAL) != 0) { p->rtCoef0 = (real *)Sleef_malloc(sizeof(real) * n); p->rtCoef1 = (real *)Sleef_malloc(sizeof(real) * n); if ((mode & SLEEF_MODE_BACKWARD) == 0) { for(uint32_t i=0;irtCoef0)[i*2+0] = ((real *)p->rtCoef0)[i*2+1] = (real)0.5 - (real)0.5 * sc.x; ((real *)p->rtCoef1)[i*2+0] = ((real *)p->rtCoef1)[i*2+1] = (real)0.5*sc.y; } } else { for(uint32_t i=0;irtCoef0)[i*2+0] = ((real *)p->rtCoef0)[i*2+1] = (real)0.5 + (real)0.5 * sc.x; ((real *)p->rtCoef1)[i*2+0] = ((real *)p->rtCoef1)[i*2+1] = (real)0.5*sc.y; } } } // Measure int sign = (mode & SLEEF_MODE_BACKWARD) != 0 ? -1 : 1; p->vecwidth = (*GETINT[p->isa])(GETINT_VECWIDTH); p->log2vecwidth = ilog2(p->vecwidth); for(int i=1;i<=MAXBUTWIDTH;i++) { ((real ***)p->tbl)[i] = makeTable(sign, p->vecwidth, p->log2len, i, constK[i]); } if (!measure(p, (mode & SLEEF_MODE_DEBUG))) { // Fall back to the first ISA freeTables(p); p->isa = 0; p->vecwidth = (*GETINT[p->isa])(GETINT_VECWIDTH); p->log2vecwidth = ilog2(p->vecwidth); for(int i=1;i<=MAXBUTWIDTH;i++) { ((real ***)p->tbl)[i] = makeTable(sign, p->vecwidth, p->log2len, i, constK[i]); } for(int level = p->log2len;level >= 1;) { int N = ABS(p->bestPath[level]); if (level == N) { level -= N; continue; } int i1 = 0; for(int i0=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } for(;i1 < (1 << p->log2len) + 8;i1++) p->perm[level][i1] = 0; level -= N; } if (!measure(p, (mode & SLEEF_MODE_DEBUG))) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("Suitable ISA not found. This should not happen.\n"); return NULL; } } for(int level = p->log2len;level >= 1;) { int N = ABS(p->bestPath[level]); if (level == N) { level -= N; continue; } int i1 = 0; for(int i0=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } for(;i1 < (1 << p->log2len) + 8;i1++) p->perm[level][i1] = 0; level -= N; } if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("ISA : %s %d bit %s\n", (char *)(*GETPTR[p->isa])(0), (int)(GETINT[p->isa](GETINT_VECWIDTH) * sizeof(real) * 16), BASETYPESTRING); return p; } // Implementation of SleefDFT_*_init2d EXPORT SleefDFT *INIT2D(uint32_t vlen, uint32_t hlen, const real *in, real *out, uint64_t mode) { SleefDFT *p = (SleefDFT *)calloc(1, sizeof(SleefDFT)); p->magic = MAGIC2D; p->mode = mode; p->baseTypeID = BASETYPEID; p->in = in; p->out = out; p->hlen = hlen; p->log2hlen = ilog2(hlen); p->vlen = vlen; p->log2vlen = ilog2(vlen); uint64_t mode1D = mode; mode1D |= SLEEF_MODE_NO_MT; if ((mode & SLEEF_MODE_NO_MT) == 0) p->mode3 |= SLEEF_MODE3_MT2D; p->instH = p->instV = INIT(hlen, NULL, NULL, mode1D); if (hlen != vlen) p->instV = INIT(vlen, NULL, NULL, mode1D); p->tBuf = (void *)Sleef_malloc(sizeof(real)*2*hlen*vlen); measureTranspose(p); return p; } // Implementation of SleefDFT_*_execute EXPORT void EXECUTE(SleefDFT *p, const real *s0, real *d0) { assert(p != NULL && (p->magic == MAGIC || p->magic == MAGIC2D)); const real *s = s0 == NULL ? p->in : s0; real *d = d0 == NULL ? p->out : d0; if (p->magic == MAGIC2D) { // S -> T -> D -> T -> D real *tBuf = (real *)(p->tBuf); #ifdef _OPENMP if ((p->mode3 & SLEEF_MODE3_MT2D) != 0 && (((p->mode & SLEEF_MODE_DEBUG) == 0 && p->tmMT < p->tmNoMT) || ((p->mode & SLEEF_MODE_DEBUG) != 0 && (rand() & 1)))) { int y=0; #pragma omp parallel for for(y=0;yvlen;y++) { EXECUTE(p->instH, &s[p->hlen*2*y], &tBuf[p->hlen*2*y]); } transposeMT(d, tBuf, p->log2vlen, p->log2hlen); #pragma omp parallel for for(y=0;yhlen;y++) { EXECUTE(p->instV, &d[p->vlen*2*y], &tBuf[p->vlen*2*y]); } transposeMT(d, tBuf, p->log2hlen, p->log2vlen); } else #endif { for(int y=0;yvlen;y++) { EXECUTE(p->instH, &s[p->hlen*2*y], &tBuf[p->hlen*2*y]); } transpose(d, tBuf, p->log2vlen, p->log2hlen); for(int y=0;yhlen;y++) { EXECUTE(p->instV, &d[p->vlen*2*y], &tBuf[p->vlen*2*y]); } transpose(d, tBuf, p->log2hlen, p->log2vlen); } return; } if (p->log2len <= 1) { if ((p->mode & SLEEF_MODE_REAL) == 0) { real r0 = s[0] + s[2]; real r1 = s[1] + s[3]; real r2 = s[0] - s[2]; real r3 = s[1] - s[3]; d[0] = r0; d[1] = r1; d[2] = r2; d[3] = r3; } else { if ((p->mode & SLEEF_MODE_ALT) == 0) { if (p->log2len == 1) { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { real r0 = s[0] + s[2] + (s[1] + s[3]); real r1 = s[0] + s[2] - (s[1] + s[3]); real r2 = s[0] - s[2]; real r3 = s[3] - s[1]; d[0] = r0; d[1] = 0; d[2] = r2; d[3] = r3; d[4] = r1; d[5] = 0; } else { real r0 = (s[0] + s[4])*(real)0.5 + s[2]; real r1 = (s[0] - s[4])*(real)0.5 - s[3]; real r2 = (s[0] + s[4])*(real)0.5 - s[2]; real r3 = (s[0] - s[4])*(real)0.5 + s[3]; d[0] = r0*2; d[1] = r1*2; d[2] = r2*2; d[3] = r3*2; } } else { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { real r0 = s[0] + s[1]; real r1 = s[0] - s[1]; d[0] = r0; d[1] = 0; d[2] = r1; d[3] = 0; } else { real r0 = s[0] + s[2]; real r1 = s[0] - s[2]; d[0] = r0; d[1] = r1; } } } else { if (p->log2len == 1) { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { real r0 = s[0] + s[2] + (s[1] + s[3]); real r1 = s[0] + s[2] - (s[1] + s[3]); real r2 = s[0] - s[2]; real r3 = s[1] - s[3]; d[0] = r0; d[1] = r1; d[2] = r2; d[3] = r3; } else { real r0 = (s[0] + s[1])*(real)0.5 + s[2]; real r1 = (s[0] - s[1])*(real)0.5 + s[3]; real r2 = (s[0] + s[1])*(real)0.5 - s[2]; real r3 = (s[0] - s[1])*(real)0.5 - s[3]; d[0] = r0; d[1] = r1; d[2] = r2; d[3] = r3; } } else { real c = ((p->mode & SLEEF_MODE_BACKWARD) != 0) ? (real)0.5 : (real)1.0; real r0 = s[0] + s[1]; real r1 = s[0] - s[1]; d[0] = r0 * c; d[1] = r1 * c; } } } return; } // #ifdef _OPENMP const int tn = omp_get_thread_num(); real *t[] = { p->x1[tn], p->x0[tn], d }; #else real *t[] = { p->x1[0], p->x0[0], d }; #endif const real *lb = s; int nb = 0; if ((p->mode & SLEEF_MODE_REAL) != 0 && (p->pathLen & 1) == 0 && ((p->mode & SLEEF_MODE_BACKWARD) != 0) != ((p->mode & SLEEF_MODE_ALT) != 0)) nb = -1; if ((p->mode & SLEEF_MODE_REAL) == 0 && (p->pathLen & 1) == 1) nb = -1; if ((p->mode & SLEEF_MODE_REAL) != 0 && ((p->mode & SLEEF_MODE_BACKWARD) != 0) != ((p->mode & SLEEF_MODE_ALT) != 0)) { (*REALSUB1[p->isa])(t[nb+1], s, p->log2len, p->rtCoef0, p->rtCoef1, (p->mode & SLEEF_MODE_ALT) == 0); if ((p-> mode & SLEEF_MODE_ALT) == 0) t[nb+1][(1 << p->log2len)+1] = -s[(1 << p->log2len)+1] * 2; lb = t[nb+1]; nb = (nb + 1) & 1; } for(int level = p->log2len;level >= 1;) { int N = ABS(p->bestPath[level]), config = p->bestPathConfig[level]; dispatch(p, N, t[nb+1], lb, level, config); level -= N; lb = t[nb+1]; nb = (nb + 1) & 1; } if ((p->mode & SLEEF_MODE_REAL) != 0 && ((p->mode & SLEEF_MODE_BACKWARD) == 0) != ((p->mode & SLEEF_MODE_ALT) != 0)) { (*REALSUB0[p->isa])(d, lb, p->log2len, p->rtCoef0, p->rtCoef1); if ((p->mode & SLEEF_MODE_ALT) == 0) { d[(1 << p->log2len)+1] = -d[(1 << p->log2len)+1]; d[(2 << p->log2len)+0] = d[1]; d[(2 << p->log2len)+1] = 0; d[1] = 0; } } } sleef-3.5.1/src/dft/dftcommon.c000066400000000000000000000271671373003144100163250ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #ifdef _OPENMP #include #endif #include "misc.h" #include "sleef.h" #define IMPORT_IS_EXPORT #include "sleefdft.h" #include "dispatchparam.h" #include "dftcommon.h" #include "common.h" #include "arraymap.h" #define MAGIC_FLOAT 0x31415926 #define MAGIC_DOUBLE 0x27182818 #define MAGIC_LONGDOUBLE 0x14142135 #define MAGIC_QUAD 0x33166247 #define MAGIC2D_FLOAT 0x22360679 #define MAGIC2D_DOUBLE 0x17320508 #define MAGIC2D_LONGDOUBLE 0x26457513 #define MAGIC2D_QUAD 0x36055512 const char *configStr[] = { "ST", "ST stream", "MT", "MT stream" }; static int parsePathStr(char *p, int *path, int *config, int pathLenMax, int log2len) { int pathLen = 0, l2l = 0; for(;;) { while(*p == ' ') p++; if (*p == '\0') break; if (!isdigit(*p)) return -1; pathLen++; if (pathLen >= pathLenMax) return -2; int n = 0; while(isdigit(*p)) n = n * 10 + *p++ - '0'; if (n > MAXBUTWIDTH) return -6; path[pathLen-1] = n; l2l += n; config[pathLen-1] = 0; if (*p != '(') continue; int c; for(c=3;c>=0;c--) if (strncmp(p+1, configStr[c], strlen(configStr[c])) == 0) break; if (c == -1) return -3; p += strlen(configStr[c]) + 1; if (*p != ')') return -4; p++; config[pathLen-1] = c; } if (l2l != log2len) return -5; return pathLen; } EXPORT void SleefDFT_setPath(SleefDFT *p, char *pathStr) { assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); int path[32], config[32]; int pathLen = parsePathStr(pathStr, path, config, 31, p->log2len); if (pathLen < 0) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("Error %d in parsing path string : %s\n", pathLen, pathStr); return; } for(uint32_t j = 0;j <= p->log2len;j++) p->bestPath[j] = 0; for(int level = p->log2len, j=0;level > 0 && j < pathLen;) { p->bestPath[level] = path[j]; p->bestPathConfig[level] = config[j]; level -= path[j]; j++; } p->pathLen = 0; for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { printf("Set path : "); for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) printf("%d(%s) ", p->bestPath[j], configStr[p->bestPathConfig[j]]); printf("\n"); } } void freeTables(SleefDFT *p) { for(int N=1;N<=MAXBUTWIDTH;N++) { for(uint32_t level=N;level<=p->log2len;level++) { Sleef_free(p->tbl[N][level]); } free(p->tbl[N]); p->tbl[N] = NULL; } } EXPORT void SleefDFT_dispose(SleefDFT *p) { if (p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)) { Sleef_free(p->tBuf); SleefDFT_dispose(p->instH); if (p->hlen != p->vlen) SleefDFT_dispose(p->instV); p->magic = 0; free(p); return; } assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); if (p->log2len <= 1) { p->magic = 0; free(p); return; } if ((p->mode & SLEEF_MODE_REAL) != 0) { Sleef_free(p->rtCoef1); Sleef_free(p->rtCoef0); p->rtCoef0 = p->rtCoef1 = NULL; } for(int level = p->log2len;level >= 1;level--) { Sleef_free(p->perm[level]); } free(p->perm); p->perm = NULL; freeTables(p); p->magic = 0; free(p); } uint32_t ilog2(uint32_t q) { static const uint32_t tab[] = {0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4}; uint32_t r = 0,qq; if (q & 0xffff0000) r = 16; q >>= r; qq = q | (q >> 1); qq |= (qq >> 2); qq = ((qq & 0x10) >> 4) | ((qq & 0x100) >> 7) | ((qq & 0x1000) >> 10); return r + tab[qq] * 4 + tab[q >> (tab[qq] * 4)] - 1; } // char *dftPlanFilePath = NULL; char *archID = NULL; uint64_t planMode = SLEEF_PLAN_REFERTOENVVAR; ArrayMap *planMap = NULL; int planFilePathSet = 0, planFileLoaded = 0; #ifdef _OPENMP omp_lock_t planMapLock; int planMapLockInitialized = 0; #endif static void initPlanMapLock() { #ifdef _OPENMP #pragma omp critical { if (!planMapLockInitialized) { planMapLockInitialized = 1; omp_init_lock(&planMapLock); } } #endif } static void planMap_clear() { if (planMap != NULL) ArrayMap_dispose(planMap); planMap = NULL; } EXPORT void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode) { initPlanMapLock(); if ((mode & SLEEF_PLAN_RESET) != 0) { planMap_clear(); planFileLoaded = 0; planFilePathSet = 0; } if (dftPlanFilePath != NULL) free(dftPlanFilePath); if (path != NULL) { dftPlanFilePath = malloc(strlen(path)+10); strcpy(dftPlanFilePath, path); } else { dftPlanFilePath = NULL; } if (archID != NULL) free(archID); if (arch == NULL) arch = Sleef_getCpuIdString(); archID = malloc(strlen(arch)+10); strcpy(archID, arch); planMode = mode; planFilePathSet = 1; } static void loadPlanFromFile() { if (planFilePathSet == 0 && (planMode & SLEEF_PLAN_REFERTOENVVAR) != 0) { char *s = getenv(ENVVAR); if (s != NULL) SleefDFT_setPlanFilePath(s, NULL, planMode); } if (planMap != NULL) ArrayMap_dispose(planMap); if (dftPlanFilePath != NULL && (planMode & SLEEF_PLAN_RESET) == 0) { planMap = ArrayMap_load(dftPlanFilePath, archID, PLANFILEID, (planMode & SLEEF_PLAN_NOLOCK) == 0); } if (planMap == NULL) planMap = initArrayMap(); planFileLoaded = 1; } static void savePlanToFile() { assert(planFileLoaded); if ((planMode & SLEEF_PLAN_READONLY) == 0 && dftPlanFilePath != NULL) { ArrayMap_save(planMap, dftPlanFilePath, archID, PLANFILEID); } } #define CATBIT 8 #define BASETYPEIDBIT 2 #define LOG2LENBIT 8 #define DIRBIT 1 #define BUTSTATBIT 16 static uint64_t keyButStat(int baseTypeID, int log2len, int dir, int butStat) { dir = (dir & SLEEF_MODE_BACKWARD) == 0; int cat = 0; uint64_t k = 0; k = (k << BUTSTATBIT) | (butStat & ~(~(uint64_t)0 << BUTSTATBIT)); k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } #define LEVELBIT LOG2LENBIT #define BUTCONFIGBIT 8 #define TRANSCONFIGBIT 8 static uint64_t keyTrans(int baseTypeID, int hlen, int vlen, int transConfig) { int max = MAX(hlen, vlen), min = MIN(hlen, vlen); int cat = 2; uint64_t k = 0; k = (k << TRANSCONFIGBIT) | (transConfig & ~(~(uint64_t)0 << TRANSCONFIGBIT)); k = (k << LOG2LENBIT) | (max & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << LOG2LENBIT) | (min & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } static uint64_t keyPath(int baseTypeID, int log2len, int dir, int level, int config) { dir = (dir & SLEEF_MODE_BACKWARD) == 0; int cat = 3; uint64_t k = 0; k = (k << BUTCONFIGBIT) | (config & ~(~(uint64_t)0 << BUTCONFIGBIT)); k = (k << LEVELBIT) | (level & ~(~(uint64_t)0 << LEVELBIT)); k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } static uint64_t keyPathConfig(int baseTypeID, int log2len, int dir, int level, int config) { dir = (dir & SLEEF_MODE_BACKWARD) == 0; int cat = 4; uint64_t k = 0; k = (k << BUTCONFIGBIT) | (config & ~(~(uint64_t)0 << BUTCONFIGBIT)); k = (k << LEVELBIT) | (level & ~(~(uint64_t)0 << LEVELBIT)); k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } static uint64_t planMap_getU64(uint64_t key) { char *s = ArrayMap_get(planMap, key); if (s == NULL) return 0; uint64_t ret; if (sscanf(s, "%" SCNx64, &ret) != 1) return 0; return ret; } static void planMap_putU64(uint64_t key, uint64_t value) { char *s = malloc(100); sprintf(s, "%" PRIx64, value); s = ArrayMap_put(planMap, key, s); if (s != NULL) free(s); } int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat) { assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); initPlanMapLock(); #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); int stat = planMap_getU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10)); if (stat == 0) { #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return 0; } int ret = 1; for(int j = p->log2len;j >= 0;j--) { p->bestPath[j] = planMap_getU64(keyPath(p->baseTypeID, p->log2len, p->mode, j, pathCat)); p->bestPathConfig[j] = planMap_getU64(keyPathConfig(p->baseTypeID, p->log2len, p->mode, j, pathCat)); if (p->bestPath[j] > MAXBUTWIDTH) ret = 0; } p->pathLen = 0; for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++; #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return ret; } void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat) { assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); initPlanMapLock(); #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); if (planMap_getU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10)) != 0) { #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return; } for(int j = p->log2len;j >= 0;j--) { planMap_putU64(keyPath(p->baseTypeID, p->log2len, p->mode, j, pathCat), p->bestPath[j]); planMap_putU64(keyPathConfig(p->baseTypeID, p->log2len, p->mode, j, pathCat), p->bestPathConfig[j]); } planMap_putU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10), 1); if ((planMode & SLEEF_PLAN_READONLY) == 0) savePlanToFile(); #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif } int PlanManager_loadMeasurementResultsT(SleefDFT *p) { assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)); initPlanMapLock(); int ret = 0; #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); p->tmNoMT = planMap_getU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 0)); p->tmMT = planMap_getU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 1)); #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return p->tmNoMT != 0; } void PlanManager_saveMeasurementResultsT(SleefDFT *p) { assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)); initPlanMapLock(); int ret = 0; #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); planMap_putU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 0), p->tmNoMT); planMap_putU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 1), p->tmMT ); if ((planMode & SLEEF_PLAN_READONLY) == 0) savePlanToFile(); #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif } sleef-3.5.1/src/dft/dftcommon.h000066400000000000000000000032321373003144100163150ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define CONFIGMAX 4 #define CONFIG_STREAM 1 #define CONFIG_MT 2 #define MAXLOG2LEN 32 typedef struct SleefDFT { uint32_t magic; uint64_t mode, mode2, mode3; int baseTypeID; const void *in; void *out; union { struct { uint32_t log2len; void **tbl[MAXBUTWIDTH+1]; void *rtCoef0, *rtCoef1; uint32_t **perm; void **x0, **x1; int isa; int planMode; int vecwidth, log2vecwidth; int nThread; uint64_t tm[CONFIGMAX][(MAXBUTWIDTH+1)*32]; uint64_t bestTime; int16_t bestPath[32], bestPathConfig[32], pathLen; }; struct { int32_t hlen, vlen; int32_t log2hlen, log2vlen; uint64_t tmNoMT, tmMT; struct SleefDFT *instH, *instV; void *tBuf; }; }; } SleefDFT; #define SLEEF_MODE2_MT1D (1 << 0) #define SLEEF_MODE3_MT2D (1 << 0) #define PLANFILEID "SLEEFDFT0\n" #define ENVVAR "SLEEFDFTPLAN" #define SLEEF_MODE_MEASUREBITS (3 << 20) void freeTables(SleefDFT *p); uint32_t ilog2(uint32_t q); //int PlanManager_loadMeasurementResultsB(SleefDFT *p); //void PlanManager_saveMeasurementResultsB(SleefDFT *p, int butStat); int PlanManager_loadMeasurementResultsT(SleefDFT *p); void PlanManager_saveMeasurementResultsT(SleefDFT *p); int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat); void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat); #define GETINT_VECWIDTH 100 #define GETINT_DFTPRIORITY 101 sleef-3.5.1/src/dft/mkdispatch.c000066400000000000000000000142431373003144100164550ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #ifndef ENABLE_STREAM #error ENABLE_STREAM not defined #endif int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "Usage : %s ...\n", argv[0]); exit(-1); } const char *basetype = argv[1]; const int maxbutwidth = atoi(argv[2]); const int isastart = 3; const int isamax = argc - isastart; #if ENABLE_STREAM == 1 const int enable_stream = 1; #else const int enable_stream = 0; #endif printf("#define MAXBUTWIDTH %d\n", maxbutwidth); printf("\n"); if (strcmp(basetype, "paramonly") == 0) exit(0); printf("#define ISAMAX %d\n", isamax); printf("#define CONFIGMAX 4\n"); for(int k=isastart;k #include #include #include #define CONFIGMAX 4 char *replaceAll(const char *in, const char *pat, const char *replace) { const int replaceLen = strlen(replace); const int patLen = strlen(pat); char *str = malloc(strlen(in)+1); strcpy(str, in); for(;;) { char *p = strstr(str, pat); if (p == NULL) return str; int replace_pos = p - str; int tail_len = strlen(p + patLen); char *newstr = malloc(strlen(str) + (replaceLen - patLen) + 1); memcpy(newstr, str, replace_pos); memcpy(newstr + replace_pos, replace, replaceLen); memcpy(newstr + replace_pos + replaceLen, str + replace_pos + patLen, tail_len+1); free(str); str = newstr; } return str; } #define LEN 1024 char line[LEN+10]; int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "Usage : %s ...\n", argv[0]); exit(-1); } const char *baseType = argv[1]; const int isastart = 2; const int isamax = argc - isastart; const int maxbutwidth = 6; for(int config=0;config> outShift); store(out, (0 << outShift), plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); store(out, (1 << outShift), ctimesminusplus(v4, tbl[0 + tbloffset], ctimes(reverse(v4), tbl[1 + tbloffset]))); } } ALIGNED(8192) void but2b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); store(out, (0 << outShift), plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); store(out, (1 << outShift), ctimesminusplus(v4, tbl[0 + tbloffset], ctimes(reverse(v4), tbl[1 + tbloffset]))); } } ALIGNED(8192) void tbut2f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; scatter(out, 0, 2, plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); scatter(out, 1, 2, timesminusplus(v4, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v4), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut2b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; scatter(out, 0, 2, plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); scatter(out, 1, 2, timesminusplus(v4, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v4), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void dft4f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v3 = load(in, 1 << shift); real2 v5 = load(in, 3 << shift); real2 v7 = reverse(minus(v3, v5)); real2 v13 = plus(v3, v5); real2 v4 = load(in, 2 << shift); real2 v2 = load(in, 0 << shift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); store(out, 3 << shift, minus(uminusplus(v7), v8)); store(out, 1 << shift, minus(uplusminus(v7), v8)); store(out, 2 << shift, minus(v12, v13)); store(out, 0 << shift, plus(v12, v13)); } } ALIGNED(8192) void dft4b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v3 = load(in, 1 << shift); real2 v5 = load(in, 3 << shift); real2 v13 = plus(v3, v5); real2 v7 = reverse(minus(v5, v3)); real2 v4 = load(in, 2 << shift); real2 v2 = load(in, 0 << shift); real2 v8 = minus(v4, v2); store(out, 3 << shift, minus(uminusplus(v7), v8)); store(out, 1 << shift, minus(uplusminus(v7), v8)); real2 v12 = plus(v2, v4); store(out, 2 << shift, minus(v12, v13)); store(out, 0 << shift, plus(v12, v13)); } } ALIGNED(8192) void but4f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v3, v5)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); store(out, 0 << outShift, plus(v12, v13)); real2 v26 = minus(v12, v13); store(out, 2 << outShift, ctimesminusplus(v26, tbl[0 + tbloffset], ctimes(reverse(v26), tbl[1 + tbloffset]))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); store(out, 1 << outShift, ctimesminusplus(reverse(v9), tbl[2 + tbloffset], ctimes(v9, tbl[3 + tbloffset]))); store(out, 3 << outShift, ctimesminusplus(reverse(v11), tbl[4 + tbloffset], ctimes(v11, tbl[5 + tbloffset]))); } } ALIGNED(8192) void but4b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v5, v3)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); store(out, 0 << outShift, plus(v12, v13)); real2 v26 = minus(v12, v13); store(out, 2 << outShift, ctimesminusplus(v26, tbl[0 + tbloffset], ctimes(reverse(v26), tbl[1 + tbloffset]))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); store(out, 1 << outShift, ctimesminusplus(reverse(v9), tbl[2 + tbloffset], ctimes(v9, tbl[3 + tbloffset]))); store(out, 3 << outShift, ctimesminusplus(reverse(v11), tbl[4 + tbloffset], ctimes(v11, tbl[5 + tbloffset]))); } } ALIGNED(8192) void tbut4f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v3, v5)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); scatter(out, 0, 4, plus(v12, v13)); real2 v26 = minus(v12, v13); scatter(out, 2, 4, timesminusplus(v26, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v26), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); scatter(out, 1, 4, timesminusplus(reverse(v9), load(tbl, 2 * VECWIDTH + tbloffset), times(v9, load(tbl, 3 * VECWIDTH + tbloffset)))); scatter(out, 3, 4, timesminusplus(reverse(v11), load(tbl, 4 * VECWIDTH + tbloffset), times(v11, load(tbl, 5 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut4b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v5, v3)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); scatter(out, 0, 4, plus(v12, v13)); real2 v26 = minus(v12, v13); scatter(out, 2, 4, timesminusplus(v26, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v26), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); scatter(out, 1, 4, timesminusplus(reverse(v9), load(tbl, 2 * VECWIDTH + tbloffset), times(v9, load(tbl, 3 * VECWIDTH + tbloffset)))); scatter(out, 3, 4, timesminusplus(reverse(v11), load(tbl, 4 * VECWIDTH + tbloffset), times(v11, load(tbl, 5 * VECWIDTH + tbloffset)))); } } #if MAXBUTWIDTH >= 3 ALIGNED(8192) void dft8f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v9 = load(in, 7 << shift); real2 v5 = load(in, 3 << shift); real2 v33 = plus(v5, v9); real2 v27 = reverse(minus(v5, v9)); real2 v3 = load(in, 1 << shift); real2 v7 = load(in, 5 << shift); real2 v32 = plus(v3, v7); real2 v28 = minus(v7, v3); real2 v45 = reverse(minus(v32, v33)); real2 v51 = plus(v32, v33); real2 v29 = minusplus(v27, v28); real2 v31 = minusplus(uminus(v27), v28); real2 v43 = ctimesminusplus(reverse(v31), ctbl[1], ctimes(v31, ctbl[0])); real2 v6 = load(in, 4 << shift); real2 v2 = load(in, 0 << shift); real2 v12 = minus(v6, v2); real2 v16 = plus(v2, v6); real2 v8 = load(in, 6 << shift); real2 v4 = load(in, 2 << shift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v4, v8)); real2 v46 = minus(v17, v16); store(out, 2 << shift, minus(uplusminus(v45), v46)); store(out, 6 << shift, minus(uminusplus(v45), v46)); real2 v50 = plus(v16, v17); store(out, 4 << shift, minus(v50, v51)); store(out, 0 << shift, plus(v50, v51)); real2 v25 = minus(uminusplus(v11), v12); store(out, 3 << shift, plus(v25, v43)); store(out, 7 << shift, minus(v25, v43)); real2 v21 = minus(uplusminus(v11), v12); real2 v38 = ctimesminusplus(reverse(v29), ctbl[1], ctimes(v29, ctbl[1])); store(out, 1 << shift, plus(v21, v38)); store(out, 5 << shift, minus(v21, v38)); } } ALIGNED(8192) void dft8b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v8 = load(in, 6 << shift); real2 v4 = load(in, 2 << shift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v8, v4)); real2 v2 = load(in, 0 << shift); real2 v6 = load(in, 4 << shift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v50 = plus(v16, v17); real2 v46 = minus(v17, v16); real2 v21 = minus(uplusminus(v11), v12); real2 v25 = minus(uminusplus(v11), v12); real2 v3 = load(in, 1 << shift); real2 v7 = load(in, 5 << shift); real2 v28 = minus(v7, v3); real2 v32 = plus(v3, v7); real2 v5 = load(in, 3 << shift); real2 v9 = load(in, 7 << shift); real2 v33 = plus(v5, v9); real2 v27 = reverse(minus(v9, v5)); real2 v45 = reverse(minus(v33, v32)); real2 v51 = plus(v32, v33); store(out, 0 << shift, plus(v50, v51)); store(out, 4 << shift, minus(v50, v51)); store(out, 2 << shift, minus(uplusminus(v45), v46)); store(out, 6 << shift, minus(uminusplus(v45), v46)); real2 v31 = minusplus(uminus(v27), v28); real2 v29 = minusplus(v27, v28); real2 v43 = ctimesminusplus(reverse(v31), ctbl[0], ctimes(v31, ctbl[0])); store(out, 7 << shift, minus(v25, v43)); store(out, 3 << shift, plus(v25, v43)); real2 v39 = ctimesminusplus(reverse(v29), ctbl[0], ctimes(v29, ctbl[1])); store(out, 1 << shift, plus(v21, v39)); store(out, 5 << shift, minus(v21, v39)); } } ALIGNED(8192) void but8f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v5, v9)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v36, v37)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = ctimesminusplus(reverse(v33), tbl[6 + tbloffset], ctimes(v33, tbl[7 + tbloffset])); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v4, v8)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); store(out, 0 << outShift, plus(v56, v57)); real2 v70 = minus(v56, v57); store(out, 4 << outShift, ctimesminusplus(v70, tbl[0 + tbloffset], ctimes(reverse(v70), tbl[1 + tbloffset]))); real2 v53 = minusplus(v51, v52); store(out, 2 << outShift, ctimesminusplus(reverse(v53), tbl[10 + tbloffset], ctimes(v53, tbl[11 + tbloffset]))); real2 v55 = minusplus(uminus(v51), v52); store(out, 6 << outShift, ctimesminusplus(reverse(v55), tbl[12 + tbloffset], ctimes(v55, tbl[13 + tbloffset]))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = ctimesminusplus(reverse(v13), tbl[2 + tbloffset], ctimes(v13, tbl[3 + tbloffset])); store(out, 1 << outShift, plus(v23, v43)); real2 v78 = minus(v23, v43); store(out, 5 << outShift, ctimesminusplus(v78, tbl[0 + tbloffset], ctimes(reverse(v78), tbl[1 + tbloffset]))); real2 v49 = ctimesminusplus(reverse(v35), tbl[8 + tbloffset], ctimes(v35, tbl[9 + tbloffset])); real2 v29 = ctimesminusplus(reverse(v15), tbl[4 + tbloffset], ctimes(v15, tbl[5 + tbloffset])); store(out, 3 << outShift, plus(v29, v49)); real2 v84 = minus(v29, v49); store(out, 7 << outShift, ctimesminusplus(v84, tbl[0 + tbloffset], ctimes(reverse(v84), tbl[1 + tbloffset]))); } } ALIGNED(8192) void but8b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v9, v5)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v37, v36)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = ctimesminusplus(reverse(v33), tbl[6 + tbloffset], ctimes(v33, tbl[7 + tbloffset])); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v8, v4)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); store(out, 0 << outShift, plus(v56, v57)); real2 v70 = minus(v56, v57); store(out, 4 << outShift, ctimesminusplus(v70, tbl[0 + tbloffset], ctimes(reverse(v70), tbl[1 + tbloffset]))); real2 v53 = minusplus(v51, v52); store(out, 2 << outShift, ctimesminusplus(reverse(v53), tbl[10 + tbloffset], ctimes(v53, tbl[11 + tbloffset]))); real2 v55 = minusplus(uminus(v51), v52); store(out, 6 << outShift, ctimesminusplus(reverse(v55), tbl[12 + tbloffset], ctimes(v55, tbl[13 + tbloffset]))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = ctimesminusplus(reverse(v13), tbl[2 + tbloffset], ctimes(v13, tbl[3 + tbloffset])); store(out, 1 << outShift, plus(v23, v43)); real2 v78 = minus(v23, v43); store(out, 5 << outShift, ctimesminusplus(v78, tbl[0 + tbloffset], ctimes(reverse(v78), tbl[1 + tbloffset]))); real2 v49 = ctimesminusplus(reverse(v35), tbl[8 + tbloffset], ctimes(v35, tbl[9 + tbloffset])); real2 v29 = ctimesminusplus(reverse(v15), tbl[4 + tbloffset], ctimes(v15, tbl[5 + tbloffset])); store(out, 3 << outShift, plus(v29, v49)); real2 v84 = minus(v29, v49); store(out, 7 << outShift, ctimesminusplus(v84, tbl[0 + tbloffset], ctimes(reverse(v84), tbl[1 + tbloffset]))); } } ALIGNED(8192) void tbut8f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v5, v9)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v36, v37)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = timesminusplus(reverse(v33), load(tbl, 6 * VECWIDTH + tbloffset), times(v33, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v4, v8)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); scatter(out, 0, 8, plus(v56, v57)); real2 v70 = minus(v56, v57); scatter(out, 4, 8, timesminusplus(v70, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v70), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v53 = minusplus(v51, v52); scatter(out, 2, 8, timesminusplus(reverse(v53), load(tbl, 10 * VECWIDTH + tbloffset), times(v53, load(tbl, 11 * VECWIDTH + tbloffset)))); real2 v55 = minusplus(uminus(v51), v52); scatter(out, 6, 8, timesminusplus(reverse(v55), load(tbl, 12 * VECWIDTH + tbloffset), times(v55, load(tbl, 13 * VECWIDTH + tbloffset)))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = timesminusplus(reverse(v13), load(tbl, 2 * VECWIDTH + tbloffset), times(v13, load(tbl, 3 * VECWIDTH + tbloffset))); scatter(out, 1, 8, plus(v23, v43)); real2 v78 = minus(v23, v43); scatter(out, 5, 8, timesminusplus(v78, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v78), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v49 = timesminusplus(reverse(v35), load(tbl, 8 * VECWIDTH + tbloffset), times(v35, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v29 = timesminusplus(reverse(v15), load(tbl, 4 * VECWIDTH + tbloffset), times(v15, load(tbl, 5 * VECWIDTH + tbloffset))); scatter(out, 3, 8, plus(v29, v49)); real2 v84 = minus(v29, v49); scatter(out, 7, 8, timesminusplus(v84, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v84), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut8b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v9, v5)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v37, v36)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = timesminusplus(reverse(v33), load(tbl, 6 * VECWIDTH + tbloffset), times(v33, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v8, v4)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); scatter(out, 0, 8, plus(v56, v57)); real2 v70 = minus(v56, v57); scatter(out, 4, 8, timesminusplus(v70, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v70), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v53 = minusplus(v51, v52); scatter(out, 2, 8, timesminusplus(reverse(v53), load(tbl, 10 * VECWIDTH + tbloffset), times(v53, load(tbl, 11 * VECWIDTH + tbloffset)))); real2 v55 = minusplus(uminus(v51), v52); scatter(out, 6, 8, timesminusplus(reverse(v55), load(tbl, 12 * VECWIDTH + tbloffset), times(v55, load(tbl, 13 * VECWIDTH + tbloffset)))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = timesminusplus(reverse(v13), load(tbl, 2 * VECWIDTH + tbloffset), times(v13, load(tbl, 3 * VECWIDTH + tbloffset))); scatter(out, 1, 8, plus(v23, v43)); real2 v78 = minus(v23, v43); scatter(out, 5, 8, timesminusplus(v78, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v78), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v49 = timesminusplus(reverse(v35), load(tbl, 8 * VECWIDTH + tbloffset), times(v35, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v29 = timesminusplus(reverse(v15), load(tbl, 4 * VECWIDTH + tbloffset), times(v15, load(tbl, 5 * VECWIDTH + tbloffset))); scatter(out, 3, 8, plus(v29, v49)); real2 v84 = minus(v29, v49); scatter(out, 7, 8, timesminusplus(v84, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v84), load(tbl, 1 * VECWIDTH + tbloffset)))); } } #endif #if MAXBUTWIDTH >= 4 ALIGNED(8192) void dft16f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v11 = load(in, 9 << shift); real2 v3 = load(in, 1 << shift); real2 v40 = plus(v3, v11); real2 v36 = minus(v11, v3); real2 v15 = load(in, 13 << shift); real2 v7 = load(in, 5 << shift); real2 v35 = reverse(minus(v7, v15)); real2 v41 = plus(v7, v15); real2 v106 = minus(v41, v40); real2 v110 = plus(v40, v41); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v51 = ctimesminusplus(reverse(v39), ctbl[5], ctimes(v39, ctbl[3])); real2 v47 = ctimesminusplus(reverse(v37), ctbl[3], ctimes(v37, ctbl[5])); real2 v13 = load(in, 11 << shift); real2 v5 = load(in, 3 << shift); real2 v72 = minus(v13, v5); real2 v76 = plus(v5, v13); real2 v17 = load(in, 15 << shift); real2 v9 = load(in, 7 << shift); real2 v77 = plus(v9, v17); real2 v71 = reverse(minus(v9, v17)); real2 v105 = reverse(minus(v76, v77)); real2 v111 = plus(v76, v77); real2 v107 = minusplus(v105, v106); real2 v109 = minusplus(uminus(v105), v106); real2 v121 = reverse(minus(v110, v111)); real2 v127 = plus(v110, v111); real2 v119 = ctimesminusplus(reverse(v109), ctbl[1], ctimes(v109, ctbl[0])); real2 v115 = ctimesminusplus(reverse(v107), ctbl[1], ctimes(v107, ctbl[1])); real2 v8 = load(in, 6 << shift); real2 v16 = load(in, 14 << shift); real2 v53 = reverse(minus(v8, v16)); real2 v59 = plus(v8, v16); real2 v4 = load(in, 2 << shift); real2 v12 = load(in, 10 << shift); real2 v54 = minus(v12, v4); real2 v58 = plus(v4, v12); real2 v95 = plus(v58, v59); real2 v89 = reverse(minus(v58, v59)); real2 v2 = load(in, 0 << shift); real2 v10 = load(in, 8 << shift); real2 v24 = plus(v2, v10); real2 v20 = minus(v10, v2); real2 v6 = load(in, 4 << shift); real2 v14 = load(in, 12 << shift); real2 v19 = reverse(minus(v6, v14)); real2 v25 = plus(v6, v14); real2 v94 = plus(v24, v25); real2 v90 = minus(v25, v24); real2 v103 = minus(uminusplus(v89), v90); real2 v99 = minus(uplusminus(v89), v90); store(out, 2 << shift, plus(v99, v115)); store(out, 10 << shift, minus(v99, v115)); store(out, 6 << shift, plus(v103, v119)); store(out, 14 << shift, minus(v103, v119)); real2 v122 = minus(v95, v94); store(out, 12 << shift, minus(uminusplus(v121), v122)); store(out, 4 << shift, minus(uplusminus(v121), v122)); real2 v126 = plus(v94, v95); store(out, 8 << shift, minus(v126, v127)); store(out, 0 << shift, plus(v126, v127)); real2 v57 = minusplus(uminus(v53), v54); real2 v55 = minusplus(v53, v54); real2 v64 = ctimesminusplus(reverse(v55), ctbl[1], ctimes(v55, ctbl[1])); real2 v75 = minusplus(uminus(v71), v72); real2 v73 = minusplus(v71, v72); real2 v81 = ctimesminusplus(reverse(v73), ctbl[5], ctimes(v73, ctbl[3])); real2 v29 = minus(uplusminus(v19), v20); real2 v33 = minus(uminusplus(v19), v20); real2 v151 = plus(v29, v64); real2 v147 = minus(v64, v29); real2 v152 = plus(v47, v81); real2 v146 = reverse(minus(v47, v81)); store(out, 13 << shift, minus(uminusplus(v146), v147)); store(out, 5 << shift, minus(uplusminus(v146), v147)); store(out, 9 << shift, minus(v151, v152)); store(out, 1 << shift, plus(v151, v152)); real2 v69 = ctimesminusplus(reverse(v57), ctbl[1], ctimes(v57, ctbl[0])); real2 v87 = ctimesminusplus(reverse(v75), ctbl[4], ctimes(v75, ctbl[2])); real2 v171 = plus(v51, v87); real2 v165 = reverse(minus(v51, v87)); real2 v170 = plus(v33, v69); real2 v166 = minus(v69, v33); store(out, 7 << shift, minus(uplusminus(v165), v166)); store(out, 15 << shift, minus(uminusplus(v165), v166)); store(out, 11 << shift, minus(v170, v171)); store(out, 3 << shift, plus(v170, v171)); } } ALIGNED(8192) void dft16b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v9 = load(in, 7 << shift); real2 v17 = load(in, 15 << shift); real2 v79 = plus(v9, v17); real2 v73 = reverse(minus(v17, v9)); real2 v13 = load(in, 11 << shift); real2 v5 = load(in, 3 << shift); real2 v78 = plus(v5, v13); real2 v74 = minus(v13, v5); real2 v105 = reverse(minus(v79, v78)); real2 v111 = plus(v78, v79); real2 v75 = minusplus(v73, v74); real2 v77 = minusplus(uminus(v73), v74); real2 v83 = ctimesminusplus(reverse(v75), ctbl[2], ctimes(v75, ctbl[3])); real2 v7 = load(in, 5 << shift); real2 v15 = load(in, 13 << shift); real2 v41 = plus(v7, v15); real2 v35 = reverse(minus(v15, v7)); real2 v11 = load(in, 9 << shift); real2 v3 = load(in, 1 << shift); real2 v40 = plus(v3, v11); real2 v36 = minus(v11, v3); real2 v110 = plus(v40, v41); real2 v106 = minus(v41, v40); real2 v121 = reverse(minus(v111, v110)); real2 v127 = plus(v110, v111); real2 v109 = minusplus(uminus(v105), v106); real2 v107 = minusplus(v105, v106); real2 v119 = ctimesminusplus(reverse(v109), ctbl[0], ctimes(v109, ctbl[0])); real2 v115 = ctimesminusplus(reverse(v107), ctbl[0], ctimes(v107, ctbl[1])); real2 v16 = load(in, 14 << shift); real2 v8 = load(in, 6 << shift); real2 v55 = reverse(minus(v16, v8)); real2 v61 = plus(v8, v16); real2 v12 = load(in, 10 << shift); real2 v4 = load(in, 2 << shift); real2 v56 = minus(v12, v4); real2 v60 = plus(v4, v12); real2 v89 = reverse(minus(v61, v60)); real2 v95 = plus(v60, v61); real2 v14 = load(in, 12 << shift); real2 v6 = load(in, 4 << shift); real2 v19 = reverse(minus(v14, v6)); real2 v25 = plus(v6, v14); real2 v2 = load(in, 0 << shift); real2 v10 = load(in, 8 << shift); real2 v24 = plus(v2, v10); real2 v20 = minus(v10, v2); real2 v90 = minus(v25, v24); real2 v94 = plus(v24, v25); real2 v103 = minus(uminusplus(v89), v90); store(out, 6 << shift, plus(v103, v119)); store(out, 14 << shift, minus(v103, v119)); real2 v99 = minus(uplusminus(v89), v90); store(out, 10 << shift, minus(v99, v115)); store(out, 2 << shift, plus(v99, v115)); real2 v126 = plus(v94, v95); store(out, 8 << shift, minus(v126, v127)); store(out, 0 << shift, plus(v126, v127)); real2 v122 = minus(v95, v94); store(out, 12 << shift, minus(uminusplus(v121), v122)); store(out, 4 << shift, minus(uplusminus(v121), v122)); real2 v33 = minus(uminusplus(v19), v20); real2 v29 = minus(uplusminus(v19), v20); real2 v59 = minusplus(uminus(v55), v56); real2 v57 = minusplus(v55, v56); real2 v67 = ctimesminusplus(reverse(v57), ctbl[0], ctimes(v57, ctbl[1])); real2 v39 = minusplus(uminus(v35), v36); real2 v37 = minusplus(v35, v36); real2 v47 = ctimesminusplus(reverse(v37), ctbl[4], ctimes(v37, ctbl[5])); real2 v146 = reverse(minus(v83, v47)); real2 v152 = plus(v47, v83); real2 v147 = minus(v67, v29); real2 v151 = plus(v29, v67); store(out, 9 << shift, minus(v151, v152)); store(out, 1 << shift, plus(v151, v152)); store(out, 5 << shift, minus(uplusminus(v146), v147)); store(out, 13 << shift, minus(uminusplus(v146), v147)); real2 v53 = ctimesminusplus(reverse(v39), ctbl[2], ctimes(v39, ctbl[3])); real2 v71 = ctimesminusplus(reverse(v59), ctbl[0], ctimes(v59, ctbl[0])); real2 v166 = minus(v71, v33); real2 v170 = plus(v33, v71); real2 v87 = ctimesminusplus(reverse(v77), ctbl[3], ctimes(v77, ctbl[2])); real2 v165 = reverse(minus(v87, v53)); store(out, 15 << shift, minus(uminusplus(v165), v166)); store(out, 7 << shift, minus(uplusminus(v165), v166)); real2 v171 = plus(v53, v87); store(out, 3 << shift, plus(v170, v171)); store(out, 11 << shift, minus(v170, v171)); } } ALIGNED(8192) void but16f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v7, v15)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = ctimesminusplus(reverse(v43), tbl[8 + tbloffset], ctimes(v43, tbl[9 + tbloffset])); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v9, v17)); real2 v119 = reverse(minus(v84, v85)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v124, v125)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = ctimesminusplus(reverse(v123), tbl[24 + tbloffset], ctimes(v123, tbl[25 + tbloffset])); real2 v131 = ctimesminusplus(reverse(v121), tbl[22 + tbloffset], ctimes(v121, tbl[23 + tbloffset])); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v8, v16)); real2 v99 = reverse(minus(v64, v65)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v6, v14)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); store(out, 0 << outShift, plus(v144, v145)); real2 v158 = minus(v144, v145); store(out, 8 << outShift, ctimesminusplus(v158, tbl[0 + tbloffset], ctimes(reverse(v158), tbl[1 + tbloffset]))); real2 v143 = minusplus(uminus(v139), v140); store(out, 12 << outShift, ctimesminusplus(reverse(v143), tbl[28 + tbloffset], ctimes(v143, tbl[29 + tbloffset]))); real2 v141 = minusplus(v139, v140); store(out, 4 << outShift, ctimesminusplus(reverse(v141), tbl[26 + tbloffset], ctimes(v141, tbl[27 + tbloffset]))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = ctimesminusplus(reverse(v103), tbl[20 + tbloffset], ctimes(v103, tbl[21 + tbloffset])); store(out, 6 << outShift, plus(v117, v137)); real2 v172 = minus(v117, v137); store(out, 14 << outShift, ctimesminusplus(v172, tbl[0 + tbloffset], ctimes(reverse(v172), tbl[1 + tbloffset]))); real2 v111 = ctimesminusplus(reverse(v101), tbl[18 + tbloffset], ctimes(v101, tbl[19 + tbloffset])); store(out, 2 << outShift, plus(v111, v131)); real2 v166 = minus(v111, v131); store(out, 10 << outShift, ctimesminusplus(v166, tbl[0 + tbloffset], ctimes(reverse(v166), tbl[1 + tbloffset]))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = ctimesminusplus(reverse(v83), tbl[16 + tbloffset], ctimes(v83, tbl[17 + tbloffset])); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v57, v97)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = ctimesminusplus(reverse(v63), tbl[12 + tbloffset], ctimes(v63, tbl[13 + tbloffset])); real2 v37 = ctimesminusplus(reverse(v23), tbl[4 + tbloffset], ctimes(v23, tbl[5 + tbloffset])); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); store(out, 3 << outShift, plus(v210, v211)); real2 v224 = minus(v210, v211); store(out, 11 << outShift, ctimesminusplus(v224, tbl[0 + tbloffset], ctimes(reverse(v224), tbl[1 + tbloffset]))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); store(out, 15 << outShift, ctimesminusplus(reverse(v209), tbl[36 + tbloffset], ctimes(v209, tbl[37 + tbloffset]))); store(out, 7 << outShift, ctimesminusplus(reverse(v207), tbl[34 + tbloffset], ctimes(v207, tbl[35 + tbloffset]))); real2 v71 = ctimesminusplus(reverse(v61), tbl[10 + tbloffset], ctimes(v61, tbl[11 + tbloffset])); real2 v51 = ctimesminusplus(reverse(v41), tbl[6 + tbloffset], ctimes(v41, tbl[7 + tbloffset])); real2 v91 = ctimesminusplus(reverse(v81), tbl[14 + tbloffset], ctimes(v81, tbl[15 + tbloffset])); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v51, v91)); real2 v31 = ctimesminusplus(reverse(v21), tbl[2 + tbloffset], ctimes(v21, tbl[3 + tbloffset])); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); store(out, 1 << outShift, plus(v184, v185)); real2 v198 = minus(v184, v185); store(out, 9 << outShift, ctimesminusplus(v198, tbl[0 + tbloffset], ctimes(reverse(v198), tbl[1 + tbloffset]))); real2 v181 = minusplus(v179, v180); store(out, 5 << outShift, ctimesminusplus(reverse(v181), tbl[30 + tbloffset], ctimes(v181, tbl[31 + tbloffset]))); real2 v183 = minusplus(uminus(v179), v180); store(out, 13 << outShift, ctimesminusplus(reverse(v183), tbl[32 + tbloffset], ctimes(v183, tbl[33 + tbloffset]))); } } ALIGNED(8192) void but16b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v15, v7)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = ctimesminusplus(reverse(v43), tbl[8 + tbloffset], ctimes(v43, tbl[9 + tbloffset])); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v17, v9)); real2 v119 = reverse(minus(v85, v84)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v125, v124)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = ctimesminusplus(reverse(v123), tbl[24 + tbloffset], ctimes(v123, tbl[25 + tbloffset])); real2 v131 = ctimesminusplus(reverse(v121), tbl[22 + tbloffset], ctimes(v121, tbl[23 + tbloffset])); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v16, v8)); real2 v99 = reverse(minus(v65, v64)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v14, v6)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); store(out, 0 << outShift, plus(v144, v145)); real2 v158 = minus(v144, v145); store(out, 8 << outShift, ctimesminusplus(v158, tbl[0 + tbloffset], ctimes(reverse(v158), tbl[1 + tbloffset]))); real2 v143 = minusplus(uminus(v139), v140); store(out, 12 << outShift, ctimesminusplus(reverse(v143), tbl[28 + tbloffset], ctimes(v143, tbl[29 + tbloffset]))); real2 v141 = minusplus(v139, v140); store(out, 4 << outShift, ctimesminusplus(reverse(v141), tbl[26 + tbloffset], ctimes(v141, tbl[27 + tbloffset]))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = ctimesminusplus(reverse(v103), tbl[20 + tbloffset], ctimes(v103, tbl[21 + tbloffset])); store(out, 6 << outShift, plus(v117, v137)); real2 v172 = minus(v117, v137); store(out, 14 << outShift, ctimesminusplus(v172, tbl[0 + tbloffset], ctimes(reverse(v172), tbl[1 + tbloffset]))); real2 v111 = ctimesminusplus(reverse(v101), tbl[18 + tbloffset], ctimes(v101, tbl[19 + tbloffset])); store(out, 2 << outShift, plus(v111, v131)); real2 v166 = minus(v111, v131); store(out, 10 << outShift, ctimesminusplus(v166, tbl[0 + tbloffset], ctimes(reverse(v166), tbl[1 + tbloffset]))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = ctimesminusplus(reverse(v83), tbl[16 + tbloffset], ctimes(v83, tbl[17 + tbloffset])); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v97, v57)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = ctimesminusplus(reverse(v63), tbl[12 + tbloffset], ctimes(v63, tbl[13 + tbloffset])); real2 v37 = ctimesminusplus(reverse(v23), tbl[4 + tbloffset], ctimes(v23, tbl[5 + tbloffset])); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); store(out, 3 << outShift, plus(v210, v211)); real2 v224 = minus(v210, v211); store(out, 11 << outShift, ctimesminusplus(v224, tbl[0 + tbloffset], ctimes(reverse(v224), tbl[1 + tbloffset]))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); store(out, 15 << outShift, ctimesminusplus(reverse(v209), tbl[36 + tbloffset], ctimes(v209, tbl[37 + tbloffset]))); store(out, 7 << outShift, ctimesminusplus(reverse(v207), tbl[34 + tbloffset], ctimes(v207, tbl[35 + tbloffset]))); real2 v71 = ctimesminusplus(reverse(v61), tbl[10 + tbloffset], ctimes(v61, tbl[11 + tbloffset])); real2 v51 = ctimesminusplus(reverse(v41), tbl[6 + tbloffset], ctimes(v41, tbl[7 + tbloffset])); real2 v91 = ctimesminusplus(reverse(v81), tbl[14 + tbloffset], ctimes(v81, tbl[15 + tbloffset])); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v91, v51)); real2 v31 = ctimesminusplus(reverse(v21), tbl[2 + tbloffset], ctimes(v21, tbl[3 + tbloffset])); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); store(out, 1 << outShift, plus(v184, v185)); real2 v198 = minus(v184, v185); store(out, 9 << outShift, ctimesminusplus(v198, tbl[0 + tbloffset], ctimes(reverse(v198), tbl[1 + tbloffset]))); real2 v181 = minusplus(v179, v180); store(out, 5 << outShift, ctimesminusplus(reverse(v181), tbl[30 + tbloffset], ctimes(v181, tbl[31 + tbloffset]))); real2 v183 = minusplus(uminus(v179), v180); store(out, 13 << outShift, ctimesminusplus(reverse(v183), tbl[32 + tbloffset], ctimes(v183, tbl[33 + tbloffset]))); } } ALIGNED(8192) void tbut16f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v7, v15)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = timesminusplus(reverse(v43), load(tbl, 8 * VECWIDTH + tbloffset), times(v43, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v9, v17)); real2 v119 = reverse(minus(v84, v85)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v124, v125)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = timesminusplus(reverse(v123), load(tbl, 24 * VECWIDTH + tbloffset), times(v123, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v131 = timesminusplus(reverse(v121), load(tbl, 22 * VECWIDTH + tbloffset), times(v121, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v8, v16)); real2 v99 = reverse(minus(v64, v65)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v6, v14)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); scatter(out, 0, 16, plus(v144, v145)); real2 v158 = minus(v144, v145); scatter(out, 8, 16, timesminusplus(v158, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v158), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v143 = minusplus(uminus(v139), v140); scatter(out, 12, 16, timesminusplus(reverse(v143), load(tbl, 28 * VECWIDTH + tbloffset), times(v143, load(tbl, 29 * VECWIDTH + tbloffset)))); real2 v141 = minusplus(v139, v140); scatter(out, 4, 16, timesminusplus(reverse(v141), load(tbl, 26 * VECWIDTH + tbloffset), times(v141, load(tbl, 27 * VECWIDTH + tbloffset)))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = timesminusplus(reverse(v103), load(tbl, 20 * VECWIDTH + tbloffset), times(v103, load(tbl, 21 * VECWIDTH + tbloffset))); scatter(out, 6, 16, plus(v117, v137)); real2 v172 = minus(v117, v137); scatter(out, 14, 16, timesminusplus(v172, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v172), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v111 = timesminusplus(reverse(v101), load(tbl, 18 * VECWIDTH + tbloffset), times(v101, load(tbl, 19 * VECWIDTH + tbloffset))); scatter(out, 2, 16, plus(v111, v131)); real2 v166 = minus(v111, v131); scatter(out, 10, 16, timesminusplus(v166, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v166), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = timesminusplus(reverse(v83), load(tbl, 16 * VECWIDTH + tbloffset), times(v83, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v57, v97)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = timesminusplus(reverse(v63), load(tbl, 12 * VECWIDTH + tbloffset), times(v63, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v37 = timesminusplus(reverse(v23), load(tbl, 4 * VECWIDTH + tbloffset), times(v23, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); scatter(out, 3, 16, plus(v210, v211)); real2 v224 = minus(v210, v211); scatter(out, 11, 16, timesminusplus(v224, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v224), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); scatter(out, 15, 16, timesminusplus(reverse(v209), load(tbl, 36 * VECWIDTH + tbloffset), times(v209, load(tbl, 37 * VECWIDTH + tbloffset)))); scatter(out, 7, 16, timesminusplus(reverse(v207), load(tbl, 34 * VECWIDTH + tbloffset), times(v207, load(tbl, 35 * VECWIDTH + tbloffset)))); real2 v71 = timesminusplus(reverse(v61), load(tbl, 10 * VECWIDTH + tbloffset), times(v61, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v51 = timesminusplus(reverse(v41), load(tbl, 6 * VECWIDTH + tbloffset), times(v41, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v91 = timesminusplus(reverse(v81), load(tbl, 14 * VECWIDTH + tbloffset), times(v81, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v51, v91)); real2 v31 = timesminusplus(reverse(v21), load(tbl, 2 * VECWIDTH + tbloffset), times(v21, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); scatter(out, 1, 16, plus(v184, v185)); real2 v198 = minus(v184, v185); scatter(out, 9, 16, timesminusplus(v198, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v198), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v181 = minusplus(v179, v180); scatter(out, 5, 16, timesminusplus(reverse(v181), load(tbl, 30 * VECWIDTH + tbloffset), times(v181, load(tbl, 31 * VECWIDTH + tbloffset)))); real2 v183 = minusplus(uminus(v179), v180); scatter(out, 13, 16, timesminusplus(reverse(v183), load(tbl, 32 * VECWIDTH + tbloffset), times(v183, load(tbl, 33 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut16b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v15, v7)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = timesminusplus(reverse(v43), load(tbl, 8 * VECWIDTH + tbloffset), times(v43, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v17, v9)); real2 v119 = reverse(minus(v85, v84)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v125, v124)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = timesminusplus(reverse(v123), load(tbl, 24 * VECWIDTH + tbloffset), times(v123, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v131 = timesminusplus(reverse(v121), load(tbl, 22 * VECWIDTH + tbloffset), times(v121, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v16, v8)); real2 v99 = reverse(minus(v65, v64)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v14, v6)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); scatter(out, 0, 16, plus(v144, v145)); real2 v158 = minus(v144, v145); scatter(out, 8, 16, timesminusplus(v158, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v158), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v143 = minusplus(uminus(v139), v140); scatter(out, 12, 16, timesminusplus(reverse(v143), load(tbl, 28 * VECWIDTH + tbloffset), times(v143, load(tbl, 29 * VECWIDTH + tbloffset)))); real2 v141 = minusplus(v139, v140); scatter(out, 4, 16, timesminusplus(reverse(v141), load(tbl, 26 * VECWIDTH + tbloffset), times(v141, load(tbl, 27 * VECWIDTH + tbloffset)))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = timesminusplus(reverse(v103), load(tbl, 20 * VECWIDTH + tbloffset), times(v103, load(tbl, 21 * VECWIDTH + tbloffset))); scatter(out, 6, 16, plus(v117, v137)); real2 v172 = minus(v117, v137); scatter(out, 14, 16, timesminusplus(v172, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v172), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v111 = timesminusplus(reverse(v101), load(tbl, 18 * VECWIDTH + tbloffset), times(v101, load(tbl, 19 * VECWIDTH + tbloffset))); scatter(out, 2, 16, plus(v111, v131)); real2 v166 = minus(v111, v131); scatter(out, 10, 16, timesminusplus(v166, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v166), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = timesminusplus(reverse(v83), load(tbl, 16 * VECWIDTH + tbloffset), times(v83, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v97, v57)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = timesminusplus(reverse(v63), load(tbl, 12 * VECWIDTH + tbloffset), times(v63, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v37 = timesminusplus(reverse(v23), load(tbl, 4 * VECWIDTH + tbloffset), times(v23, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); scatter(out, 3, 16, plus(v210, v211)); real2 v224 = minus(v210, v211); scatter(out, 11, 16, timesminusplus(v224, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v224), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); scatter(out, 15, 16, timesminusplus(reverse(v209), load(tbl, 36 * VECWIDTH + tbloffset), times(v209, load(tbl, 37 * VECWIDTH + tbloffset)))); scatter(out, 7, 16, timesminusplus(reverse(v207), load(tbl, 34 * VECWIDTH + tbloffset), times(v207, load(tbl, 35 * VECWIDTH + tbloffset)))); real2 v71 = timesminusplus(reverse(v61), load(tbl, 10 * VECWIDTH + tbloffset), times(v61, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v51 = timesminusplus(reverse(v41), load(tbl, 6 * VECWIDTH + tbloffset), times(v41, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v91 = timesminusplus(reverse(v81), load(tbl, 14 * VECWIDTH + tbloffset), times(v81, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v91, v51)); real2 v31 = timesminusplus(reverse(v21), load(tbl, 2 * VECWIDTH + tbloffset), times(v21, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); scatter(out, 1, 16, plus(v184, v185)); real2 v198 = minus(v184, v185); scatter(out, 9, 16, timesminusplus(v198, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v198), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v181 = minusplus(v179, v180); scatter(out, 5, 16, timesminusplus(reverse(v181), load(tbl, 30 * VECWIDTH + tbloffset), times(v181, load(tbl, 31 * VECWIDTH + tbloffset)))); real2 v183 = minusplus(uminus(v179), v180); scatter(out, 13, 16, timesminusplus(reverse(v183), load(tbl, 32 * VECWIDTH + tbloffset), times(v183, load(tbl, 33 * VECWIDTH + tbloffset)))); } } #endif #if MAXBUTWIDTH >= 5 ALIGNED(8192) void dft32f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v15 = load(in, 13 << shift); real2 v31 = load(in, 29 << shift); real2 v124 = reverse(minus(v15, v31)); real2 v130 = plus(v15, v31); real2 v23 = load(in, 21 << shift); real2 v7 = load(in, 5 << shift); real2 v129 = plus(v7, v23); real2 v125 = minus(v23, v7); real2 v193 = reverse(minus(v129, v130)); real2 v199 = plus(v129, v130); real2 v126 = minusplus(v124, v125); real2 v128 = minusplus(uminus(v124), v125); real2 v139 = ctimesminusplus(reverse(v128), ctbl[7], ctimes(v128, ctbl[6])); real2 v134 = ctimesminusplus(reverse(v126), ctbl[9], ctimes(v126, ctbl[11])); real2 v19 = load(in, 17 << shift); real2 v3 = load(in, 1 << shift); real2 v52 = minus(v19, v3); real2 v56 = plus(v3, v19); real2 v27 = load(in, 25 << shift); real2 v11 = load(in, 9 << shift); real2 v51 = reverse(minus(v11, v27)); real2 v57 = plus(v11, v27); real2 v194 = minus(v57, v56); real2 v198 = plus(v56, v57); real2 v53 = minusplus(v51, v52); real2 v55 = minusplus(uminus(v51), v52); real2 v69 = ctimesminusplus(reverse(v55), ctbl[11], ctimes(v55, ctbl[9])); real2 v262 = plus(v198, v199); real2 v258 = minus(v199, v198); real2 v195 = minusplus(v193, v194); real2 v197 = minusplus(uminus(v193), v194); real2 v207 = ctimesminusplus(reverse(v197), ctbl[5], ctimes(v197, ctbl[3])); real2 v414 = plus(v69, v139); real2 v410 = minus(v139, v69); real2 v203 = ctimesminusplus(reverse(v195), ctbl[3], ctimes(v195, ctbl[5])); real2 v17 = load(in, 15 << shift); real2 v33 = load(in, 31 << shift); real2 v159 = reverse(minus(v17, v33)); real2 v165 = plus(v17, v33); real2 v25 = load(in, 23 << shift); real2 v9 = load(in, 7 << shift); real2 v164 = plus(v9, v25); real2 v160 = minus(v25, v9); real2 v231 = plus(v164, v165); real2 v225 = reverse(minus(v164, v165)); real2 v161 = minusplus(v159, v160); real2 v163 = minusplus(uminus(v159), v160); real2 v175 = ctimesminusplus(reverse(v163), ctbl[10], ctimes(v163, ctbl[8])); real2 v13 = load(in, 11 << shift); real2 v29 = load(in, 27 << shift); real2 v95 = plus(v13, v29); real2 v89 = reverse(minus(v13, v29)); real2 v21 = load(in, 19 << shift); real2 v5 = load(in, 3 << shift); real2 v90 = minus(v21, v5); real2 v94 = plus(v5, v21); real2 v226 = minus(v95, v94); real2 v230 = plus(v94, v95); real2 v229 = minusplus(uminus(v225), v226); real2 v227 = minusplus(v225, v226); real2 v239 = ctimesminusplus(reverse(v229), ctbl[4], ctimes(v229, ctbl[2])); real2 v257 = reverse(minus(v230, v231)); real2 v263 = plus(v230, v231); real2 v235 = ctimesminusplus(reverse(v227), ctbl[5], ctimes(v227, ctbl[3])); real2 v261 = minusplus(uminus(v257), v258); real2 v259 = minusplus(v257, v258); real2 v267 = ctimesminusplus(reverse(v259), ctbl[1], ctimes(v259, ctbl[1])); real2 v298 = reverse(minus(v203, v235)); real2 v304 = plus(v203, v235); real2 v271 = ctimesminusplus(reverse(v261), ctbl[1], ctimes(v261, ctbl[0])); real2 v279 = plus(v262, v263); real2 v273 = reverse(minus(v262, v263)); real2 v317 = reverse(minus(v207, v239)); real2 v323 = plus(v207, v239); real2 v8 = load(in, 6 << shift); real2 v24 = load(in, 22 << shift); real2 v146 = plus(v8, v24); real2 v142 = minus(v24, v8); real2 v28 = load(in, 26 << shift); real2 v12 = load(in, 10 << shift); real2 v77 = plus(v12, v28); real2 v71 = reverse(minus(v12, v28)); real2 v16 = load(in, 14 << shift); real2 v32 = load(in, 30 << shift); real2 v147 = plus(v16, v32); real2 v141 = reverse(minus(v16, v32)); real2 v209 = reverse(minus(v146, v147)); real2 v215 = plus(v146, v147); real2 v20 = load(in, 18 << shift); real2 v4 = load(in, 2 << shift); real2 v72 = minus(v20, v4); real2 v76 = plus(v4, v20); real2 v214 = plus(v76, v77); real2 v210 = minus(v77, v76); real2 v247 = plus(v214, v215); real2 v241 = reverse(minus(v214, v215)); real2 v213 = minusplus(uminus(v209), v210); real2 v211 = minusplus(v209, v210); real2 v223 = ctimesminusplus(reverse(v213), ctbl[1], ctimes(v213, ctbl[0])); real2 v219 = ctimesminusplus(reverse(v211), ctbl[1], ctimes(v211, ctbl[1])); real2 v26 = load(in, 24 << shift); real2 v10 = load(in, 8 << shift); real2 v35 = reverse(minus(v10, v26)); real2 v41 = plus(v10, v26); real2 v2 = load(in, 0 << shift); real2 v18 = load(in, 16 << shift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v178 = minus(v41, v40); real2 v182 = plus(v40, v41); real2 v6 = load(in, 4 << shift); real2 v22 = load(in, 20 << shift); real2 v107 = minus(v22, v6); real2 v111 = plus(v6, v22); real2 v14 = load(in, 12 << shift); real2 v30 = load(in, 28 << shift); real2 v112 = plus(v14, v30); real2 v106 = reverse(minus(v14, v30)); real2 v177 = reverse(minus(v111, v112)); real2 v183 = plus(v111, v112); real2 v191 = minus(uminusplus(v177), v178); real2 v187 = minus(uplusminus(v177), v178); real2 v322 = plus(v191, v223); real2 v318 = minus(v223, v191); store(out, 22 << shift, minus(v322, v323)); store(out, 6 << shift, plus(v322, v323)); store(out, 14 << shift, minus(uplusminus(v317), v318)); store(out, 30 << shift, minus(uminusplus(v317), v318)); real2 v246 = plus(v182, v183); real2 v242 = minus(v183, v182); real2 v274 = minus(v247, v246); store(out, 24 << shift, minus(uminusplus(v273), v274)); store(out, 8 << shift, minus(uplusminus(v273), v274)); real2 v278 = plus(v246, v247); store(out, 16 << shift, minus(v278, v279)); store(out, 0 << shift, plus(v278, v279)); real2 v303 = plus(v187, v219); store(out, 2 << shift, plus(v303, v304)); store(out, 18 << shift, minus(v303, v304)); real2 v299 = minus(v219, v187); store(out, 26 << shift, minus(uminusplus(v298), v299)); store(out, 10 << shift, minus(uplusminus(v298), v299)); real2 v255 = minus(uminusplus(v241), v242); real2 v251 = minus(uplusminus(v241), v242); store(out, 20 << shift, minus(v251, v267)); store(out, 4 << shift, plus(v251, v267)); store(out, 28 << shift, minus(v255, v271)); store(out, 12 << shift, plus(v255, v271)); real2 v75 = minusplus(uminus(v71), v72); real2 v73 = minusplus(v71, v72); real2 v143 = minusplus(v141, v142); real2 v145 = minusplus(uminus(v141), v142); real2 v157 = ctimesminusplus(reverse(v145), ctbl[4], ctimes(v145, ctbl[2])); real2 v87 = ctimesminusplus(reverse(v75), ctbl[5], ctimes(v75, ctbl[3])); real2 v91 = minusplus(v89, v90); real2 v93 = minusplus(uminus(v89), v90); real2 v104 = ctimesminusplus(reverse(v93), ctbl[13], ctimes(v93, ctbl[12])); real2 v399 = plus(v87, v157); real2 v393 = reverse(minus(v87, v157)); real2 v110 = minusplus(uminus(v106), v107); real2 v108 = minusplus(v106, v107); real2 v415 = plus(v104, v175); real2 v409 = reverse(minus(v104, v175)); real2 v411 = minusplus(v409, v410); real2 v413 = minusplus(uminus(v409), v410); real2 v49 = minus(uminusplus(v35), v36); real2 v45 = minus(uplusminus(v35), v36); real2 v122 = ctimesminusplus(reverse(v110), ctbl[1], ctimes(v110, ctbl[0])); real2 v423 = ctimesminusplus(reverse(v413), ctbl[1], ctimes(v413, ctbl[0])); real2 v398 = plus(v49, v122); real2 v394 = minus(v122, v49); real2 v407 = minus(uminusplus(v393), v394); store(out, 15 << shift, plus(v407, v423)); store(out, 31 << shift, minus(v407, v423)); real2 v403 = minus(uplusminus(v393), v394); real2 v419 = ctimesminusplus(reverse(v411), ctbl[1], ctimes(v411, ctbl[1])); store(out, 7 << shift, plus(v403, v419)); store(out, 23 << shift, minus(v403, v419)); real2 v431 = plus(v414, v415); real2 v425 = reverse(minus(v414, v415)); real2 v430 = plus(v398, v399); store(out, 19 << shift, minus(v430, v431)); store(out, 3 << shift, plus(v430, v431)); real2 v426 = minus(v399, v398); store(out, 27 << shift, minus(uminusplus(v425), v426)); store(out, 11 << shift, minus(uplusminus(v425), v426)); real2 v63 = ctimesminusplus(reverse(v53), ctbl[7], ctimes(v53, ctbl[13])); real2 v151 = ctimesminusplus(reverse(v143), ctbl[5], ctimes(v143, ctbl[3])); real2 v99 = ctimesminusplus(reverse(v91), ctbl[11], ctimes(v91, ctbl[9])); real2 v169 = ctimesminusplus(reverse(v161), ctbl[13], ctimes(v161, ctbl[7])); real2 v352 = reverse(minus(v99, v169)); real2 v358 = plus(v99, v169); real2 v357 = plus(v63, v134); real2 v353 = minus(v134, v63); real2 v117 = ctimesminusplus(reverse(v108), ctbl[1], ctimes(v108, ctbl[1])); real2 v374 = plus(v357, v358); real2 v368 = reverse(minus(v357, v358)); real2 v83 = ctimesminusplus(reverse(v73), ctbl[3], ctimes(v73, ctbl[5])); real2 v336 = reverse(minus(v83, v151)); real2 v342 = plus(v83, v151); real2 v341 = plus(v45, v117); real2 v337 = minus(v117, v45); real2 v373 = plus(v341, v342); real2 v369 = minus(v342, v341); store(out, 9 << shift, minus(uplusminus(v368), v369)); store(out, 25 << shift, minus(uminusplus(v368), v369)); store(out, 17 << shift, minus(v373, v374)); store(out, 1 << shift, plus(v373, v374)); real2 v354 = minusplus(v352, v353); real2 v356 = minusplus(uminus(v352), v353); real2 v362 = ctimesminusplus(reverse(v354), ctbl[1], ctimes(v354, ctbl[1])); real2 v346 = minus(uplusminus(v336), v337); store(out, 21 << shift, minus(v346, v362)); store(out, 5 << shift, plus(v346, v362)); real2 v350 = minus(uminusplus(v336), v337); real2 v366 = ctimesminusplus(reverse(v356), ctbl[1], ctimes(v356, ctbl[0])); store(out, 29 << shift, minus(v350, v366)); store(out, 13 << shift, plus(v350, v366)); } } ALIGNED(8192) void dft32b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v6 = load(in, 4 << shift); real2 v22 = load(in, 20 << shift); real2 v109 = minus(v22, v6); real2 v113 = plus(v6, v22); real2 v14 = load(in, 12 << shift); real2 v30 = load(in, 28 << shift); real2 v108 = reverse(minus(v30, v14)); real2 v114 = plus(v14, v30); real2 v183 = plus(v113, v114); real2 v177 = reverse(minus(v114, v113)); real2 v110 = minusplus(v108, v109); real2 v112 = minusplus(uminus(v108), v109); real2 v120 = ctimesminusplus(reverse(v110), ctbl[0], ctimes(v110, ctbl[1])); real2 v124 = ctimesminusplus(reverse(v112), ctbl[0], ctimes(v112, ctbl[0])); real2 v10 = load(in, 8 << shift); real2 v26 = load(in, 24 << shift); real2 v35 = reverse(minus(v26, v10)); real2 v41 = plus(v10, v26); real2 v18 = load(in, 16 << shift); real2 v2 = load(in, 0 << shift); real2 v36 = minus(v18, v2); real2 v40 = plus(v2, v18); real2 v178 = minus(v41, v40); real2 v182 = plus(v40, v41); real2 v45 = minus(uplusminus(v35), v36); real2 v49 = minus(uminusplus(v35), v36); real2 v398 = plus(v49, v124); real2 v394 = minus(v124, v49); real2 v242 = minus(v183, v182); real2 v246 = plus(v182, v183); real2 v341 = plus(v45, v120); real2 v337 = minus(v120, v45); real2 v187 = minus(uplusminus(v177), v178); real2 v191 = minus(uminusplus(v177), v178); real2 v7 = load(in, 5 << shift); real2 v23 = load(in, 21 << shift); real2 v131 = plus(v7, v23); real2 v127 = minus(v23, v7); real2 v15 = load(in, 13 << shift); real2 v31 = load(in, 29 << shift); real2 v126 = reverse(minus(v31, v15)); real2 v132 = plus(v15, v31); real2 v199 = plus(v131, v132); real2 v193 = reverse(minus(v132, v131)); real2 v128 = minusplus(v126, v127); real2 v130 = minusplus(uminus(v126), v127); real2 v138 = ctimesminusplus(reverse(v128), ctbl[10], ctimes(v128, ctbl[11])); real2 v21 = load(in, 19 << shift); real2 v5 = load(in, 3 << shift); real2 v92 = minus(v21, v5); real2 v96 = plus(v5, v21); real2 v29 = load(in, 27 << shift); real2 v13 = load(in, 11 << shift); real2 v97 = plus(v13, v29); real2 v91 = reverse(minus(v29, v13)); real2 v95 = minusplus(uminus(v91), v92); real2 v93 = minusplus(v91, v92); real2 v230 = plus(v96, v97); real2 v226 = minus(v97, v96); real2 v17 = load(in, 15 << shift); real2 v33 = load(in, 31 << shift); real2 v166 = plus(v17, v33); real2 v160 = reverse(minus(v33, v17)); real2 v9 = load(in, 7 << shift); real2 v25 = load(in, 23 << shift); real2 v161 = minus(v25, v9); real2 v165 = plus(v9, v25); real2 v231 = plus(v165, v166); real2 v225 = reverse(minus(v166, v165)); real2 v263 = plus(v230, v231); real2 v257 = reverse(minus(v231, v230)); real2 v229 = minusplus(uminus(v225), v226); real2 v227 = minusplus(v225, v226); real2 v235 = ctimesminusplus(reverse(v227), ctbl[2], ctimes(v227, ctbl[3])); real2 v3 = load(in, 1 << shift); real2 v19 = load(in, 17 << shift); real2 v52 = minus(v19, v3); real2 v56 = plus(v3, v19); real2 v27 = load(in, 25 << shift); real2 v11 = load(in, 9 << shift); real2 v51 = reverse(minus(v27, v11)); real2 v57 = plus(v11, v27); real2 v198 = plus(v56, v57); real2 v194 = minus(v57, v56); real2 v258 = minus(v199, v198); real2 v262 = plus(v198, v199); real2 v273 = reverse(minus(v263, v262)); real2 v279 = plus(v262, v263); real2 v259 = minusplus(v257, v258); real2 v261 = minusplus(uminus(v257), v258); real2 v271 = ctimesminusplus(reverse(v261), ctbl[0], ctimes(v261, ctbl[0])); real2 v197 = minusplus(uminus(v193), v194); real2 v195 = minusplus(v193, v194); real2 v203 = ctimesminusplus(reverse(v195), ctbl[4], ctimes(v195, ctbl[5])); real2 v298 = reverse(minus(v235, v203)); real2 v304 = plus(v203, v235); real2 v267 = ctimesminusplus(reverse(v259), ctbl[0], ctimes(v259, ctbl[1])); real2 v4 = load(in, 2 << shift); real2 v20 = load(in, 18 << shift); real2 v72 = minus(v20, v4); real2 v76 = plus(v4, v20); real2 v28 = load(in, 26 << shift); real2 v12 = load(in, 10 << shift); real2 v71 = reverse(minus(v28, v12)); real2 v77 = plus(v12, v28); real2 v210 = minus(v77, v76); real2 v214 = plus(v76, v77); real2 v32 = load(in, 30 << shift); real2 v16 = load(in, 14 << shift); real2 v150 = plus(v16, v32); real2 v144 = reverse(minus(v32, v16)); real2 v8 = load(in, 6 << shift); real2 v24 = load(in, 22 << shift); real2 v149 = plus(v8, v24); real2 v145 = minus(v24, v8); real2 v215 = plus(v149, v150); real2 v209 = reverse(minus(v150, v149)); real2 v241 = reverse(minus(v215, v214)); real2 v247 = plus(v214, v215); real2 v251 = minus(uplusminus(v241), v242); real2 v255 = minus(uminusplus(v241), v242); store(out, 12 << shift, plus(v255, v271)); store(out, 28 << shift, minus(v255, v271)); store(out, 4 << shift, plus(v251, v267)); store(out, 20 << shift, minus(v251, v267)); real2 v278 = plus(v246, v247); real2 v274 = minus(v247, v246); store(out, 24 << shift, minus(uminusplus(v273), v274)); store(out, 8 << shift, minus(uplusminus(v273), v274)); store(out, 16 << shift, minus(v278, v279)); store(out, 0 << shift, plus(v278, v279)); real2 v211 = minusplus(v209, v210); real2 v213 = minusplus(uminus(v209), v210); real2 v219 = ctimesminusplus(reverse(v211), ctbl[0], ctimes(v211, ctbl[1])); real2 v299 = minus(v219, v187); real2 v303 = plus(v187, v219); store(out, 2 << shift, plus(v303, v304)); store(out, 18 << shift, minus(v303, v304)); store(out, 10 << shift, minus(uplusminus(v298), v299)); store(out, 26 << shift, minus(uminusplus(v298), v299)); real2 v223 = ctimesminusplus(reverse(v213), ctbl[0], ctimes(v213, ctbl[0])); real2 v322 = plus(v191, v223); real2 v318 = minus(v223, v191); real2 v239 = ctimesminusplus(reverse(v229), ctbl[3], ctimes(v229, ctbl[2])); real2 v207 = ctimesminusplus(reverse(v197), ctbl[2], ctimes(v197, ctbl[3])); real2 v317 = reverse(minus(v239, v207)); store(out, 30 << shift, minus(uminusplus(v317), v318)); store(out, 14 << shift, minus(uplusminus(v317), v318)); real2 v323 = plus(v207, v239); store(out, 6 << shift, plus(v322, v323)); store(out, 22 << shift, minus(v322, v323)); real2 v101 = ctimesminusplus(reverse(v93), ctbl[8], ctimes(v93, ctbl[9])); real2 v75 = minusplus(uminus(v71), v72); real2 v73 = minusplus(v71, v72); real2 v83 = ctimesminusplus(reverse(v73), ctbl[4], ctimes(v73, ctbl[5])); real2 v162 = minusplus(v160, v161); real2 v164 = minusplus(uminus(v160), v161); real2 v55 = minusplus(uminus(v51), v52); real2 v53 = minusplus(v51, v52); real2 v171 = ctimesminusplus(reverse(v162), ctbl[6], ctimes(v162, ctbl[7])); real2 v352 = reverse(minus(v171, v101)); real2 v358 = plus(v101, v171); real2 v63 = ctimesminusplus(reverse(v53), ctbl[12], ctimes(v53, ctbl[13])); real2 v146 = minusplus(v144, v145); real2 v148 = minusplus(uminus(v144), v145); real2 v154 = ctimesminusplus(reverse(v146), ctbl[2], ctimes(v146, ctbl[3])); real2 v342 = plus(v83, v154); real2 v336 = reverse(minus(v154, v83)); real2 v373 = plus(v341, v342); real2 v369 = minus(v342, v341); real2 v353 = minus(v138, v63); real2 v357 = plus(v63, v138); real2 v374 = plus(v357, v358); store(out, 1 << shift, plus(v373, v374)); store(out, 17 << shift, minus(v373, v374)); real2 v368 = reverse(minus(v358, v357)); store(out, 25 << shift, minus(uminusplus(v368), v369)); store(out, 9 << shift, minus(uplusminus(v368), v369)); real2 v346 = minus(uplusminus(v336), v337); real2 v350 = minus(uminusplus(v336), v337); real2 v356 = minusplus(uminus(v352), v353); real2 v354 = minusplus(v352, v353); real2 v362 = ctimesminusplus(reverse(v354), ctbl[0], ctimes(v354, ctbl[1])); store(out, 21 << shift, minus(v346, v362)); store(out, 5 << shift, plus(v346, v362)); real2 v366 = ctimesminusplus(reverse(v356), ctbl[0], ctimes(v356, ctbl[0])); store(out, 13 << shift, plus(v350, v366)); store(out, 29 << shift, minus(v350, v366)); real2 v89 = ctimesminusplus(reverse(v75), ctbl[2], ctimes(v75, ctbl[3])); real2 v106 = ctimesminusplus(reverse(v95), ctbl[6], ctimes(v95, ctbl[12])); real2 v142 = ctimesminusplus(reverse(v130), ctbl[12], ctimes(v130, ctbl[6])); real2 v158 = ctimesminusplus(reverse(v148), ctbl[3], ctimes(v148, ctbl[2])); real2 v393 = reverse(minus(v158, v89)); real2 v399 = plus(v89, v158); real2 v403 = minus(uplusminus(v393), v394); real2 v407 = minus(uminusplus(v393), v394); real2 v175 = ctimesminusplus(reverse(v164), ctbl[9], ctimes(v164, ctbl[8])); real2 v415 = plus(v106, v175); real2 v409 = reverse(minus(v175, v106)); real2 v69 = ctimesminusplus(reverse(v55), ctbl[8], ctimes(v55, ctbl[9])); real2 v414 = plus(v69, v142); real2 v410 = minus(v142, v69); real2 v411 = minusplus(v409, v410); real2 v413 = minusplus(uminus(v409), v410); real2 v419 = ctimesminusplus(reverse(v411), ctbl[0], ctimes(v411, ctbl[1])); store(out, 23 << shift, minus(v403, v419)); store(out, 7 << shift, plus(v403, v419)); real2 v423 = ctimesminusplus(reverse(v413), ctbl[0], ctimes(v413, ctbl[0])); store(out, 15 << shift, plus(v407, v423)); store(out, 31 << shift, minus(v407, v423)); real2 v431 = plus(v414, v415); real2 v425 = reverse(minus(v415, v414)); real2 v430 = plus(v398, v399); real2 v426 = minus(v399, v398); store(out, 27 << shift, minus(uminusplus(v425), v426)); store(out, 11 << shift, minus(uplusminus(v425), v426)); store(out, 19 << shift, minus(v430, v431)); store(out, 3 << shift, plus(v430, v431)); } } ALIGNED(8192) void but32f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v14, v30)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v120, v121)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = ctimesminusplus(reverse(v119), tbl[20 + tbloffset], ctimes(v119, tbl[21 + tbloffset])); real2 v127 = ctimesminusplus(reverse(v117), tbl[18 + tbloffset], ctimes(v117, tbl[19 + tbloffset])); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v10, v26)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = ctimesminusplus(reverse(v39), tbl[4 + tbloffset], ctimes(v39, tbl[5 + tbloffset])); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = ctimesminusplus(reverse(v37), tbl[2 + tbloffset], ctimes(v37, tbl[3 + tbloffset])); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = ctimesminusplus(reverse(v199), tbl[36 + tbloffset], ctimes(v199, tbl[37 + tbloffset])); real2 v207 = ctimesminusplus(reverse(v197), tbl[34 + tbloffset], ctimes(v197, tbl[35 + tbloffset])); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v12, v28)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = ctimesminusplus(reverse(v79), tbl[12 + tbloffset], ctimes(v79, tbl[13 + tbloffset])); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v16, v32)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v160, v161)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = ctimesminusplus(reverse(v159), tbl[28 + tbloffset], ctimes(v159, tbl[29 + tbloffset])); real2 v485 = reverse(minus(v93, v173)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = ctimesminusplus(reverse(v239), tbl[44 + tbloffset], ctimes(v239, tbl[45 + tbloffset])); real2 v497 = ctimesminusplus(reverse(v487), tbl[82 + tbloffset], ctimes(v487, tbl[83 + tbloffset])); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = ctimesminusplus(reverse(v489), tbl[84 + tbloffset], ctimes(v489, tbl[85 + tbloffset])); real2 v247 = ctimesminusplus(reverse(v237), tbl[42 + tbloffset], ctimes(v237, tbl[43 + tbloffset])); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v17, v33)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = ctimesminusplus(reverse(v179), tbl[32 + tbloffset], ctimes(v179, tbl[33 + tbloffset])); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v180, v181)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v13, v29)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = ctimesminusplus(reverse(v259), tbl[48 + tbloffset], ctimes(v259, tbl[49 + tbloffset])); real2 v267 = ctimesminusplus(reverse(v257), tbl[46 + tbloffset], ctimes(v257, tbl[47 + tbloffset])); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v11, v27)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v15, v31)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v140, v141)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = ctimesminusplus(reverse(v217), tbl[38 + tbloffset], ctimes(v217, tbl[39 + tbloffset])); real2 v355 = reverse(minus(v227, v267)); real2 v361 = plus(v227, v267); store(out, 2 << outShift, plus(v360, v361)); real2 v374 = minus(v360, v361); store(out, 18 << outShift, ctimesminusplus(v374, tbl[0 + tbloffset], ctimes(reverse(v374), tbl[1 + tbloffset]))); real2 v357 = minusplus(v355, v356); store(out, 10 << outShift, ctimesminusplus(reverse(v357), tbl[62 + tbloffset], ctimes(v357, tbl[63 + tbloffset]))); real2 v359 = minusplus(uminus(v355), v356); store(out, 26 << outShift, ctimesminusplus(reverse(v359), tbl[64 + tbloffset], ctimes(v359, tbl[65 + tbloffset]))); real2 v233 = ctimesminusplus(reverse(v219), tbl[40 + tbloffset], ctimes(v219, tbl[41 + tbloffset])); real2 v381 = reverse(minus(v233, v273)); real2 v387 = plus(v233, v273); store(out, 6 << outShift, plus(v386, v387)); real2 v400 = minus(v386, v387); store(out, 22 << outShift, ctimesminusplus(v400, tbl[0 + tbloffset], ctimes(reverse(v400), tbl[1 + tbloffset]))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); store(out, 30 << outShift, ctimesminusplus(reverse(v385), tbl[68 + tbloffset], ctimes(v385, tbl[69 + tbloffset]))); store(out, 14 << outShift, ctimesminusplus(reverse(v383), tbl[66 + tbloffset], ctimes(v383, tbl[67 + tbloffset]))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = ctimesminusplus(reverse(v139), tbl[24 + tbloffset], ctimes(v139, tbl[25 + tbloffset])); real2 v113 = ctimesminusplus(reverse(v99), tbl[16 + tbloffset], ctimes(v99, tbl[17 + tbloffset])); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v113, v193)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = ctimesminusplus(reverse(v59), tbl[8 + tbloffset], ctimes(v59, tbl[9 + tbloffset])); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v510, v511)); store(out, 3 << outShift, plus(v530, v531)); real2 v544 = minus(v530, v531); store(out, 19 << outShift, ctimesminusplus(v544, tbl[0 + tbloffset], ctimes(reverse(v544), tbl[1 + tbloffset]))); real2 v527 = minusplus(v525, v526); store(out, 11 << outShift, ctimesminusplus(reverse(v527), tbl[90 + tbloffset], ctimes(v527, tbl[91 + tbloffset]))); real2 v529 = minusplus(uminus(v525), v526); store(out, 27 << outShift, ctimesminusplus(reverse(v529), tbl[92 + tbloffset], ctimes(v529, tbl[93 + tbloffset]))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = ctimesminusplus(reverse(v509), tbl[88 + tbloffset], ctimes(v509, tbl[89 + tbloffset])); store(out, 15 << outShift, plus(v503, v523)); real2 v556 = minus(v503, v523); store(out, 31 << outShift, ctimesminusplus(v556, tbl[0 + tbloffset], ctimes(reverse(v556), tbl[1 + tbloffset]))); real2 v517 = ctimesminusplus(reverse(v507), tbl[86 + tbloffset], ctimes(v507, tbl[87 + tbloffset])); store(out, 7 << outShift, plus(v497, v517)); real2 v550 = minus(v497, v517); store(out, 23 << outShift, ctimesminusplus(v550, tbl[0 + tbloffset], ctimes(reverse(v550), tbl[1 + tbloffset]))); real2 v275 = reverse(minus(v240, v241)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v260, v261)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v300, v301)); real2 v321 = plus(v300, v301); store(out, 0 << outShift, plus(v320, v321)); real2 v334 = minus(v320, v321); store(out, 16 << outShift, ctimesminusplus(v334, tbl[0 + tbloffset], ctimes(reverse(v334), tbl[1 + tbloffset]))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); store(out, 8 << outShift, ctimesminusplus(reverse(v317), tbl[58 + tbloffset], ctimes(v317, tbl[59 + tbloffset]))); store(out, 24 << outShift, ctimesminusplus(reverse(v319), tbl[60 + tbloffset], ctimes(v319, tbl[61 + tbloffset]))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = ctimesminusplus(reverse(v277), tbl[50 + tbloffset], ctimes(v277, tbl[51 + tbloffset])); real2 v307 = ctimesminusplus(reverse(v297), tbl[54 + tbloffset], ctimes(v297, tbl[55 + tbloffset])); store(out, 4 << outShift, plus(v287, v307)); real2 v342 = minus(v287, v307); store(out, 20 << outShift, ctimesminusplus(v342, tbl[0 + tbloffset], ctimes(reverse(v342), tbl[1 + tbloffset]))); real2 v313 = ctimesminusplus(reverse(v299), tbl[56 + tbloffset], ctimes(v299, tbl[57 + tbloffset])); real2 v293 = ctimesminusplus(reverse(v279), tbl[52 + tbloffset], ctimes(v279, tbl[53 + tbloffset])); store(out, 12 << outShift, plus(v293, v313)); real2 v348 = minus(v293, v313); store(out, 28 << outShift, ctimesminusplus(v348, tbl[0 + tbloffset], ctimes(reverse(v348), tbl[1 + tbloffset]))); real2 v87 = ctimesminusplus(reverse(v77), tbl[10 + tbloffset], ctimes(v77, tbl[11 + tbloffset])); real2 v147 = ctimesminusplus(reverse(v137), tbl[22 + tbloffset], ctimes(v137, tbl[23 + tbloffset])); real2 v187 = ctimesminusplus(reverse(v177), tbl[30 + tbloffset], ctimes(v177, tbl[31 + tbloffset])); real2 v167 = ctimesminusplus(reverse(v157), tbl[26 + tbloffset], ctimes(v157, tbl[27 + tbloffset])); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v87, v167)); real2 v67 = ctimesminusplus(reverse(v57), tbl[6 + tbloffset], ctimes(v57, tbl[7 + tbloffset])); real2 v107 = ctimesminusplus(reverse(v97), tbl[14 + tbloffset], ctimes(v97, tbl[15 + tbloffset])); real2 v427 = reverse(minus(v107, v187)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v432, v433)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); store(out, 1 << outShift, plus(v452, v453)); real2 v466 = minus(v452, v453); store(out, 17 << outShift, ctimesminusplus(v466, tbl[0 + tbloffset], ctimes(reverse(v466), tbl[1 + tbloffset]))); real2 v451 = minusplus(uminus(v447), v448); store(out, 25 << outShift, ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset]))); real2 v449 = minusplus(v447, v448); store(out, 9 << outShift, ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset]))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); store(out, 13 << outShift, plus(v425, v445)); real2 v478 = minus(v425, v445); store(out, 29 << outShift, ctimesminusplus(v478, tbl[0 + tbloffset], ctimes(reverse(v478), tbl[1 + tbloffset]))); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); store(out, 5 << outShift, plus(v419, v439)); real2 v472 = minus(v419, v439); store(out, 21 << outShift, ctimesminusplus(v472, tbl[0 + tbloffset], ctimes(reverse(v472), tbl[1 + tbloffset]))); } } ALIGNED(8192) void but32b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v30, v14)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v121, v120)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = ctimesminusplus(reverse(v119), tbl[20 + tbloffset], ctimes(v119, tbl[21 + tbloffset])); real2 v127 = ctimesminusplus(reverse(v117), tbl[18 + tbloffset], ctimes(v117, tbl[19 + tbloffset])); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v26, v10)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = ctimesminusplus(reverse(v39), tbl[4 + tbloffset], ctimes(v39, tbl[5 + tbloffset])); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = ctimesminusplus(reverse(v37), tbl[2 + tbloffset], ctimes(v37, tbl[3 + tbloffset])); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = ctimesminusplus(reverse(v199), tbl[36 + tbloffset], ctimes(v199, tbl[37 + tbloffset])); real2 v207 = ctimesminusplus(reverse(v197), tbl[34 + tbloffset], ctimes(v197, tbl[35 + tbloffset])); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v28, v12)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = ctimesminusplus(reverse(v79), tbl[12 + tbloffset], ctimes(v79, tbl[13 + tbloffset])); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v32, v16)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v161, v160)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = ctimesminusplus(reverse(v159), tbl[28 + tbloffset], ctimes(v159, tbl[29 + tbloffset])); real2 v485 = reverse(minus(v173, v93)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = ctimesminusplus(reverse(v239), tbl[44 + tbloffset], ctimes(v239, tbl[45 + tbloffset])); real2 v497 = ctimesminusplus(reverse(v487), tbl[82 + tbloffset], ctimes(v487, tbl[83 + tbloffset])); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = ctimesminusplus(reverse(v489), tbl[84 + tbloffset], ctimes(v489, tbl[85 + tbloffset])); real2 v247 = ctimesminusplus(reverse(v237), tbl[42 + tbloffset], ctimes(v237, tbl[43 + tbloffset])); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v33, v17)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = ctimesminusplus(reverse(v179), tbl[32 + tbloffset], ctimes(v179, tbl[33 + tbloffset])); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v181, v180)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v29, v13)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = ctimesminusplus(reverse(v259), tbl[48 + tbloffset], ctimes(v259, tbl[49 + tbloffset])); real2 v267 = ctimesminusplus(reverse(v257), tbl[46 + tbloffset], ctimes(v257, tbl[47 + tbloffset])); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v27, v11)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v31, v15)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v141, v140)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = ctimesminusplus(reverse(v217), tbl[38 + tbloffset], ctimes(v217, tbl[39 + tbloffset])); real2 v355 = reverse(minus(v267, v227)); real2 v361 = plus(v227, v267); store(out, 2 << outShift, plus(v360, v361)); real2 v374 = minus(v360, v361); store(out, 18 << outShift, ctimesminusplus(v374, tbl[0 + tbloffset], ctimes(reverse(v374), tbl[1 + tbloffset]))); real2 v357 = minusplus(v355, v356); store(out, 10 << outShift, ctimesminusplus(reverse(v357), tbl[62 + tbloffset], ctimes(v357, tbl[63 + tbloffset]))); real2 v359 = minusplus(uminus(v355), v356); store(out, 26 << outShift, ctimesminusplus(reverse(v359), tbl[64 + tbloffset], ctimes(v359, tbl[65 + tbloffset]))); real2 v233 = ctimesminusplus(reverse(v219), tbl[40 + tbloffset], ctimes(v219, tbl[41 + tbloffset])); real2 v381 = reverse(minus(v273, v233)); real2 v387 = plus(v233, v273); store(out, 6 << outShift, plus(v386, v387)); real2 v400 = minus(v386, v387); store(out, 22 << outShift, ctimesminusplus(v400, tbl[0 + tbloffset], ctimes(reverse(v400), tbl[1 + tbloffset]))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); store(out, 30 << outShift, ctimesminusplus(reverse(v385), tbl[68 + tbloffset], ctimes(v385, tbl[69 + tbloffset]))); store(out, 14 << outShift, ctimesminusplus(reverse(v383), tbl[66 + tbloffset], ctimes(v383, tbl[67 + tbloffset]))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = ctimesminusplus(reverse(v139), tbl[24 + tbloffset], ctimes(v139, tbl[25 + tbloffset])); real2 v113 = ctimesminusplus(reverse(v99), tbl[16 + tbloffset], ctimes(v99, tbl[17 + tbloffset])); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v193, v113)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = ctimesminusplus(reverse(v59), tbl[8 + tbloffset], ctimes(v59, tbl[9 + tbloffset])); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v511, v510)); store(out, 3 << outShift, plus(v530, v531)); real2 v544 = minus(v530, v531); store(out, 19 << outShift, ctimesminusplus(v544, tbl[0 + tbloffset], ctimes(reverse(v544), tbl[1 + tbloffset]))); real2 v527 = minusplus(v525, v526); store(out, 11 << outShift, ctimesminusplus(reverse(v527), tbl[90 + tbloffset], ctimes(v527, tbl[91 + tbloffset]))); real2 v529 = minusplus(uminus(v525), v526); store(out, 27 << outShift, ctimesminusplus(reverse(v529), tbl[92 + tbloffset], ctimes(v529, tbl[93 + tbloffset]))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = ctimesminusplus(reverse(v509), tbl[88 + tbloffset], ctimes(v509, tbl[89 + tbloffset])); store(out, 15 << outShift, plus(v503, v523)); real2 v556 = minus(v503, v523); store(out, 31 << outShift, ctimesminusplus(v556, tbl[0 + tbloffset], ctimes(reverse(v556), tbl[1 + tbloffset]))); real2 v517 = ctimesminusplus(reverse(v507), tbl[86 + tbloffset], ctimes(v507, tbl[87 + tbloffset])); store(out, 7 << outShift, plus(v497, v517)); real2 v550 = minus(v497, v517); store(out, 23 << outShift, ctimesminusplus(v550, tbl[0 + tbloffset], ctimes(reverse(v550), tbl[1 + tbloffset]))); real2 v275 = reverse(minus(v241, v240)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v261, v260)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v301, v300)); real2 v321 = plus(v300, v301); store(out, 0 << outShift, plus(v320, v321)); real2 v334 = minus(v320, v321); store(out, 16 << outShift, ctimesminusplus(v334, tbl[0 + tbloffset], ctimes(reverse(v334), tbl[1 + tbloffset]))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); store(out, 8 << outShift, ctimesminusplus(reverse(v317), tbl[58 + tbloffset], ctimes(v317, tbl[59 + tbloffset]))); store(out, 24 << outShift, ctimesminusplus(reverse(v319), tbl[60 + tbloffset], ctimes(v319, tbl[61 + tbloffset]))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = ctimesminusplus(reverse(v277), tbl[50 + tbloffset], ctimes(v277, tbl[51 + tbloffset])); real2 v307 = ctimesminusplus(reverse(v297), tbl[54 + tbloffset], ctimes(v297, tbl[55 + tbloffset])); store(out, 4 << outShift, plus(v287, v307)); real2 v342 = minus(v287, v307); store(out, 20 << outShift, ctimesminusplus(v342, tbl[0 + tbloffset], ctimes(reverse(v342), tbl[1 + tbloffset]))); real2 v313 = ctimesminusplus(reverse(v299), tbl[56 + tbloffset], ctimes(v299, tbl[57 + tbloffset])); real2 v293 = ctimesminusplus(reverse(v279), tbl[52 + tbloffset], ctimes(v279, tbl[53 + tbloffset])); store(out, 12 << outShift, plus(v293, v313)); real2 v348 = minus(v293, v313); store(out, 28 << outShift, ctimesminusplus(v348, tbl[0 + tbloffset], ctimes(reverse(v348), tbl[1 + tbloffset]))); real2 v87 = ctimesminusplus(reverse(v77), tbl[10 + tbloffset], ctimes(v77, tbl[11 + tbloffset])); real2 v147 = ctimesminusplus(reverse(v137), tbl[22 + tbloffset], ctimes(v137, tbl[23 + tbloffset])); real2 v187 = ctimesminusplus(reverse(v177), tbl[30 + tbloffset], ctimes(v177, tbl[31 + tbloffset])); real2 v167 = ctimesminusplus(reverse(v157), tbl[26 + tbloffset], ctimes(v157, tbl[27 + tbloffset])); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v167, v87)); real2 v67 = ctimesminusplus(reverse(v57), tbl[6 + tbloffset], ctimes(v57, tbl[7 + tbloffset])); real2 v107 = ctimesminusplus(reverse(v97), tbl[14 + tbloffset], ctimes(v97, tbl[15 + tbloffset])); real2 v427 = reverse(minus(v187, v107)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v433, v432)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); store(out, 1 << outShift, plus(v452, v453)); real2 v466 = minus(v452, v453); store(out, 17 << outShift, ctimesminusplus(v466, tbl[0 + tbloffset], ctimes(reverse(v466), tbl[1 + tbloffset]))); real2 v451 = minusplus(uminus(v447), v448); store(out, 25 << outShift, ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset]))); real2 v449 = minusplus(v447, v448); store(out, 9 << outShift, ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset]))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); store(out, 13 << outShift, plus(v425, v445)); real2 v478 = minus(v425, v445); store(out, 29 << outShift, ctimesminusplus(v478, tbl[0 + tbloffset], ctimes(reverse(v478), tbl[1 + tbloffset]))); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); store(out, 5 << outShift, plus(v419, v439)); real2 v472 = minus(v419, v439); store(out, 21 << outShift, ctimesminusplus(v472, tbl[0 + tbloffset], ctimes(reverse(v472), tbl[1 + tbloffset]))); } } ALIGNED(8192) void tbut32f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v14, v30)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v120, v121)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = timesminusplus(reverse(v119), load(tbl, 20 * VECWIDTH + tbloffset), times(v119, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v127 = timesminusplus(reverse(v117), load(tbl, 18 * VECWIDTH + tbloffset), times(v117, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v10, v26)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = timesminusplus(reverse(v39), load(tbl, 4 * VECWIDTH + tbloffset), times(v39, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = timesminusplus(reverse(v37), load(tbl, 2 * VECWIDTH + tbloffset), times(v37, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = timesminusplus(reverse(v199), load(tbl, 36 * VECWIDTH + tbloffset), times(v199, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v207 = timesminusplus(reverse(v197), load(tbl, 34 * VECWIDTH + tbloffset), times(v197, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v12, v28)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = timesminusplus(reverse(v79), load(tbl, 12 * VECWIDTH + tbloffset), times(v79, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v16, v32)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v160, v161)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = timesminusplus(reverse(v159), load(tbl, 28 * VECWIDTH + tbloffset), times(v159, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v485 = reverse(minus(v93, v173)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = timesminusplus(reverse(v239), load(tbl, 44 * VECWIDTH + tbloffset), times(v239, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v497 = timesminusplus(reverse(v487), load(tbl, 82 * VECWIDTH + tbloffset), times(v487, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = timesminusplus(reverse(v489), load(tbl, 84 * VECWIDTH + tbloffset), times(v489, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v247 = timesminusplus(reverse(v237), load(tbl, 42 * VECWIDTH + tbloffset), times(v237, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v17, v33)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = timesminusplus(reverse(v179), load(tbl, 32 * VECWIDTH + tbloffset), times(v179, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v180, v181)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v13, v29)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = timesminusplus(reverse(v259), load(tbl, 48 * VECWIDTH + tbloffset), times(v259, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v267 = timesminusplus(reverse(v257), load(tbl, 46 * VECWIDTH + tbloffset), times(v257, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v11, v27)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v15, v31)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v140, v141)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = timesminusplus(reverse(v217), load(tbl, 38 * VECWIDTH + tbloffset), times(v217, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v355 = reverse(minus(v227, v267)); real2 v361 = plus(v227, v267); scatter(out, 2, 32, plus(v360, v361)); real2 v374 = minus(v360, v361); scatter(out, 18, 32, timesminusplus(v374, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v374), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v357 = minusplus(v355, v356); scatter(out, 10, 32, timesminusplus(reverse(v357), load(tbl, 62 * VECWIDTH + tbloffset), times(v357, load(tbl, 63 * VECWIDTH + tbloffset)))); real2 v359 = minusplus(uminus(v355), v356); scatter(out, 26, 32, timesminusplus(reverse(v359), load(tbl, 64 * VECWIDTH + tbloffset), times(v359, load(tbl, 65 * VECWIDTH + tbloffset)))); real2 v233 = timesminusplus(reverse(v219), load(tbl, 40 * VECWIDTH + tbloffset), times(v219, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v381 = reverse(minus(v233, v273)); real2 v387 = plus(v233, v273); scatter(out, 6, 32, plus(v386, v387)); real2 v400 = minus(v386, v387); scatter(out, 22, 32, timesminusplus(v400, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v400), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); scatter(out, 30, 32, timesminusplus(reverse(v385), load(tbl, 68 * VECWIDTH + tbloffset), times(v385, load(tbl, 69 * VECWIDTH + tbloffset)))); scatter(out, 14, 32, timesminusplus(reverse(v383), load(tbl, 66 * VECWIDTH + tbloffset), times(v383, load(tbl, 67 * VECWIDTH + tbloffset)))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = timesminusplus(reverse(v139), load(tbl, 24 * VECWIDTH + tbloffset), times(v139, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v113 = timesminusplus(reverse(v99), load(tbl, 16 * VECWIDTH + tbloffset), times(v99, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v113, v193)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = timesminusplus(reverse(v59), load(tbl, 8 * VECWIDTH + tbloffset), times(v59, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v510, v511)); scatter(out, 3, 32, plus(v530, v531)); real2 v544 = minus(v530, v531); scatter(out, 19, 32, timesminusplus(v544, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v544), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v527 = minusplus(v525, v526); scatter(out, 11, 32, timesminusplus(reverse(v527), load(tbl, 90 * VECWIDTH + tbloffset), times(v527, load(tbl, 91 * VECWIDTH + tbloffset)))); real2 v529 = minusplus(uminus(v525), v526); scatter(out, 27, 32, timesminusplus(reverse(v529), load(tbl, 92 * VECWIDTH + tbloffset), times(v529, load(tbl, 93 * VECWIDTH + tbloffset)))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = timesminusplus(reverse(v509), load(tbl, 88 * VECWIDTH + tbloffset), times(v509, load(tbl, 89 * VECWIDTH + tbloffset))); scatter(out, 15, 32, plus(v503, v523)); real2 v556 = minus(v503, v523); scatter(out, 31, 32, timesminusplus(v556, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v556), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v517 = timesminusplus(reverse(v507), load(tbl, 86 * VECWIDTH + tbloffset), times(v507, load(tbl, 87 * VECWIDTH + tbloffset))); scatter(out, 7, 32, plus(v497, v517)); real2 v550 = minus(v497, v517); scatter(out, 23, 32, timesminusplus(v550, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v550), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v275 = reverse(minus(v240, v241)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v260, v261)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v300, v301)); real2 v321 = plus(v300, v301); scatter(out, 0, 32, plus(v320, v321)); real2 v334 = minus(v320, v321); scatter(out, 16, 32, timesminusplus(v334, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v334), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); scatter(out, 8, 32, timesminusplus(reverse(v317), load(tbl, 58 * VECWIDTH + tbloffset), times(v317, load(tbl, 59 * VECWIDTH + tbloffset)))); scatter(out, 24, 32, timesminusplus(reverse(v319), load(tbl, 60 * VECWIDTH + tbloffset), times(v319, load(tbl, 61 * VECWIDTH + tbloffset)))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = timesminusplus(reverse(v277), load(tbl, 50 * VECWIDTH + tbloffset), times(v277, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v307 = timesminusplus(reverse(v297), load(tbl, 54 * VECWIDTH + tbloffset), times(v297, load(tbl, 55 * VECWIDTH + tbloffset))); scatter(out, 4, 32, plus(v287, v307)); real2 v342 = minus(v287, v307); scatter(out, 20, 32, timesminusplus(v342, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v342), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v313 = timesminusplus(reverse(v299), load(tbl, 56 * VECWIDTH + tbloffset), times(v299, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v293 = timesminusplus(reverse(v279), load(tbl, 52 * VECWIDTH + tbloffset), times(v279, load(tbl, 53 * VECWIDTH + tbloffset))); scatter(out, 12, 32, plus(v293, v313)); real2 v348 = minus(v293, v313); scatter(out, 28, 32, timesminusplus(v348, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v348), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v87 = timesminusplus(reverse(v77), load(tbl, 10 * VECWIDTH + tbloffset), times(v77, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v147 = timesminusplus(reverse(v137), load(tbl, 22 * VECWIDTH + tbloffset), times(v137, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v187 = timesminusplus(reverse(v177), load(tbl, 30 * VECWIDTH + tbloffset), times(v177, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v167 = timesminusplus(reverse(v157), load(tbl, 26 * VECWIDTH + tbloffset), times(v157, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v87, v167)); real2 v67 = timesminusplus(reverse(v57), load(tbl, 6 * VECWIDTH + tbloffset), times(v57, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v107 = timesminusplus(reverse(v97), load(tbl, 14 * VECWIDTH + tbloffset), times(v97, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v427 = reverse(minus(v107, v187)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v432, v433)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); scatter(out, 1, 32, plus(v452, v453)); real2 v466 = minus(v452, v453); scatter(out, 17, 32, timesminusplus(v466, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v466), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v451 = minusplus(uminus(v447), v448); scatter(out, 25, 32, timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset)))); real2 v449 = minusplus(v447, v448); scatter(out, 9, 32, timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset)))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); scatter(out, 13, 32, plus(v425, v445)); real2 v478 = minus(v425, v445); scatter(out, 29, 32, timesminusplus(v478, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v478), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); scatter(out, 5, 32, plus(v419, v439)); real2 v472 = minus(v419, v439); scatter(out, 21, 32, timesminusplus(v472, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v472), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut32b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v30, v14)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v121, v120)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = timesminusplus(reverse(v119), load(tbl, 20 * VECWIDTH + tbloffset), times(v119, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v127 = timesminusplus(reverse(v117), load(tbl, 18 * VECWIDTH + tbloffset), times(v117, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v26, v10)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = timesminusplus(reverse(v39), load(tbl, 4 * VECWIDTH + tbloffset), times(v39, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = timesminusplus(reverse(v37), load(tbl, 2 * VECWIDTH + tbloffset), times(v37, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = timesminusplus(reverse(v199), load(tbl, 36 * VECWIDTH + tbloffset), times(v199, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v207 = timesminusplus(reverse(v197), load(tbl, 34 * VECWIDTH + tbloffset), times(v197, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v28, v12)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = timesminusplus(reverse(v79), load(tbl, 12 * VECWIDTH + tbloffset), times(v79, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v32, v16)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v161, v160)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = timesminusplus(reverse(v159), load(tbl, 28 * VECWIDTH + tbloffset), times(v159, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v485 = reverse(minus(v173, v93)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = timesminusplus(reverse(v239), load(tbl, 44 * VECWIDTH + tbloffset), times(v239, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v497 = timesminusplus(reverse(v487), load(tbl, 82 * VECWIDTH + tbloffset), times(v487, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = timesminusplus(reverse(v489), load(tbl, 84 * VECWIDTH + tbloffset), times(v489, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v247 = timesminusplus(reverse(v237), load(tbl, 42 * VECWIDTH + tbloffset), times(v237, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v33, v17)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = timesminusplus(reverse(v179), load(tbl, 32 * VECWIDTH + tbloffset), times(v179, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v181, v180)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v29, v13)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = timesminusplus(reverse(v259), load(tbl, 48 * VECWIDTH + tbloffset), times(v259, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v267 = timesminusplus(reverse(v257), load(tbl, 46 * VECWIDTH + tbloffset), times(v257, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v27, v11)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v31, v15)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v141, v140)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = timesminusplus(reverse(v217), load(tbl, 38 * VECWIDTH + tbloffset), times(v217, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v355 = reverse(minus(v267, v227)); real2 v361 = plus(v227, v267); scatter(out, 2, 32, plus(v360, v361)); real2 v374 = minus(v360, v361); scatter(out, 18, 32, timesminusplus(v374, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v374), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v357 = minusplus(v355, v356); scatter(out, 10, 32, timesminusplus(reverse(v357), load(tbl, 62 * VECWIDTH + tbloffset), times(v357, load(tbl, 63 * VECWIDTH + tbloffset)))); real2 v359 = minusplus(uminus(v355), v356); scatter(out, 26, 32, timesminusplus(reverse(v359), load(tbl, 64 * VECWIDTH + tbloffset), times(v359, load(tbl, 65 * VECWIDTH + tbloffset)))); real2 v233 = timesminusplus(reverse(v219), load(tbl, 40 * VECWIDTH + tbloffset), times(v219, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v381 = reverse(minus(v273, v233)); real2 v387 = plus(v233, v273); scatter(out, 6, 32, plus(v386, v387)); real2 v400 = minus(v386, v387); scatter(out, 22, 32, timesminusplus(v400, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v400), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); scatter(out, 30, 32, timesminusplus(reverse(v385), load(tbl, 68 * VECWIDTH + tbloffset), times(v385, load(tbl, 69 * VECWIDTH + tbloffset)))); scatter(out, 14, 32, timesminusplus(reverse(v383), load(tbl, 66 * VECWIDTH + tbloffset), times(v383, load(tbl, 67 * VECWIDTH + tbloffset)))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = timesminusplus(reverse(v139), load(tbl, 24 * VECWIDTH + tbloffset), times(v139, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v113 = timesminusplus(reverse(v99), load(tbl, 16 * VECWIDTH + tbloffset), times(v99, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v193, v113)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = timesminusplus(reverse(v59), load(tbl, 8 * VECWIDTH + tbloffset), times(v59, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v511, v510)); scatter(out, 3, 32, plus(v530, v531)); real2 v544 = minus(v530, v531); scatter(out, 19, 32, timesminusplus(v544, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v544), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v527 = minusplus(v525, v526); scatter(out, 11, 32, timesminusplus(reverse(v527), load(tbl, 90 * VECWIDTH + tbloffset), times(v527, load(tbl, 91 * VECWIDTH + tbloffset)))); real2 v529 = minusplus(uminus(v525), v526); scatter(out, 27, 32, timesminusplus(reverse(v529), load(tbl, 92 * VECWIDTH + tbloffset), times(v529, load(tbl, 93 * VECWIDTH + tbloffset)))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = timesminusplus(reverse(v509), load(tbl, 88 * VECWIDTH + tbloffset), times(v509, load(tbl, 89 * VECWIDTH + tbloffset))); scatter(out, 15, 32, plus(v503, v523)); real2 v556 = minus(v503, v523); scatter(out, 31, 32, timesminusplus(v556, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v556), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v517 = timesminusplus(reverse(v507), load(tbl, 86 * VECWIDTH + tbloffset), times(v507, load(tbl, 87 * VECWIDTH + tbloffset))); scatter(out, 7, 32, plus(v497, v517)); real2 v550 = minus(v497, v517); scatter(out, 23, 32, timesminusplus(v550, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v550), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v275 = reverse(minus(v241, v240)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v261, v260)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v301, v300)); real2 v321 = plus(v300, v301); scatter(out, 0, 32, plus(v320, v321)); real2 v334 = minus(v320, v321); scatter(out, 16, 32, timesminusplus(v334, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v334), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); scatter(out, 8, 32, timesminusplus(reverse(v317), load(tbl, 58 * VECWIDTH + tbloffset), times(v317, load(tbl, 59 * VECWIDTH + tbloffset)))); scatter(out, 24, 32, timesminusplus(reverse(v319), load(tbl, 60 * VECWIDTH + tbloffset), times(v319, load(tbl, 61 * VECWIDTH + tbloffset)))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = timesminusplus(reverse(v277), load(tbl, 50 * VECWIDTH + tbloffset), times(v277, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v307 = timesminusplus(reverse(v297), load(tbl, 54 * VECWIDTH + tbloffset), times(v297, load(tbl, 55 * VECWIDTH + tbloffset))); scatter(out, 4, 32, plus(v287, v307)); real2 v342 = minus(v287, v307); scatter(out, 20, 32, timesminusplus(v342, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v342), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v313 = timesminusplus(reverse(v299), load(tbl, 56 * VECWIDTH + tbloffset), times(v299, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v293 = timesminusplus(reverse(v279), load(tbl, 52 * VECWIDTH + tbloffset), times(v279, load(tbl, 53 * VECWIDTH + tbloffset))); scatter(out, 12, 32, plus(v293, v313)); real2 v348 = minus(v293, v313); scatter(out, 28, 32, timesminusplus(v348, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v348), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v87 = timesminusplus(reverse(v77), load(tbl, 10 * VECWIDTH + tbloffset), times(v77, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v147 = timesminusplus(reverse(v137), load(tbl, 22 * VECWIDTH + tbloffset), times(v137, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v187 = timesminusplus(reverse(v177), load(tbl, 30 * VECWIDTH + tbloffset), times(v177, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v167 = timesminusplus(reverse(v157), load(tbl, 26 * VECWIDTH + tbloffset), times(v157, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v167, v87)); real2 v67 = timesminusplus(reverse(v57), load(tbl, 6 * VECWIDTH + tbloffset), times(v57, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v107 = timesminusplus(reverse(v97), load(tbl, 14 * VECWIDTH + tbloffset), times(v97, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v427 = reverse(minus(v187, v107)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v433, v432)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); scatter(out, 1, 32, plus(v452, v453)); real2 v466 = minus(v452, v453); scatter(out, 17, 32, timesminusplus(v466, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v466), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v451 = minusplus(uminus(v447), v448); scatter(out, 25, 32, timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset)))); real2 v449 = minusplus(v447, v448); scatter(out, 9, 32, timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset)))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); scatter(out, 13, 32, plus(v425, v445)); real2 v478 = minus(v425, v445); scatter(out, 29, 32, timesminusplus(v478, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v478), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); scatter(out, 5, 32, plus(v419, v439)); real2 v472 = minus(v419, v439); scatter(out, 21, 32, timesminusplus(v472, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v472), load(tbl, 1 * VECWIDTH + tbloffset)))); } } #endif #if MAXBUTWIDTH >= 6 ALIGNED(8192) void dft64f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 27834 real2 v13 = load(in, 11 << shift); real2 v45 = load(in, 43 << shift); real2 v268 = plus(v13, v45); real2 v264 = minus(v45, v13); real2 v61 = load(in, 59 << shift); real2 v29 = load(in, 27 << shift); real2 v269 = plus(v29, v61); real2 v263 = reverse(minus(v29, v61)); real2 v401 = reverse(minus(v268, v269)); real2 v407 = plus(v268, v269); real2 v267 = minusplus(uminus(v263), v264); real2 v265 = minusplus(v263, v264); real2 v279 = ctimesminusplus(reverse(v267), ctbl[28], ctimes(v267, ctbl[14])); real2 v273 = ctimesminusplus(reverse(v265), ctbl[25], ctimes(v265, ctbl[19])); real2 v5 = load(in, 3 << shift); real2 v37 = load(in, 35 << shift); real2 v124 = minus(v37, v5); real2 v128 = plus(v5, v37); real2 v21 = load(in, 19 << shift); real2 v53 = load(in, 51 << shift); real2 v129 = plus(v21, v53); real2 v123 = reverse(minus(v21, v53)); real2 v402 = minus(v129, v128); real2 v406 = plus(v128, v129); real2 v405 = minusplus(uminus(v401), v402); real2 v403 = minusplus(v401, v402); real2 v415 = ctimesminusplus(reverse(v405), ctbl[13], ctimes(v405, ctbl[12])); real2 v411 = ctimesminusplus(reverse(v403), ctbl[11], ctimes(v403, ctbl[9])); real2 v125 = minusplus(v123, v124); real2 v127 = minusplus(uminus(v123), v124); real2 v139 = ctimesminusplus(reverse(v127), ctbl[17], ctimes(v127, ctbl[27])); real2 v534 = plus(v406, v407); real2 v530 = minus(v407, v406); real2 v962 = minus(v279, v139); real2 v966 = plus(v139, v279); real2 v133 = ctimesminusplus(reverse(v125), ctbl[23], ctimes(v125, ctbl[21])); real2 v807 = plus(v133, v273); real2 v803 = minus(v273, v133); real2 v65 = load(in, 63 << shift); real2 v33 = load(in, 31 << shift); real2 v335 = reverse(minus(v33, v65)); real2 v341 = plus(v33, v65); real2 v49 = load(in, 47 << shift); real2 v17 = load(in, 15 << shift); real2 v340 = plus(v17, v49); real2 v336 = minus(v49, v17); real2 v471 = plus(v340, v341); real2 v465 = reverse(minus(v340, v341)); real2 v339 = minusplus(uminus(v335), v336); real2 v337 = minusplus(v335, v336); real2 v351 = ctimesminusplus(reverse(v339), ctbl[22], ctimes(v339, ctbl[20])); real2 v345 = ctimesminusplus(reverse(v337), ctbl[29], ctimes(v337, ctbl[15])); real2 v9 = load(in, 7 << shift); real2 v41 = load(in, 39 << shift); real2 v199 = plus(v9, v41); real2 v195 = minus(v41, v9); real2 v57 = load(in, 55 << shift); real2 v25 = load(in, 23 << shift); real2 v200 = plus(v25, v57); real2 v194 = reverse(minus(v25, v57)); real2 v466 = minus(v200, v199); real2 v470 = plus(v199, v200); real2 v535 = plus(v470, v471); real2 v529 = reverse(minus(v470, v471)); real2 v469 = minusplus(uminus(v465), v466); real2 v467 = minusplus(v465, v466); real2 v531 = minusplus(v529, v530); real2 v533 = minusplus(uminus(v529), v530); real2 v543 = ctimesminusplus(reverse(v533), ctbl[4], ctimes(v533, ctbl[2])); real2 v539 = ctimesminusplus(reverse(v531), ctbl[5], ctimes(v531, ctbl[3])); real2 v561 = reverse(minus(v534, v535)); real2 v567 = plus(v534, v535); real2 v479 = ctimesminusplus(reverse(v469), ctbl[10], ctimes(v469, ctbl[8])); real2 v719 = plus(v415, v479); real2 v713 = reverse(minus(v415, v479)); real2 v475 = ctimesminusplus(reverse(v467), ctbl[13], ctimes(v467, ctbl[7])); real2 v662 = plus(v411, v475); real2 v656 = reverse(minus(v411, v475)); real2 v196 = minusplus(v194, v195); real2 v198 = minusplus(uminus(v194), v195); real2 v209 = ctimesminusplus(reverse(v198), ctbl[25], ctimes(v198, ctbl[24])); real2 v961 = reverse(minus(v209, v351)); real2 v967 = plus(v209, v351); real2 v963 = minusplus(v961, v962); real2 v965 = minusplus(uminus(v961), v962); real2 v975 = ctimesminusplus(reverse(v965), ctbl[4], ctimes(v965, ctbl[2])); real2 v971 = ctimesminusplus(reverse(v963), ctbl[5], ctimes(v963, ctbl[3])); real2 v999 = plus(v966, v967); real2 v993 = reverse(minus(v966, v967)); real2 v31 = load(in, 29 << shift); real2 v63 = load(in, 61 << shift); real2 v305 = plus(v31, v63); real2 v299 = reverse(minus(v31, v63)); real2 v47 = load(in, 45 << shift); real2 v15 = load(in, 13 << shift); real2 v300 = minus(v47, v15); real2 v304 = plus(v15, v47); real2 v439 = plus(v304, v305); real2 v433 = reverse(minus(v304, v305)); real2 v301 = minusplus(v299, v300); real2 v303 = minusplus(uminus(v299), v300); real2 v315 = ctimesminusplus(reverse(v303), ctbl[16], ctimes(v303, ctbl[26])); real2 v7 = load(in, 5 << shift); real2 v39 = load(in, 37 << shift); real2 v164 = plus(v7, v39); real2 v160 = minus(v39, v7); real2 v23 = load(in, 21 << shift); real2 v55 = load(in, 53 << shift); real2 v159 = reverse(minus(v23, v55)); real2 v165 = plus(v23, v55); real2 v438 = plus(v164, v165); real2 v434 = minus(v165, v164); real2 v163 = minusplus(uminus(v159), v160); real2 v161 = minusplus(v159, v160); real2 v175 = ctimesminusplus(reverse(v163), ctbl[29], ctimes(v163, ctbl[15])); real2 v929 = reverse(minus(v175, v315)); real2 v935 = plus(v175, v315); real2 v435 = minusplus(v433, v434); real2 v437 = minusplus(uminus(v433), v434); real2 v443 = ctimesminusplus(reverse(v435), ctbl[9], ctimes(v435, ctbl[11])); real2 v497 = reverse(minus(v438, v439)); real2 v503 = plus(v438, v439); real2 v447 = ctimesminusplus(reverse(v437), ctbl[7], ctimes(v437, ctbl[6])); real2 v11 = load(in, 9 << shift); real2 v43 = load(in, 41 << shift); real2 v234 = plus(v11, v43); real2 v230 = minus(v43, v11); real2 v27 = load(in, 25 << shift); real2 v59 = load(in, 57 << shift); real2 v235 = plus(v27, v59); real2 v229 = reverse(minus(v27, v59)); real2 v375 = plus(v234, v235); real2 v369 = reverse(minus(v234, v235)); real2 v233 = minusplus(uminus(v229), v230); real2 v231 = minusplus(v229, v230); real2 v244 = ctimesminusplus(reverse(v233), ctbl[19], ctimes(v233, ctbl[18])); real2 v19 = load(in, 17 << shift); real2 v51 = load(in, 49 << shift); real2 v89 = plus(v19, v51); real2 v83 = reverse(minus(v19, v51)); real2 v3 = load(in, 1 << shift); real2 v35 = load(in, 33 << shift); real2 v88 = plus(v3, v35); real2 v84 = minus(v35, v3); real2 v370 = minus(v89, v88); real2 v374 = plus(v88, v89); real2 v371 = minusplus(v369, v370); real2 v373 = minusplus(uminus(v369), v370); real2 v383 = ctimesminusplus(reverse(v373), ctbl[11], ctimes(v373, ctbl[9])); real2 v714 = minus(v447, v383); real2 v718 = plus(v383, v447); real2 v502 = plus(v374, v375); real2 v498 = minus(v375, v374); real2 v379 = ctimesminusplus(reverse(v371), ctbl[7], ctimes(v371, ctbl[13])); real2 v657 = minus(v443, v379); real2 v661 = plus(v379, v443); real2 v715 = minusplus(v713, v714); real2 v717 = minusplus(uminus(v713), v714); real2 v566 = plus(v502, v503); real2 v562 = minus(v503, v502); real2 v499 = minusplus(v497, v498); real2 v501 = minusplus(uminus(v497), v498); real2 v511 = ctimesminusplus(reverse(v501), ctbl[5], ctimes(v501, ctbl[3])); real2 v621 = reverse(minus(v511, v543)); real2 v627 = plus(v511, v543); real2 v583 = plus(v566, v567); real2 v577 = reverse(minus(v566, v567)); real2 v727 = ctimesminusplus(reverse(v717), ctbl[1], ctimes(v717, ctbl[0])); real2 v723 = ctimesminusplus(reverse(v715), ctbl[1], ctimes(v715, ctbl[1])); real2 v507 = ctimesminusplus(reverse(v499), ctbl[3], ctimes(v499, ctbl[5])); real2 v735 = plus(v718, v719); real2 v729 = reverse(minus(v718, v719)); real2 v565 = minusplus(uminus(v561), v562); real2 v563 = minusplus(v561, v562); real2 v571 = ctimesminusplus(reverse(v563), ctbl[1], ctimes(v563, ctbl[1])); real2 v602 = reverse(minus(v507, v539)); real2 v608 = plus(v507, v539); real2 v660 = minusplus(uminus(v656), v657); real2 v658 = minusplus(v656, v657); real2 v670 = ctimesminusplus(reverse(v660), ctbl[1], ctimes(v660, ctbl[0])); real2 v666 = ctimesminusplus(reverse(v658), ctbl[1], ctimes(v658, ctbl[1])); real2 v678 = plus(v661, v662); real2 v672 = reverse(minus(v661, v662)); real2 v575 = ctimesminusplus(reverse(v565), ctbl[1], ctimes(v565, ctbl[0])); real2 v28 = load(in, 26 << shift); real2 v60 = load(in, 58 << shift); real2 v252 = plus(v28, v60); real2 v246 = reverse(minus(v28, v60)); real2 v44 = load(in, 42 << shift); real2 v12 = load(in, 10 << shift); real2 v251 = plus(v12, v44); real2 v247 = minus(v44, v12); real2 v391 = plus(v251, v252); real2 v385 = reverse(minus(v251, v252)); real2 v20 = load(in, 18 << shift); real2 v52 = load(in, 50 << shift); real2 v109 = plus(v20, v52); real2 v103 = reverse(minus(v20, v52)); real2 v36 = load(in, 34 << shift); real2 v4 = load(in, 2 << shift); real2 v108 = plus(v4, v36); real2 v104 = minus(v36, v4); real2 v386 = minus(v109, v108); real2 v390 = plus(v108, v109); real2 v514 = minus(v391, v390); real2 v518 = plus(v390, v391); real2 v389 = minusplus(uminus(v385), v386); real2 v387 = minusplus(v385, v386); real2 v399 = ctimesminusplus(reverse(v389), ctbl[5], ctimes(v389, ctbl[3])); real2 v8 = load(in, 6 << shift); real2 v40 = load(in, 38 << shift); real2 v178 = minus(v40, v8); real2 v182 = plus(v8, v40); real2 v24 = load(in, 22 << shift); real2 v56 = load(in, 54 << shift); real2 v183 = plus(v24, v56); real2 v177 = reverse(minus(v24, v56)); real2 v450 = minus(v183, v182); real2 v454 = plus(v182, v183); real2 v16 = load(in, 14 << shift); real2 v48 = load(in, 46 << shift); real2 v322 = plus(v16, v48); real2 v318 = minus(v48, v16); real2 v32 = load(in, 30 << shift); real2 v64 = load(in, 62 << shift); real2 v323 = plus(v32, v64); real2 v317 = reverse(minus(v32, v64)); real2 v449 = reverse(minus(v322, v323)); real2 v455 = plus(v322, v323); real2 v519 = plus(v454, v455); real2 v513 = reverse(minus(v454, v455)); real2 v545 = reverse(minus(v518, v519)); real2 v551 = plus(v518, v519); real2 v515 = minusplus(v513, v514); real2 v517 = minusplus(uminus(v513), v514); real2 v527 = ctimesminusplus(reverse(v517), ctbl[1], ctimes(v517, ctbl[0])); real2 v523 = ctimesminusplus(reverse(v515), ctbl[1], ctimes(v515, ctbl[1])); real2 v14 = load(in, 12 << shift); real2 v46 = load(in, 44 << shift); real2 v286 = plus(v14, v46); real2 v282 = minus(v46, v14); real2 v62 = load(in, 60 << shift); real2 v30 = load(in, 28 << shift); real2 v281 = reverse(minus(v30, v62)); real2 v287 = plus(v30, v62); real2 v423 = plus(v286, v287); real2 v417 = reverse(minus(v286, v287)); real2 v22 = load(in, 20 << shift); real2 v54 = load(in, 52 << shift); real2 v147 = plus(v22, v54); real2 v141 = reverse(minus(v22, v54)); real2 v38 = load(in, 36 << shift); real2 v6 = load(in, 4 << shift); real2 v146 = plus(v6, v38); real2 v142 = minus(v38, v6); real2 v422 = plus(v146, v147); real2 v418 = minus(v147, v146); real2 v487 = plus(v422, v423); real2 v481 = reverse(minus(v422, v423)); real2 v42 = load(in, 40 << shift); real2 v10 = load(in, 8 << shift); real2 v212 = minus(v42, v10); real2 v216 = plus(v10, v42); real2 v58 = load(in, 56 << shift); real2 v26 = load(in, 24 << shift); real2 v217 = plus(v26, v58); real2 v211 = reverse(minus(v26, v58)); real2 v353 = reverse(minus(v216, v217)); real2 v359 = plus(v216, v217); real2 v18 = load(in, 16 << shift); real2 v50 = load(in, 48 << shift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v18, v50)); real2 v2 = load(in, 0 << shift); real2 v34 = load(in, 32 << shift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v358 = plus(v72, v73); real2 v354 = minus(v73, v72); real2 v486 = plus(v358, v359); real2 v482 = minus(v359, v358); real2 v491 = minus(uplusminus(v481), v482); real2 v495 = minus(uminusplus(v481), v482); real2 v603 = minus(v523, v491); real2 v607 = plus(v491, v523); store(out, 4 << shift, plus(v607, v608)); store(out, 36 << shift, minus(v607, v608)); store(out, 52 << shift, minus(uminusplus(v602), v603)); store(out, 20 << shift, minus(uplusminus(v602), v603)); real2 v622 = minus(v527, v495); real2 v626 = plus(v495, v527); store(out, 60 << shift, minus(uminusplus(v621), v622)); store(out, 28 << shift, minus(uplusminus(v621), v622)); store(out, 12 << shift, plus(v626, v627)); store(out, 44 << shift, minus(v626, v627)); real2 v550 = plus(v486, v487); real2 v546 = minus(v487, v486); real2 v559 = minus(uminusplus(v545), v546); real2 v555 = minus(uplusminus(v545), v546); store(out, 8 << shift, plus(v555, v571)); store(out, 40 << shift, minus(v555, v571)); store(out, 24 << shift, plus(v559, v575)); store(out, 56 << shift, minus(v559, v575)); real2 v578 = minus(v551, v550); store(out, 48 << shift, minus(uminusplus(v577), v578)); store(out, 16 << shift, minus(uplusminus(v577), v578)); real2 v582 = plus(v550, v551); store(out, 0 << shift, plus(v582, v583)); store(out, 32 << shift, minus(v582, v583)); real2 v453 = minusplus(uminus(v449), v450); real2 v451 = minusplus(v449, v450); real2 v419 = minusplus(v417, v418); real2 v421 = minusplus(uminus(v417), v418); real2 v431 = ctimesminusplus(reverse(v421), ctbl[1], ctimes(v421, ctbl[0])); real2 v463 = ctimesminusplus(reverse(v453), ctbl[4], ctimes(v453, ctbl[2])); real2 v703 = plus(v399, v463); real2 v697 = reverse(minus(v399, v463)); real2 v367 = minus(uminusplus(v353), v354); real2 v363 = minus(uplusminus(v353), v354); real2 v702 = plus(v367, v431); real2 v698 = minus(v431, v367); real2 v730 = minus(v703, v702); store(out, 54 << shift, minus(uminusplus(v729), v730)); store(out, 22 << shift, minus(uplusminus(v729), v730)); real2 v734 = plus(v702, v703); store(out, 6 << shift, plus(v734, v735)); store(out, 38 << shift, minus(v734, v735)); real2 v707 = minus(uplusminus(v697), v698); real2 v711 = minus(uminusplus(v697), v698); store(out, 30 << shift, plus(v711, v727)); store(out, 62 << shift, minus(v711, v727)); store(out, 14 << shift, plus(v707, v723)); store(out, 46 << shift, minus(v707, v723)); real2 v395 = ctimesminusplus(reverse(v387), ctbl[3], ctimes(v387, ctbl[5])); real2 v459 = ctimesminusplus(reverse(v451), ctbl[5], ctimes(v451, ctbl[3])); real2 v640 = reverse(minus(v395, v459)); real2 v646 = plus(v395, v459); real2 v427 = ctimesminusplus(reverse(v419), ctbl[1], ctimes(v419, ctbl[1])); real2 v641 = minus(v427, v363); real2 v645 = plus(v363, v427); real2 v654 = minus(uminusplus(v640), v641); real2 v650 = minus(uplusminus(v640), v641); store(out, 10 << shift, plus(v650, v666)); store(out, 42 << shift, minus(v650, v666)); store(out, 58 << shift, minus(v654, v670)); store(out, 26 << shift, plus(v654, v670)); real2 v673 = minus(v646, v645); store(out, 50 << shift, minus(uminusplus(v672), v673)); store(out, 18 << shift, minus(uplusminus(v672), v673)); real2 v677 = plus(v645, v646); store(out, 2 << shift, plus(v677, v678)); store(out, 34 << shift, minus(v677, v678)); real2 v250 = minusplus(uminus(v246), v247); real2 v248 = minusplus(v246, v247); real2 v261 = ctimesminusplus(reverse(v250), ctbl[7], ctimes(v250, ctbl[6])); real2 v145 = minusplus(uminus(v141), v142); real2 v143 = minusplus(v141, v142); real2 v283 = minusplus(v281, v282); real2 v285 = minusplus(uminus(v281), v282); real2 v297 = ctimesminusplus(reverse(v285), ctbl[4], ctimes(v285, ctbl[2])); real2 v157 = ctimesminusplus(reverse(v145), ctbl[5], ctimes(v145, ctbl[3])); real2 v919 = plus(v157, v297); real2 v913 = reverse(minus(v157, v297)); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v227 = ctimesminusplus(reverse(v215), ctbl[1], ctimes(v215, ctbl[0])); real2 v81 = minus(uminusplus(v67), v68); real2 v77 = minus(uplusminus(v67), v68); real2 v85 = minusplus(v83, v84); real2 v87 = minusplus(uminus(v83), v84); real2 v101 = ctimesminusplus(reverse(v87), ctbl[23], ctimes(v87, ctbl[21])); real2 v934 = plus(v101, v244); real2 v930 = minus(v244, v101); real2 v179 = minusplus(v177, v178); real2 v181 = minusplus(uminus(v177), v178); real2 v192 = ctimesminusplus(reverse(v181), ctbl[13], ctimes(v181, ctbl[12])); real2 v918 = plus(v81, v227); real2 v914 = minus(v227, v81); real2 v105 = minusplus(v103, v104); real2 v107 = minusplus(uminus(v103), v104); real2 v121 = ctimesminusplus(reverse(v107), ctbl[11], ctimes(v107, ctbl[9])); real2 v946 = minus(v261, v121); real2 v950 = plus(v121, v261); real2 v994 = minus(v935, v934); real2 v998 = plus(v934, v935); real2 v1009 = reverse(minus(v998, v999)); real2 v1015 = plus(v998, v999); real2 v982 = plus(v918, v919); real2 v978 = minus(v919, v918); real2 v321 = minusplus(uminus(v317), v318); real2 v319 = minusplus(v317, v318); real2 v333 = ctimesminusplus(reverse(v321), ctbl[10], ctimes(v321, ctbl[8])); real2 v951 = plus(v192, v333); real2 v945 = reverse(minus(v192, v333)); real2 v983 = plus(v950, v951); real2 v977 = reverse(minus(v950, v951)); real2 v1014 = plus(v982, v983); real2 v1010 = minus(v983, v982); store(out, 3 << shift, plus(v1014, v1015)); store(out, 35 << shift, minus(v1014, v1015)); store(out, 51 << shift, minus(uminusplus(v1009), v1010)); store(out, 19 << shift, minus(uplusminus(v1009), v1010)); real2 v997 = minusplus(uminus(v993), v994); real2 v995 = minusplus(v993, v994); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[1], ctimes(v995, ctbl[1])); real2 v987 = minus(uplusminus(v977), v978); store(out, 43 << shift, minus(v987, v1003)); store(out, 11 << shift, plus(v987, v1003)); real2 v991 = minus(uminusplus(v977), v978); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[1], ctimes(v997, ctbl[0])); store(out, 27 << shift, plus(v991, v1007)); store(out, 59 << shift, minus(v991, v1007)); real2 v947 = minusplus(v945, v946); real2 v949 = minusplus(uminus(v945), v946); real2 v931 = minusplus(v929, v930); real2 v933 = minusplus(uminus(v929), v930); real2 v939 = ctimesminusplus(reverse(v931), ctbl[3], ctimes(v931, ctbl[5])); real2 v1034 = reverse(minus(v939, v971)); real2 v1040 = plus(v939, v971); real2 v927 = minus(uminusplus(v913), v914); real2 v923 = minus(uplusminus(v913), v914); real2 v955 = ctimesminusplus(reverse(v947), ctbl[1], ctimes(v947, ctbl[1])); real2 v1035 = minus(v955, v923); real2 v1039 = plus(v923, v955); store(out, 39 << shift, minus(v1039, v1040)); store(out, 7 << shift, plus(v1039, v1040)); store(out, 23 << shift, minus(uplusminus(v1034), v1035)); store(out, 55 << shift, minus(uminusplus(v1034), v1035)); real2 v959 = ctimesminusplus(reverse(v949), ctbl[1], ctimes(v949, ctbl[0])); real2 v943 = ctimesminusplus(reverse(v933), ctbl[5], ctimes(v933, ctbl[3])); real2 v1053 = reverse(minus(v943, v975)); real2 v1059 = plus(v943, v975); real2 v1058 = plus(v927, v959); real2 v1054 = minus(v959, v927); store(out, 63 << shift, minus(uminusplus(v1053), v1054)); store(out, 31 << shift, minus(uplusminus(v1053), v1054)); store(out, 47 << shift, minus(v1058, v1059)); store(out, 15 << shift, plus(v1058, v1059)); real2 v309 = ctimesminusplus(reverse(v301), ctbl[21], ctimes(v301, ctbl[23])); real2 v171 = ctimesminusplus(reverse(v161), ctbl[19], ctimes(v161, ctbl[25])); real2 v776 = plus(v171, v309); real2 v770 = reverse(minus(v171, v309)); real2 v256 = ctimesminusplus(reverse(v248), ctbl[9], ctimes(v248, ctbl[11])); real2 v222 = ctimesminusplus(reverse(v213), ctbl[1], ctimes(v213, ctbl[1])); real2 v239 = ctimesminusplus(reverse(v231), ctbl[17], ctimes(v231, ctbl[27])); real2 v204 = ctimesminusplus(reverse(v196), ctbl[27], ctimes(v196, ctbl[17])); real2 v291 = ctimesminusplus(reverse(v283), ctbl[5], ctimes(v283, ctbl[3])); real2 v153 = ctimesminusplus(reverse(v143), ctbl[3], ctimes(v143, ctbl[5])); real2 v760 = plus(v153, v291); real2 v754 = reverse(minus(v153, v291)); real2 v187 = ctimesminusplus(reverse(v179), ctbl[11], ctimes(v179, ctbl[9])); real2 v95 = ctimesminusplus(reverse(v85), ctbl[15], ctimes(v85, ctbl[29])); real2 v771 = minus(v239, v95); real2 v775 = plus(v95, v239); real2 v839 = plus(v775, v776); real2 v835 = minus(v776, v775); real2 v115 = ctimesminusplus(reverse(v105), ctbl[7], ctimes(v105, ctbl[13])); real2 v791 = plus(v115, v256); real2 v787 = minus(v256, v115); real2 v327 = ctimesminusplus(reverse(v319), ctbl[13], ctimes(v319, ctbl[7])); real2 v792 = plus(v187, v327); real2 v786 = reverse(minus(v187, v327)); real2 v824 = plus(v791, v792); real2 v818 = reverse(minus(v791, v792)); real2 v808 = plus(v204, v345); real2 v802 = reverse(minus(v204, v345)); real2 v840 = plus(v807, v808); real2 v834 = reverse(minus(v807, v808)); real2 v850 = reverse(minus(v839, v840)); real2 v856 = plus(v839, v840); real2 v759 = plus(v77, v222); real2 v755 = minus(v222, v77); real2 v823 = plus(v759, v760); real2 v819 = minus(v760, v759); real2 v855 = plus(v823, v824); store(out, 33 << shift, minus(v855, v856)); store(out, 1 << shift, plus(v855, v856)); real2 v851 = minus(v824, v823); store(out, 49 << shift, minus(uminusplus(v850), v851)); store(out, 17 << shift, minus(uplusminus(v850), v851)); real2 v836 = minusplus(v834, v835); real2 v838 = minusplus(uminus(v834), v835); real2 v844 = ctimesminusplus(reverse(v836), ctbl[1], ctimes(v836, ctbl[1])); real2 v828 = minus(uplusminus(v818), v819); store(out, 41 << shift, minus(v828, v844)); store(out, 9 << shift, plus(v828, v844)); real2 v832 = minus(uminusplus(v818), v819); real2 v848 = ctimesminusplus(reverse(v838), ctbl[1], ctimes(v838, ctbl[0])); store(out, 25 << shift, plus(v832, v848)); store(out, 57 << shift, minus(v832, v848)); real2 v774 = minusplus(uminus(v770), v771); real2 v772 = minusplus(v770, v771); real2 v790 = minusplus(uminus(v786), v787); real2 v788 = minusplus(v786, v787); real2 v796 = ctimesminusplus(reverse(v788), ctbl[1], ctimes(v788, ctbl[1])); real2 v780 = ctimesminusplus(reverse(v772), ctbl[3], ctimes(v772, ctbl[5])); real2 v764 = minus(uplusminus(v754), v755); real2 v768 = minus(uminusplus(v754), v755); real2 v876 = minus(v796, v764); real2 v880 = plus(v764, v796); real2 v806 = minusplus(uminus(v802), v803); real2 v804 = minusplus(v802, v803); real2 v812 = ctimesminusplus(reverse(v804), ctbl[5], ctimes(v804, ctbl[3])); real2 v881 = plus(v780, v812); real2 v875 = reverse(minus(v780, v812)); store(out, 21 << shift, minus(uplusminus(v875), v876)); store(out, 53 << shift, minus(uminusplus(v875), v876)); store(out, 5 << shift, plus(v880, v881)); store(out, 37 << shift, minus(v880, v881)); real2 v800 = ctimesminusplus(reverse(v790), ctbl[1], ctimes(v790, ctbl[0])); real2 v784 = ctimesminusplus(reverse(v774), ctbl[5], ctimes(v774, ctbl[3])); real2 v816 = ctimesminusplus(reverse(v806), ctbl[4], ctimes(v806, ctbl[2])); real2 v900 = plus(v784, v816); real2 v894 = reverse(minus(v784, v816)); real2 v899 = plus(v768, v800); store(out, 45 << shift, minus(v899, v900)); store(out, 13 << shift, plus(v899, v900)); real2 v895 = minus(v800, v768); store(out, 61 << shift, minus(uminusplus(v894), v895)); store(out, 29 << shift, minus(uplusminus(v894), v895)); // Pres : 15312 } } ALIGNED(8192) void dft64b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 27598 real2 v27 = load(in, 25 << shift); real2 v59 = load(in, 57 << shift); real2 v241 = plus(v27, v59); real2 v235 = reverse(minus(v59, v27)); real2 v43 = load(in, 41 << shift); real2 v11 = load(in, 9 << shift); real2 v236 = minus(v43, v11); real2 v240 = plus(v11, v43); real2 v375 = plus(v240, v241); real2 v369 = reverse(minus(v241, v240)); real2 v237 = minusplus(v235, v236); real2 v239 = minusplus(uminus(v235), v236); real2 v249 = ctimesminusplus(reverse(v239), ctbl[24], ctimes(v239, ctbl[18])); real2 v245 = ctimesminusplus(reverse(v237), ctbl[26], ctimes(v237, ctbl[27])); real2 v3 = load(in, 1 << shift); real2 v35 = load(in, 33 << shift); real2 v84 = minus(v35, v3); real2 v88 = plus(v3, v35); real2 v51 = load(in, 49 << shift); real2 v19 = load(in, 17 << shift); real2 v83 = reverse(minus(v51, v19)); real2 v89 = plus(v19, v51); real2 v370 = minus(v89, v88); real2 v374 = plus(v88, v89); real2 v85 = minusplus(v83, v84); real2 v87 = minusplus(uminus(v83), v84); real2 v101 = ctimesminusplus(reverse(v87), ctbl[20], ctimes(v87, ctbl[21])); real2 v498 = minus(v375, v374); real2 v502 = plus(v374, v375); real2 v934 = plus(v101, v249); real2 v930 = minus(v249, v101); real2 v373 = minusplus(uminus(v369), v370); real2 v371 = minusplus(v369, v370); real2 v379 = ctimesminusplus(reverse(v371), ctbl[12], ctimes(v371, ctbl[13])); real2 v383 = ctimesminusplus(reverse(v373), ctbl[8], ctimes(v373, ctbl[9])); real2 v95 = ctimesminusplus(reverse(v85), ctbl[28], ctimes(v85, ctbl[29])); real2 v771 = minus(v245, v95); real2 v775 = plus(v95, v245); real2 v7 = load(in, 5 << shift); real2 v39 = load(in, 37 << shift); real2 v166 = plus(v7, v39); real2 v162 = minus(v39, v7); real2 v23 = load(in, 21 << shift); real2 v55 = load(in, 53 << shift); real2 v161 = reverse(minus(v55, v23)); real2 v167 = plus(v23, v55); real2 v163 = minusplus(v161, v162); real2 v165 = minusplus(uminus(v161), v162); real2 v434 = minus(v167, v166); real2 v438 = plus(v166, v167); real2 v179 = ctimesminusplus(reverse(v165), ctbl[14], ctimes(v165, ctbl[15])); real2 v173 = ctimesminusplus(reverse(v163), ctbl[24], ctimes(v163, ctbl[25])); real2 v15 = load(in, 13 << shift); real2 v47 = load(in, 45 << shift); real2 v307 = plus(v15, v47); real2 v303 = minus(v47, v15); real2 v63 = load(in, 61 << shift); real2 v31 = load(in, 29 << shift); real2 v308 = plus(v31, v63); real2 v302 = reverse(minus(v63, v31)); real2 v439 = plus(v307, v308); real2 v433 = reverse(minus(v308, v307)); real2 v437 = minusplus(uminus(v433), v434); real2 v435 = minusplus(v433, v434); real2 v443 = ctimesminusplus(reverse(v435), ctbl[10], ctimes(v435, ctbl[11])); real2 v497 = reverse(minus(v439, v438)); real2 v503 = plus(v438, v439); real2 v562 = minus(v503, v502); real2 v566 = plus(v502, v503); real2 v499 = minusplus(v497, v498); real2 v501 = minusplus(uminus(v497), v498); real2 v511 = ctimesminusplus(reverse(v501), ctbl[2], ctimes(v501, ctbl[3])); real2 v447 = ctimesminusplus(reverse(v437), ctbl[12], ctimes(v437, ctbl[6])); real2 v507 = ctimesminusplus(reverse(v499), ctbl[4], ctimes(v499, ctbl[5])); real2 v718 = plus(v383, v447); real2 v714 = minus(v447, v383); real2 v306 = minusplus(uminus(v302), v303); real2 v304 = minusplus(v302, v303); real2 v318 = ctimesminusplus(reverse(v306), ctbl[27], ctimes(v306, ctbl[26])); real2 v929 = reverse(minus(v318, v179)); real2 v935 = plus(v179, v318); real2 v931 = minusplus(v929, v930); real2 v933 = minusplus(uminus(v929), v930); real2 v998 = plus(v934, v935); real2 v994 = minus(v935, v934); real2 v661 = plus(v379, v443); real2 v657 = minus(v443, v379); real2 v939 = ctimesminusplus(reverse(v931), ctbl[4], ctimes(v931, ctbl[5])); real2 v943 = ctimesminusplus(reverse(v933), ctbl[2], ctimes(v933, ctbl[3])); real2 v45 = load(in, 43 << shift); real2 v13 = load(in, 11 << shift); real2 v274 = plus(v13, v45); real2 v270 = minus(v45, v13); real2 v29 = load(in, 27 << shift); real2 v61 = load(in, 59 << shift); real2 v269 = reverse(minus(v61, v29)); real2 v275 = plus(v29, v61); real2 v273 = minusplus(uminus(v269), v270); real2 v271 = minusplus(v269, v270); real2 v407 = plus(v274, v275); real2 v401 = reverse(minus(v275, v274)); real2 v284 = ctimesminusplus(reverse(v273), ctbl[15], ctimes(v273, ctbl[14])); real2 v5 = load(in, 3 << shift); real2 v37 = load(in, 35 << shift); real2 v128 = plus(v5, v37); real2 v124 = minus(v37, v5); real2 v53 = load(in, 51 << shift); real2 v21 = load(in, 19 << shift); real2 v129 = plus(v21, v53); real2 v123 = reverse(minus(v53, v21)); real2 v406 = plus(v128, v129); real2 v402 = minus(v129, v128); real2 v405 = minusplus(uminus(v401), v402); real2 v403 = minusplus(v401, v402); real2 v415 = ctimesminusplus(reverse(v405), ctbl[6], ctimes(v405, ctbl[12])); real2 v411 = ctimesminusplus(reverse(v403), ctbl[8], ctimes(v403, ctbl[9])); real2 v127 = minusplus(uminus(v123), v124); real2 v125 = minusplus(v123, v124); real2 v530 = minus(v407, v406); real2 v534 = plus(v406, v407); real2 v139 = ctimesminusplus(reverse(v127), ctbl[26], ctimes(v127, ctbl[27])); real2 v962 = minus(v284, v139); real2 v966 = plus(v139, v284); real2 v57 = load(in, 55 << shift); real2 v25 = load(in, 23 << shift); real2 v204 = plus(v25, v57); real2 v198 = reverse(minus(v57, v25)); real2 v9 = load(in, 7 << shift); real2 v41 = load(in, 39 << shift); real2 v199 = minus(v41, v9); real2 v203 = plus(v9, v41); real2 v202 = minusplus(uminus(v198), v199); real2 v200 = minusplus(v198, v199); real2 v470 = plus(v203, v204); real2 v466 = minus(v204, v203); real2 v215 = ctimesminusplus(reverse(v202), ctbl[18], ctimes(v202, ctbl[24])); real2 v17 = load(in, 15 << shift); real2 v49 = load(in, 47 << shift); real2 v338 = minus(v49, v17); real2 v342 = plus(v17, v49); real2 v33 = load(in, 31 << shift); real2 v65 = load(in, 63 << shift); real2 v337 = reverse(minus(v65, v33)); real2 v343 = plus(v33, v65); real2 v341 = minusplus(uminus(v337), v338); real2 v339 = minusplus(v337, v338); real2 v351 = ctimesminusplus(reverse(v341), ctbl[21], ctimes(v341, ctbl[20])); real2 v961 = reverse(minus(v351, v215)); real2 v967 = plus(v215, v351); real2 v465 = reverse(minus(v343, v342)); real2 v471 = plus(v342, v343); real2 v467 = minusplus(v465, v466); real2 v469 = minusplus(uminus(v465), v466); real2 v475 = ctimesminusplus(reverse(v467), ctbl[6], ctimes(v467, ctbl[7])); real2 v662 = plus(v411, v475); real2 v656 = reverse(minus(v475, v411)); real2 v529 = reverse(minus(v471, v470)); real2 v535 = plus(v470, v471); real2 v531 = minusplus(v529, v530); real2 v533 = minusplus(uminus(v529), v530); real2 v543 = ctimesminusplus(reverse(v533), ctbl[3], ctimes(v533, ctbl[2])); real2 v561 = reverse(minus(v535, v534)); real2 v567 = plus(v534, v535); real2 v565 = minusplus(uminus(v561), v562); real2 v563 = minusplus(v561, v562); real2 v571 = ctimesminusplus(reverse(v563), ctbl[0], ctimes(v563, ctbl[1])); real2 v583 = plus(v566, v567); real2 v577 = reverse(minus(v567, v566)); real2 v539 = ctimesminusplus(reverse(v531), ctbl[2], ctimes(v531, ctbl[3])); real2 v602 = reverse(minus(v539, v507)); real2 v608 = plus(v507, v539); real2 v993 = reverse(minus(v967, v966)); real2 v999 = plus(v966, v967); real2 v575 = ctimesminusplus(reverse(v565), ctbl[0], ctimes(v565, ctbl[0])); real2 v1009 = reverse(minus(v999, v998)); real2 v1015 = plus(v998, v999); real2 v479 = ctimesminusplus(reverse(v469), ctbl[9], ctimes(v469, ctbl[8])); real2 v713 = reverse(minus(v479, v415)); real2 v719 = plus(v415, v479); real2 v660 = minusplus(uminus(v656), v657); real2 v658 = minusplus(v656, v657); real2 v717 = minusplus(uminus(v713), v714); real2 v715 = minusplus(v713, v714); real2 v723 = ctimesminusplus(reverse(v715), ctbl[0], ctimes(v715, ctbl[1])); real2 v666 = ctimesminusplus(reverse(v658), ctbl[0], ctimes(v658, ctbl[1])); real2 v670 = ctimesminusplus(reverse(v660), ctbl[0], ctimes(v660, ctbl[0])); real2 v727 = ctimesminusplus(reverse(v717), ctbl[0], ctimes(v717, ctbl[0])); real2 v735 = plus(v718, v719); real2 v729 = reverse(minus(v719, v718)); real2 v621 = reverse(minus(v543, v511)); real2 v627 = plus(v511, v543); real2 v672 = reverse(minus(v662, v661)); real2 v678 = plus(v661, v662); real2 v28 = load(in, 26 << shift); real2 v60 = load(in, 58 << shift); real2 v251 = reverse(minus(v60, v28)); real2 v257 = plus(v28, v60); real2 v44 = load(in, 42 << shift); real2 v12 = load(in, 10 << shift); real2 v256 = plus(v12, v44); real2 v252 = minus(v44, v12); real2 v391 = plus(v256, v257); real2 v385 = reverse(minus(v257, v256)); real2 v36 = load(in, 34 << shift); real2 v4 = load(in, 2 << shift); real2 v104 = minus(v36, v4); real2 v108 = plus(v4, v36); real2 v20 = load(in, 18 << shift); real2 v52 = load(in, 50 << shift); real2 v109 = plus(v20, v52); real2 v103 = reverse(minus(v52, v20)); real2 v390 = plus(v108, v109); real2 v386 = minus(v109, v108); real2 v514 = minus(v391, v390); real2 v518 = plus(v390, v391); real2 v387 = minusplus(v385, v386); real2 v389 = minusplus(uminus(v385), v386); real2 v399 = ctimesminusplus(reverse(v389), ctbl[2], ctimes(v389, ctbl[3])); real2 v395 = ctimesminusplus(reverse(v387), ctbl[4], ctimes(v387, ctbl[5])); real2 v40 = load(in, 38 << shift); real2 v8 = load(in, 6 << shift); real2 v182 = minus(v40, v8); real2 v186 = plus(v8, v40); real2 v56 = load(in, 54 << shift); real2 v24 = load(in, 22 << shift); real2 v187 = plus(v24, v56); real2 v181 = reverse(minus(v56, v24)); real2 v454 = plus(v186, v187); real2 v450 = minus(v187, v186); real2 v48 = load(in, 46 << shift); real2 v16 = load(in, 14 << shift); real2 v325 = plus(v16, v48); real2 v321 = minus(v48, v16); real2 v32 = load(in, 30 << shift); real2 v64 = load(in, 62 << shift); real2 v326 = plus(v32, v64); real2 v320 = reverse(minus(v64, v32)); real2 v455 = plus(v325, v326); real2 v449 = reverse(minus(v326, v325)); real2 v513 = reverse(minus(v455, v454)); real2 v519 = plus(v454, v455); real2 v515 = minusplus(v513, v514); real2 v517 = minusplus(uminus(v513), v514); real2 v551 = plus(v518, v519); real2 v545 = reverse(minus(v519, v518)); real2 v523 = ctimesminusplus(reverse(v515), ctbl[0], ctimes(v515, ctbl[1])); real2 v527 = ctimesminusplus(reverse(v517), ctbl[0], ctimes(v517, ctbl[0])); real2 v58 = load(in, 56 << shift); real2 v26 = load(in, 24 << shift); real2 v217 = reverse(minus(v58, v26)); real2 v223 = plus(v26, v58); real2 v50 = load(in, 48 << shift); real2 v18 = load(in, 16 << shift); real2 v67 = reverse(minus(v50, v18)); real2 v73 = plus(v18, v50); real2 v2 = load(in, 0 << shift); real2 v34 = load(in, 32 << shift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v354 = minus(v73, v72); real2 v358 = plus(v72, v73); real2 v10 = load(in, 8 << shift); real2 v42 = load(in, 40 << shift); real2 v222 = plus(v10, v42); real2 v218 = minus(v42, v10); real2 v359 = plus(v222, v223); real2 v353 = reverse(minus(v223, v222)); real2 v482 = minus(v359, v358); real2 v486 = plus(v358, v359); real2 v46 = load(in, 44 << shift); real2 v14 = load(in, 12 << shift); real2 v287 = minus(v46, v14); real2 v291 = plus(v14, v46); real2 v54 = load(in, 52 << shift); real2 v22 = load(in, 20 << shift); real2 v147 = plus(v22, v54); real2 v141 = reverse(minus(v54, v22)); real2 v38 = load(in, 36 << shift); real2 v6 = load(in, 4 << shift); real2 v142 = minus(v38, v6); real2 v146 = plus(v6, v38); real2 v418 = minus(v147, v146); real2 v422 = plus(v146, v147); real2 v62 = load(in, 60 << shift); real2 v30 = load(in, 28 << shift); real2 v286 = reverse(minus(v62, v30)); real2 v292 = plus(v30, v62); real2 v423 = plus(v291, v292); real2 v417 = reverse(minus(v292, v291)); real2 v481 = reverse(minus(v423, v422)); real2 v487 = plus(v422, v423); real2 v550 = plus(v486, v487); real2 v546 = minus(v487, v486); real2 v578 = minus(v551, v550); store(out, 16 << shift, minus(uplusminus(v577), v578)); store(out, 48 << shift, minus(uminusplus(v577), v578)); real2 v582 = plus(v550, v551); store(out, 0 << shift, plus(v582, v583)); store(out, 32 << shift, minus(v582, v583)); real2 v559 = minus(uminusplus(v545), v546); real2 v555 = minus(uplusminus(v545), v546); store(out, 40 << shift, minus(v555, v571)); store(out, 8 << shift, plus(v555, v571)); store(out, 56 << shift, minus(v559, v575)); store(out, 24 << shift, plus(v559, v575)); real2 v495 = minus(uminusplus(v481), v482); real2 v491 = minus(uplusminus(v481), v482); real2 v626 = plus(v495, v527); store(out, 12 << shift, plus(v626, v627)); store(out, 44 << shift, minus(v626, v627)); real2 v622 = minus(v527, v495); store(out, 60 << shift, minus(uminusplus(v621), v622)); store(out, 28 << shift, minus(uplusminus(v621), v622)); real2 v607 = plus(v491, v523); real2 v603 = minus(v523, v491); store(out, 4 << shift, plus(v607, v608)); store(out, 36 << shift, minus(v607, v608)); store(out, 20 << shift, minus(uplusminus(v602), v603)); store(out, 52 << shift, minus(uminusplus(v602), v603)); real2 v367 = minus(uminusplus(v353), v354); real2 v363 = minus(uplusminus(v353), v354); real2 v421 = minusplus(uminus(v417), v418); real2 v419 = minusplus(v417, v418); real2 v431 = ctimesminusplus(reverse(v421), ctbl[0], ctimes(v421, ctbl[0])); real2 v451 = minusplus(v449, v450); real2 v453 = minusplus(uminus(v449), v450); real2 v463 = ctimesminusplus(reverse(v453), ctbl[3], ctimes(v453, ctbl[2])); real2 v697 = reverse(minus(v463, v399)); real2 v703 = plus(v399, v463); real2 v698 = minus(v431, v367); real2 v702 = plus(v367, v431); real2 v711 = minus(uminusplus(v697), v698); store(out, 30 << shift, plus(v711, v727)); store(out, 62 << shift, minus(v711, v727)); real2 v707 = minus(uplusminus(v697), v698); store(out, 46 << shift, minus(v707, v723)); store(out, 14 << shift, plus(v707, v723)); real2 v734 = plus(v702, v703); store(out, 6 << shift, plus(v734, v735)); store(out, 38 << shift, minus(v734, v735)); real2 v730 = minus(v703, v702); store(out, 54 << shift, minus(uminusplus(v729), v730)); store(out, 22 << shift, minus(uplusminus(v729), v730)); real2 v459 = ctimesminusplus(reverse(v451), ctbl[2], ctimes(v451, ctbl[3])); real2 v640 = reverse(minus(v459, v395)); real2 v646 = plus(v395, v459); real2 v427 = ctimesminusplus(reverse(v419), ctbl[0], ctimes(v419, ctbl[1])); real2 v641 = minus(v427, v363); real2 v645 = plus(v363, v427); real2 v654 = minus(uminusplus(v640), v641); real2 v650 = minus(uplusminus(v640), v641); store(out, 10 << shift, plus(v650, v666)); store(out, 42 << shift, minus(v650, v666)); store(out, 26 << shift, plus(v654, v670)); store(out, 58 << shift, minus(v654, v670)); real2 v673 = minus(v646, v645); store(out, 18 << shift, minus(uplusminus(v672), v673)); store(out, 50 << shift, minus(uminusplus(v672), v673)); real2 v677 = plus(v645, v646); store(out, 2 << shift, plus(v677, v678)); store(out, 34 << shift, minus(v677, v678)); real2 v105 = minusplus(v103, v104); real2 v107 = minusplus(uminus(v103), v104); real2 v253 = minusplus(v251, v252); real2 v255 = minusplus(uminus(v251), v252); real2 v267 = ctimesminusplus(reverse(v255), ctbl[12], ctimes(v255, ctbl[6])); real2 v121 = ctimesminusplus(reverse(v107), ctbl[8], ctimes(v107, ctbl[9])); real2 v950 = plus(v121, v267); real2 v946 = minus(v267, v121); real2 v290 = minusplus(uminus(v286), v287); real2 v288 = minusplus(v286, v287); real2 v143 = minusplus(v141, v142); real2 v145 = minusplus(uminus(v141), v142); real2 v159 = ctimesminusplus(reverse(v145), ctbl[2], ctimes(v145, ctbl[3])); real2 v300 = ctimesminusplus(reverse(v290), ctbl[3], ctimes(v290, ctbl[2])); real2 v919 = plus(v159, v300); real2 v913 = reverse(minus(v300, v159)); real2 v219 = minusplus(v217, v218); real2 v221 = minusplus(uminus(v217), v218); real2 v185 = minusplus(uminus(v181), v182); real2 v183 = minusplus(v181, v182); real2 v196 = ctimesminusplus(reverse(v185), ctbl[6], ctimes(v185, ctbl[12])); real2 v233 = ctimesminusplus(reverse(v221), ctbl[0], ctimes(v221, ctbl[0])); real2 v324 = minusplus(uminus(v320), v321); real2 v322 = minusplus(v320, v321); real2 v335 = ctimesminusplus(reverse(v324), ctbl[9], ctimes(v324, ctbl[8])); real2 v945 = reverse(minus(v335, v196)); real2 v951 = plus(v196, v335); real2 v977 = reverse(minus(v951, v950)); real2 v983 = plus(v950, v951); real2 v77 = minus(uplusminus(v67), v68); real2 v81 = minus(uminusplus(v67), v68); real2 v914 = minus(v233, v81); real2 v918 = plus(v81, v233); real2 v982 = plus(v918, v919); real2 v978 = minus(v919, v918); real2 v1014 = plus(v982, v983); store(out, 3 << shift, plus(v1014, v1015)); store(out, 35 << shift, minus(v1014, v1015)); real2 v1010 = minus(v983, v982); store(out, 19 << shift, minus(uplusminus(v1009), v1010)); store(out, 51 << shift, minus(uminusplus(v1009), v1010)); real2 v995 = minusplus(v993, v994); real2 v997 = minusplus(uminus(v993), v994); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[0], ctimes(v997, ctbl[0])); real2 v987 = minus(uplusminus(v977), v978); real2 v991 = minus(uminusplus(v977), v978); store(out, 27 << shift, plus(v991, v1007)); store(out, 59 << shift, minus(v991, v1007)); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[0], ctimes(v995, ctbl[1])); store(out, 43 << shift, minus(v987, v1003)); store(out, 11 << shift, plus(v987, v1003)); real2 v965 = minusplus(uminus(v961), v962); real2 v963 = minusplus(v961, v962); real2 v975 = ctimesminusplus(reverse(v965), ctbl[3], ctimes(v965, ctbl[2])); real2 v1059 = plus(v943, v975); real2 v1053 = reverse(minus(v975, v943)); real2 v947 = minusplus(v945, v946); real2 v949 = minusplus(uminus(v945), v946); real2 v959 = ctimesminusplus(reverse(v949), ctbl[0], ctimes(v949, ctbl[0])); real2 v927 = minus(uminusplus(v913), v914); real2 v923 = minus(uplusminus(v913), v914); real2 v1058 = plus(v927, v959); store(out, 15 << shift, plus(v1058, v1059)); store(out, 47 << shift, minus(v1058, v1059)); real2 v1054 = minus(v959, v927); store(out, 63 << shift, minus(uminusplus(v1053), v1054)); store(out, 31 << shift, minus(uplusminus(v1053), v1054)); real2 v955 = ctimesminusplus(reverse(v947), ctbl[0], ctimes(v947, ctbl[1])); real2 v971 = ctimesminusplus(reverse(v963), ctbl[2], ctimes(v963, ctbl[3])); real2 v1034 = reverse(minus(v971, v939)); real2 v1040 = plus(v939, v971); real2 v1035 = minus(v955, v923); store(out, 55 << shift, minus(uminusplus(v1034), v1035)); store(out, 23 << shift, minus(uplusminus(v1034), v1035)); real2 v1039 = plus(v923, v955); store(out, 39 << shift, minus(v1039, v1040)); store(out, 7 << shift, plus(v1039, v1040)); real2 v263 = ctimesminusplus(reverse(v253), ctbl[10], ctimes(v253, ctbl[11])); real2 v191 = ctimesminusplus(reverse(v183), ctbl[8], ctimes(v183, ctbl[9])); real2 v115 = ctimesminusplus(reverse(v105), ctbl[12], ctimes(v105, ctbl[13])); real2 v787 = minus(v263, v115); real2 v791 = plus(v115, v263); real2 v331 = ctimesminusplus(reverse(v322), ctbl[6], ctimes(v322, ctbl[7])); real2 v786 = reverse(minus(v331, v191)); real2 v792 = plus(v191, v331); real2 v280 = ctimesminusplus(reverse(v271), ctbl[18], ctimes(v271, ctbl[19])); real2 v133 = ctimesminusplus(reverse(v125), ctbl[20], ctimes(v125, ctbl[21])); real2 v807 = plus(v133, v280); real2 v803 = minus(v280, v133); real2 v210 = ctimesminusplus(reverse(v200), ctbl[16], ctimes(v200, ctbl[17])); real2 v153 = ctimesminusplus(reverse(v143), ctbl[4], ctimes(v143, ctbl[5])); real2 v347 = ctimesminusplus(reverse(v339), ctbl[14], ctimes(v339, ctbl[15])); real2 v808 = plus(v210, v347); real2 v802 = reverse(minus(v347, v210)); real2 v314 = ctimesminusplus(reverse(v304), ctbl[22], ctimes(v304, ctbl[23])); real2 v770 = reverse(minus(v314, v173)); real2 v776 = plus(v173, v314); real2 v839 = plus(v775, v776); real2 v835 = minus(v776, v775); real2 v818 = reverse(minus(v792, v791)); real2 v824 = plus(v791, v792); real2 v840 = plus(v807, v808); real2 v834 = reverse(minus(v808, v807)); real2 v856 = plus(v839, v840); real2 v850 = reverse(minus(v840, v839)); real2 v229 = ctimesminusplus(reverse(v219), ctbl[0], ctimes(v219, ctbl[1])); real2 v296 = ctimesminusplus(reverse(v288), ctbl[2], ctimes(v288, ctbl[3])); real2 v760 = plus(v153, v296); real2 v754 = reverse(minus(v296, v153)); real2 v759 = plus(v77, v229); real2 v755 = minus(v229, v77); real2 v823 = plus(v759, v760); real2 v819 = minus(v760, v759); real2 v855 = plus(v823, v824); store(out, 33 << shift, minus(v855, v856)); store(out, 1 << shift, plus(v855, v856)); real2 v851 = minus(v824, v823); store(out, 17 << shift, minus(uplusminus(v850), v851)); store(out, 49 << shift, minus(uminusplus(v850), v851)); real2 v838 = minusplus(uminus(v834), v835); real2 v836 = minusplus(v834, v835); real2 v844 = ctimesminusplus(reverse(v836), ctbl[0], ctimes(v836, ctbl[1])); real2 v832 = minus(uminusplus(v818), v819); real2 v828 = minus(uplusminus(v818), v819); store(out, 41 << shift, minus(v828, v844)); store(out, 9 << shift, plus(v828, v844)); real2 v848 = ctimesminusplus(reverse(v838), ctbl[0], ctimes(v838, ctbl[0])); store(out, 25 << shift, plus(v832, v848)); store(out, 57 << shift, minus(v832, v848)); real2 v790 = minusplus(uminus(v786), v787); real2 v788 = minusplus(v786, v787); real2 v772 = minusplus(v770, v771); real2 v774 = minusplus(uminus(v770), v771); real2 v780 = ctimesminusplus(reverse(v772), ctbl[4], ctimes(v772, ctbl[5])); real2 v806 = minusplus(uminus(v802), v803); real2 v804 = minusplus(v802, v803); real2 v812 = ctimesminusplus(reverse(v804), ctbl[2], ctimes(v804, ctbl[3])); real2 v875 = reverse(minus(v812, v780)); real2 v881 = plus(v780, v812); real2 v796 = ctimesminusplus(reverse(v788), ctbl[0], ctimes(v788, ctbl[1])); real2 v764 = minus(uplusminus(v754), v755); real2 v768 = minus(uminusplus(v754), v755); real2 v880 = plus(v764, v796); real2 v876 = minus(v796, v764); store(out, 21 << shift, minus(uplusminus(v875), v876)); store(out, 53 << shift, minus(uminusplus(v875), v876)); store(out, 37 << shift, minus(v880, v881)); store(out, 5 << shift, plus(v880, v881)); real2 v800 = ctimesminusplus(reverse(v790), ctbl[0], ctimes(v790, ctbl[0])); real2 v784 = ctimesminusplus(reverse(v774), ctbl[2], ctimes(v774, ctbl[3])); real2 v816 = ctimesminusplus(reverse(v806), ctbl[3], ctimes(v806, ctbl[2])); real2 v900 = plus(v784, v816); real2 v894 = reverse(minus(v816, v784)); real2 v895 = minus(v800, v768); real2 v899 = plus(v768, v800); store(out, 45 << shift, minus(v899, v900)); store(out, 13 << shift, plus(v899, v900)); store(out, 61 << shift, minus(uminusplus(v894), v895)); store(out, 29 << shift, minus(uplusminus(v894), v895)); // Pres : 15320 } } ALIGNED(8192) void but64f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v21, v53)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = ctimesminusplus(reverse(v129), tbl[14 + tbloffset], ctimes(v129, tbl[15 + tbloffset])); real2 v145 = ctimesminusplus(reverse(v131), tbl[16 + tbloffset], ctimes(v131, tbl[17 + tbloffset])); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v29, v61)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = ctimesminusplus(reverse(v289), tbl[46 + tbloffset], ctimes(v289, tbl[47 + tbloffset])); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v292, v293)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset])); real2 v305 = ctimesminusplus(reverse(v291), tbl[48 + tbloffset], ctimes(v291, tbl[49 + tbloffset])); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset])); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v25, v57)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = ctimesminusplus(reverse(v211), tbl[32 + tbloffset], ctimes(v211, tbl[33 + tbloffset])); real2 v219 = ctimesminusplus(reverse(v209), tbl[30 + tbloffset], ctimes(v209, tbl[31 + tbloffset])); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v33, v65)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v372, v373)); real2 v607 = reverse(minus(v532, v533)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = ctimesminusplus(reverse(v531), tbl[96 + tbloffset], ctimes(v531, tbl[97 + tbloffset])); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v612, v613)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v465, v545)); real2 v539 = ctimesminusplus(reverse(v529), tbl[94 + tbloffset], ctimes(v529, tbl[95 + tbloffset])); real2 v385 = ctimesminusplus(reverse(v371), tbl[64 + tbloffset], ctimes(v371, tbl[65 + tbloffset])); real2 v619 = ctimesminusplus(reverse(v609), tbl[110 + tbloffset], ctimes(v609, tbl[111 + tbloffset])); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v225, v385)); real2 v779 = reverse(minus(v459, v539)); real2 v785 = plus(v459, v539); real2 v625 = ctimesminusplus(reverse(v611), tbl[112 + tbloffset], ctimes(v611, tbl[113 + tbloffset])); real2 v379 = ctimesminusplus(reverse(v369), tbl[62 + tbloffset], ctimes(v369, tbl[63 + tbloffset])); real2 v975 = reverse(minus(v219, v379)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = ctimesminusplus(reverse(v977), tbl[170 + tbloffset], ctimes(v977, tbl[171 + tbloffset])); real2 v993 = ctimesminusplus(reverse(v979), tbl[172 + tbloffset], ctimes(v979, tbl[173 + tbloffset])); real2 v1015 = reverse(minus(v980, v981)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v27, v59)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v252, v253)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = ctimesminusplus(reverse(v249), tbl[38 + tbloffset], ctimes(v249, tbl[39 + tbloffset])); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v19, v51)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = ctimesminusplus(reverse(v89), tbl[6 + tbloffset], ctimes(v89, tbl[7 + tbloffset])); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v31, v63)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = ctimesminusplus(reverse(v329), tbl[54 + tbloffset], ctimes(v329, tbl[55 + tbloffset])); real2 v487 = reverse(minus(v332, v333)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v23, v55)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = ctimesminusplus(reverse(v489), tbl[86 + tbloffset], ctimes(v489, tbl[87 + tbloffset])); real2 v505 = ctimesminusplus(reverse(v491), tbl[88 + tbloffset], ctimes(v491, tbl[89 + tbloffset])); real2 v567 = reverse(minus(v492, v493)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = ctimesminusplus(reverse(v569), tbl[102 + tbloffset], ctimes(v569, tbl[103 + tbloffset])); real2 v585 = ctimesminusplus(reverse(v571), tbl[104 + tbloffset], ctimes(v571, tbl[105 + tbloffset])); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v585, v625)); real2 v707 = reverse(minus(v579, v619)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v652, v653)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = ctimesminusplus(reverse(v649), tbl[118 + tbloffset], ctimes(v649, tbl[119 + tbloffset])); real2 v665 = ctimesminusplus(reverse(v651), tbl[120 + tbloffset], ctimes(v651, tbl[121 + tbloffset])); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v784, v785)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = ctimesminusplus(reverse(v861), tbl[152 + tbloffset], ctimes(v861, tbl[153 + tbloffset])); real2 v791 = ctimesminusplus(reverse(v781), tbl[138 + tbloffset], ctimes(v781, tbl[139 + tbloffset])); real2 v797 = ctimesminusplus(reverse(v783), tbl[140 + tbloffset], ctimes(v783, tbl[141 + tbloffset])); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v862, v863)); real2 v869 = ctimesminusplus(reverse(v859), tbl[150 + tbloffset], ctimes(v859, tbl[151 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v20, v52)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v28, v60)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v272, v273)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v24, v56)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v32, v64)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v352, v353)); real2 v587 = reverse(minus(v512, v513)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v592, v593)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = ctimesminusplus(reverse(v591), tbl[108 + tbloffset], ctimes(v591, tbl[109 + tbloffset])); real2 v599 = ctimesminusplus(reverse(v589), tbl[106 + tbloffset], ctimes(v589, tbl[107 + tbloffset])); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v30, v62)); real2 v467 = reverse(minus(v312, v313)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v22, v54)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v472, v473)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v26, v58)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v232, v233)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v18, v50)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); store(out, 0 << outShift, plus(v672, v673)); real2 v686 = minus(v672, v673); store(out, 32 << outShift, ctimesminusplus(v686, tbl[0 + tbloffset], ctimes(reverse(v686), tbl[1 + tbloffset]))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); store(out, 48 << outShift, ctimesminusplus(reverse(v671), tbl[124 + tbloffset], ctimes(v671, tbl[125 + tbloffset]))); store(out, 16 << outShift, ctimesminusplus(reverse(v669), tbl[122 + tbloffset], ctimes(v669, tbl[123 + tbloffset]))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = ctimesminusplus(reverse(v629), tbl[114 + tbloffset], ctimes(v629, tbl[115 + tbloffset])); store(out, 8 << outShift, plus(v639, v659)); real2 v694 = minus(v639, v659); store(out, 40 << outShift, ctimesminusplus(v694, tbl[0 + tbloffset], ctimes(reverse(v694), tbl[1 + tbloffset]))); real2 v645 = ctimesminusplus(reverse(v631), tbl[116 + tbloffset], ctimes(v631, tbl[117 + tbloffset])); store(out, 24 << outShift, plus(v645, v665)); real2 v700 = minus(v645, v665); store(out, 56 << outShift, ctimesminusplus(v700, tbl[0 + tbloffset], ctimes(reverse(v700), tbl[1 + tbloffset]))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = ctimesminusplus(reverse(v549), tbl[98 + tbloffset], ctimes(v549, tbl[99 + tbloffset])); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); store(out, 4 << outShift, plus(v712, v713)); real2 v726 = minus(v712, v713); store(out, 36 << outShift, ctimesminusplus(v726, tbl[0 + tbloffset], ctimes(reverse(v726), tbl[1 + tbloffset]))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); store(out, 20 << outShift, ctimesminusplus(reverse(v709), tbl[126 + tbloffset], ctimes(v709, tbl[127 + tbloffset]))); store(out, 52 << outShift, ctimesminusplus(reverse(v711), tbl[128 + tbloffset], ctimes(v711, tbl[129 + tbloffset]))); real2 v565 = ctimesminusplus(reverse(v551), tbl[100 + tbloffset], ctimes(v551, tbl[101 + tbloffset])); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); store(out, 12 << outShift, plus(v738, v739)); real2 v752 = minus(v738, v739); store(out, 44 << outShift, ctimesminusplus(v752, tbl[0 + tbloffset], ctimes(reverse(v752), tbl[1 + tbloffset]))); real2 v737 = minusplus(uminus(v733), v734); store(out, 60 << outShift, ctimesminusplus(reverse(v737), tbl[132 + tbloffset], ctimes(v737, tbl[133 + tbloffset]))); real2 v735 = minusplus(v733, v734); store(out, 28 << outShift, ctimesminusplus(reverse(v735), tbl[130 + tbloffset], ctimes(v735, tbl[131 + tbloffset]))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = ctimesminusplus(reverse(v469), tbl[82 + tbloffset], ctimes(v469, tbl[83 + tbloffset])); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = ctimesminusplus(reverse(v509), tbl[90 + tbloffset], ctimes(v509, tbl[91 + tbloffset])); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v439, v519)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = ctimesminusplus(reverse(v389), tbl[66 + tbloffset], ctimes(v389, tbl[67 + tbloffset])); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); store(out, 2 << outShift, plus(v804, v805)); real2 v818 = minus(v804, v805); store(out, 34 << outShift, ctimesminusplus(v818, tbl[0 + tbloffset], ctimes(reverse(v818), tbl[1 + tbloffset]))); real2 v803 = minusplus(uminus(v799), v800); store(out, 50 << outShift, ctimesminusplus(reverse(v803), tbl[144 + tbloffset], ctimes(v803, tbl[145 + tbloffset]))); real2 v801 = minusplus(v799, v800); store(out, 18 << outShift, ctimesminusplus(reverse(v801), tbl[142 + tbloffset], ctimes(v801, tbl[143 + tbloffset]))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = ctimesminusplus(reverse(v763), tbl[136 + tbloffset], ctimes(v763, tbl[137 + tbloffset])); store(out, 26 << outShift, plus(v777, v797)); real2 v830 = minus(v777, v797); store(out, 58 << outShift, ctimesminusplus(v830, tbl[0 + tbloffset], ctimes(reverse(v830), tbl[1 + tbloffset]))); real2 v771 = ctimesminusplus(reverse(v761), tbl[134 + tbloffset], ctimes(v761, tbl[135 + tbloffset])); store(out, 10 << outShift, plus(v771, v791)); real2 v824 = minus(v771, v791); store(out, 42 << outShift, ctimesminusplus(v824, tbl[0 + tbloffset], ctimes(reverse(v824), tbl[1 + tbloffset]))); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v525 = ctimesminusplus(reverse(v511), tbl[92 + tbloffset], ctimes(v511, tbl[93 + tbloffset])); real2 v837 = reverse(minus(v445, v525)); real2 v843 = plus(v445, v525); real2 v485 = ctimesminusplus(reverse(v471), tbl[84 + tbloffset], ctimes(v471, tbl[85 + tbloffset])); real2 v405 = ctimesminusplus(reverse(v391), tbl[68 + tbloffset], ctimes(v391, tbl[69 + tbloffset])); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); store(out, 6 << outShift, plus(v882, v883)); real2 v896 = minus(v882, v883); store(out, 38 << outShift, ctimesminusplus(v896, tbl[0 + tbloffset], ctimes(reverse(v896), tbl[1 + tbloffset]))); real2 v881 = minusplus(uminus(v877), v878); store(out, 54 << outShift, ctimesminusplus(reverse(v881), tbl[156 + tbloffset], ctimes(v881, tbl[157 + tbloffset]))); real2 v879 = minusplus(v877, v878); store(out, 22 << outShift, ctimesminusplus(reverse(v879), tbl[154 + tbloffset], ctimes(v879, tbl[155 + tbloffset]))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = ctimesminusplus(reverse(v841), tbl[148 + tbloffset], ctimes(v841, tbl[149 + tbloffset])); store(out, 30 << outShift, plus(v855, v875)); real2 v908 = minus(v855, v875); store(out, 62 << outShift, ctimesminusplus(v908, tbl[0 + tbloffset], ctimes(reverse(v908), tbl[1 + tbloffset]))); real2 v849 = ctimesminusplus(reverse(v839), tbl[146 + tbloffset], ctimes(v839, tbl[147 + tbloffset])); store(out, 14 << outShift, plus(v849, v869)); real2 v902 = minus(v849, v869); store(out, 46 << outShift, ctimesminusplus(v902, tbl[0 + tbloffset], ctimes(reverse(v902), tbl[1 + tbloffset]))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = ctimesminusplus(reverse(v109), tbl[10 + tbloffset], ctimes(v109, tbl[11 + tbloffset])); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = ctimesminusplus(reverse(v269), tbl[42 + tbloffset], ctimes(v269, tbl[43 + tbloffset])); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = ctimesminusplus(reverse(v149), tbl[18 + tbloffset], ctimes(v149, tbl[19 + tbloffset])); real2 v319 = ctimesminusplus(reverse(v309), tbl[50 + tbloffset], ctimes(v309, tbl[51 + tbloffset])); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v159, v319)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = ctimesminusplus(reverse(v349), tbl[58 + tbloffset], ctimes(v349, tbl[59 + tbloffset])); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = ctimesminusplus(reverse(v189), tbl[26 + tbloffset], ctimes(v189, tbl[27 + tbloffset])); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v199, v359)); real2 v995 = reverse(minus(v960, v961)); real2 v1001 = plus(v960, v961); real2 v179 = ctimesminusplus(reverse(v169), tbl[22 + tbloffset], ctimes(v169, tbl[23 + tbloffset])); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v179, v339)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = ctimesminusplus(reverse(v69), tbl[2 + tbloffset], ctimes(v69, tbl[3 + tbloffset])); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1020, v1021)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = ctimesminusplus(reverse(v229), tbl[34 + tbloffset], ctimes(v229, tbl[35 + tbloffset])); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); store(out, 1 << outShift, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); store(out, 33 << outShift, ctimesminusplus(v1054, tbl[0 + tbloffset], ctimes(reverse(v1054), tbl[1 + tbloffset]))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); store(out, 49 << outShift, ctimesminusplus(reverse(v1039), tbl[184 + tbloffset], ctimes(v1039, tbl[185 + tbloffset]))); store(out, 17 << outShift, ctimesminusplus(reverse(v1037), tbl[182 + tbloffset], ctimes(v1037, tbl[183 + tbloffset]))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = ctimesminusplus(reverse(v1019), tbl[180 + tbloffset], ctimes(v1019, tbl[181 + tbloffset])); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = ctimesminusplus(reverse(v999), tbl[176 + tbloffset], ctimes(v999, tbl[177 + tbloffset])); store(out, 25 << outShift, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); store(out, 57 << outShift, ctimesminusplus(v1066, tbl[0 + tbloffset], ctimes(reverse(v1066), tbl[1 + tbloffset]))); real2 v1027 = ctimesminusplus(reverse(v1017), tbl[178 + tbloffset], ctimes(v1017, tbl[179 + tbloffset])); real2 v1007 = ctimesminusplus(reverse(v997), tbl[174 + tbloffset], ctimes(v997, tbl[175 + tbloffset])); store(out, 9 << outShift, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); store(out, 41 << outShift, ctimesminusplus(v1060, tbl[0 + tbloffset], ctimes(reverse(v1060), tbl[1 + tbloffset]))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = ctimesminusplus(reverse(v957), tbl[166 + tbloffset], ctimes(v957, tbl[167 + tbloffset])); real2 v947 = ctimesminusplus(reverse(v937), tbl[162 + tbloffset], ctimes(v937, tbl[163 + tbloffset])); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v947, v987)); real2 v927 = ctimesminusplus(reverse(v917), tbl[158 + tbloffset], ctimes(v917, tbl[159 + tbloffset])); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); store(out, 5 << outShift, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); store(out, 37 << outShift, ctimesminusplus(v1092, tbl[0 + tbloffset], ctimes(reverse(v1092), tbl[1 + tbloffset]))); real2 v1075 = minusplus(v1073, v1074); store(out, 21 << outShift, ctimesminusplus(reverse(v1075), tbl[186 + tbloffset], ctimes(v1075, tbl[187 + tbloffset]))); real2 v1077 = minusplus(uminus(v1073), v1074); store(out, 53 << outShift, ctimesminusplus(reverse(v1077), tbl[188 + tbloffset], ctimes(v1077, tbl[189 + tbloffset]))); real2 v953 = ctimesminusplus(reverse(v939), tbl[164 + tbloffset], ctimes(v939, tbl[165 + tbloffset])); real2 v1099 = reverse(minus(v953, v993)); real2 v1105 = plus(v953, v993); real2 v973 = ctimesminusplus(reverse(v959), tbl[168 + tbloffset], ctimes(v959, tbl[169 + tbloffset])); real2 v933 = ctimesminusplus(reverse(v919), tbl[160 + tbloffset], ctimes(v919, tbl[161 + tbloffset])); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); store(out, 13 << outShift, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); store(out, 45 << outShift, ctimesminusplus(v1118, tbl[0 + tbloffset], ctimes(reverse(v1118), tbl[1 + tbloffset]))); real2 v1101 = minusplus(v1099, v1100); store(out, 29 << outShift, ctimesminusplus(reverse(v1101), tbl[190 + tbloffset], ctimes(v1101, tbl[191 + tbloffset]))); real2 v1103 = minusplus(uminus(v1099), v1100); store(out, 61 << outShift, ctimesminusplus(reverse(v1103), tbl[192 + tbloffset], ctimes(v1103, tbl[193 + tbloffset]))); real2 v345 = ctimesminusplus(reverse(v331), tbl[56 + tbloffset], ctimes(v331, tbl[57 + tbloffset])); real2 v325 = ctimesminusplus(reverse(v311), tbl[52 + tbloffset], ctimes(v311, tbl[53 + tbloffset])); real2 v265 = ctimesminusplus(reverse(v251), tbl[40 + tbloffset], ctimes(v251, tbl[41 + tbloffset])); real2 v185 = ctimesminusplus(reverse(v171), tbl[24 + tbloffset], ctimes(v171, tbl[25 + tbloffset])); real2 v165 = ctimesminusplus(reverse(v151), tbl[20 + tbloffset], ctimes(v151, tbl[21 + tbloffset])); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v165, v325)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v185, v345)); real2 v105 = ctimesminusplus(reverse(v91), tbl[8 + tbloffset], ctimes(v91, tbl[9 + tbloffset])); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1190, v1191)); real2 v1245 = reverse(minus(v1230, v1231)); real2 v1251 = plus(v1230, v1231); real2 v365 = ctimesminusplus(reverse(v351), tbl[60 + tbloffset], ctimes(v351, tbl[61 + tbloffset])); real2 v285 = ctimesminusplus(reverse(v271), tbl[44 + tbloffset], ctimes(v271, tbl[45 + tbloffset])); real2 v205 = ctimesminusplus(reverse(v191), tbl[28 + tbloffset], ctimes(v191, tbl[29 + tbloffset])); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v205, v365)); real2 v125 = ctimesminusplus(reverse(v111), tbl[12 + tbloffset], ctimes(v111, tbl[13 + tbloffset])); real2 v85 = ctimesminusplus(reverse(v71), tbl[4 + tbloffset], ctimes(v71, tbl[5 + tbloffset])); real2 v245 = ctimesminusplus(reverse(v231), tbl[36 + tbloffset], ctimes(v231, tbl[37 + tbloffset])); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1170, v1171)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); store(out, 3 << outShift, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); store(out, 35 << outShift, ctimesminusplus(v1264, tbl[0 + tbloffset], ctimes(reverse(v1264), tbl[1 + tbloffset]))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); store(out, 19 << outShift, ctimesminusplus(reverse(v1247), tbl[218 + tbloffset], ctimes(v1247, tbl[219 + tbloffset]))); store(out, 51 << outShift, ctimesminusplus(reverse(v1249), tbl[220 + tbloffset], ctimes(v1249, tbl[221 + tbloffset]))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = ctimesminusplus(reverse(v1227), tbl[214 + tbloffset], ctimes(v1227, tbl[215 + tbloffset])); real2 v1217 = ctimesminusplus(reverse(v1207), tbl[210 + tbloffset], ctimes(v1207, tbl[211 + tbloffset])); store(out, 11 << outShift, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); store(out, 43 << outShift, ctimesminusplus(v1270, tbl[0 + tbloffset], ctimes(reverse(v1270), tbl[1 + tbloffset]))); real2 v1223 = ctimesminusplus(reverse(v1209), tbl[212 + tbloffset], ctimes(v1209, tbl[213 + tbloffset])); real2 v1243 = ctimesminusplus(reverse(v1229), tbl[216 + tbloffset], ctimes(v1229, tbl[217 + tbloffset])); store(out, 27 << outShift, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); store(out, 59 << outShift, ctimesminusplus(v1276, tbl[0 + tbloffset], ctimes(reverse(v1276), tbl[1 + tbloffset]))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = ctimesminusplus(reverse(v1129), tbl[196 + tbloffset], ctimes(v1129, tbl[197 + tbloffset])); real2 v1163 = ctimesminusplus(reverse(v1149), tbl[200 + tbloffset], ctimes(v1149, tbl[201 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1189), tbl[208 + tbloffset], ctimes(v1189, tbl[209 + tbloffset])); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1163, v1203)); real2 v1183 = ctimesminusplus(reverse(v1169), tbl[204 + tbloffset], ctimes(v1169, tbl[205 + tbloffset])); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); store(out, 15 << outShift, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); store(out, 47 << outShift, ctimesminusplus(v1328, tbl[0 + tbloffset], ctimes(reverse(v1328), tbl[1 + tbloffset]))); real2 v1311 = minusplus(v1309, v1310); store(out, 31 << outShift, ctimesminusplus(reverse(v1311), tbl[226 + tbloffset], ctimes(v1311, tbl[227 + tbloffset]))); real2 v1313 = minusplus(uminus(v1309), v1310); store(out, 63 << outShift, ctimesminusplus(reverse(v1313), tbl[228 + tbloffset], ctimes(v1313, tbl[229 + tbloffset]))); real2 v1177 = ctimesminusplus(reverse(v1167), tbl[202 + tbloffset], ctimes(v1167, tbl[203 + tbloffset])); real2 v1137 = ctimesminusplus(reverse(v1127), tbl[194 + tbloffset], ctimes(v1127, tbl[195 + tbloffset])); real2 v1197 = ctimesminusplus(reverse(v1187), tbl[206 + tbloffset], ctimes(v1187, tbl[207 + tbloffset])); real2 v1157 = ctimesminusplus(reverse(v1147), tbl[198 + tbloffset], ctimes(v1147, tbl[199 + tbloffset])); real2 v1283 = reverse(minus(v1157, v1197)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); store(out, 7 << outShift, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); store(out, 39 << outShift, ctimesminusplus(v1302, tbl[0 + tbloffset], ctimes(reverse(v1302), tbl[1 + tbloffset]))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); store(out, 55 << outShift, ctimesminusplus(reverse(v1287), tbl[224 + tbloffset], ctimes(v1287, tbl[225 + tbloffset]))); store(out, 23 << outShift, ctimesminusplus(reverse(v1285), tbl[222 + tbloffset], ctimes(v1285, tbl[223 + tbloffset]))); // Pres : 17339 } } ALIGNED(8192) void but64b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v53, v21)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = ctimesminusplus(reverse(v129), tbl[14 + tbloffset], ctimes(v129, tbl[15 + tbloffset])); real2 v145 = ctimesminusplus(reverse(v131), tbl[16 + tbloffset], ctimes(v131, tbl[17 + tbloffset])); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v61, v29)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = ctimesminusplus(reverse(v289), tbl[46 + tbloffset], ctimes(v289, tbl[47 + tbloffset])); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v293, v292)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset])); real2 v305 = ctimesminusplus(reverse(v291), tbl[48 + tbloffset], ctimes(v291, tbl[49 + tbloffset])); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset])); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v57, v25)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = ctimesminusplus(reverse(v211), tbl[32 + tbloffset], ctimes(v211, tbl[33 + tbloffset])); real2 v219 = ctimesminusplus(reverse(v209), tbl[30 + tbloffset], ctimes(v209, tbl[31 + tbloffset])); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v65, v33)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v373, v372)); real2 v607 = reverse(minus(v533, v532)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = ctimesminusplus(reverse(v531), tbl[96 + tbloffset], ctimes(v531, tbl[97 + tbloffset])); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v613, v612)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v545, v465)); real2 v539 = ctimesminusplus(reverse(v529), tbl[94 + tbloffset], ctimes(v529, tbl[95 + tbloffset])); real2 v385 = ctimesminusplus(reverse(v371), tbl[64 + tbloffset], ctimes(v371, tbl[65 + tbloffset])); real2 v619 = ctimesminusplus(reverse(v609), tbl[110 + tbloffset], ctimes(v609, tbl[111 + tbloffset])); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v385, v225)); real2 v779 = reverse(minus(v539, v459)); real2 v785 = plus(v459, v539); real2 v625 = ctimesminusplus(reverse(v611), tbl[112 + tbloffset], ctimes(v611, tbl[113 + tbloffset])); real2 v379 = ctimesminusplus(reverse(v369), tbl[62 + tbloffset], ctimes(v369, tbl[63 + tbloffset])); real2 v975 = reverse(minus(v379, v219)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = ctimesminusplus(reverse(v977), tbl[170 + tbloffset], ctimes(v977, tbl[171 + tbloffset])); real2 v993 = ctimesminusplus(reverse(v979), tbl[172 + tbloffset], ctimes(v979, tbl[173 + tbloffset])); real2 v1015 = reverse(minus(v981, v980)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v59, v27)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v253, v252)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = ctimesminusplus(reverse(v249), tbl[38 + tbloffset], ctimes(v249, tbl[39 + tbloffset])); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v51, v19)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = ctimesminusplus(reverse(v89), tbl[6 + tbloffset], ctimes(v89, tbl[7 + tbloffset])); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v63, v31)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = ctimesminusplus(reverse(v329), tbl[54 + tbloffset], ctimes(v329, tbl[55 + tbloffset])); real2 v487 = reverse(minus(v333, v332)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v55, v23)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = ctimesminusplus(reverse(v489), tbl[86 + tbloffset], ctimes(v489, tbl[87 + tbloffset])); real2 v505 = ctimesminusplus(reverse(v491), tbl[88 + tbloffset], ctimes(v491, tbl[89 + tbloffset])); real2 v567 = reverse(minus(v493, v492)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = ctimesminusplus(reverse(v569), tbl[102 + tbloffset], ctimes(v569, tbl[103 + tbloffset])); real2 v585 = ctimesminusplus(reverse(v571), tbl[104 + tbloffset], ctimes(v571, tbl[105 + tbloffset])); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v625, v585)); real2 v707 = reverse(minus(v619, v579)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v653, v652)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = ctimesminusplus(reverse(v649), tbl[118 + tbloffset], ctimes(v649, tbl[119 + tbloffset])); real2 v665 = ctimesminusplus(reverse(v651), tbl[120 + tbloffset], ctimes(v651, tbl[121 + tbloffset])); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v785, v784)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = ctimesminusplus(reverse(v861), tbl[152 + tbloffset], ctimes(v861, tbl[153 + tbloffset])); real2 v791 = ctimesminusplus(reverse(v781), tbl[138 + tbloffset], ctimes(v781, tbl[139 + tbloffset])); real2 v797 = ctimesminusplus(reverse(v783), tbl[140 + tbloffset], ctimes(v783, tbl[141 + tbloffset])); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v863, v862)); real2 v869 = ctimesminusplus(reverse(v859), tbl[150 + tbloffset], ctimes(v859, tbl[151 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v52, v20)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v60, v28)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v273, v272)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v56, v24)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v64, v32)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v353, v352)); real2 v587 = reverse(minus(v513, v512)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v593, v592)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = ctimesminusplus(reverse(v591), tbl[108 + tbloffset], ctimes(v591, tbl[109 + tbloffset])); real2 v599 = ctimesminusplus(reverse(v589), tbl[106 + tbloffset], ctimes(v589, tbl[107 + tbloffset])); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v62, v30)); real2 v467 = reverse(minus(v313, v312)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v54, v22)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v473, v472)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v58, v26)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v233, v232)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v50, v18)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); store(out, 0 << outShift, plus(v672, v673)); real2 v686 = minus(v672, v673); store(out, 32 << outShift, ctimesminusplus(v686, tbl[0 + tbloffset], ctimes(reverse(v686), tbl[1 + tbloffset]))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); store(out, 48 << outShift, ctimesminusplus(reverse(v671), tbl[124 + tbloffset], ctimes(v671, tbl[125 + tbloffset]))); store(out, 16 << outShift, ctimesminusplus(reverse(v669), tbl[122 + tbloffset], ctimes(v669, tbl[123 + tbloffset]))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = ctimesminusplus(reverse(v629), tbl[114 + tbloffset], ctimes(v629, tbl[115 + tbloffset])); store(out, 8 << outShift, plus(v639, v659)); real2 v694 = minus(v639, v659); store(out, 40 << outShift, ctimesminusplus(v694, tbl[0 + tbloffset], ctimes(reverse(v694), tbl[1 + tbloffset]))); real2 v645 = ctimesminusplus(reverse(v631), tbl[116 + tbloffset], ctimes(v631, tbl[117 + tbloffset])); store(out, 24 << outShift, plus(v645, v665)); real2 v700 = minus(v645, v665); store(out, 56 << outShift, ctimesminusplus(v700, tbl[0 + tbloffset], ctimes(reverse(v700), tbl[1 + tbloffset]))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = ctimesminusplus(reverse(v549), tbl[98 + tbloffset], ctimes(v549, tbl[99 + tbloffset])); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); store(out, 4 << outShift, plus(v712, v713)); real2 v726 = minus(v712, v713); store(out, 36 << outShift, ctimesminusplus(v726, tbl[0 + tbloffset], ctimes(reverse(v726), tbl[1 + tbloffset]))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); store(out, 20 << outShift, ctimesminusplus(reverse(v709), tbl[126 + tbloffset], ctimes(v709, tbl[127 + tbloffset]))); store(out, 52 << outShift, ctimesminusplus(reverse(v711), tbl[128 + tbloffset], ctimes(v711, tbl[129 + tbloffset]))); real2 v565 = ctimesminusplus(reverse(v551), tbl[100 + tbloffset], ctimes(v551, tbl[101 + tbloffset])); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); store(out, 12 << outShift, plus(v738, v739)); real2 v752 = minus(v738, v739); store(out, 44 << outShift, ctimesminusplus(v752, tbl[0 + tbloffset], ctimes(reverse(v752), tbl[1 + tbloffset]))); real2 v737 = minusplus(uminus(v733), v734); store(out, 60 << outShift, ctimesminusplus(reverse(v737), tbl[132 + tbloffset], ctimes(v737, tbl[133 + tbloffset]))); real2 v735 = minusplus(v733, v734); store(out, 28 << outShift, ctimesminusplus(reverse(v735), tbl[130 + tbloffset], ctimes(v735, tbl[131 + tbloffset]))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = ctimesminusplus(reverse(v469), tbl[82 + tbloffset], ctimes(v469, tbl[83 + tbloffset])); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = ctimesminusplus(reverse(v509), tbl[90 + tbloffset], ctimes(v509, tbl[91 + tbloffset])); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v519, v439)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = ctimesminusplus(reverse(v389), tbl[66 + tbloffset], ctimes(v389, tbl[67 + tbloffset])); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); store(out, 2 << outShift, plus(v804, v805)); real2 v818 = minus(v804, v805); store(out, 34 << outShift, ctimesminusplus(v818, tbl[0 + tbloffset], ctimes(reverse(v818), tbl[1 + tbloffset]))); real2 v803 = minusplus(uminus(v799), v800); store(out, 50 << outShift, ctimesminusplus(reverse(v803), tbl[144 + tbloffset], ctimes(v803, tbl[145 + tbloffset]))); real2 v801 = minusplus(v799, v800); store(out, 18 << outShift, ctimesminusplus(reverse(v801), tbl[142 + tbloffset], ctimes(v801, tbl[143 + tbloffset]))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = ctimesminusplus(reverse(v763), tbl[136 + tbloffset], ctimes(v763, tbl[137 + tbloffset])); store(out, 26 << outShift, plus(v777, v797)); real2 v830 = minus(v777, v797); store(out, 58 << outShift, ctimesminusplus(v830, tbl[0 + tbloffset], ctimes(reverse(v830), tbl[1 + tbloffset]))); real2 v771 = ctimesminusplus(reverse(v761), tbl[134 + tbloffset], ctimes(v761, tbl[135 + tbloffset])); store(out, 10 << outShift, plus(v771, v791)); real2 v824 = minus(v771, v791); store(out, 42 << outShift, ctimesminusplus(v824, tbl[0 + tbloffset], ctimes(reverse(v824), tbl[1 + tbloffset]))); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v525 = ctimesminusplus(reverse(v511), tbl[92 + tbloffset], ctimes(v511, tbl[93 + tbloffset])); real2 v837 = reverse(minus(v525, v445)); real2 v843 = plus(v445, v525); real2 v485 = ctimesminusplus(reverse(v471), tbl[84 + tbloffset], ctimes(v471, tbl[85 + tbloffset])); real2 v405 = ctimesminusplus(reverse(v391), tbl[68 + tbloffset], ctimes(v391, tbl[69 + tbloffset])); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); store(out, 6 << outShift, plus(v882, v883)); real2 v896 = minus(v882, v883); store(out, 38 << outShift, ctimesminusplus(v896, tbl[0 + tbloffset], ctimes(reverse(v896), tbl[1 + tbloffset]))); real2 v881 = minusplus(uminus(v877), v878); store(out, 54 << outShift, ctimesminusplus(reverse(v881), tbl[156 + tbloffset], ctimes(v881, tbl[157 + tbloffset]))); real2 v879 = minusplus(v877, v878); store(out, 22 << outShift, ctimesminusplus(reverse(v879), tbl[154 + tbloffset], ctimes(v879, tbl[155 + tbloffset]))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = ctimesminusplus(reverse(v841), tbl[148 + tbloffset], ctimes(v841, tbl[149 + tbloffset])); store(out, 30 << outShift, plus(v855, v875)); real2 v908 = minus(v855, v875); store(out, 62 << outShift, ctimesminusplus(v908, tbl[0 + tbloffset], ctimes(reverse(v908), tbl[1 + tbloffset]))); real2 v849 = ctimesminusplus(reverse(v839), tbl[146 + tbloffset], ctimes(v839, tbl[147 + tbloffset])); store(out, 14 << outShift, plus(v849, v869)); real2 v902 = minus(v849, v869); store(out, 46 << outShift, ctimesminusplus(v902, tbl[0 + tbloffset], ctimes(reverse(v902), tbl[1 + tbloffset]))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = ctimesminusplus(reverse(v109), tbl[10 + tbloffset], ctimes(v109, tbl[11 + tbloffset])); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = ctimesminusplus(reverse(v269), tbl[42 + tbloffset], ctimes(v269, tbl[43 + tbloffset])); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = ctimesminusplus(reverse(v149), tbl[18 + tbloffset], ctimes(v149, tbl[19 + tbloffset])); real2 v319 = ctimesminusplus(reverse(v309), tbl[50 + tbloffset], ctimes(v309, tbl[51 + tbloffset])); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v319, v159)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = ctimesminusplus(reverse(v349), tbl[58 + tbloffset], ctimes(v349, tbl[59 + tbloffset])); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = ctimesminusplus(reverse(v189), tbl[26 + tbloffset], ctimes(v189, tbl[27 + tbloffset])); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v359, v199)); real2 v995 = reverse(minus(v961, v960)); real2 v1001 = plus(v960, v961); real2 v179 = ctimesminusplus(reverse(v169), tbl[22 + tbloffset], ctimes(v169, tbl[23 + tbloffset])); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v339, v179)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = ctimesminusplus(reverse(v69), tbl[2 + tbloffset], ctimes(v69, tbl[3 + tbloffset])); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1021, v1020)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = ctimesminusplus(reverse(v229), tbl[34 + tbloffset], ctimes(v229, tbl[35 + tbloffset])); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); store(out, 1 << outShift, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); store(out, 33 << outShift, ctimesminusplus(v1054, tbl[0 + tbloffset], ctimes(reverse(v1054), tbl[1 + tbloffset]))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); store(out, 49 << outShift, ctimesminusplus(reverse(v1039), tbl[184 + tbloffset], ctimes(v1039, tbl[185 + tbloffset]))); store(out, 17 << outShift, ctimesminusplus(reverse(v1037), tbl[182 + tbloffset], ctimes(v1037, tbl[183 + tbloffset]))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = ctimesminusplus(reverse(v1019), tbl[180 + tbloffset], ctimes(v1019, tbl[181 + tbloffset])); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = ctimesminusplus(reverse(v999), tbl[176 + tbloffset], ctimes(v999, tbl[177 + tbloffset])); store(out, 25 << outShift, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); store(out, 57 << outShift, ctimesminusplus(v1066, tbl[0 + tbloffset], ctimes(reverse(v1066), tbl[1 + tbloffset]))); real2 v1027 = ctimesminusplus(reverse(v1017), tbl[178 + tbloffset], ctimes(v1017, tbl[179 + tbloffset])); real2 v1007 = ctimesminusplus(reverse(v997), tbl[174 + tbloffset], ctimes(v997, tbl[175 + tbloffset])); store(out, 9 << outShift, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); store(out, 41 << outShift, ctimesminusplus(v1060, tbl[0 + tbloffset], ctimes(reverse(v1060), tbl[1 + tbloffset]))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = ctimesminusplus(reverse(v957), tbl[166 + tbloffset], ctimes(v957, tbl[167 + tbloffset])); real2 v947 = ctimesminusplus(reverse(v937), tbl[162 + tbloffset], ctimes(v937, tbl[163 + tbloffset])); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v987, v947)); real2 v927 = ctimesminusplus(reverse(v917), tbl[158 + tbloffset], ctimes(v917, tbl[159 + tbloffset])); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); store(out, 5 << outShift, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); store(out, 37 << outShift, ctimesminusplus(v1092, tbl[0 + tbloffset], ctimes(reverse(v1092), tbl[1 + tbloffset]))); real2 v1075 = minusplus(v1073, v1074); store(out, 21 << outShift, ctimesminusplus(reverse(v1075), tbl[186 + tbloffset], ctimes(v1075, tbl[187 + tbloffset]))); real2 v1077 = minusplus(uminus(v1073), v1074); store(out, 53 << outShift, ctimesminusplus(reverse(v1077), tbl[188 + tbloffset], ctimes(v1077, tbl[189 + tbloffset]))); real2 v953 = ctimesminusplus(reverse(v939), tbl[164 + tbloffset], ctimes(v939, tbl[165 + tbloffset])); real2 v1099 = reverse(minus(v993, v953)); real2 v1105 = plus(v953, v993); real2 v973 = ctimesminusplus(reverse(v959), tbl[168 + tbloffset], ctimes(v959, tbl[169 + tbloffset])); real2 v933 = ctimesminusplus(reverse(v919), tbl[160 + tbloffset], ctimes(v919, tbl[161 + tbloffset])); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); store(out, 13 << outShift, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); store(out, 45 << outShift, ctimesminusplus(v1118, tbl[0 + tbloffset], ctimes(reverse(v1118), tbl[1 + tbloffset]))); real2 v1101 = minusplus(v1099, v1100); store(out, 29 << outShift, ctimesminusplus(reverse(v1101), tbl[190 + tbloffset], ctimes(v1101, tbl[191 + tbloffset]))); real2 v1103 = minusplus(uminus(v1099), v1100); store(out, 61 << outShift, ctimesminusplus(reverse(v1103), tbl[192 + tbloffset], ctimes(v1103, tbl[193 + tbloffset]))); real2 v345 = ctimesminusplus(reverse(v331), tbl[56 + tbloffset], ctimes(v331, tbl[57 + tbloffset])); real2 v325 = ctimesminusplus(reverse(v311), tbl[52 + tbloffset], ctimes(v311, tbl[53 + tbloffset])); real2 v265 = ctimesminusplus(reverse(v251), tbl[40 + tbloffset], ctimes(v251, tbl[41 + tbloffset])); real2 v185 = ctimesminusplus(reverse(v171), tbl[24 + tbloffset], ctimes(v171, tbl[25 + tbloffset])); real2 v165 = ctimesminusplus(reverse(v151), tbl[20 + tbloffset], ctimes(v151, tbl[21 + tbloffset])); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v325, v165)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v345, v185)); real2 v105 = ctimesminusplus(reverse(v91), tbl[8 + tbloffset], ctimes(v91, tbl[9 + tbloffset])); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1191, v1190)); real2 v1245 = reverse(minus(v1231, v1230)); real2 v1251 = plus(v1230, v1231); real2 v365 = ctimesminusplus(reverse(v351), tbl[60 + tbloffset], ctimes(v351, tbl[61 + tbloffset])); real2 v285 = ctimesminusplus(reverse(v271), tbl[44 + tbloffset], ctimes(v271, tbl[45 + tbloffset])); real2 v205 = ctimesminusplus(reverse(v191), tbl[28 + tbloffset], ctimes(v191, tbl[29 + tbloffset])); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v365, v205)); real2 v125 = ctimesminusplus(reverse(v111), tbl[12 + tbloffset], ctimes(v111, tbl[13 + tbloffset])); real2 v85 = ctimesminusplus(reverse(v71), tbl[4 + tbloffset], ctimes(v71, tbl[5 + tbloffset])); real2 v245 = ctimesminusplus(reverse(v231), tbl[36 + tbloffset], ctimes(v231, tbl[37 + tbloffset])); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1171, v1170)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); store(out, 3 << outShift, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); store(out, 35 << outShift, ctimesminusplus(v1264, tbl[0 + tbloffset], ctimes(reverse(v1264), tbl[1 + tbloffset]))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); store(out, 19 << outShift, ctimesminusplus(reverse(v1247), tbl[218 + tbloffset], ctimes(v1247, tbl[219 + tbloffset]))); store(out, 51 << outShift, ctimesminusplus(reverse(v1249), tbl[220 + tbloffset], ctimes(v1249, tbl[221 + tbloffset]))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = ctimesminusplus(reverse(v1227), tbl[214 + tbloffset], ctimes(v1227, tbl[215 + tbloffset])); real2 v1217 = ctimesminusplus(reverse(v1207), tbl[210 + tbloffset], ctimes(v1207, tbl[211 + tbloffset])); store(out, 11 << outShift, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); store(out, 43 << outShift, ctimesminusplus(v1270, tbl[0 + tbloffset], ctimes(reverse(v1270), tbl[1 + tbloffset]))); real2 v1223 = ctimesminusplus(reverse(v1209), tbl[212 + tbloffset], ctimes(v1209, tbl[213 + tbloffset])); real2 v1243 = ctimesminusplus(reverse(v1229), tbl[216 + tbloffset], ctimes(v1229, tbl[217 + tbloffset])); store(out, 27 << outShift, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); store(out, 59 << outShift, ctimesminusplus(v1276, tbl[0 + tbloffset], ctimes(reverse(v1276), tbl[1 + tbloffset]))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = ctimesminusplus(reverse(v1129), tbl[196 + tbloffset], ctimes(v1129, tbl[197 + tbloffset])); real2 v1163 = ctimesminusplus(reverse(v1149), tbl[200 + tbloffset], ctimes(v1149, tbl[201 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1189), tbl[208 + tbloffset], ctimes(v1189, tbl[209 + tbloffset])); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1203, v1163)); real2 v1183 = ctimesminusplus(reverse(v1169), tbl[204 + tbloffset], ctimes(v1169, tbl[205 + tbloffset])); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); store(out, 15 << outShift, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); store(out, 47 << outShift, ctimesminusplus(v1328, tbl[0 + tbloffset], ctimes(reverse(v1328), tbl[1 + tbloffset]))); real2 v1311 = minusplus(v1309, v1310); store(out, 31 << outShift, ctimesminusplus(reverse(v1311), tbl[226 + tbloffset], ctimes(v1311, tbl[227 + tbloffset]))); real2 v1313 = minusplus(uminus(v1309), v1310); store(out, 63 << outShift, ctimesminusplus(reverse(v1313), tbl[228 + tbloffset], ctimes(v1313, tbl[229 + tbloffset]))); real2 v1177 = ctimesminusplus(reverse(v1167), tbl[202 + tbloffset], ctimes(v1167, tbl[203 + tbloffset])); real2 v1137 = ctimesminusplus(reverse(v1127), tbl[194 + tbloffset], ctimes(v1127, tbl[195 + tbloffset])); real2 v1197 = ctimesminusplus(reverse(v1187), tbl[206 + tbloffset], ctimes(v1187, tbl[207 + tbloffset])); real2 v1157 = ctimesminusplus(reverse(v1147), tbl[198 + tbloffset], ctimes(v1147, tbl[199 + tbloffset])); real2 v1283 = reverse(minus(v1197, v1157)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); store(out, 7 << outShift, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); store(out, 39 << outShift, ctimesminusplus(v1302, tbl[0 + tbloffset], ctimes(reverse(v1302), tbl[1 + tbloffset]))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); store(out, 55 << outShift, ctimesminusplus(reverse(v1287), tbl[224 + tbloffset], ctimes(v1287, tbl[225 + tbloffset]))); store(out, 23 << outShift, ctimesminusplus(reverse(v1285), tbl[222 + tbloffset], ctimes(v1285, tbl[223 + tbloffset]))); // Pres : 17339 } } ALIGNED(8192) void tbut64f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v21, v53)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = timesminusplus(reverse(v129), load(tbl, 14 * VECWIDTH + tbloffset), times(v129, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v145 = timesminusplus(reverse(v131), load(tbl, 16 * VECWIDTH + tbloffset), times(v131, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v29, v61)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = timesminusplus(reverse(v289), load(tbl, 46 * VECWIDTH + tbloffset), times(v289, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v292, v293)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v305 = timesminusplus(reverse(v291), load(tbl, 48 * VECWIDTH + tbloffset), times(v291, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v25, v57)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = timesminusplus(reverse(v211), load(tbl, 32 * VECWIDTH + tbloffset), times(v211, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v219 = timesminusplus(reverse(v209), load(tbl, 30 * VECWIDTH + tbloffset), times(v209, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v33, v65)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v372, v373)); real2 v607 = reverse(minus(v532, v533)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = timesminusplus(reverse(v531), load(tbl, 96 * VECWIDTH + tbloffset), times(v531, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v612, v613)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v465, v545)); real2 v539 = timesminusplus(reverse(v529), load(tbl, 94 * VECWIDTH + tbloffset), times(v529, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v385 = timesminusplus(reverse(v371), load(tbl, 64 * VECWIDTH + tbloffset), times(v371, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v619 = timesminusplus(reverse(v609), load(tbl, 110 * VECWIDTH + tbloffset), times(v609, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v225, v385)); real2 v779 = reverse(minus(v459, v539)); real2 v785 = plus(v459, v539); real2 v625 = timesminusplus(reverse(v611), load(tbl, 112 * VECWIDTH + tbloffset), times(v611, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v379 = timesminusplus(reverse(v369), load(tbl, 62 * VECWIDTH + tbloffset), times(v369, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v975 = reverse(minus(v219, v379)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = timesminusplus(reverse(v977), load(tbl, 170 * VECWIDTH + tbloffset), times(v977, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v993 = timesminusplus(reverse(v979), load(tbl, 172 * VECWIDTH + tbloffset), times(v979, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v1015 = reverse(minus(v980, v981)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v27, v59)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v252, v253)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = timesminusplus(reverse(v249), load(tbl, 38 * VECWIDTH + tbloffset), times(v249, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v19, v51)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = timesminusplus(reverse(v89), load(tbl, 6 * VECWIDTH + tbloffset), times(v89, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v31, v63)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = timesminusplus(reverse(v329), load(tbl, 54 * VECWIDTH + tbloffset), times(v329, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v487 = reverse(minus(v332, v333)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v23, v55)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = timesminusplus(reverse(v489), load(tbl, 86 * VECWIDTH + tbloffset), times(v489, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v505 = timesminusplus(reverse(v491), load(tbl, 88 * VECWIDTH + tbloffset), times(v491, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v567 = reverse(minus(v492, v493)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = timesminusplus(reverse(v569), load(tbl, 102 * VECWIDTH + tbloffset), times(v569, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v585 = timesminusplus(reverse(v571), load(tbl, 104 * VECWIDTH + tbloffset), times(v571, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v585, v625)); real2 v707 = reverse(minus(v579, v619)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v652, v653)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = timesminusplus(reverse(v649), load(tbl, 118 * VECWIDTH + tbloffset), times(v649, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v665 = timesminusplus(reverse(v651), load(tbl, 120 * VECWIDTH + tbloffset), times(v651, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v784, v785)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = timesminusplus(reverse(v861), load(tbl, 152 * VECWIDTH + tbloffset), times(v861, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v791 = timesminusplus(reverse(v781), load(tbl, 138 * VECWIDTH + tbloffset), times(v781, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v797 = timesminusplus(reverse(v783), load(tbl, 140 * VECWIDTH + tbloffset), times(v783, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v862, v863)); real2 v869 = timesminusplus(reverse(v859), load(tbl, 150 * VECWIDTH + tbloffset), times(v859, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v20, v52)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v28, v60)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v272, v273)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v24, v56)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v32, v64)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v352, v353)); real2 v587 = reverse(minus(v512, v513)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v592, v593)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = timesminusplus(reverse(v591), load(tbl, 108 * VECWIDTH + tbloffset), times(v591, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v599 = timesminusplus(reverse(v589), load(tbl, 106 * VECWIDTH + tbloffset), times(v589, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v30, v62)); real2 v467 = reverse(minus(v312, v313)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v22, v54)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v472, v473)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v26, v58)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v232, v233)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v18, v50)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); scatter(out, 0, 64, plus(v672, v673)); real2 v686 = minus(v672, v673); scatter(out, 32, 64, timesminusplus(v686, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v686), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); scatter(out, 48, 64, timesminusplus(reverse(v671), load(tbl, 124 * VECWIDTH + tbloffset), times(v671, load(tbl, 125 * VECWIDTH + tbloffset)))); scatter(out, 16, 64, timesminusplus(reverse(v669), load(tbl, 122 * VECWIDTH + tbloffset), times(v669, load(tbl, 123 * VECWIDTH + tbloffset)))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = timesminusplus(reverse(v629), load(tbl, 114 * VECWIDTH + tbloffset), times(v629, load(tbl, 115 * VECWIDTH + tbloffset))); scatter(out, 8, 64, plus(v639, v659)); real2 v694 = minus(v639, v659); scatter(out, 40, 64, timesminusplus(v694, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v694), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v645 = timesminusplus(reverse(v631), load(tbl, 116 * VECWIDTH + tbloffset), times(v631, load(tbl, 117 * VECWIDTH + tbloffset))); scatter(out, 24, 64, plus(v645, v665)); real2 v700 = minus(v645, v665); scatter(out, 56, 64, timesminusplus(v700, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v700), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = timesminusplus(reverse(v549), load(tbl, 98 * VECWIDTH + tbloffset), times(v549, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); scatter(out, 4, 64, plus(v712, v713)); real2 v726 = minus(v712, v713); scatter(out, 36, 64, timesminusplus(v726, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v726), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); scatter(out, 20, 64, timesminusplus(reverse(v709), load(tbl, 126 * VECWIDTH + tbloffset), times(v709, load(tbl, 127 * VECWIDTH + tbloffset)))); scatter(out, 52, 64, timesminusplus(reverse(v711), load(tbl, 128 * VECWIDTH + tbloffset), times(v711, load(tbl, 129 * VECWIDTH + tbloffset)))); real2 v565 = timesminusplus(reverse(v551), load(tbl, 100 * VECWIDTH + tbloffset), times(v551, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); scatter(out, 12, 64, plus(v738, v739)); real2 v752 = minus(v738, v739); scatter(out, 44, 64, timesminusplus(v752, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v752), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v737 = minusplus(uminus(v733), v734); scatter(out, 60, 64, timesminusplus(reverse(v737), load(tbl, 132 * VECWIDTH + tbloffset), times(v737, load(tbl, 133 * VECWIDTH + tbloffset)))); real2 v735 = minusplus(v733, v734); scatter(out, 28, 64, timesminusplus(reverse(v735), load(tbl, 130 * VECWIDTH + tbloffset), times(v735, load(tbl, 131 * VECWIDTH + tbloffset)))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = timesminusplus(reverse(v469), load(tbl, 82 * VECWIDTH + tbloffset), times(v469, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = timesminusplus(reverse(v509), load(tbl, 90 * VECWIDTH + tbloffset), times(v509, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v439, v519)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = timesminusplus(reverse(v389), load(tbl, 66 * VECWIDTH + tbloffset), times(v389, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); scatter(out, 2, 64, plus(v804, v805)); real2 v818 = minus(v804, v805); scatter(out, 34, 64, timesminusplus(v818, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v818), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v803 = minusplus(uminus(v799), v800); scatter(out, 50, 64, timesminusplus(reverse(v803), load(tbl, 144 * VECWIDTH + tbloffset), times(v803, load(tbl, 145 * VECWIDTH + tbloffset)))); real2 v801 = minusplus(v799, v800); scatter(out, 18, 64, timesminusplus(reverse(v801), load(tbl, 142 * VECWIDTH + tbloffset), times(v801, load(tbl, 143 * VECWIDTH + tbloffset)))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = timesminusplus(reverse(v763), load(tbl, 136 * VECWIDTH + tbloffset), times(v763, load(tbl, 137 * VECWIDTH + tbloffset))); scatter(out, 26, 64, plus(v777, v797)); real2 v830 = minus(v777, v797); scatter(out, 58, 64, timesminusplus(v830, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v830), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v771 = timesminusplus(reverse(v761), load(tbl, 134 * VECWIDTH + tbloffset), times(v761, load(tbl, 135 * VECWIDTH + tbloffset))); scatter(out, 10, 64, plus(v771, v791)); real2 v824 = minus(v771, v791); scatter(out, 42, 64, timesminusplus(v824, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v824), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v525 = timesminusplus(reverse(v511), load(tbl, 92 * VECWIDTH + tbloffset), times(v511, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v837 = reverse(minus(v445, v525)); real2 v843 = plus(v445, v525); real2 v485 = timesminusplus(reverse(v471), load(tbl, 84 * VECWIDTH + tbloffset), times(v471, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v405 = timesminusplus(reverse(v391), load(tbl, 68 * VECWIDTH + tbloffset), times(v391, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); scatter(out, 6, 64, plus(v882, v883)); real2 v896 = minus(v882, v883); scatter(out, 38, 64, timesminusplus(v896, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v896), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v881 = minusplus(uminus(v877), v878); scatter(out, 54, 64, timesminusplus(reverse(v881), load(tbl, 156 * VECWIDTH + tbloffset), times(v881, load(tbl, 157 * VECWIDTH + tbloffset)))); real2 v879 = minusplus(v877, v878); scatter(out, 22, 64, timesminusplus(reverse(v879), load(tbl, 154 * VECWIDTH + tbloffset), times(v879, load(tbl, 155 * VECWIDTH + tbloffset)))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = timesminusplus(reverse(v841), load(tbl, 148 * VECWIDTH + tbloffset), times(v841, load(tbl, 149 * VECWIDTH + tbloffset))); scatter(out, 30, 64, plus(v855, v875)); real2 v908 = minus(v855, v875); scatter(out, 62, 64, timesminusplus(v908, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v908), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v849 = timesminusplus(reverse(v839), load(tbl, 146 * VECWIDTH + tbloffset), times(v839, load(tbl, 147 * VECWIDTH + tbloffset))); scatter(out, 14, 64, plus(v849, v869)); real2 v902 = minus(v849, v869); scatter(out, 46, 64, timesminusplus(v902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = timesminusplus(reverse(v109), load(tbl, 10 * VECWIDTH + tbloffset), times(v109, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = timesminusplus(reverse(v269), load(tbl, 42 * VECWIDTH + tbloffset), times(v269, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = timesminusplus(reverse(v149), load(tbl, 18 * VECWIDTH + tbloffset), times(v149, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v319 = timesminusplus(reverse(v309), load(tbl, 50 * VECWIDTH + tbloffset), times(v309, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v159, v319)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = timesminusplus(reverse(v349), load(tbl, 58 * VECWIDTH + tbloffset), times(v349, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = timesminusplus(reverse(v189), load(tbl, 26 * VECWIDTH + tbloffset), times(v189, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v199, v359)); real2 v995 = reverse(minus(v960, v961)); real2 v1001 = plus(v960, v961); real2 v179 = timesminusplus(reverse(v169), load(tbl, 22 * VECWIDTH + tbloffset), times(v169, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v179, v339)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = timesminusplus(reverse(v69), load(tbl, 2 * VECWIDTH + tbloffset), times(v69, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1020, v1021)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = timesminusplus(reverse(v229), load(tbl, 34 * VECWIDTH + tbloffset), times(v229, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); scatter(out, 1, 64, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); scatter(out, 33, 64, timesminusplus(v1054, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1054), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); scatter(out, 49, 64, timesminusplus(reverse(v1039), load(tbl, 184 * VECWIDTH + tbloffset), times(v1039, load(tbl, 185 * VECWIDTH + tbloffset)))); scatter(out, 17, 64, timesminusplus(reverse(v1037), load(tbl, 182 * VECWIDTH + tbloffset), times(v1037, load(tbl, 183 * VECWIDTH + tbloffset)))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = timesminusplus(reverse(v1019), load(tbl, 180 * VECWIDTH + tbloffset), times(v1019, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = timesminusplus(reverse(v999), load(tbl, 176 * VECWIDTH + tbloffset), times(v999, load(tbl, 177 * VECWIDTH + tbloffset))); scatter(out, 25, 64, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); scatter(out, 57, 64, timesminusplus(v1066, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1066), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1027 = timesminusplus(reverse(v1017), load(tbl, 178 * VECWIDTH + tbloffset), times(v1017, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1007 = timesminusplus(reverse(v997), load(tbl, 174 * VECWIDTH + tbloffset), times(v997, load(tbl, 175 * VECWIDTH + tbloffset))); scatter(out, 9, 64, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); scatter(out, 41, 64, timesminusplus(v1060, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1060), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = timesminusplus(reverse(v957), load(tbl, 166 * VECWIDTH + tbloffset), times(v957, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v947 = timesminusplus(reverse(v937), load(tbl, 162 * VECWIDTH + tbloffset), times(v937, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v947, v987)); real2 v927 = timesminusplus(reverse(v917), load(tbl, 158 * VECWIDTH + tbloffset), times(v917, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); scatter(out, 5, 64, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); scatter(out, 37, 64, timesminusplus(v1092, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1092), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1075 = minusplus(v1073, v1074); scatter(out, 21, 64, timesminusplus(reverse(v1075), load(tbl, 186 * VECWIDTH + tbloffset), times(v1075, load(tbl, 187 * VECWIDTH + tbloffset)))); real2 v1077 = minusplus(uminus(v1073), v1074); scatter(out, 53, 64, timesminusplus(reverse(v1077), load(tbl, 188 * VECWIDTH + tbloffset), times(v1077, load(tbl, 189 * VECWIDTH + tbloffset)))); real2 v953 = timesminusplus(reverse(v939), load(tbl, 164 * VECWIDTH + tbloffset), times(v939, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1099 = reverse(minus(v953, v993)); real2 v1105 = plus(v953, v993); real2 v973 = timesminusplus(reverse(v959), load(tbl, 168 * VECWIDTH + tbloffset), times(v959, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v933 = timesminusplus(reverse(v919), load(tbl, 160 * VECWIDTH + tbloffset), times(v919, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); scatter(out, 13, 64, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); scatter(out, 45, 64, timesminusplus(v1118, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1118), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1101 = minusplus(v1099, v1100); scatter(out, 29, 64, timesminusplus(reverse(v1101), load(tbl, 190 * VECWIDTH + tbloffset), times(v1101, load(tbl, 191 * VECWIDTH + tbloffset)))); real2 v1103 = minusplus(uminus(v1099), v1100); scatter(out, 61, 64, timesminusplus(reverse(v1103), load(tbl, 192 * VECWIDTH + tbloffset), times(v1103, load(tbl, 193 * VECWIDTH + tbloffset)))); real2 v345 = timesminusplus(reverse(v331), load(tbl, 56 * VECWIDTH + tbloffset), times(v331, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v325 = timesminusplus(reverse(v311), load(tbl, 52 * VECWIDTH + tbloffset), times(v311, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v265 = timesminusplus(reverse(v251), load(tbl, 40 * VECWIDTH + tbloffset), times(v251, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v185 = timesminusplus(reverse(v171), load(tbl, 24 * VECWIDTH + tbloffset), times(v171, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v165 = timesminusplus(reverse(v151), load(tbl, 20 * VECWIDTH + tbloffset), times(v151, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v165, v325)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v185, v345)); real2 v105 = timesminusplus(reverse(v91), load(tbl, 8 * VECWIDTH + tbloffset), times(v91, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1190, v1191)); real2 v1245 = reverse(minus(v1230, v1231)); real2 v1251 = plus(v1230, v1231); real2 v365 = timesminusplus(reverse(v351), load(tbl, 60 * VECWIDTH + tbloffset), times(v351, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v285 = timesminusplus(reverse(v271), load(tbl, 44 * VECWIDTH + tbloffset), times(v271, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v205 = timesminusplus(reverse(v191), load(tbl, 28 * VECWIDTH + tbloffset), times(v191, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v205, v365)); real2 v125 = timesminusplus(reverse(v111), load(tbl, 12 * VECWIDTH + tbloffset), times(v111, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v85 = timesminusplus(reverse(v71), load(tbl, 4 * VECWIDTH + tbloffset), times(v71, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v245 = timesminusplus(reverse(v231), load(tbl, 36 * VECWIDTH + tbloffset), times(v231, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1170, v1171)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); scatter(out, 3, 64, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); scatter(out, 35, 64, timesminusplus(v1264, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1264), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); scatter(out, 19, 64, timesminusplus(reverse(v1247), load(tbl, 218 * VECWIDTH + tbloffset), times(v1247, load(tbl, 219 * VECWIDTH + tbloffset)))); scatter(out, 51, 64, timesminusplus(reverse(v1249), load(tbl, 220 * VECWIDTH + tbloffset), times(v1249, load(tbl, 221 * VECWIDTH + tbloffset)))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = timesminusplus(reverse(v1227), load(tbl, 214 * VECWIDTH + tbloffset), times(v1227, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v1217 = timesminusplus(reverse(v1207), load(tbl, 210 * VECWIDTH + tbloffset), times(v1207, load(tbl, 211 * VECWIDTH + tbloffset))); scatter(out, 11, 64, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); scatter(out, 43, 64, timesminusplus(v1270, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1270), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1223 = timesminusplus(reverse(v1209), load(tbl, 212 * VECWIDTH + tbloffset), times(v1209, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1243 = timesminusplus(reverse(v1229), load(tbl, 216 * VECWIDTH + tbloffset), times(v1229, load(tbl, 217 * VECWIDTH + tbloffset))); scatter(out, 27, 64, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); scatter(out, 59, 64, timesminusplus(v1276, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1276), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = timesminusplus(reverse(v1129), load(tbl, 196 * VECWIDTH + tbloffset), times(v1129, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1163 = timesminusplus(reverse(v1149), load(tbl, 200 * VECWIDTH + tbloffset), times(v1149, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1189), load(tbl, 208 * VECWIDTH + tbloffset), times(v1189, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1163, v1203)); real2 v1183 = timesminusplus(reverse(v1169), load(tbl, 204 * VECWIDTH + tbloffset), times(v1169, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); scatter(out, 15, 64, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); scatter(out, 47, 64, timesminusplus(v1328, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1328), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1311 = minusplus(v1309, v1310); scatter(out, 31, 64, timesminusplus(reverse(v1311), load(tbl, 226 * VECWIDTH + tbloffset), times(v1311, load(tbl, 227 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(uminus(v1309), v1310); scatter(out, 63, 64, timesminusplus(reverse(v1313), load(tbl, 228 * VECWIDTH + tbloffset), times(v1313, load(tbl, 229 * VECWIDTH + tbloffset)))); real2 v1177 = timesminusplus(reverse(v1167), load(tbl, 202 * VECWIDTH + tbloffset), times(v1167, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1137 = timesminusplus(reverse(v1127), load(tbl, 194 * VECWIDTH + tbloffset), times(v1127, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1197 = timesminusplus(reverse(v1187), load(tbl, 206 * VECWIDTH + tbloffset), times(v1187, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1157 = timesminusplus(reverse(v1147), load(tbl, 198 * VECWIDTH + tbloffset), times(v1147, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1283 = reverse(minus(v1157, v1197)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); scatter(out, 7, 64, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); scatter(out, 39, 64, timesminusplus(v1302, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1302), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); scatter(out, 55, 64, timesminusplus(reverse(v1287), load(tbl, 224 * VECWIDTH + tbloffset), times(v1287, load(tbl, 225 * VECWIDTH + tbloffset)))); scatter(out, 23, 64, timesminusplus(reverse(v1285), load(tbl, 222 * VECWIDTH + tbloffset), times(v1285, load(tbl, 223 * VECWIDTH + tbloffset)))); // Pres : 17339 } } ALIGNED(8192) void tbut64b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v53, v21)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = timesminusplus(reverse(v129), load(tbl, 14 * VECWIDTH + tbloffset), times(v129, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v145 = timesminusplus(reverse(v131), load(tbl, 16 * VECWIDTH + tbloffset), times(v131, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v61, v29)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = timesminusplus(reverse(v289), load(tbl, 46 * VECWIDTH + tbloffset), times(v289, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v293, v292)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v305 = timesminusplus(reverse(v291), load(tbl, 48 * VECWIDTH + tbloffset), times(v291, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v57, v25)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = timesminusplus(reverse(v211), load(tbl, 32 * VECWIDTH + tbloffset), times(v211, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v219 = timesminusplus(reverse(v209), load(tbl, 30 * VECWIDTH + tbloffset), times(v209, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v65, v33)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v373, v372)); real2 v607 = reverse(minus(v533, v532)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = timesminusplus(reverse(v531), load(tbl, 96 * VECWIDTH + tbloffset), times(v531, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v613, v612)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v545, v465)); real2 v539 = timesminusplus(reverse(v529), load(tbl, 94 * VECWIDTH + tbloffset), times(v529, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v385 = timesminusplus(reverse(v371), load(tbl, 64 * VECWIDTH + tbloffset), times(v371, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v619 = timesminusplus(reverse(v609), load(tbl, 110 * VECWIDTH + tbloffset), times(v609, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v385, v225)); real2 v779 = reverse(minus(v539, v459)); real2 v785 = plus(v459, v539); real2 v625 = timesminusplus(reverse(v611), load(tbl, 112 * VECWIDTH + tbloffset), times(v611, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v379 = timesminusplus(reverse(v369), load(tbl, 62 * VECWIDTH + tbloffset), times(v369, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v975 = reverse(minus(v379, v219)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = timesminusplus(reverse(v977), load(tbl, 170 * VECWIDTH + tbloffset), times(v977, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v993 = timesminusplus(reverse(v979), load(tbl, 172 * VECWIDTH + tbloffset), times(v979, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v1015 = reverse(minus(v981, v980)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v59, v27)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v253, v252)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = timesminusplus(reverse(v249), load(tbl, 38 * VECWIDTH + tbloffset), times(v249, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v51, v19)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = timesminusplus(reverse(v89), load(tbl, 6 * VECWIDTH + tbloffset), times(v89, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v63, v31)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = timesminusplus(reverse(v329), load(tbl, 54 * VECWIDTH + tbloffset), times(v329, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v487 = reverse(minus(v333, v332)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v55, v23)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = timesminusplus(reverse(v489), load(tbl, 86 * VECWIDTH + tbloffset), times(v489, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v505 = timesminusplus(reverse(v491), load(tbl, 88 * VECWIDTH + tbloffset), times(v491, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v567 = reverse(minus(v493, v492)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = timesminusplus(reverse(v569), load(tbl, 102 * VECWIDTH + tbloffset), times(v569, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v585 = timesminusplus(reverse(v571), load(tbl, 104 * VECWIDTH + tbloffset), times(v571, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v625, v585)); real2 v707 = reverse(minus(v619, v579)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v653, v652)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = timesminusplus(reverse(v649), load(tbl, 118 * VECWIDTH + tbloffset), times(v649, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v665 = timesminusplus(reverse(v651), load(tbl, 120 * VECWIDTH + tbloffset), times(v651, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v785, v784)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = timesminusplus(reverse(v861), load(tbl, 152 * VECWIDTH + tbloffset), times(v861, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v791 = timesminusplus(reverse(v781), load(tbl, 138 * VECWIDTH + tbloffset), times(v781, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v797 = timesminusplus(reverse(v783), load(tbl, 140 * VECWIDTH + tbloffset), times(v783, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v863, v862)); real2 v869 = timesminusplus(reverse(v859), load(tbl, 150 * VECWIDTH + tbloffset), times(v859, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v52, v20)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v60, v28)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v273, v272)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v56, v24)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v64, v32)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v353, v352)); real2 v587 = reverse(minus(v513, v512)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v593, v592)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = timesminusplus(reverse(v591), load(tbl, 108 * VECWIDTH + tbloffset), times(v591, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v599 = timesminusplus(reverse(v589), load(tbl, 106 * VECWIDTH + tbloffset), times(v589, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v62, v30)); real2 v467 = reverse(minus(v313, v312)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v54, v22)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v473, v472)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v58, v26)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v233, v232)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v50, v18)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); scatter(out, 0, 64, plus(v672, v673)); real2 v686 = minus(v672, v673); scatter(out, 32, 64, timesminusplus(v686, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v686), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); scatter(out, 48, 64, timesminusplus(reverse(v671), load(tbl, 124 * VECWIDTH + tbloffset), times(v671, load(tbl, 125 * VECWIDTH + tbloffset)))); scatter(out, 16, 64, timesminusplus(reverse(v669), load(tbl, 122 * VECWIDTH + tbloffset), times(v669, load(tbl, 123 * VECWIDTH + tbloffset)))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = timesminusplus(reverse(v629), load(tbl, 114 * VECWIDTH + tbloffset), times(v629, load(tbl, 115 * VECWIDTH + tbloffset))); scatter(out, 8, 64, plus(v639, v659)); real2 v694 = minus(v639, v659); scatter(out, 40, 64, timesminusplus(v694, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v694), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v645 = timesminusplus(reverse(v631), load(tbl, 116 * VECWIDTH + tbloffset), times(v631, load(tbl, 117 * VECWIDTH + tbloffset))); scatter(out, 24, 64, plus(v645, v665)); real2 v700 = minus(v645, v665); scatter(out, 56, 64, timesminusplus(v700, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v700), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = timesminusplus(reverse(v549), load(tbl, 98 * VECWIDTH + tbloffset), times(v549, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); scatter(out, 4, 64, plus(v712, v713)); real2 v726 = minus(v712, v713); scatter(out, 36, 64, timesminusplus(v726, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v726), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); scatter(out, 20, 64, timesminusplus(reverse(v709), load(tbl, 126 * VECWIDTH + tbloffset), times(v709, load(tbl, 127 * VECWIDTH + tbloffset)))); scatter(out, 52, 64, timesminusplus(reverse(v711), load(tbl, 128 * VECWIDTH + tbloffset), times(v711, load(tbl, 129 * VECWIDTH + tbloffset)))); real2 v565 = timesminusplus(reverse(v551), load(tbl, 100 * VECWIDTH + tbloffset), times(v551, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); scatter(out, 12, 64, plus(v738, v739)); real2 v752 = minus(v738, v739); scatter(out, 44, 64, timesminusplus(v752, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v752), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v737 = minusplus(uminus(v733), v734); scatter(out, 60, 64, timesminusplus(reverse(v737), load(tbl, 132 * VECWIDTH + tbloffset), times(v737, load(tbl, 133 * VECWIDTH + tbloffset)))); real2 v735 = minusplus(v733, v734); scatter(out, 28, 64, timesminusplus(reverse(v735), load(tbl, 130 * VECWIDTH + tbloffset), times(v735, load(tbl, 131 * VECWIDTH + tbloffset)))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = timesminusplus(reverse(v469), load(tbl, 82 * VECWIDTH + tbloffset), times(v469, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = timesminusplus(reverse(v509), load(tbl, 90 * VECWIDTH + tbloffset), times(v509, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v519, v439)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = timesminusplus(reverse(v389), load(tbl, 66 * VECWIDTH + tbloffset), times(v389, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); scatter(out, 2, 64, plus(v804, v805)); real2 v818 = minus(v804, v805); scatter(out, 34, 64, timesminusplus(v818, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v818), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v803 = minusplus(uminus(v799), v800); scatter(out, 50, 64, timesminusplus(reverse(v803), load(tbl, 144 * VECWIDTH + tbloffset), times(v803, load(tbl, 145 * VECWIDTH + tbloffset)))); real2 v801 = minusplus(v799, v800); scatter(out, 18, 64, timesminusplus(reverse(v801), load(tbl, 142 * VECWIDTH + tbloffset), times(v801, load(tbl, 143 * VECWIDTH + tbloffset)))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = timesminusplus(reverse(v763), load(tbl, 136 * VECWIDTH + tbloffset), times(v763, load(tbl, 137 * VECWIDTH + tbloffset))); scatter(out, 26, 64, plus(v777, v797)); real2 v830 = minus(v777, v797); scatter(out, 58, 64, timesminusplus(v830, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v830), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v771 = timesminusplus(reverse(v761), load(tbl, 134 * VECWIDTH + tbloffset), times(v761, load(tbl, 135 * VECWIDTH + tbloffset))); scatter(out, 10, 64, plus(v771, v791)); real2 v824 = minus(v771, v791); scatter(out, 42, 64, timesminusplus(v824, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v824), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v525 = timesminusplus(reverse(v511), load(tbl, 92 * VECWIDTH + tbloffset), times(v511, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v837 = reverse(minus(v525, v445)); real2 v843 = plus(v445, v525); real2 v485 = timesminusplus(reverse(v471), load(tbl, 84 * VECWIDTH + tbloffset), times(v471, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v405 = timesminusplus(reverse(v391), load(tbl, 68 * VECWIDTH + tbloffset), times(v391, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); scatter(out, 6, 64, plus(v882, v883)); real2 v896 = minus(v882, v883); scatter(out, 38, 64, timesminusplus(v896, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v896), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v881 = minusplus(uminus(v877), v878); scatter(out, 54, 64, timesminusplus(reverse(v881), load(tbl, 156 * VECWIDTH + tbloffset), times(v881, load(tbl, 157 * VECWIDTH + tbloffset)))); real2 v879 = minusplus(v877, v878); scatter(out, 22, 64, timesminusplus(reverse(v879), load(tbl, 154 * VECWIDTH + tbloffset), times(v879, load(tbl, 155 * VECWIDTH + tbloffset)))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = timesminusplus(reverse(v841), load(tbl, 148 * VECWIDTH + tbloffset), times(v841, load(tbl, 149 * VECWIDTH + tbloffset))); scatter(out, 30, 64, plus(v855, v875)); real2 v908 = minus(v855, v875); scatter(out, 62, 64, timesminusplus(v908, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v908), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v849 = timesminusplus(reverse(v839), load(tbl, 146 * VECWIDTH + tbloffset), times(v839, load(tbl, 147 * VECWIDTH + tbloffset))); scatter(out, 14, 64, plus(v849, v869)); real2 v902 = minus(v849, v869); scatter(out, 46, 64, timesminusplus(v902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = timesminusplus(reverse(v109), load(tbl, 10 * VECWIDTH + tbloffset), times(v109, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = timesminusplus(reverse(v269), load(tbl, 42 * VECWIDTH + tbloffset), times(v269, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = timesminusplus(reverse(v149), load(tbl, 18 * VECWIDTH + tbloffset), times(v149, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v319 = timesminusplus(reverse(v309), load(tbl, 50 * VECWIDTH + tbloffset), times(v309, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v319, v159)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = timesminusplus(reverse(v349), load(tbl, 58 * VECWIDTH + tbloffset), times(v349, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = timesminusplus(reverse(v189), load(tbl, 26 * VECWIDTH + tbloffset), times(v189, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v359, v199)); real2 v995 = reverse(minus(v961, v960)); real2 v1001 = plus(v960, v961); real2 v179 = timesminusplus(reverse(v169), load(tbl, 22 * VECWIDTH + tbloffset), times(v169, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v339, v179)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = timesminusplus(reverse(v69), load(tbl, 2 * VECWIDTH + tbloffset), times(v69, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1021, v1020)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = timesminusplus(reverse(v229), load(tbl, 34 * VECWIDTH + tbloffset), times(v229, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); scatter(out, 1, 64, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); scatter(out, 33, 64, timesminusplus(v1054, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1054), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); scatter(out, 49, 64, timesminusplus(reverse(v1039), load(tbl, 184 * VECWIDTH + tbloffset), times(v1039, load(tbl, 185 * VECWIDTH + tbloffset)))); scatter(out, 17, 64, timesminusplus(reverse(v1037), load(tbl, 182 * VECWIDTH + tbloffset), times(v1037, load(tbl, 183 * VECWIDTH + tbloffset)))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = timesminusplus(reverse(v1019), load(tbl, 180 * VECWIDTH + tbloffset), times(v1019, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = timesminusplus(reverse(v999), load(tbl, 176 * VECWIDTH + tbloffset), times(v999, load(tbl, 177 * VECWIDTH + tbloffset))); scatter(out, 25, 64, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); scatter(out, 57, 64, timesminusplus(v1066, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1066), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1027 = timesminusplus(reverse(v1017), load(tbl, 178 * VECWIDTH + tbloffset), times(v1017, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1007 = timesminusplus(reverse(v997), load(tbl, 174 * VECWIDTH + tbloffset), times(v997, load(tbl, 175 * VECWIDTH + tbloffset))); scatter(out, 9, 64, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); scatter(out, 41, 64, timesminusplus(v1060, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1060), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = timesminusplus(reverse(v957), load(tbl, 166 * VECWIDTH + tbloffset), times(v957, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v947 = timesminusplus(reverse(v937), load(tbl, 162 * VECWIDTH + tbloffset), times(v937, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v987, v947)); real2 v927 = timesminusplus(reverse(v917), load(tbl, 158 * VECWIDTH + tbloffset), times(v917, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); scatter(out, 5, 64, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); scatter(out, 37, 64, timesminusplus(v1092, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1092), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1075 = minusplus(v1073, v1074); scatter(out, 21, 64, timesminusplus(reverse(v1075), load(tbl, 186 * VECWIDTH + tbloffset), times(v1075, load(tbl, 187 * VECWIDTH + tbloffset)))); real2 v1077 = minusplus(uminus(v1073), v1074); scatter(out, 53, 64, timesminusplus(reverse(v1077), load(tbl, 188 * VECWIDTH + tbloffset), times(v1077, load(tbl, 189 * VECWIDTH + tbloffset)))); real2 v953 = timesminusplus(reverse(v939), load(tbl, 164 * VECWIDTH + tbloffset), times(v939, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1099 = reverse(minus(v993, v953)); real2 v1105 = plus(v953, v993); real2 v973 = timesminusplus(reverse(v959), load(tbl, 168 * VECWIDTH + tbloffset), times(v959, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v933 = timesminusplus(reverse(v919), load(tbl, 160 * VECWIDTH + tbloffset), times(v919, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); scatter(out, 13, 64, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); scatter(out, 45, 64, timesminusplus(v1118, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1118), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1101 = minusplus(v1099, v1100); scatter(out, 29, 64, timesminusplus(reverse(v1101), load(tbl, 190 * VECWIDTH + tbloffset), times(v1101, load(tbl, 191 * VECWIDTH + tbloffset)))); real2 v1103 = minusplus(uminus(v1099), v1100); scatter(out, 61, 64, timesminusplus(reverse(v1103), load(tbl, 192 * VECWIDTH + tbloffset), times(v1103, load(tbl, 193 * VECWIDTH + tbloffset)))); real2 v345 = timesminusplus(reverse(v331), load(tbl, 56 * VECWIDTH + tbloffset), times(v331, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v325 = timesminusplus(reverse(v311), load(tbl, 52 * VECWIDTH + tbloffset), times(v311, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v265 = timesminusplus(reverse(v251), load(tbl, 40 * VECWIDTH + tbloffset), times(v251, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v185 = timesminusplus(reverse(v171), load(tbl, 24 * VECWIDTH + tbloffset), times(v171, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v165 = timesminusplus(reverse(v151), load(tbl, 20 * VECWIDTH + tbloffset), times(v151, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v325, v165)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v345, v185)); real2 v105 = timesminusplus(reverse(v91), load(tbl, 8 * VECWIDTH + tbloffset), times(v91, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1191, v1190)); real2 v1245 = reverse(minus(v1231, v1230)); real2 v1251 = plus(v1230, v1231); real2 v365 = timesminusplus(reverse(v351), load(tbl, 60 * VECWIDTH + tbloffset), times(v351, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v285 = timesminusplus(reverse(v271), load(tbl, 44 * VECWIDTH + tbloffset), times(v271, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v205 = timesminusplus(reverse(v191), load(tbl, 28 * VECWIDTH + tbloffset), times(v191, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v365, v205)); real2 v125 = timesminusplus(reverse(v111), load(tbl, 12 * VECWIDTH + tbloffset), times(v111, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v85 = timesminusplus(reverse(v71), load(tbl, 4 * VECWIDTH + tbloffset), times(v71, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v245 = timesminusplus(reverse(v231), load(tbl, 36 * VECWIDTH + tbloffset), times(v231, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1171, v1170)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); scatter(out, 3, 64, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); scatter(out, 35, 64, timesminusplus(v1264, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1264), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); scatter(out, 19, 64, timesminusplus(reverse(v1247), load(tbl, 218 * VECWIDTH + tbloffset), times(v1247, load(tbl, 219 * VECWIDTH + tbloffset)))); scatter(out, 51, 64, timesminusplus(reverse(v1249), load(tbl, 220 * VECWIDTH + tbloffset), times(v1249, load(tbl, 221 * VECWIDTH + tbloffset)))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = timesminusplus(reverse(v1227), load(tbl, 214 * VECWIDTH + tbloffset), times(v1227, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v1217 = timesminusplus(reverse(v1207), load(tbl, 210 * VECWIDTH + tbloffset), times(v1207, load(tbl, 211 * VECWIDTH + tbloffset))); scatter(out, 11, 64, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); scatter(out, 43, 64, timesminusplus(v1270, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1270), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1223 = timesminusplus(reverse(v1209), load(tbl, 212 * VECWIDTH + tbloffset), times(v1209, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1243 = timesminusplus(reverse(v1229), load(tbl, 216 * VECWIDTH + tbloffset), times(v1229, load(tbl, 217 * VECWIDTH + tbloffset))); scatter(out, 27, 64, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); scatter(out, 59, 64, timesminusplus(v1276, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1276), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = timesminusplus(reverse(v1129), load(tbl, 196 * VECWIDTH + tbloffset), times(v1129, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1163 = timesminusplus(reverse(v1149), load(tbl, 200 * VECWIDTH + tbloffset), times(v1149, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1189), load(tbl, 208 * VECWIDTH + tbloffset), times(v1189, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1203, v1163)); real2 v1183 = timesminusplus(reverse(v1169), load(tbl, 204 * VECWIDTH + tbloffset), times(v1169, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); scatter(out, 15, 64, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); scatter(out, 47, 64, timesminusplus(v1328, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1328), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1311 = minusplus(v1309, v1310); scatter(out, 31, 64, timesminusplus(reverse(v1311), load(tbl, 226 * VECWIDTH + tbloffset), times(v1311, load(tbl, 227 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(uminus(v1309), v1310); scatter(out, 63, 64, timesminusplus(reverse(v1313), load(tbl, 228 * VECWIDTH + tbloffset), times(v1313, load(tbl, 229 * VECWIDTH + tbloffset)))); real2 v1177 = timesminusplus(reverse(v1167), load(tbl, 202 * VECWIDTH + tbloffset), times(v1167, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1137 = timesminusplus(reverse(v1127), load(tbl, 194 * VECWIDTH + tbloffset), times(v1127, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1197 = timesminusplus(reverse(v1187), load(tbl, 206 * VECWIDTH + tbloffset), times(v1187, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1157 = timesminusplus(reverse(v1147), load(tbl, 198 * VECWIDTH + tbloffset), times(v1147, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1283 = reverse(minus(v1197, v1157)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); scatter(out, 7, 64, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); scatter(out, 39, 64, timesminusplus(v1302, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1302), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); scatter(out, 55, 64, timesminusplus(reverse(v1287), load(tbl, 224 * VECWIDTH + tbloffset), times(v1287, load(tbl, 225 * VECWIDTH + tbloffset)))); scatter(out, 23, 64, timesminusplus(reverse(v1285), load(tbl, 222 * VECWIDTH + tbloffset), times(v1285, load(tbl, 223 * VECWIDTH + tbloffset)))); // Pres : 17339 } } #endif // #if MAXBUTWIDTH >= 7 ALIGNED(8192) void dft128f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 129041 real2 v109 = load(in, 107 << shift); real2 v45 = load(in, 43 << shift); real2 v341 = plus(v45, v109); real2 v335 = reverse(minus(v45, v109)); real2 v77 = load(in, 75 << shift); real2 v13 = load(in, 11 << shift); real2 v340 = plus(v13, v77); real2 v336 = minus(v77, v13); real2 v337 = minusplus(v335, v336); real2 v339 = minusplus(uminus(v335), v336); real2 v350 = ctimesminusplus(reverse(v339), ctbl[61], ctimes(v339, ctbl[60])); real2 v886 = plus(v340, v341); real2 v882 = minus(v341, v340); real2 v345 = ctimesminusplus(reverse(v337), ctbl[51], ctimes(v337, ctbl[41])); real2 v125 = load(in, 123 << shift); real2 v61 = load(in, 59 << shift); real2 v621 = plus(v61, v125); real2 v615 = reverse(minus(v61, v125)); real2 v29 = load(in, 27 << shift); real2 v93 = load(in, 91 << shift); real2 v616 = minus(v93, v29); real2 v620 = plus(v29, v93); real2 v887 = plus(v620, v621); real2 v881 = reverse(minus(v620, v621)); real2 v1009 = reverse(minus(v886, v887)); real2 v1015 = plus(v886, v887); real2 v883 = minusplus(v881, v882); real2 v885 = minusplus(uminus(v881), v882); real2 v895 = ctimesminusplus(reverse(v885), ctbl[28], ctimes(v885, ctbl[14])); real2 v619 = minusplus(uminus(v615), v616); real2 v617 = minusplus(v615, v616); real2 v625 = ctimesminusplus(reverse(v617), ctbl[53], ctimes(v617, ctbl[39])); real2 v891 = ctimesminusplus(reverse(v883), ctbl[25], ctimes(v883, ctbl[19])); real2 v631 = ctimesminusplus(reverse(v619), ctbl[58], ctimes(v619, ctbl[32])); real2 v2129 = reverse(minus(v350, v631)); real2 v2135 = plus(v350, v631); real2 v1728 = reverse(minus(v345, v625)); real2 v1734 = plus(v345, v625); real2 v5 = load(in, 3 << shift); real2 v69 = load(in, 67 << shift); real2 v192 = plus(v5, v69); real2 v188 = minus(v69, v5); real2 v37 = load(in, 35 << shift); real2 v101 = load(in, 99 << shift); real2 v193 = plus(v37, v101); real2 v187 = reverse(minus(v37, v101)); real2 v758 = plus(v192, v193); real2 v754 = minus(v193, v192); real2 v189 = minusplus(v187, v188); real2 v191 = minusplus(uminus(v187), v188); real2 v203 = ctimesminusplus(reverse(v191), ctbl[35], ctimes(v191, ctbl[57])); real2 v197 = ctimesminusplus(reverse(v189), ctbl[47], ctimes(v189, ctbl[45])); real2 v53 = load(in, 51 << shift); real2 v117 = load(in, 115 << shift); real2 v474 = reverse(minus(v53, v117)); real2 v480 = plus(v53, v117); real2 v85 = load(in, 83 << shift); real2 v21 = load(in, 19 << shift); real2 v475 = minus(v85, v21); real2 v479 = plus(v21, v85); real2 v753 = reverse(minus(v479, v480)); real2 v759 = plus(v479, v480); real2 v755 = minusplus(v753, v754); real2 v757 = minusplus(uminus(v753), v754); real2 v767 = ctimesminusplus(reverse(v757), ctbl[17], ctimes(v757, ctbl[27])); real2 v763 = ctimesminusplus(reverse(v755), ctbl[23], ctimes(v755, ctbl[21])); real2 v1411 = minus(v891, v763); real2 v1415 = plus(v763, v891); real2 v1014 = plus(v758, v759); real2 v1010 = minus(v759, v758); real2 v1011 = minusplus(v1009, v1010); real2 v1013 = minusplus(uminus(v1009), v1010); real2 v1023 = ctimesminusplus(reverse(v1013), ctbl[13], ctimes(v1013, ctbl[12])); real2 v1570 = minus(v895, v767); real2 v1574 = plus(v767, v895); real2 v1142 = plus(v1014, v1015); real2 v1138 = minus(v1015, v1014); real2 v478 = minusplus(uminus(v474), v475); real2 v476 = minusplus(v474, v475); real2 v484 = ctimesminusplus(reverse(v476), ctbl[49], ctimes(v476, ctbl[43])); real2 v1733 = plus(v197, v484); real2 v1729 = minus(v484, v197); real2 v1861 = plus(v1733, v1734); real2 v1857 = minus(v1734, v1733); real2 v1730 = minusplus(v1728, v1729); real2 v1732 = minusplus(uminus(v1728), v1729); real2 v1738 = ctimesminusplus(reverse(v1730), ctbl[11], ctimes(v1730, ctbl[9])); real2 v489 = ctimesminusplus(reverse(v478), ctbl[55], ctimes(v478, ctbl[54])); real2 v1742 = ctimesminusplus(reverse(v1732), ctbl[13], ctimes(v1732, ctbl[12])); real2 v1019 = ctimesminusplus(reverse(v1011), ctbl[11], ctimes(v1011, ctbl[9])); real2 v2134 = plus(v203, v489); real2 v2130 = minus(v489, v203); real2 v2262 = plus(v2134, v2135); real2 v2258 = minus(v2135, v2134); real2 v105 = load(in, 103 << shift); real2 v41 = load(in, 39 << shift); real2 v269 = plus(v41, v105); real2 v263 = reverse(minus(v41, v105)); real2 v9 = load(in, 7 << shift); real2 v73 = load(in, 71 << shift); real2 v264 = minus(v73, v9); real2 v268 = plus(v9, v73); real2 v822 = plus(v268, v269); real2 v818 = minus(v269, v268); real2 v265 = minusplus(v263, v264); real2 v267 = minusplus(uminus(v263), v264); real2 v275 = ctimesminusplus(reverse(v265), ctbl[55], ctimes(v265, ctbl[37])); real2 v281 = ctimesminusplus(reverse(v267), ctbl[41], ctimes(v267, ctbl[51])); real2 v121 = load(in, 119 << shift); real2 v57 = load(in, 55 << shift); real2 v549 = plus(v57, v121); real2 v543 = reverse(minus(v57, v121)); real2 v25 = load(in, 23 << shift); real2 v89 = load(in, 87 << shift); real2 v544 = minus(v89, v25); real2 v548 = plus(v25, v89); real2 v817 = reverse(minus(v548, v549)); real2 v823 = plus(v548, v549); real2 v819 = minusplus(v817, v818); real2 v821 = minusplus(uminus(v817), v818); real2 v547 = minusplus(uminus(v543), v544); real2 v545 = minusplus(v543, v544); real2 v553 = ctimesminusplus(reverse(v545), ctbl[57], ctimes(v545, ctbl[35])); real2 v827 = ctimesminusplus(reverse(v819), ctbl[27], ctimes(v819, ctbl[17])); real2 v831 = ctimesminusplus(reverse(v821), ctbl[25], ctimes(v821, ctbl[24])); real2 v559 = ctimesminusplus(reverse(v547), ctbl[52], ctimes(v547, ctbl[38])); real2 v2198 = plus(v281, v559); real2 v2194 = minus(v559, v281); real2 v1793 = minus(v553, v275); real2 v1797 = plus(v275, v553); real2 v1078 = plus(v822, v823); real2 v1074 = minus(v823, v822); real2 v129 = load(in, 127 << shift); real2 v65 = load(in, 63 << shift); real2 v693 = plus(v65, v129); real2 v687 = reverse(minus(v65, v129)); real2 v33 = load(in, 31 << shift); real2 v97 = load(in, 95 << shift); real2 v692 = plus(v33, v97); real2 v688 = minus(v97, v33); real2 v691 = minusplus(uminus(v687), v688); real2 v689 = minusplus(v687, v688); real2 v945 = reverse(minus(v692, v693)); real2 v951 = plus(v692, v693); real2 v697 = ctimesminusplus(reverse(v689), ctbl[61], ctimes(v689, ctbl[31])); real2 v703 = ctimesminusplus(reverse(v691), ctbl[46], ctimes(v691, ctbl[44])); real2 v81 = load(in, 79 << shift); real2 v17 = load(in, 15 << shift); real2 v406 = minus(v81, v17); real2 v410 = plus(v17, v81); real2 v49 = load(in, 47 << shift); real2 v113 = load(in, 111 << shift); real2 v405 = reverse(minus(v49, v113)); real2 v411 = plus(v49, v113); real2 v407 = minusplus(v405, v406); real2 v409 = minusplus(uminus(v405), v406); real2 v415 = ctimesminusplus(reverse(v407), ctbl[59], ctimes(v407, ctbl[33])); real2 v1798 = plus(v415, v697); real2 v1792 = reverse(minus(v415, v697)); real2 v950 = plus(v410, v411); real2 v946 = minus(v411, v410); real2 v949 = minusplus(uminus(v945), v946); real2 v947 = minusplus(v945, v946); real2 v1073 = reverse(minus(v950, v951)); real2 v1079 = plus(v950, v951); real2 v955 = ctimesminusplus(reverse(v947), ctbl[29], ctimes(v947, ctbl[15])); real2 v1410 = reverse(minus(v827, v955)); real2 v1416 = plus(v827, v955); real2 v1448 = plus(v1415, v1416); real2 v1442 = reverse(minus(v1415, v1416)); real2 v1412 = minusplus(v1410, v1411); real2 v1414 = minusplus(uminus(v1410), v1411); real2 v1424 = ctimesminusplus(reverse(v1414), ctbl[4], ctimes(v1414, ctbl[2])); real2 v1077 = minusplus(uminus(v1073), v1074); real2 v1075 = minusplus(v1073, v1074); real2 v1087 = ctimesminusplus(reverse(v1077), ctbl[10], ctimes(v1077, ctbl[8])); real2 v1327 = plus(v1023, v1087); real2 v1321 = reverse(minus(v1023, v1087)); real2 v1137 = reverse(minus(v1078, v1079)); real2 v1143 = plus(v1078, v1079); real2 v1169 = reverse(minus(v1142, v1143)); real2 v1175 = plus(v1142, v1143); real2 v1083 = ctimesminusplus(reverse(v1075), ctbl[13], ctimes(v1075, ctbl[7])); real2 v1796 = minusplus(uminus(v1792), v1793); real2 v1794 = minusplus(v1792, v1793); real2 v1806 = ctimesminusplus(reverse(v1796), ctbl[10], ctimes(v1796, ctbl[8])); real2 v2046 = plus(v1742, v1806); real2 v2040 = reverse(minus(v1742, v1806)); real2 v1270 = plus(v1019, v1083); real2 v1264 = reverse(minus(v1019, v1083)); real2 v959 = ctimesminusplus(reverse(v949), ctbl[22], ctimes(v949, ctbl[20])); real2 v1139 = minusplus(v1137, v1138); real2 v1141 = minusplus(uminus(v1137), v1138); real2 v1151 = ctimesminusplus(reverse(v1141), ctbl[4], ctimes(v1141, ctbl[2])); real2 v1420 = ctimesminusplus(reverse(v1412), ctbl[5], ctimes(v1412, ctbl[3])); real2 v1569 = reverse(minus(v831, v959)); real2 v1575 = plus(v831, v959); real2 v1607 = plus(v1574, v1575); real2 v1601 = reverse(minus(v1574, v1575)); real2 v1856 = reverse(minus(v1797, v1798)); real2 v1862 = plus(v1797, v1798); real2 v1888 = reverse(minus(v1861, v1862)); real2 v1894 = plus(v1861, v1862); real2 v1147 = ctimesminusplus(reverse(v1139), ctbl[5], ctimes(v1139, ctbl[3])); real2 v1571 = minusplus(v1569, v1570); real2 v1573 = minusplus(uminus(v1569), v1570); real2 v1583 = ctimesminusplus(reverse(v1573), ctbl[4], ctimes(v1573, ctbl[2])); real2 v1858 = minusplus(v1856, v1857); real2 v1860 = minusplus(uminus(v1856), v1857); real2 v1870 = ctimesminusplus(reverse(v1860), ctbl[4], ctimes(v1860, ctbl[2])); real2 v1579 = ctimesminusplus(reverse(v1571), ctbl[5], ctimes(v1571, ctbl[3])); real2 v1802 = ctimesminusplus(reverse(v1794), ctbl[13], ctimes(v1794, ctbl[7])); real2 v1989 = plus(v1738, v1802); real2 v1983 = reverse(minus(v1738, v1802)); real2 v1866 = ctimesminusplus(reverse(v1858), ctbl[5], ctimes(v1858, ctbl[3])); real2 v23 = load(in, 21 << shift); real2 v87 = load(in, 85 << shift); real2 v513 = plus(v23, v87); real2 v509 = minus(v87, v23); real2 v55 = load(in, 53 << shift); real2 v119 = load(in, 117 << shift); real2 v514 = plus(v55, v119); real2 v508 = reverse(minus(v55, v119)); real2 v791 = plus(v513, v514); real2 v785 = reverse(minus(v513, v514)); real2 v512 = minusplus(uminus(v508), v509); real2 v510 = minusplus(v508, v509); real2 v518 = ctimesminusplus(reverse(v510), ctbl[41], ctimes(v510, ctbl[51])); real2 v71 = load(in, 69 << shift); real2 v7 = load(in, 5 << shift); real2 v230 = plus(v7, v71); real2 v226 = minus(v71, v7); real2 v103 = load(in, 101 << shift); real2 v39 = load(in, 37 << shift); real2 v225 = reverse(minus(v39, v103)); real2 v231 = plus(v39, v103); real2 v790 = plus(v230, v231); real2 v786 = minus(v231, v230); real2 v1042 = minus(v791, v790); real2 v1046 = plus(v790, v791); real2 v787 = minusplus(v785, v786); real2 v789 = minusplus(uminus(v785), v786); real2 v229 = minusplus(uminus(v225), v226); real2 v227 = minusplus(v225, v226); real2 v237 = ctimesminusplus(reverse(v227), ctbl[39], ctimes(v227, ctbl[53])); real2 v1761 = minus(v518, v237); real2 v1765 = plus(v237, v518); real2 v795 = ctimesminusplus(reverse(v787), ctbl[19], ctimes(v787, ctbl[25])); real2 v799 = ctimesminusplus(reverse(v789), ctbl[29], ctimes(v789, ctbl[15])); real2 v127 = load(in, 125 << shift); real2 v63 = load(in, 61 << shift); real2 v657 = plus(v63, v127); real2 v651 = reverse(minus(v63, v127)); real2 v31 = load(in, 29 << shift); real2 v95 = load(in, 93 << shift); real2 v652 = minus(v95, v31); real2 v656 = plus(v31, v95); real2 v913 = reverse(minus(v656, v657)); real2 v919 = plus(v656, v657); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v661 = ctimesminusplus(reverse(v653), ctbl[45], ctimes(v653, ctbl[47])); real2 v111 = load(in, 109 << shift); real2 v47 = load(in, 45 << shift); real2 v375 = plus(v47, v111); real2 v369 = reverse(minus(v47, v111)); real2 v79 = load(in, 77 << shift); real2 v15 = load(in, 13 << shift); real2 v370 = minus(v79, v15); real2 v374 = plus(v15, v79); real2 v914 = minus(v375, v374); real2 v918 = plus(v374, v375); real2 v371 = minusplus(v369, v370); real2 v373 = minusplus(uminus(v369), v370); real2 v915 = minusplus(v913, v914); real2 v917 = minusplus(uminus(v913), v914); real2 v927 = ctimesminusplus(reverse(v917), ctbl[16], ctimes(v917, ctbl[26])); real2 v381 = ctimesminusplus(reverse(v371), ctbl[43], ctimes(v371, ctbl[49])); real2 v1041 = reverse(minus(v918, v919)); real2 v1047 = plus(v918, v919); real2 v1766 = plus(v381, v661); real2 v1760 = reverse(minus(v381, v661)); real2 v1762 = minusplus(v1760, v1761); real2 v1764 = minusplus(uminus(v1760), v1761); real2 v1824 = reverse(minus(v1765, v1766)); real2 v1830 = plus(v1765, v1766); real2 v923 = ctimesminusplus(reverse(v915), ctbl[21], ctimes(v915, ctbl[23])); real2 v1378 = reverse(minus(v795, v923)); real2 v1384 = plus(v795, v923); real2 v1045 = minusplus(uminus(v1041), v1042); real2 v1043 = minusplus(v1041, v1042); real2 v1051 = ctimesminusplus(reverse(v1043), ctbl[9], ctimes(v1043, ctbl[11])); real2 v1537 = reverse(minus(v799, v927)); real2 v1543 = plus(v799, v927); real2 v1055 = ctimesminusplus(reverse(v1045), ctbl[7], ctimes(v1045, ctbl[6])); real2 v1111 = plus(v1046, v1047); real2 v1105 = reverse(minus(v1046, v1047)); real2 v115 = load(in, 113 << shift); real2 v51 = load(in, 49 << shift); real2 v440 = reverse(minus(v51, v115)); real2 v446 = plus(v51, v115); real2 v19 = load(in, 17 << shift); real2 v83 = load(in, 81 << shift); real2 v441 = minus(v83, v19); real2 v445 = plus(v19, v83); real2 v727 = plus(v445, v446); real2 v721 = reverse(minus(v445, v446)); real2 v442 = minusplus(v440, v441); real2 v444 = minusplus(uminus(v440), v441); real2 v450 = ctimesminusplus(reverse(v442), ctbl[33], ctimes(v442, ctbl[59])); real2 v67 = load(in, 65 << shift); real2 v3 = load(in, 1 << shift); real2 v148 = minus(v67, v3); real2 v152 = plus(v3, v67); real2 v99 = load(in, 97 << shift); real2 v35 = load(in, 33 << shift); real2 v147 = reverse(minus(v35, v99)); real2 v153 = plus(v35, v99); real2 v726 = plus(v152, v153); real2 v722 = minus(v153, v152); real2 v723 = minusplus(v721, v722); real2 v725 = minusplus(uminus(v721), v722); real2 v731 = ctimesminusplus(reverse(v723), ctbl[15], ctimes(v723, ctbl[29])); real2 v735 = ctimesminusplus(reverse(v725), ctbl[23], ctimes(v725, ctbl[21])); real2 v149 = minusplus(v147, v148); real2 v151 = minusplus(uminus(v147), v148); real2 v978 = minus(v727, v726); real2 v982 = plus(v726, v727); real2 v159 = ctimesminusplus(reverse(v149), ctbl[31], ctimes(v149, ctbl[61])); real2 v1701 = plus(v159, v450); real2 v1697 = minus(v450, v159); real2 v91 = load(in, 89 << shift); real2 v27 = load(in, 25 << shift); real2 v584 = plus(v27, v91); real2 v580 = minus(v91, v27); real2 v59 = load(in, 57 << shift); real2 v123 = load(in, 121 << shift); real2 v579 = reverse(minus(v59, v123)); real2 v585 = plus(v59, v123); real2 v583 = minusplus(uminus(v579), v580); real2 v581 = minusplus(v579, v580); real2 v589 = ctimesminusplus(reverse(v581), ctbl[37], ctimes(v581, ctbl[55])); real2 v855 = plus(v584, v585); real2 v849 = reverse(minus(v584, v585)); real2 v11 = load(in, 9 << shift); real2 v75 = load(in, 73 << shift); real2 v302 = minus(v75, v11); real2 v306 = plus(v11, v75); real2 v107 = load(in, 105 << shift); real2 v43 = load(in, 41 << shift); real2 v307 = plus(v43, v107); real2 v301 = reverse(minus(v43, v107)); real2 v854 = plus(v306, v307); real2 v850 = minus(v307, v306); real2 v851 = minusplus(v849, v850); real2 v853 = minusplus(uminus(v849), v850); real2 v863 = ctimesminusplus(reverse(v853), ctbl[19], ctimes(v853, ctbl[18])); real2 v305 = minusplus(uminus(v301), v302); real2 v303 = minusplus(v301, v302); real2 v1538 = minus(v863, v735); real2 v1542 = plus(v735, v863); real2 v859 = ctimesminusplus(reverse(v851), ctbl[17], ctimes(v851, ctbl[27])); real2 v1379 = minus(v859, v731); real2 v1383 = plus(v731, v859); real2 v1443 = minus(v1384, v1383); real2 v1447 = plus(v1383, v1384); real2 v1446 = minusplus(uminus(v1442), v1443); real2 v1444 = minusplus(v1442, v1443); real2 v983 = plus(v854, v855); real2 v977 = reverse(minus(v854, v855)); real2 v979 = minusplus(v977, v978); real2 v981 = minusplus(uminus(v977), v978); real2 v1456 = ctimesminusplus(reverse(v1446), ctbl[1], ctimes(v1446, ctbl[0])); real2 v311 = ctimesminusplus(reverse(v303), ctbl[35], ctimes(v303, ctbl[57])); real2 v1696 = reverse(minus(v311, v589)); real2 v1702 = plus(v311, v589); real2 v1452 = ctimesminusplus(reverse(v1444), ctbl[1], ctimes(v1444, ctbl[1])); real2 v987 = ctimesminusplus(reverse(v979), ctbl[7], ctimes(v979, ctbl[13])); real2 v1265 = minus(v1051, v987); real2 v1269 = plus(v987, v1051); real2 v1266 = minusplus(v1264, v1265); real2 v1268 = minusplus(uminus(v1264), v1265); real2 v1278 = ctimesminusplus(reverse(v1268), ctbl[1], ctimes(v1268, ctbl[0])); real2 v1286 = plus(v1269, v1270); real2 v1280 = reverse(minus(v1269, v1270)); real2 v1110 = plus(v982, v983); real2 v1106 = minus(v983, v982); real2 v1174 = plus(v1110, v1111); real2 v1170 = minus(v1111, v1110); real2 v1185 = reverse(minus(v1174, v1175)); real2 v1191 = plus(v1174, v1175); real2 v1171 = minusplus(v1169, v1170); real2 v1173 = minusplus(uminus(v1169), v1170); real2 v1179 = ctimesminusplus(reverse(v1171), ctbl[1], ctimes(v1171, ctbl[1])); real2 v1183 = ctimesminusplus(reverse(v1173), ctbl[1], ctimes(v1173, ctbl[0])); real2 v991 = ctimesminusplus(reverse(v981), ctbl[11], ctimes(v981, ctbl[9])); real2 v1322 = minus(v1055, v991); real2 v1326 = plus(v991, v1055); real2 v1337 = reverse(minus(v1326, v1327)); real2 v1343 = plus(v1326, v1327); real2 v1323 = minusplus(v1321, v1322); real2 v1325 = minusplus(uminus(v1321), v1322); real2 v1335 = ctimesminusplus(reverse(v1325), ctbl[1], ctimes(v1325, ctbl[0])); real2 v1109 = minusplus(uminus(v1105), v1106); real2 v1107 = minusplus(v1105, v1106); real2 v1115 = ctimesminusplus(reverse(v1107), ctbl[3], ctimes(v1107, ctbl[5])); real2 v1274 = ctimesminusplus(reverse(v1266), ctbl[1], ctimes(v1266, ctbl[1])); real2 v1606 = plus(v1542, v1543); real2 v1602 = minus(v1543, v1542); real2 v1216 = plus(v1115, v1147); real2 v1210 = reverse(minus(v1115, v1147)); real2 v1331 = ctimesminusplus(reverse(v1323), ctbl[1], ctimes(v1323, ctbl[1])); real2 v1119 = ctimesminusplus(reverse(v1109), ctbl[5], ctimes(v1109, ctbl[3])); real2 v1464 = plus(v1447, v1448); real2 v1458 = reverse(minus(v1447, v1448)); real2 v1382 = minusplus(uminus(v1378), v1379); real2 v1380 = minusplus(v1378, v1379); real2 v1388 = ctimesminusplus(reverse(v1380), ctbl[3], ctimes(v1380, ctbl[5])); real2 v1392 = ctimesminusplus(reverse(v1382), ctbl[5], ctimes(v1382, ctbl[3])); real2 v1508 = plus(v1392, v1424); real2 v1502 = reverse(minus(v1392, v1424)); real2 v1489 = plus(v1388, v1420); real2 v1483 = reverse(minus(v1388, v1420)); real2 v1603 = minusplus(v1601, v1602); real2 v1605 = minusplus(uminus(v1601), v1602); real2 v1615 = ctimesminusplus(reverse(v1605), ctbl[1], ctimes(v1605, ctbl[0])); real2 v1611 = ctimesminusplus(reverse(v1603), ctbl[1], ctimes(v1603, ctbl[1])); real2 v1617 = reverse(minus(v1606, v1607)); real2 v1623 = plus(v1606, v1607); real2 v1541 = minusplus(uminus(v1537), v1538); real2 v1539 = minusplus(v1537, v1538); real2 v1547 = ctimesminusplus(reverse(v1539), ctbl[3], ctimes(v1539, ctbl[5])); real2 v1551 = ctimesminusplus(reverse(v1541), ctbl[5], ctimes(v1541, ctbl[3])); real2 v1667 = plus(v1551, v1583); real2 v1661 = reverse(minus(v1551, v1583)); real2 v1648 = plus(v1547, v1579); real2 v1642 = reverse(minus(v1547, v1579)); real2 v1229 = reverse(minus(v1119, v1151)); real2 v1235 = plus(v1119, v1151); real2 v76 = load(in, 74 << shift); real2 v12 = load(in, 10 << shift); real2 v322 = plus(v12, v76); real2 v318 = minus(v76, v12); real2 v44 = load(in, 42 << shift); real2 v108 = load(in, 106 << shift); real2 v323 = plus(v44, v108); real2 v317 = reverse(minus(v44, v108)); real2 v866 = minus(v323, v322); real2 v870 = plus(v322, v323); real2 v92 = load(in, 90 << shift); real2 v28 = load(in, 26 << shift); real2 v602 = plus(v28, v92); real2 v598 = minus(v92, v28); real2 v60 = load(in, 58 << shift); real2 v124 = load(in, 122 << shift); real2 v603 = plus(v60, v124); real2 v597 = reverse(minus(v60, v124)); real2 v865 = reverse(minus(v602, v603)); real2 v871 = plus(v602, v603); real2 v869 = minusplus(uminus(v865), v866); real2 v867 = minusplus(v865, v866); real2 v879 = ctimesminusplus(reverse(v869), ctbl[7], ctimes(v869, ctbl[6])); real2 v993 = reverse(minus(v870, v871)); real2 v999 = plus(v870, v871); real2 v875 = ctimesminusplus(reverse(v867), ctbl[9], ctimes(v867, ctbl[11])); real2 v100 = load(in, 98 << shift); real2 v36 = load(in, 34 << shift); real2 v167 = reverse(minus(v36, v100)); real2 v173 = plus(v36, v100); real2 v4 = load(in, 2 << shift); real2 v68 = load(in, 66 << shift); real2 v168 = minus(v68, v4); real2 v172 = plus(v4, v68); real2 v742 = plus(v172, v173); real2 v738 = minus(v173, v172); real2 v84 = load(in, 82 << shift); real2 v20 = load(in, 18 << shift); real2 v462 = plus(v20, v84); real2 v458 = minus(v84, v20); real2 v116 = load(in, 114 << shift); real2 v52 = load(in, 50 << shift); real2 v463 = plus(v52, v116); real2 v457 = reverse(minus(v52, v116)); real2 v737 = reverse(minus(v462, v463)); real2 v743 = plus(v462, v463); real2 v998 = plus(v742, v743); real2 v994 = minus(v743, v742); real2 v739 = minusplus(v737, v738); real2 v741 = minusplus(uminus(v737), v738); real2 v995 = minusplus(v993, v994); real2 v997 = minusplus(uminus(v993), v994); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[5], ctimes(v997, ctbl[3])); real2 v747 = ctimesminusplus(reverse(v739), ctbl[7], ctimes(v739, ctbl[13])); real2 v1395 = minus(v875, v747); real2 v1399 = plus(v747, v875); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[3], ctimes(v995, ctbl[5])); real2 v1122 = minus(v999, v998); real2 v1126 = plus(v998, v999); real2 v72 = load(in, 70 << shift); real2 v8 = load(in, 6 << shift); real2 v246 = minus(v72, v8); real2 v250 = plus(v8, v72); real2 v104 = load(in, 102 << shift); real2 v40 = load(in, 38 << shift); real2 v245 = reverse(minus(v40, v104)); real2 v251 = plus(v40, v104); real2 v802 = minus(v251, v250); real2 v806 = plus(v250, v251); real2 v24 = load(in, 22 << shift); real2 v88 = load(in, 86 << shift); real2 v530 = plus(v24, v88); real2 v526 = minus(v88, v24); real2 v120 = load(in, 118 << shift); real2 v56 = load(in, 54 << shift); real2 v531 = plus(v56, v120); real2 v525 = reverse(minus(v56, v120)); real2 v801 = reverse(minus(v530, v531)); real2 v807 = plus(v530, v531); real2 v1058 = minus(v807, v806); real2 v1062 = plus(v806, v807); real2 v803 = minusplus(v801, v802); real2 v805 = minusplus(uminus(v801), v802); real2 v811 = ctimesminusplus(reverse(v803), ctbl[11], ctimes(v803, ctbl[9])); real2 v128 = load(in, 126 << shift); real2 v64 = load(in, 62 << shift); real2 v669 = reverse(minus(v64, v128)); real2 v675 = plus(v64, v128); real2 v32 = load(in, 30 << shift); real2 v96 = load(in, 94 << shift); real2 v674 = plus(v32, v96); real2 v670 = minus(v96, v32); real2 v935 = plus(v674, v675); real2 v929 = reverse(minus(v674, v675)); real2 v80 = load(in, 78 << shift); real2 v16 = load(in, 14 << shift); real2 v389 = minus(v80, v16); real2 v393 = plus(v16, v80); real2 v112 = load(in, 110 << shift); real2 v48 = load(in, 46 << shift); real2 v394 = plus(v48, v112); real2 v388 = reverse(minus(v48, v112)); real2 v930 = minus(v394, v393); real2 v934 = plus(v393, v394); real2 v1063 = plus(v934, v935); real2 v1057 = reverse(minus(v934, v935)); real2 v1059 = minusplus(v1057, v1058); real2 v1061 = minusplus(uminus(v1057), v1058); real2 v1127 = plus(v1062, v1063); real2 v1121 = reverse(minus(v1062, v1063)); real2 v1123 = minusplus(v1121, v1122); real2 v1125 = minusplus(uminus(v1121), v1122); real2 v1135 = ctimesminusplus(reverse(v1125), ctbl[1], ctimes(v1125, ctbl[0])); real2 v1071 = ctimesminusplus(reverse(v1061), ctbl[4], ctimes(v1061, ctbl[2])); real2 v1311 = plus(v1007, v1071); real2 v1305 = reverse(minus(v1007, v1071)); real2 v1131 = ctimesminusplus(reverse(v1123), ctbl[1], ctimes(v1123, ctbl[1])); real2 v1153 = reverse(minus(v1126, v1127)); real2 v1159 = plus(v1126, v1127); real2 v1067 = ctimesminusplus(reverse(v1059), ctbl[5], ctimes(v1059, ctbl[3])); real2 v1248 = reverse(minus(v1003, v1067)); real2 v1254 = plus(v1003, v1067); real2 v94 = load(in, 92 << shift); real2 v30 = load(in, 28 << shift); real2 v634 = minus(v94, v30); real2 v638 = plus(v30, v94); real2 v126 = load(in, 124 << shift); real2 v62 = load(in, 60 << shift); real2 v633 = reverse(minus(v62, v126)); real2 v639 = plus(v62, v126); real2 v897 = reverse(minus(v638, v639)); real2 v903 = plus(v638, v639); real2 v42 = load(in, 40 << shift); real2 v106 = load(in, 104 << shift); real2 v283 = reverse(minus(v42, v106)); real2 v289 = plus(v42, v106); real2 v10 = load(in, 8 << shift); real2 v74 = load(in, 72 << shift); real2 v284 = minus(v74, v10); real2 v288 = plus(v10, v74); real2 v838 = plus(v288, v289); real2 v834 = minus(v289, v288); real2 v26 = load(in, 24 << shift); real2 v90 = load(in, 88 << shift); real2 v562 = minus(v90, v26); real2 v566 = plus(v26, v90); real2 v122 = load(in, 120 << shift); real2 v58 = load(in, 56 << shift); real2 v567 = plus(v58, v122); real2 v561 = reverse(minus(v58, v122)); real2 v833 = reverse(minus(v566, v567)); real2 v839 = plus(v566, v567); real2 v967 = plus(v838, v839); real2 v961 = reverse(minus(v838, v839)); real2 v14 = load(in, 12 << shift); real2 v78 = load(in, 76 << shift); real2 v353 = minus(v78, v14); real2 v357 = plus(v14, v78); real2 v46 = load(in, 44 << shift); real2 v110 = load(in, 108 << shift); real2 v358 = plus(v46, v110); real2 v352 = reverse(minus(v46, v110)); real2 v898 = minus(v358, v357); real2 v902 = plus(v357, v358); real2 v1025 = reverse(minus(v902, v903)); real2 v1031 = plus(v902, v903); real2 v114 = load(in, 112 << shift); real2 v50 = load(in, 48 << shift); real2 v422 = reverse(minus(v50, v114)); real2 v428 = plus(v50, v114); real2 v2 = load(in, 0 << shift); real2 v66 = load(in, 64 << shift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << shift); real2 v34 = load(in, 32 << shift); real2 v137 = plus(v34, v98); real2 v131 = reverse(minus(v34, v98)); real2 v706 = minus(v137, v136); real2 v710 = plus(v136, v137); real2 v18 = load(in, 16 << shift); real2 v82 = load(in, 80 << shift); real2 v427 = plus(v18, v82); real2 v423 = minus(v82, v18); real2 v705 = reverse(minus(v427, v428)); real2 v711 = plus(v427, v428); real2 v966 = plus(v710, v711); real2 v962 = minus(v711, v710); real2 v1090 = minus(v967, v966); real2 v1094 = plus(v966, v967); real2 v70 = load(in, 68 << shift); real2 v6 = load(in, 4 << shift); real2 v210 = plus(v6, v70); real2 v206 = minus(v70, v6); real2 v38 = load(in, 36 << shift); real2 v102 = load(in, 100 << shift); real2 v211 = plus(v38, v102); real2 v205 = reverse(minus(v38, v102)); real2 v774 = plus(v210, v211); real2 v770 = minus(v211, v210); real2 v22 = load(in, 20 << shift); real2 v86 = load(in, 84 << shift); real2 v492 = minus(v86, v22); real2 v496 = plus(v22, v86); real2 v118 = load(in, 116 << shift); real2 v54 = load(in, 52 << shift); real2 v497 = plus(v54, v118); real2 v491 = reverse(minus(v54, v118)); real2 v775 = plus(v496, v497); real2 v769 = reverse(minus(v496, v497)); real2 v1030 = plus(v774, v775); real2 v1026 = minus(v775, v774); real2 v1095 = plus(v1030, v1031); real2 v1089 = reverse(minus(v1030, v1031)); real2 v1103 = minus(uminusplus(v1089), v1090); real2 v1099 = minus(uplusminus(v1089), v1090); real2 v1230 = minus(v1135, v1103); store(out, 56 << shift, minus(uplusminus(v1229), v1230)); store(out, 120 << shift, minus(uminusplus(v1229), v1230)); real2 v1234 = plus(v1103, v1135); store(out, 24 << shift, plus(v1234, v1235)); store(out, 88 << shift, minus(v1234, v1235)); real2 v1211 = minus(v1131, v1099); real2 v1215 = plus(v1099, v1131); store(out, 8 << shift, plus(v1215, v1216)); store(out, 72 << shift, minus(v1215, v1216)); store(out, 40 << shift, minus(uplusminus(v1210), v1211)); store(out, 104 << shift, minus(uminusplus(v1210), v1211)); real2 v1158 = plus(v1094, v1095); real2 v1154 = minus(v1095, v1094); real2 v1186 = minus(v1159, v1158); store(out, 32 << shift, minus(uplusminus(v1185), v1186)); store(out, 96 << shift, minus(uminusplus(v1185), v1186)); real2 v1190 = plus(v1158, v1159); store(out, 64 << shift, minus(v1190, v1191)); store(out, 0 << shift, plus(v1190, v1191)); real2 v1163 = minus(uplusminus(v1153), v1154); store(out, 16 << shift, plus(v1163, v1179)); store(out, 80 << shift, minus(v1163, v1179)); real2 v1167 = minus(uminusplus(v1153), v1154); store(out, 112 << shift, minus(v1167, v1183)); store(out, 48 << shift, plus(v1167, v1183)); real2 v971 = minus(uplusminus(v961), v962); real2 v975 = minus(uminusplus(v961), v962); real2 v1027 = minusplus(v1025, v1026); real2 v1029 = minusplus(uminus(v1025), v1026); real2 v1039 = ctimesminusplus(reverse(v1029), ctbl[1], ctimes(v1029, ctbl[0])); real2 v1306 = minus(v1039, v975); real2 v1310 = plus(v975, v1039); real2 v1319 = minus(uminusplus(v1305), v1306); real2 v1315 = minus(uplusminus(v1305), v1306); store(out, 124 << shift, minus(v1319, v1335)); store(out, 60 << shift, plus(v1319, v1335)); store(out, 28 << shift, plus(v1315, v1331)); store(out, 92 << shift, minus(v1315, v1331)); real2 v1342 = plus(v1310, v1311); store(out, 76 << shift, minus(v1342, v1343)); store(out, 12 << shift, plus(v1342, v1343)); real2 v1338 = minus(v1311, v1310); store(out, 44 << shift, minus(uplusminus(v1337), v1338)); store(out, 108 << shift, minus(uminusplus(v1337), v1338)); real2 v1035 = ctimesminusplus(reverse(v1027), ctbl[1], ctimes(v1027, ctbl[1])); real2 v1249 = minus(v1035, v971); real2 v1253 = plus(v971, v1035); real2 v1262 = minus(uminusplus(v1248), v1249); real2 v1258 = minus(uplusminus(v1248), v1249); store(out, 84 << shift, minus(v1258, v1274)); store(out, 20 << shift, plus(v1258, v1274)); store(out, 52 << shift, plus(v1262, v1278)); store(out, 116 << shift, minus(v1262, v1278)); real2 v1281 = minus(v1254, v1253); real2 v1285 = plus(v1253, v1254); store(out, 68 << shift, minus(v1285, v1286)); store(out, 4 << shift, plus(v1285, v1286)); store(out, 100 << shift, minus(uminusplus(v1280), v1281)); store(out, 36 << shift, minus(uplusminus(v1280), v1281)); real2 v835 = minusplus(v833, v834); real2 v837 = minusplus(uminus(v833), v834); real2 v843 = ctimesminusplus(reverse(v835), ctbl[1], ctimes(v835, ctbl[1])); real2 v773 = minusplus(uminus(v769), v770); real2 v771 = minusplus(v769, v770); real2 v779 = ctimesminusplus(reverse(v771), ctbl[3], ctimes(v771, ctbl[5])); real2 v901 = minusplus(uminus(v897), v898); real2 v899 = minusplus(v897, v898); real2 v907 = ctimesminusplus(reverse(v899), ctbl[5], ctimes(v899, ctbl[3])); real2 v719 = minus(uminusplus(v705), v706); real2 v715 = minus(uplusminus(v705), v706); real2 v933 = minusplus(uminus(v929), v930); real2 v931 = minusplus(v929, v930); real2 v939 = ctimesminusplus(reverse(v931), ctbl[13], ctimes(v931, ctbl[7])); real2 v1394 = reverse(minus(v811, v939)); real2 v1400 = plus(v811, v939); real2 v1426 = reverse(minus(v1399, v1400)); real2 v1432 = plus(v1399, v1400); real2 v1367 = plus(v715, v843); real2 v1363 = minus(v843, v715); real2 v1368 = plus(v779, v907); real2 v1362 = reverse(minus(v779, v907)); real2 v1427 = minus(v1368, v1367); real2 v1431 = plus(v1367, v1368); real2 v1440 = minus(uminusplus(v1426), v1427); real2 v1436 = minus(uplusminus(v1426), v1427); store(out, 18 << shift, plus(v1436, v1452)); store(out, 82 << shift, minus(v1436, v1452)); store(out, 114 << shift, minus(v1440, v1456)); store(out, 50 << shift, plus(v1440, v1456)); real2 v1459 = minus(v1432, v1431); store(out, 98 << shift, minus(uminusplus(v1458), v1459)); store(out, 34 << shift, minus(uplusminus(v1458), v1459)); real2 v1463 = plus(v1431, v1432); store(out, 2 << shift, plus(v1463, v1464)); store(out, 66 << shift, minus(v1463, v1464)); real2 v1372 = minus(uplusminus(v1362), v1363); real2 v1376 = minus(uminusplus(v1362), v1363); real2 v1398 = minusplus(uminus(v1394), v1395); real2 v1396 = minusplus(v1394, v1395); real2 v1404 = ctimesminusplus(reverse(v1396), ctbl[1], ctimes(v1396, ctbl[1])); real2 v1484 = minus(v1404, v1372); store(out, 106 << shift, minus(uminusplus(v1483), v1484)); store(out, 42 << shift, minus(uplusminus(v1483), v1484)); real2 v1488 = plus(v1372, v1404); store(out, 10 << shift, plus(v1488, v1489)); store(out, 74 << shift, minus(v1488, v1489)); real2 v1408 = ctimesminusplus(reverse(v1398), ctbl[1], ctimes(v1398, ctbl[0])); real2 v1503 = minus(v1408, v1376); store(out, 122 << shift, minus(uminusplus(v1502), v1503)); store(out, 58 << shift, minus(uplusminus(v1502), v1503)); real2 v1507 = plus(v1376, v1408); store(out, 90 << shift, minus(v1507, v1508)); store(out, 26 << shift, plus(v1507, v1508)); real2 v847 = ctimesminusplus(reverse(v837), ctbl[1], ctimes(v837, ctbl[0])); real2 v911 = ctimesminusplus(reverse(v901), ctbl[4], ctimes(v901, ctbl[2])); real2 v815 = ctimesminusplus(reverse(v805), ctbl[13], ctimes(v805, ctbl[12])); real2 v1522 = minus(v847, v719); real2 v1526 = plus(v719, v847); real2 v751 = ctimesminusplus(reverse(v741), ctbl[11], ctimes(v741, ctbl[9])); real2 v1554 = minus(v879, v751); real2 v1558 = plus(v751, v879); real2 v943 = ctimesminusplus(reverse(v933), ctbl[10], ctimes(v933, ctbl[8])); real2 v1553 = reverse(minus(v815, v943)); real2 v1559 = plus(v815, v943); real2 v1591 = plus(v1558, v1559); real2 v1585 = reverse(minus(v1558, v1559)); real2 v783 = ctimesminusplus(reverse(v773), ctbl[5], ctimes(v773, ctbl[3])); real2 v1521 = reverse(minus(v783, v911)); real2 v1527 = plus(v783, v911); real2 v1586 = minus(v1527, v1526); real2 v1590 = plus(v1526, v1527); real2 v1595 = minus(uplusminus(v1585), v1586); store(out, 22 << shift, plus(v1595, v1611)); store(out, 86 << shift, minus(v1595, v1611)); real2 v1599 = minus(uminusplus(v1585), v1586); store(out, 118 << shift, minus(v1599, v1615)); store(out, 54 << shift, plus(v1599, v1615)); real2 v1622 = plus(v1590, v1591); store(out, 70 << shift, minus(v1622, v1623)); store(out, 6 << shift, plus(v1622, v1623)); real2 v1618 = minus(v1591, v1590); store(out, 102 << shift, minus(uminusplus(v1617), v1618)); store(out, 38 << shift, minus(uplusminus(v1617), v1618)); real2 v1557 = minusplus(uminus(v1553), v1554); real2 v1555 = minusplus(v1553, v1554); real2 v1563 = ctimesminusplus(reverse(v1555), ctbl[1], ctimes(v1555, ctbl[1])); real2 v1531 = minus(uplusminus(v1521), v1522); real2 v1535 = minus(uminusplus(v1521), v1522); real2 v1643 = minus(v1563, v1531); store(out, 46 << shift, minus(uplusminus(v1642), v1643)); store(out, 110 << shift, minus(uminusplus(v1642), v1643)); real2 v1647 = plus(v1531, v1563); store(out, 78 << shift, minus(v1647, v1648)); store(out, 14 << shift, plus(v1647, v1648)); real2 v1567 = ctimesminusplus(reverse(v1557), ctbl[1], ctimes(v1557, ctbl[0])); real2 v1666 = plus(v1535, v1567); real2 v1662 = minus(v1567, v1535); store(out, 94 << shift, minus(v1666, v1667)); store(out, 30 << shift, plus(v1666, v1667)); store(out, 126 << shift, minus(uminusplus(v1661), v1662)); store(out, 62 << shift, minus(uplusminus(v1661), v1662)); real2 v426 = minusplus(uminus(v422), v423); real2 v424 = minusplus(v422, v423); real2 v433 = ctimesminusplus(reverse(v424), ctbl[1], ctimes(v424, ctbl[1])); real2 v141 = minus(uplusminus(v131), v132); real2 v145 = minus(uminusplus(v131), v132); real2 v1685 = plus(v141, v433); real2 v1681 = minus(v433, v141); real2 v247 = minusplus(v245, v246); real2 v249 = minusplus(uminus(v245), v246); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); real2 v217 = ctimesminusplus(reverse(v207), ctbl[7], ctimes(v207, ctbl[13])); real2 v321 = minusplus(uminus(v317), v318); real2 v319 = minusplus(v317, v318); real2 v565 = minusplus(uminus(v561), v562); real2 v563 = minusplus(v561, v562); real2 v285 = minusplus(v283, v284); real2 v287 = minusplus(uminus(v283), v284); real2 v295 = ctimesminusplus(reverse(v285), ctbl[3], ctimes(v285, ctbl[5])); real2 v329 = ctimesminusplus(reverse(v319), ctbl[19], ctimes(v319, ctbl[25])); real2 v571 = ctimesminusplus(reverse(v563), ctbl[5], ctimes(v563, ctbl[3])); real2 v1680 = reverse(minus(v295, v571)); real2 v1686 = plus(v295, v571); real2 v601 = minusplus(uminus(v597), v598); real2 v599 = minusplus(v597, v598); real2 v607 = ctimesminusplus(reverse(v599), ctbl[21], ctimes(v599, ctbl[23])); real2 v1718 = plus(v329, v607); real2 v1712 = reverse(minus(v329, v607)); real2 v527 = minusplus(v525, v526); real2 v529 = minusplus(uminus(v525), v526); real2 v461 = minusplus(uminus(v457), v458); real2 v459 = minusplus(v457, v458); real2 v467 = ctimesminusplus(reverse(v459), ctbl[17], ctimes(v459, ctbl[27])); real2 v255 = ctimesminusplus(reverse(v247), ctbl[23], ctimes(v247, ctbl[21])); real2 v637 = minusplus(uminus(v633), v634); real2 v635 = minusplus(v633, v634); real2 v643 = ctimesminusplus(reverse(v635), ctbl[13], ctimes(v635, ctbl[7])); real2 v1813 = plus(v1685, v1686); real2 v1809 = minus(v1686, v1685); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v171 = minusplus(uminus(v167), v168); real2 v169 = minusplus(v167, v168); real2 v354 = minusplus(v352, v353); real2 v356 = minusplus(uminus(v352), v353); real2 v362 = ctimesminusplus(reverse(v354), ctbl[11], ctimes(v354, ctbl[9])); real2 v179 = ctimesminusplus(reverse(v169), ctbl[15], ctimes(v169, ctbl[29])); real2 v1717 = plus(v179, v467); real2 v1713 = minus(v467, v179); real2 v1841 = minus(v1718, v1717); real2 v1845 = plus(v1717, v1718); real2 v501 = ctimesminusplus(reverse(v493), ctbl[9], ctimes(v493, ctbl[11])); real2 v1745 = minus(v501, v217); real2 v1749 = plus(v217, v501); real2 v671 = minusplus(v669, v670); real2 v673 = minusplus(uminus(v669), v670); real2 v679 = ctimesminusplus(reverse(v671), ctbl[29], ctimes(v671, ctbl[15])); real2 v535 = ctimesminusplus(reverse(v527), ctbl[25], ctimes(v527, ctbl[19])); real2 v1781 = plus(v255, v535); real2 v1777 = minus(v535, v255); real2 v1825 = minus(v1702, v1701); real2 v1829 = plus(v1701, v1702); real2 v1889 = minus(v1830, v1829); real2 v1893 = plus(v1829, v1830); real2 v1910 = plus(v1893, v1894); real2 v1904 = reverse(minus(v1893, v1894)); real2 v392 = minusplus(uminus(v388), v389); real2 v390 = minusplus(v388, v389); real2 v398 = ctimesminusplus(reverse(v390), ctbl[27], ctimes(v390, ctbl[17])); real2 v1776 = reverse(minus(v398, v679)); real2 v1782 = plus(v398, v679); real2 v1744 = reverse(minus(v362, v643)); real2 v1750 = plus(v362, v643); real2 v1808 = reverse(minus(v1749, v1750)); real2 v1814 = plus(v1749, v1750); real2 v1873 = minus(v1814, v1813); real2 v1877 = plus(v1813, v1814); real2 v1846 = plus(v1781, v1782); real2 v1840 = reverse(minus(v1781, v1782)); real2 v1872 = reverse(minus(v1845, v1846)); real2 v1878 = plus(v1845, v1846); real2 v1909 = plus(v1877, v1878); store(out, 1 << shift, plus(v1909, v1910)); store(out, 65 << shift, minus(v1909, v1910)); real2 v1905 = minus(v1878, v1877); store(out, 33 << shift, minus(uplusminus(v1904), v1905)); store(out, 97 << shift, minus(uminusplus(v1904), v1905)); real2 v1822 = minus(uminusplus(v1808), v1809); real2 v1818 = minus(uplusminus(v1808), v1809); real2 v1826 = minusplus(v1824, v1825); real2 v1828 = minusplus(uminus(v1824), v1825); real2 v1838 = ctimesminusplus(reverse(v1828), ctbl[5], ctimes(v1828, ctbl[3])); real2 v1948 = reverse(minus(v1838, v1870)); real2 v1954 = plus(v1838, v1870); real2 v1844 = minusplus(uminus(v1840), v1841); real2 v1842 = minusplus(v1840, v1841); real2 v1854 = ctimesminusplus(reverse(v1844), ctbl[1], ctimes(v1844, ctbl[0])); real2 v1953 = plus(v1822, v1854); real2 v1949 = minus(v1854, v1822); store(out, 89 << shift, minus(v1953, v1954)); store(out, 25 << shift, plus(v1953, v1954)); store(out, 121 << shift, minus(uminusplus(v1948), v1949)); store(out, 57 << shift, minus(uplusminus(v1948), v1949)); real2 v1834 = ctimesminusplus(reverse(v1826), ctbl[3], ctimes(v1826, ctbl[5])); real2 v1850 = ctimesminusplus(reverse(v1842), ctbl[1], ctimes(v1842, ctbl[1])); real2 v1929 = reverse(minus(v1834, v1866)); real2 v1935 = plus(v1834, v1866); real2 v1934 = plus(v1818, v1850); real2 v1930 = minus(v1850, v1818); store(out, 105 << shift, minus(uminusplus(v1929), v1930)); store(out, 41 << shift, minus(uplusminus(v1929), v1930)); store(out, 73 << shift, minus(v1934, v1935)); store(out, 9 << shift, plus(v1934, v1935)); real2 v1890 = minusplus(v1888, v1889); real2 v1892 = minusplus(uminus(v1888), v1889); real2 v1902 = ctimesminusplus(reverse(v1892), ctbl[1], ctimes(v1892, ctbl[0])); real2 v1886 = minus(uminusplus(v1872), v1873); store(out, 113 << shift, minus(v1886, v1902)); store(out, 49 << shift, plus(v1886, v1902)); real2 v1882 = minus(uplusminus(v1872), v1873); real2 v1898 = ctimesminusplus(reverse(v1890), ctbl[1], ctimes(v1890, ctbl[1])); store(out, 17 << shift, plus(v1882, v1898)); store(out, 81 << shift, minus(v1882, v1898)); real2 v1700 = minusplus(uminus(v1696), v1697); real2 v1698 = minusplus(v1696, v1697); real2 v1690 = minus(uplusminus(v1680), v1681); real2 v1694 = minus(uminusplus(v1680), v1681); real2 v1778 = minusplus(v1776, v1777); real2 v1780 = minusplus(uminus(v1776), v1777); real2 v1774 = ctimesminusplus(reverse(v1764), ctbl[7], ctimes(v1764, ctbl[6])); real2 v1710 = ctimesminusplus(reverse(v1700), ctbl[11], ctimes(v1700, ctbl[9])); real2 v2041 = minus(v1774, v1710); real2 v2045 = plus(v1710, v1774); real2 v1714 = minusplus(v1712, v1713); real2 v1716 = minusplus(uminus(v1712), v1713); real2 v2042 = minusplus(v2040, v2041); real2 v2044 = minusplus(uminus(v2040), v2041); real2 v2054 = ctimesminusplus(reverse(v2044), ctbl[1], ctimes(v2044, ctbl[0])); real2 v1726 = ctimesminusplus(reverse(v1716), ctbl[5], ctimes(v1716, ctbl[3])); real2 v1748 = minusplus(uminus(v1744), v1745); real2 v1746 = minusplus(v1744, v1745); real2 v1758 = ctimesminusplus(reverse(v1748), ctbl[1], ctimes(v1748, ctbl[0])); real2 v2029 = plus(v1694, v1758); real2 v2025 = minus(v1758, v1694); real2 v1790 = ctimesminusplus(reverse(v1780), ctbl[4], ctimes(v1780, ctbl[2])); real2 v2024 = reverse(minus(v1726, v1790)); real2 v2030 = plus(v1726, v1790); real2 v2038 = minus(uminusplus(v2024), v2025); store(out, 61 << shift, plus(v2038, v2054)); store(out, 125 << shift, minus(v2038, v2054)); real2 v2034 = minus(uplusminus(v2024), v2025); real2 v2050 = ctimesminusplus(reverse(v2042), ctbl[1], ctimes(v2042, ctbl[1])); store(out, 93 << shift, minus(v2034, v2050)); store(out, 29 << shift, plus(v2034, v2050)); real2 v2056 = reverse(minus(v2045, v2046)); real2 v2062 = plus(v2045, v2046); real2 v2061 = plus(v2029, v2030); store(out, 13 << shift, plus(v2061, v2062)); store(out, 77 << shift, minus(v2061, v2062)); real2 v2057 = minus(v2030, v2029); store(out, 45 << shift, minus(uplusminus(v2056), v2057)); store(out, 109 << shift, minus(uminusplus(v2056), v2057)); real2 v1754 = ctimesminusplus(reverse(v1746), ctbl[1], ctimes(v1746, ctbl[1])); real2 v1722 = ctimesminusplus(reverse(v1714), ctbl[3], ctimes(v1714, ctbl[5])); real2 v1770 = ctimesminusplus(reverse(v1762), ctbl[9], ctimes(v1762, ctbl[11])); real2 v1706 = ctimesminusplus(reverse(v1698), ctbl[7], ctimes(v1698, ctbl[13])); real2 v1988 = plus(v1706, v1770); real2 v1984 = minus(v1770, v1706); real2 v1968 = minus(v1754, v1690); real2 v1972 = plus(v1690, v1754); real2 v1985 = minusplus(v1983, v1984); real2 v1987 = minusplus(uminus(v1983), v1984); real2 v1993 = ctimesminusplus(reverse(v1985), ctbl[1], ctimes(v1985, ctbl[1])); real2 v1786 = ctimesminusplus(reverse(v1778), ctbl[5], ctimes(v1778, ctbl[3])); real2 v1967 = reverse(minus(v1722, v1786)); real2 v1973 = plus(v1722, v1786); real2 v1977 = minus(uplusminus(v1967), v1968); real2 v1981 = minus(uminusplus(v1967), v1968); store(out, 85 << shift, minus(v1977, v1993)); store(out, 21 << shift, plus(v1977, v1993)); real2 v1997 = ctimesminusplus(reverse(v1987), ctbl[1], ctimes(v1987, ctbl[0])); store(out, 117 << shift, minus(v1981, v1997)); store(out, 53 << shift, plus(v1981, v1997)); real2 v2004 = plus(v1972, v1973); real2 v2000 = minus(v1973, v1972); real2 v1999 = reverse(minus(v1988, v1989)); real2 v2005 = plus(v1988, v1989); store(out, 5 << shift, plus(v2004, v2005)); store(out, 69 << shift, minus(v2004, v2005)); store(out, 37 << shift, minus(uplusminus(v1999), v2000)); store(out, 101 << shift, minus(uminusplus(v1999), v2000)); real2 v333 = ctimesminusplus(reverse(v321), ctbl[29], ctimes(v321, ctbl[15])); real2 v613 = ctimesminusplus(reverse(v601), ctbl[16], ctimes(v601, ctbl[26])); real2 v2113 = reverse(minus(v333, v613)); real2 v2119 = plus(v333, v613); real2 v595 = ctimesminusplus(reverse(v583), ctbl[40], ctimes(v583, ctbl[50])); real2 v455 = ctimesminusplus(reverse(v444), ctbl[43], ctimes(v444, ctbl[42])); real2 v165 = ctimesminusplus(reverse(v151), ctbl[47], ctimes(v151, ctbl[45])); real2 v2102 = plus(v165, v455); real2 v2098 = minus(v455, v165); real2 v315 = ctimesminusplus(reverse(v305), ctbl[53], ctimes(v305, ctbl[39])); real2 v2097 = reverse(minus(v315, v595)); real2 v2103 = plus(v315, v595); real2 v261 = ctimesminusplus(reverse(v249), ctbl[17], ctimes(v249, ctbl[27])); real2 v299 = ctimesminusplus(reverse(v287), ctbl[5], ctimes(v287, ctbl[3])); real2 v523 = ctimesminusplus(reverse(v512), ctbl[31], ctimes(v512, ctbl[30])); real2 v541 = ctimesminusplus(reverse(v529), ctbl[28], ctimes(v529, ctbl[14])); real2 v2182 = plus(v261, v541); real2 v2178 = minus(v541, v261); real2 v243 = ctimesminusplus(reverse(v229), ctbl[59], ctimes(v229, ctbl[33])); real2 v667 = ctimesminusplus(reverse(v655), ctbl[34], ctimes(v655, ctbl[56])); real2 v2166 = plus(v243, v523); real2 v2162 = minus(v523, v243); real2 v386 = ctimesminusplus(reverse(v373), ctbl[37], ctimes(v373, ctbl[36])); real2 v2161 = reverse(minus(v386, v667)); real2 v2167 = plus(v386, v667); real2 v472 = ctimesminusplus(reverse(v461), ctbl[19], ctimes(v461, ctbl[18])); real2 v185 = ctimesminusplus(reverse(v171), ctbl[23], ctimes(v171, ctbl[21])); real2 v2114 = minus(v472, v185); real2 v2118 = plus(v185, v472); real2 v420 = ctimesminusplus(reverse(v409), ctbl[49], ctimes(v409, ctbl[48])); real2 v506 = ctimesminusplus(reverse(v495), ctbl[7], ctimes(v495, ctbl[6])); real2 v2199 = plus(v420, v703); real2 v2193 = reverse(minus(v420, v703)); real2 v649 = ctimesminusplus(reverse(v637), ctbl[10], ctimes(v637, ctbl[8])); real2 v223 = ctimesminusplus(reverse(v209), ctbl[11], ctimes(v209, ctbl[9])); real2 v2146 = minus(v506, v223); real2 v2150 = plus(v223, v506); real2 v2231 = plus(v2166, v2167); real2 v2225 = reverse(minus(v2166, v2167)); real2 v685 = ctimesminusplus(reverse(v673), ctbl[22], ctimes(v673, ctbl[20])); real2 v2257 = reverse(minus(v2198, v2199)); real2 v2263 = plus(v2198, v2199); real2 v2226 = minus(v2103, v2102); real2 v2230 = plus(v2102, v2103); real2 v2294 = plus(v2230, v2231); real2 v2290 = minus(v2231, v2230); real2 v2246 = plus(v2118, v2119); real2 v2242 = minus(v2119, v2118); real2 v577 = ctimesminusplus(reverse(v565), ctbl[4], ctimes(v565, ctbl[2])); real2 v2081 = reverse(minus(v299, v577)); real2 v2087 = plus(v299, v577); real2 v403 = ctimesminusplus(reverse(v392), ctbl[25], ctimes(v392, ctbl[24])); real2 v2177 = reverse(minus(v403, v685)); real2 v2183 = plus(v403, v685); real2 v438 = ctimesminusplus(reverse(v426), ctbl[1], ctimes(v426, ctbl[0])); real2 v2086 = plus(v145, v438); real2 v2082 = minus(v438, v145); real2 v2210 = minus(v2087, v2086); real2 v2214 = plus(v2086, v2087); real2 v2247 = plus(v2182, v2183); real2 v2241 = reverse(minus(v2182, v2183)); real2 v2279 = plus(v2246, v2247); real2 v2273 = reverse(minus(v2246, v2247)); real2 v367 = ctimesminusplus(reverse(v356), ctbl[13], ctimes(v356, ctbl[12])); real2 v2151 = plus(v367, v649); real2 v2145 = reverse(minus(v367, v649)); real2 v2209 = reverse(minus(v2150, v2151)); real2 v2215 = plus(v2150, v2151); real2 v2274 = minus(v2215, v2214); real2 v2278 = plus(v2214, v2215); real2 v2310 = plus(v2278, v2279); real2 v2306 = minus(v2279, v2278); real2 v2295 = plus(v2262, v2263); real2 v2289 = reverse(minus(v2262, v2263)); real2 v2311 = plus(v2294, v2295); store(out, 3 << shift, plus(v2310, v2311)); store(out, 67 << shift, minus(v2310, v2311)); real2 v2305 = reverse(minus(v2294, v2295)); store(out, 35 << shift, minus(uplusminus(v2305), v2306)); store(out, 99 << shift, minus(uminusplus(v2305), v2306)); real2 v2287 = minus(uminusplus(v2273), v2274); real2 v2283 = minus(uplusminus(v2273), v2274); real2 v2291 = minusplus(v2289, v2290); real2 v2293 = minusplus(uminus(v2289), v2290); real2 v2299 = ctimesminusplus(reverse(v2291), ctbl[1], ctimes(v2291, ctbl[1])); store(out, 19 << shift, plus(v2283, v2299)); store(out, 83 << shift, minus(v2283, v2299)); real2 v2303 = ctimesminusplus(reverse(v2293), ctbl[1], ctimes(v2293, ctbl[0])); store(out, 51 << shift, plus(v2287, v2303)); store(out, 115 << shift, minus(v2287, v2303)); real2 v2229 = minusplus(uminus(v2225), v2226); real2 v2227 = minusplus(v2225, v2226); real2 v2235 = ctimesminusplus(reverse(v2227), ctbl[3], ctimes(v2227, ctbl[5])); real2 v2219 = minus(uplusminus(v2209), v2210); real2 v2223 = minus(uminusplus(v2209), v2210); real2 v2243 = minusplus(v2241, v2242); real2 v2245 = minusplus(uminus(v2241), v2242); real2 v2251 = ctimesminusplus(reverse(v2243), ctbl[1], ctimes(v2243, ctbl[1])); real2 v2331 = minus(v2251, v2219); real2 v2335 = plus(v2219, v2251); real2 v2259 = minusplus(v2257, v2258); real2 v2261 = minusplus(uminus(v2257), v2258); real2 v2267 = ctimesminusplus(reverse(v2259), ctbl[5], ctimes(v2259, ctbl[3])); real2 v2336 = plus(v2235, v2267); store(out, 75 << shift, minus(v2335, v2336)); store(out, 11 << shift, plus(v2335, v2336)); real2 v2330 = reverse(minus(v2235, v2267)); store(out, 107 << shift, minus(uminusplus(v2330), v2331)); store(out, 43 << shift, minus(uplusminus(v2330), v2331)); real2 v2239 = ctimesminusplus(reverse(v2229), ctbl[5], ctimes(v2229, ctbl[3])); real2 v2271 = ctimesminusplus(reverse(v2261), ctbl[4], ctimes(v2261, ctbl[2])); real2 v2255 = ctimesminusplus(reverse(v2245), ctbl[1], ctimes(v2245, ctbl[0])); real2 v2350 = minus(v2255, v2223); real2 v2354 = plus(v2223, v2255); real2 v2355 = plus(v2239, v2271); store(out, 91 << shift, minus(v2354, v2355)); store(out, 27 << shift, plus(v2354, v2355)); real2 v2349 = reverse(minus(v2239, v2271)); store(out, 59 << shift, minus(uplusminus(v2349), v2350)); store(out, 123 << shift, minus(uminusplus(v2349), v2350)); real2 v2091 = minus(uplusminus(v2081), v2082); real2 v2095 = minus(uminusplus(v2081), v2082); real2 v2181 = minusplus(uminus(v2177), v2178); real2 v2179 = minusplus(v2177, v2178); real2 v2101 = minusplus(uminus(v2097), v2098); real2 v2099 = minusplus(v2097, v2098); real2 v2165 = minusplus(uminus(v2161), v2162); real2 v2163 = minusplus(v2161, v2162); real2 v2147 = minusplus(v2145, v2146); real2 v2149 = minusplus(uminus(v2145), v2146); real2 v2155 = ctimesminusplus(reverse(v2147), ctbl[1], ctimes(v2147, ctbl[1])); real2 v2197 = minusplus(uminus(v2193), v2194); real2 v2195 = minusplus(v2193, v2194); real2 v2117 = minusplus(uminus(v2113), v2114); real2 v2115 = minusplus(v2113, v2114); real2 v2123 = ctimesminusplus(reverse(v2115), ctbl[3], ctimes(v2115, ctbl[5])); real2 v2171 = ctimesminusplus(reverse(v2163), ctbl[9], ctimes(v2163, ctbl[11])); real2 v2107 = ctimesminusplus(reverse(v2099), ctbl[7], ctimes(v2099, ctbl[13])); real2 v2389 = plus(v2107, v2171); real2 v2385 = minus(v2171, v2107); real2 v2187 = ctimesminusplus(reverse(v2179), ctbl[5], ctimes(v2179, ctbl[3])); real2 v2374 = plus(v2123, v2187); real2 v2368 = reverse(minus(v2123, v2187)); real2 v2369 = minus(v2155, v2091); real2 v2373 = plus(v2091, v2155); real2 v2405 = plus(v2373, v2374); real2 v2401 = minus(v2374, v2373); real2 v2203 = ctimesminusplus(reverse(v2195), ctbl[13], ctimes(v2195, ctbl[7])); real2 v2131 = minusplus(v2129, v2130); real2 v2133 = minusplus(uminus(v2129), v2130); real2 v2139 = ctimesminusplus(reverse(v2131), ctbl[11], ctimes(v2131, ctbl[9])); real2 v2390 = plus(v2139, v2203); real2 v2384 = reverse(minus(v2139, v2203)); real2 v2400 = reverse(minus(v2389, v2390)); store(out, 103 << shift, minus(uminusplus(v2400), v2401)); store(out, 39 << shift, minus(uplusminus(v2400), v2401)); real2 v2406 = plus(v2389, v2390); store(out, 71 << shift, minus(v2405, v2406)); store(out, 7 << shift, plus(v2405, v2406)); real2 v2382 = minus(uminusplus(v2368), v2369); real2 v2378 = minus(uplusminus(v2368), v2369); real2 v2388 = minusplus(uminus(v2384), v2385); real2 v2386 = minusplus(v2384, v2385); real2 v2398 = ctimesminusplus(reverse(v2388), ctbl[1], ctimes(v2388, ctbl[0])); store(out, 119 << shift, minus(v2382, v2398)); store(out, 55 << shift, plus(v2382, v2398)); real2 v2394 = ctimesminusplus(reverse(v2386), ctbl[1], ctimes(v2386, ctbl[1])); store(out, 87 << shift, minus(v2378, v2394)); store(out, 23 << shift, plus(v2378, v2394)); real2 v2127 = ctimesminusplus(reverse(v2117), ctbl[5], ctimes(v2117, ctbl[3])); real2 v2175 = ctimesminusplus(reverse(v2165), ctbl[7], ctimes(v2165, ctbl[6])); real2 v2111 = ctimesminusplus(reverse(v2101), ctbl[11], ctimes(v2101, ctbl[9])); real2 v2442 = minus(v2175, v2111); real2 v2446 = plus(v2111, v2175); real2 v2207 = ctimesminusplus(reverse(v2197), ctbl[10], ctimes(v2197, ctbl[8])); real2 v2159 = ctimesminusplus(reverse(v2149), ctbl[1], ctimes(v2149, ctbl[0])); real2 v2430 = plus(v2095, v2159); real2 v2426 = minus(v2159, v2095); real2 v2191 = ctimesminusplus(reverse(v2181), ctbl[4], ctimes(v2181, ctbl[2])); real2 v2143 = ctimesminusplus(reverse(v2133), ctbl[13], ctimes(v2133, ctbl[12])); real2 v2447 = plus(v2143, v2207); real2 v2441 = reverse(minus(v2143, v2207)); real2 v2425 = reverse(minus(v2127, v2191)); real2 v2431 = plus(v2127, v2191); real2 v2458 = minus(v2431, v2430); real2 v2462 = plus(v2430, v2431); real2 v2457 = reverse(minus(v2446, v2447)); store(out, 47 << shift, minus(uplusminus(v2457), v2458)); store(out, 111 << shift, minus(uminusplus(v2457), v2458)); real2 v2463 = plus(v2446, v2447); store(out, 79 << shift, minus(v2462, v2463)); store(out, 15 << shift, plus(v2462, v2463)); real2 v2435 = minus(uplusminus(v2425), v2426); real2 v2439 = minus(uminusplus(v2425), v2426); real2 v2445 = minusplus(uminus(v2441), v2442); real2 v2443 = minusplus(v2441, v2442); real2 v2451 = ctimesminusplus(reverse(v2443), ctbl[1], ctimes(v2443, ctbl[1])); store(out, 31 << shift, plus(v2435, v2451)); store(out, 95 << shift, minus(v2435, v2451)); real2 v2455 = ctimesminusplus(reverse(v2445), ctbl[1], ctimes(v2445, ctbl[0])); store(out, 127 << shift, minus(v2439, v2455)); store(out, 63 << shift, plus(v2439, v2455)); // Pres : 68124 } } ALIGNED(8192) void dft128b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 135650 real2 v51 = load(in, 49 << shift); real2 v115 = load(in, 113 << shift); real2 v456 = plus(v51, v115); real2 v450 = reverse(minus(v115, v51)); real2 v83 = load(in, 81 << shift); real2 v19 = load(in, 17 << shift); real2 v455 = plus(v19, v83); real2 v451 = minus(v83, v19); real2 v721 = reverse(minus(v456, v455)); real2 v727 = plus(v455, v456); real2 v452 = minusplus(v450, v451); real2 v454 = minusplus(uminus(v450), v451); real2 v462 = ctimesminusplus(reverse(v452), ctbl[58], ctimes(v452, ctbl[59])); real2 v466 = ctimesminusplus(reverse(v454), ctbl[48], ctimes(v454, ctbl[42])); real2 v35 = load(in, 33 << shift); real2 v99 = load(in, 97 << shift); real2 v147 = reverse(minus(v99, v35)); real2 v153 = plus(v35, v99); real2 v3 = load(in, 1 << shift); real2 v67 = load(in, 65 << shift); real2 v152 = plus(v3, v67); real2 v148 = minus(v67, v3); real2 v722 = minus(v153, v152); real2 v726 = plus(v152, v153); real2 v982 = plus(v726, v727); real2 v978 = minus(v727, v726); real2 v149 = minusplus(v147, v148); real2 v151 = minusplus(uminus(v147), v148); real2 v165 = ctimesminusplus(reverse(v151), ctbl[44], ctimes(v151, ctbl[45])); real2 v159 = ctimesminusplus(reverse(v149), ctbl[60], ctimes(v149, ctbl[61])); real2 v723 = minusplus(v721, v722); real2 v725 = minusplus(uminus(v721), v722); real2 v2102 = plus(v165, v466); real2 v2098 = minus(v466, v165); real2 v731 = ctimesminusplus(reverse(v723), ctbl[28], ctimes(v723, ctbl[29])); real2 v1697 = minus(v462, v159); real2 v1701 = plus(v159, v462); real2 v735 = ctimesminusplus(reverse(v725), ctbl[20], ctimes(v725, ctbl[21])); real2 v75 = load(in, 73 << shift); real2 v11 = load(in, 9 << shift); real2 v304 = minus(v75, v11); real2 v308 = plus(v11, v75); real2 v107 = load(in, 105 << shift); real2 v43 = load(in, 41 << shift); real2 v309 = plus(v43, v107); real2 v303 = reverse(minus(v107, v43)); real2 v854 = plus(v308, v309); real2 v850 = minus(v309, v308); real2 v307 = minusplus(uminus(v303), v304); real2 v305 = minusplus(v303, v304); real2 v313 = ctimesminusplus(reverse(v305), ctbl[56], ctimes(v305, ctbl[57])); real2 v319 = ctimesminusplus(reverse(v307), ctbl[38], ctimes(v307, ctbl[39])); real2 v27 = load(in, 25 << shift); real2 v91 = load(in, 89 << shift); real2 v591 = plus(v27, v91); real2 v587 = minus(v91, v27); real2 v59 = load(in, 57 << shift); real2 v123 = load(in, 121 << shift); real2 v592 = plus(v59, v123); real2 v586 = reverse(minus(v123, v59)); real2 v849 = reverse(minus(v592, v591)); real2 v855 = plus(v591, v592); real2 v983 = plus(v854, v855); real2 v977 = reverse(minus(v855, v854)); real2 v590 = minusplus(uminus(v586), v587); real2 v588 = minusplus(v586, v587); real2 v597 = ctimesminusplus(reverse(v588), ctbl[54], ctimes(v588, ctbl[55])); real2 v1110 = plus(v982, v983); real2 v1106 = minus(v983, v982); real2 v853 = minusplus(uminus(v849), v850); real2 v851 = minusplus(v849, v850); real2 v859 = ctimesminusplus(reverse(v851), ctbl[26], ctimes(v851, ctbl[27])); real2 v1702 = plus(v313, v597); real2 v1696 = reverse(minus(v597, v313)); real2 v1825 = minus(v1702, v1701); real2 v1829 = plus(v1701, v1702); real2 v1700 = minusplus(uminus(v1696), v1697); real2 v1698 = minusplus(v1696, v1697); real2 v1710 = ctimesminusplus(reverse(v1700), ctbl[8], ctimes(v1700, ctbl[9])); real2 v1706 = ctimesminusplus(reverse(v1698), ctbl[12], ctimes(v1698, ctbl[13])); real2 v1379 = minus(v859, v731); real2 v1383 = plus(v731, v859); real2 v863 = ctimesminusplus(reverse(v853), ctbl[24], ctimes(v853, ctbl[18])); real2 v1538 = minus(v863, v735); real2 v1542 = plus(v735, v863); real2 v981 = minusplus(uminus(v977), v978); real2 v979 = minusplus(v977, v978); real2 v987 = ctimesminusplus(reverse(v979), ctbl[12], ctimes(v979, ctbl[13])); real2 v601 = ctimesminusplus(reverse(v590), ctbl[51], ctimes(v590, ctbl[50])); real2 v991 = ctimesminusplus(reverse(v981), ctbl[8], ctimes(v981, ctbl[9])); real2 v2103 = plus(v319, v601); real2 v2097 = reverse(minus(v601, v319)); real2 v2226 = minus(v2103, v2102); real2 v2230 = plus(v2102, v2103); real2 v2101 = minusplus(uminus(v2097), v2098); real2 v2099 = minusplus(v2097, v2098); real2 v2107 = ctimesminusplus(reverse(v2099), ctbl[12], ctimes(v2099, ctbl[13])); real2 v127 = load(in, 125 << shift); real2 v63 = load(in, 61 << shift); real2 v660 = plus(v63, v127); real2 v654 = reverse(minus(v127, v63)); real2 v31 = load(in, 29 << shift); real2 v95 = load(in, 93 << shift); real2 v659 = plus(v31, v95); real2 v655 = minus(v95, v31); real2 v913 = reverse(minus(v660, v659)); real2 v919 = plus(v659, v660); real2 v658 = minusplus(uminus(v654), v655); real2 v656 = minusplus(v654, v655); real2 v666 = ctimesminusplus(reverse(v656), ctbl[46], ctimes(v656, ctbl[47])); real2 v670 = ctimesminusplus(reverse(v658), ctbl[57], ctimes(v658, ctbl[56])); real2 v47 = load(in, 45 << shift); real2 v111 = load(in, 109 << shift); real2 v377 = reverse(minus(v111, v47)); real2 v383 = plus(v47, v111); real2 v15 = load(in, 13 << shift); real2 v79 = load(in, 77 << shift); real2 v378 = minus(v79, v15); real2 v382 = plus(v15, v79); real2 v379 = minusplus(v377, v378); real2 v381 = minusplus(uminus(v377), v378); real2 v394 = ctimesminusplus(reverse(v381), ctbl[54], ctimes(v381, ctbl[36])); real2 v2167 = plus(v394, v670); real2 v2161 = reverse(minus(v670, v394)); real2 v914 = minus(v383, v382); real2 v918 = plus(v382, v383); real2 v1047 = plus(v918, v919); real2 v1041 = reverse(minus(v919, v918)); real2 v917 = minusplus(uminus(v913), v914); real2 v915 = minusplus(v913, v914); real2 v923 = ctimesminusplus(reverse(v915), ctbl[22], ctimes(v915, ctbl[23])); real2 v389 = ctimesminusplus(reverse(v379), ctbl[48], ctimes(v379, ctbl[49])); real2 v1760 = reverse(minus(v666, v389)); real2 v1766 = plus(v389, v666); real2 v927 = ctimesminusplus(reverse(v917), ctbl[27], ctimes(v917, ctbl[26])); real2 v87 = load(in, 85 << shift); real2 v23 = load(in, 21 << shift); real2 v520 = minus(v87, v23); real2 v524 = plus(v23, v87); real2 v55 = load(in, 53 << shift); real2 v119 = load(in, 117 << shift); real2 v519 = reverse(minus(v119, v55)); real2 v525 = plus(v55, v119); real2 v523 = minusplus(uminus(v519), v520); real2 v521 = minusplus(v519, v520); real2 v529 = ctimesminusplus(reverse(v521), ctbl[50], ctimes(v521, ctbl[51])); real2 v533 = ctimesminusplus(reverse(v523), ctbl[60], ctimes(v523, ctbl[30])); real2 v791 = plus(v524, v525); real2 v785 = reverse(minus(v525, v524)); real2 v39 = load(in, 37 << shift); real2 v103 = load(in, 101 << shift); real2 v231 = plus(v39, v103); real2 v225 = reverse(minus(v103, v39)); real2 v7 = load(in, 5 << shift); real2 v71 = load(in, 69 << shift); real2 v226 = minus(v71, v7); real2 v230 = plus(v7, v71); real2 v227 = minusplus(v225, v226); real2 v229 = minusplus(uminus(v225), v226); real2 v243 = ctimesminusplus(reverse(v229), ctbl[32], ctimes(v229, ctbl[33])); real2 v2162 = minus(v533, v243); real2 v2166 = plus(v243, v533); real2 v2231 = plus(v2166, v2167); real2 v2225 = reverse(minus(v2167, v2166)); real2 v237 = ctimesminusplus(reverse(v227), ctbl[52], ctimes(v227, ctbl[53])); real2 v1761 = minus(v529, v237); real2 v1765 = plus(v237, v529); real2 v1824 = reverse(minus(v1766, v1765)); real2 v1830 = plus(v1765, v1766); real2 v790 = plus(v230, v231); real2 v786 = minus(v231, v230); real2 v1826 = minusplus(v1824, v1825); real2 v1828 = minusplus(uminus(v1824), v1825); real2 v1764 = minusplus(uminus(v1760), v1761); real2 v1762 = minusplus(v1760, v1761); real2 v1889 = minus(v1830, v1829); real2 v1893 = plus(v1829, v1830); real2 v1838 = ctimesminusplus(reverse(v1828), ctbl[2], ctimes(v1828, ctbl[3])); real2 v1774 = ctimesminusplus(reverse(v1764), ctbl[12], ctimes(v1764, ctbl[6])); real2 v2041 = minus(v1774, v1710); real2 v2045 = plus(v1710, v1774); real2 v1770 = ctimesminusplus(reverse(v1762), ctbl[10], ctimes(v1762, ctbl[11])); real2 v1988 = plus(v1706, v1770); real2 v1984 = minus(v1770, v1706); real2 v1834 = ctimesminusplus(reverse(v1826), ctbl[4], ctimes(v1826, ctbl[5])); real2 v787 = minusplus(v785, v786); real2 v789 = minusplus(uminus(v785), v786); real2 v799 = ctimesminusplus(reverse(v789), ctbl[14], ctimes(v789, ctbl[15])); real2 v1046 = plus(v790, v791); real2 v1042 = minus(v791, v790); real2 v1043 = minusplus(v1041, v1042); real2 v1045 = minusplus(uminus(v1041), v1042); real2 v1537 = reverse(minus(v927, v799)); real2 v1543 = plus(v799, v927); real2 v1606 = plus(v1542, v1543); real2 v1602 = minus(v1543, v1542); real2 v795 = ctimesminusplus(reverse(v787), ctbl[24], ctimes(v787, ctbl[25])); real2 v1105 = reverse(minus(v1047, v1046)); real2 v1111 = plus(v1046, v1047); real2 v1384 = plus(v795, v923); real2 v1378 = reverse(minus(v923, v795)); real2 v1107 = minusplus(v1105, v1106); real2 v1109 = minusplus(uminus(v1105), v1106); real2 v1119 = ctimesminusplus(reverse(v1109), ctbl[2], ctimes(v1109, ctbl[3])); real2 v1382 = minusplus(uminus(v1378), v1379); real2 v1380 = minusplus(v1378, v1379); real2 v1388 = ctimesminusplus(reverse(v1380), ctbl[4], ctimes(v1380, ctbl[5])); real2 v1115 = ctimesminusplus(reverse(v1107), ctbl[4], ctimes(v1107, ctbl[5])); real2 v1392 = ctimesminusplus(reverse(v1382), ctbl[2], ctimes(v1382, ctbl[3])); real2 v1443 = minus(v1384, v1383); real2 v1447 = plus(v1383, v1384); real2 v1539 = minusplus(v1537, v1538); real2 v1541 = minusplus(uminus(v1537), v1538); real2 v1551 = ctimesminusplus(reverse(v1541), ctbl[2], ctimes(v1541, ctbl[3])); real2 v1547 = ctimesminusplus(reverse(v1539), ctbl[4], ctimes(v1539, ctbl[5])); real2 v1051 = ctimesminusplus(reverse(v1043), ctbl[10], ctimes(v1043, ctbl[11])); real2 v2290 = minus(v2231, v2230); real2 v2294 = plus(v2230, v2231); real2 v1174 = plus(v1110, v1111); real2 v1170 = minus(v1111, v1110); real2 v1265 = minus(v1051, v987); real2 v1269 = plus(v987, v1051); real2 v1055 = ctimesminusplus(reverse(v1045), ctbl[12], ctimes(v1045, ctbl[6])); real2 v1322 = minus(v1055, v991); real2 v1326 = plus(v991, v1055); real2 v129 = load(in, 127 << shift); real2 v65 = load(in, 63 << shift); real2 v688 = reverse(minus(v129, v65)); real2 v694 = plus(v65, v129); real2 v33 = load(in, 31 << shift); real2 v97 = load(in, 95 << shift); real2 v689 = minus(v97, v33); real2 v693 = plus(v33, v97); real2 v951 = plus(v693, v694); real2 v945 = reverse(minus(v694, v693)); real2 v692 = minusplus(uminus(v688), v689); real2 v690 = minusplus(v688, v689); real2 v699 = ctimesminusplus(reverse(v690), ctbl[30], ctimes(v690, ctbl[31])); real2 v17 = load(in, 15 << shift); real2 v81 = load(in, 79 << shift); real2 v420 = plus(v17, v81); real2 v416 = minus(v81, v17); real2 v113 = load(in, 111 << shift); real2 v49 = load(in, 47 << shift); real2 v415 = reverse(minus(v113, v49)); real2 v421 = plus(v49, v113); real2 v419 = minusplus(uminus(v415), v416); real2 v417 = minusplus(v415, v416); real2 v425 = ctimesminusplus(reverse(v417), ctbl[32], ctimes(v417, ctbl[33])); real2 v1792 = reverse(minus(v699, v425)); real2 v1798 = plus(v425, v699); real2 v950 = plus(v420, v421); real2 v946 = minus(v421, v420); real2 v947 = minusplus(v945, v946); real2 v949 = minusplus(uminus(v945), v946); real2 v959 = ctimesminusplus(reverse(v949), ctbl[21], ctimes(v949, ctbl[20])); real2 v955 = ctimesminusplus(reverse(v947), ctbl[14], ctimes(v947, ctbl[15])); real2 v1079 = plus(v950, v951); real2 v1073 = reverse(minus(v951, v950)); real2 v73 = load(in, 71 << shift); real2 v9 = load(in, 7 << shift); real2 v268 = plus(v9, v73); real2 v264 = minus(v73, v9); real2 v105 = load(in, 103 << shift); real2 v41 = load(in, 39 << shift); real2 v269 = plus(v41, v105); real2 v263 = reverse(minus(v105, v41)); real2 v818 = minus(v269, v268); real2 v822 = plus(v268, v269); real2 v267 = minusplus(uminus(v263), v264); real2 v265 = minusplus(v263, v264); real2 v275 = ctimesminusplus(reverse(v265), ctbl[36], ctimes(v265, ctbl[37])); real2 v89 = load(in, 87 << shift); real2 v25 = load(in, 23 << shift); real2 v557 = plus(v25, v89); real2 v553 = minus(v89, v25); real2 v121 = load(in, 119 << shift); real2 v57 = load(in, 55 << shift); real2 v558 = plus(v57, v121); real2 v552 = reverse(minus(v121, v57)); real2 v823 = plus(v557, v558); real2 v817 = reverse(minus(v558, v557)); real2 v1078 = plus(v822, v823); real2 v1074 = minus(v823, v822); real2 v819 = minusplus(v817, v818); real2 v821 = minusplus(uminus(v817), v818); real2 v1077 = minusplus(uminus(v1073), v1074); real2 v1075 = minusplus(v1073, v1074); real2 v1083 = ctimesminusplus(reverse(v1075), ctbl[6], ctimes(v1075, ctbl[7])); real2 v1087 = ctimesminusplus(reverse(v1077), ctbl[9], ctimes(v1077, ctbl[8])); real2 v556 = minusplus(uminus(v552), v553); real2 v554 = minusplus(v552, v553); real2 v564 = ctimesminusplus(reverse(v554), ctbl[34], ctimes(v554, ctbl[35])); real2 v1793 = minus(v564, v275); real2 v1797 = plus(v275, v564); real2 v1862 = plus(v1797, v1798); real2 v1856 = reverse(minus(v1798, v1797)); real2 v1794 = minusplus(v1792, v1793); real2 v1796 = minusplus(uminus(v1792), v1793); real2 v827 = ctimesminusplus(reverse(v819), ctbl[16], ctimes(v819, ctbl[17])); real2 v1410 = reverse(minus(v955, v827)); real2 v1416 = plus(v827, v955); real2 v1143 = plus(v1078, v1079); real2 v1137 = reverse(minus(v1079, v1078)); real2 v831 = ctimesminusplus(reverse(v821), ctbl[18], ctimes(v821, ctbl[24])); real2 v1575 = plus(v831, v959); real2 v1569 = reverse(minus(v959, v831)); real2 v5 = load(in, 3 << shift); real2 v69 = load(in, 67 << shift); real2 v188 = minus(v69, v5); real2 v192 = plus(v5, v69); real2 v101 = load(in, 99 << shift); real2 v37 = load(in, 35 << shift); real2 v193 = plus(v37, v101); real2 v187 = reverse(minus(v101, v37)); real2 v754 = minus(v193, v192); real2 v758 = plus(v192, v193); real2 v189 = minusplus(v187, v188); real2 v191 = minusplus(uminus(v187), v188); real2 v197 = ctimesminusplus(reverse(v189), ctbl[44], ctimes(v189, ctbl[45])); real2 v21 = load(in, 19 << shift); real2 v85 = load(in, 83 << shift); real2 v485 = minus(v85, v21); real2 v489 = plus(v21, v85); real2 v53 = load(in, 51 << shift); real2 v117 = load(in, 115 << shift); real2 v484 = reverse(minus(v117, v53)); real2 v490 = plus(v53, v117); real2 v753 = reverse(minus(v490, v489)); real2 v759 = plus(v489, v490); real2 v757 = minusplus(uminus(v753), v754); real2 v755 = minusplus(v753, v754); real2 v767 = ctimesminusplus(reverse(v757), ctbl[26], ctimes(v757, ctbl[27])); real2 v763 = ctimesminusplus(reverse(v755), ctbl[20], ctimes(v755, ctbl[21])); real2 v486 = minusplus(v484, v485); real2 v488 = minusplus(uminus(v484), v485); real2 v495 = ctimesminusplus(reverse(v486), ctbl[42], ctimes(v486, ctbl[43])); real2 v1729 = minus(v495, v197); real2 v1733 = plus(v197, v495); real2 v1014 = plus(v758, v759); real2 v1010 = minus(v759, v758); real2 v13 = load(in, 11 << shift); real2 v77 = load(in, 75 << shift); real2 v342 = minus(v77, v13); real2 v346 = plus(v13, v77); real2 v45 = load(in, 43 << shift); real2 v109 = load(in, 107 << shift); real2 v347 = plus(v45, v109); real2 v341 = reverse(minus(v109, v45)); real2 v345 = minusplus(uminus(v341), v342); real2 v343 = minusplus(v341, v342); real2 v882 = minus(v347, v346); real2 v886 = plus(v346, v347); real2 v353 = ctimesminusplus(reverse(v343), ctbl[40], ctimes(v343, ctbl[41])); real2 v125 = load(in, 123 << shift); real2 v61 = load(in, 59 << shift); real2 v621 = reverse(minus(v125, v61)); real2 v627 = plus(v61, v125); real2 v93 = load(in, 91 << shift); real2 v29 = load(in, 27 << shift); real2 v622 = minus(v93, v29); real2 v626 = plus(v29, v93); real2 v887 = plus(v626, v627); real2 v881 = reverse(minus(v627, v626)); real2 v885 = minusplus(uminus(v881), v882); real2 v883 = minusplus(v881, v882); real2 v891 = ctimesminusplus(reverse(v883), ctbl[18], ctimes(v883, ctbl[19])); real2 v1009 = reverse(minus(v887, v886)); real2 v1015 = plus(v886, v887); real2 v1415 = plus(v763, v891); real2 v1411 = minus(v891, v763); real2 v895 = ctimesminusplus(reverse(v885), ctbl[15], ctimes(v885, ctbl[14])); real2 v1570 = minus(v895, v767); real2 v1574 = plus(v767, v895); real2 v1142 = plus(v1014, v1015); real2 v1138 = minus(v1015, v1014); real2 v1011 = minusplus(v1009, v1010); real2 v1013 = minusplus(uminus(v1009), v1010); real2 v1023 = ctimesminusplus(reverse(v1013), ctbl[6], ctimes(v1013, ctbl[12])); real2 v1019 = ctimesminusplus(reverse(v1011), ctbl[8], ctimes(v1011, ctbl[9])); real2 v1321 = reverse(minus(v1087, v1023)); real2 v1327 = plus(v1023, v1087); real2 v1414 = minusplus(uminus(v1410), v1411); real2 v1412 = minusplus(v1410, v1411); real2 v1424 = ctimesminusplus(reverse(v1414), ctbl[3], ctimes(v1414, ctbl[2])); real2 v1508 = plus(v1392, v1424); real2 v1502 = reverse(minus(v1424, v1392)); real2 v1264 = reverse(minus(v1083, v1019)); real2 v1270 = plus(v1019, v1083); real2 v1286 = plus(v1269, v1270); real2 v1280 = reverse(minus(v1270, v1269)); real2 v1268 = minusplus(uminus(v1264), v1265); real2 v1266 = minusplus(v1264, v1265); real2 v1141 = minusplus(uminus(v1137), v1138); real2 v1139 = minusplus(v1137, v1138); real2 v1147 = ctimesminusplus(reverse(v1139), ctbl[2], ctimes(v1139, ctbl[3])); real2 v1278 = ctimesminusplus(reverse(v1268), ctbl[0], ctimes(v1268, ctbl[0])); real2 v1151 = ctimesminusplus(reverse(v1141), ctbl[3], ctimes(v1141, ctbl[2])); real2 v1235 = plus(v1119, v1151); real2 v1229 = reverse(minus(v1151, v1119)); real2 v1420 = ctimesminusplus(reverse(v1412), ctbl[2], ctimes(v1412, ctbl[3])); real2 v1483 = reverse(minus(v1420, v1388)); real2 v1489 = plus(v1388, v1420); real2 v1274 = ctimesminusplus(reverse(v1266), ctbl[0], ctimes(v1266, ctbl[1])); real2 v1607 = plus(v1574, v1575); real2 v1601 = reverse(minus(v1575, v1574)); real2 v1605 = minusplus(uminus(v1601), v1602); real2 v1603 = minusplus(v1601, v1602); real2 v1175 = plus(v1142, v1143); real2 v1169 = reverse(minus(v1143, v1142)); real2 v1171 = minusplus(v1169, v1170); real2 v1173 = minusplus(uminus(v1169), v1170); real2 v1179 = ctimesminusplus(reverse(v1171), ctbl[0], ctimes(v1171, ctbl[1])); real2 v1191 = plus(v1174, v1175); real2 v1185 = reverse(minus(v1175, v1174)); real2 v1325 = minusplus(uminus(v1321), v1322); real2 v1323 = minusplus(v1321, v1322); real2 v1331 = ctimesminusplus(reverse(v1323), ctbl[0], ctimes(v1323, ctbl[1])); real2 v1448 = plus(v1415, v1416); real2 v1442 = reverse(minus(v1416, v1415)); real2 v1446 = minusplus(uminus(v1442), v1443); real2 v1444 = minusplus(v1442, v1443); real2 v1452 = ctimesminusplus(reverse(v1444), ctbl[0], ctimes(v1444, ctbl[1])); real2 v1464 = plus(v1447, v1448); real2 v1458 = reverse(minus(v1448, v1447)); real2 v1335 = ctimesminusplus(reverse(v1325), ctbl[0], ctimes(v1325, ctbl[0])); real2 v1337 = reverse(minus(v1327, v1326)); real2 v1343 = plus(v1326, v1327); real2 v1183 = ctimesminusplus(reverse(v1173), ctbl[0], ctimes(v1173, ctbl[0])); real2 v1456 = ctimesminusplus(reverse(v1446), ctbl[0], ctimes(v1446, ctbl[0])); real2 v1210 = reverse(minus(v1147, v1115)); real2 v1216 = plus(v1115, v1147); real2 v1623 = plus(v1606, v1607); real2 v1617 = reverse(minus(v1607, v1606)); real2 v1571 = minusplus(v1569, v1570); real2 v1573 = minusplus(uminus(v1569), v1570); real2 v1583 = ctimesminusplus(reverse(v1573), ctbl[3], ctimes(v1573, ctbl[2])); real2 v1661 = reverse(minus(v1583, v1551)); real2 v1667 = plus(v1551, v1583); real2 v1611 = ctimesminusplus(reverse(v1603), ctbl[0], ctimes(v1603, ctbl[1])); real2 v1615 = ctimesminusplus(reverse(v1605), ctbl[0], ctimes(v1605, ctbl[0])); real2 v1579 = ctimesminusplus(reverse(v1571), ctbl[2], ctimes(v1571, ctbl[3])); real2 v1648 = plus(v1547, v1579); real2 v1642 = reverse(minus(v1579, v1547)); real2 v92 = load(in, 90 << shift); real2 v28 = load(in, 26 << shift); real2 v604 = minus(v92, v28); real2 v608 = plus(v28, v92); real2 v60 = load(in, 58 << shift); real2 v124 = load(in, 122 << shift); real2 v603 = reverse(minus(v124, v60)); real2 v609 = plus(v60, v124); real2 v871 = plus(v608, v609); real2 v865 = reverse(minus(v609, v608)); real2 v76 = load(in, 74 << shift); real2 v12 = load(in, 10 << shift); real2 v322 = minus(v76, v12); real2 v326 = plus(v12, v76); real2 v108 = load(in, 106 << shift); real2 v44 = load(in, 42 << shift); real2 v321 = reverse(minus(v108, v44)); real2 v327 = plus(v44, v108); real2 v870 = plus(v326, v327); real2 v866 = minus(v327, v326); real2 v993 = reverse(minus(v871, v870)); real2 v999 = plus(v870, v871); real2 v869 = minusplus(uminus(v865), v866); real2 v867 = minusplus(v865, v866); real2 v875 = ctimesminusplus(reverse(v867), ctbl[10], ctimes(v867, ctbl[11])); real2 v879 = ctimesminusplus(reverse(v869), ctbl[12], ctimes(v869, ctbl[6])); real2 v36 = load(in, 34 << shift); real2 v100 = load(in, 98 << shift); real2 v167 = reverse(minus(v100, v36)); real2 v173 = plus(v36, v100); real2 v68 = load(in, 66 << shift); real2 v4 = load(in, 2 << shift); real2 v168 = minus(v68, v4); real2 v172 = plus(v4, v68); real2 v742 = plus(v172, v173); real2 v738 = minus(v173, v172); real2 v52 = load(in, 50 << shift); real2 v116 = load(in, 114 << shift); real2 v468 = reverse(minus(v116, v52)); real2 v474 = plus(v52, v116); real2 v84 = load(in, 82 << shift); real2 v20 = load(in, 18 << shift); real2 v469 = minus(v84, v20); real2 v473 = plus(v20, v84); real2 v743 = plus(v473, v474); real2 v737 = reverse(minus(v474, v473)); real2 v739 = minusplus(v737, v738); real2 v741 = minusplus(uminus(v737), v738); real2 v747 = ctimesminusplus(reverse(v739), ctbl[12], ctimes(v739, ctbl[13])); real2 v998 = plus(v742, v743); real2 v994 = minus(v743, v742); real2 v1399 = plus(v747, v875); real2 v1395 = minus(v875, v747); real2 v1122 = minus(v999, v998); real2 v1126 = plus(v998, v999); real2 v997 = minusplus(uminus(v993), v994); real2 v995 = minusplus(v993, v994); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[4], ctimes(v995, ctbl[5])); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[2], ctimes(v997, ctbl[3])); real2 v80 = load(in, 78 << shift); real2 v16 = load(in, 14 << shift); real2 v397 = minus(v80, v16); real2 v401 = plus(v16, v80); real2 v48 = load(in, 46 << shift); real2 v112 = load(in, 110 << shift); real2 v402 = plus(v48, v112); real2 v396 = reverse(minus(v112, v48)); real2 v934 = plus(v401, v402); real2 v930 = minus(v402, v401); real2 v96 = load(in, 94 << shift); real2 v32 = load(in, 30 << shift); real2 v673 = minus(v96, v32); real2 v677 = plus(v32, v96); real2 v64 = load(in, 62 << shift); real2 v128 = load(in, 126 << shift); real2 v678 = plus(v64, v128); real2 v672 = reverse(minus(v128, v64)); real2 v929 = reverse(minus(v678, v677)); real2 v935 = plus(v677, v678); real2 v933 = minusplus(uminus(v929), v930); real2 v931 = minusplus(v929, v930); real2 v1057 = reverse(minus(v935, v934)); real2 v1063 = plus(v934, v935); real2 v939 = ctimesminusplus(reverse(v931), ctbl[6], ctimes(v931, ctbl[7])); real2 v104 = load(in, 102 << shift); real2 v40 = load(in, 38 << shift); real2 v251 = plus(v40, v104); real2 v245 = reverse(minus(v104, v40)); real2 v72 = load(in, 70 << shift); real2 v8 = load(in, 6 << shift); real2 v246 = minus(v72, v8); real2 v250 = plus(v8, v72); real2 v802 = minus(v251, v250); real2 v806 = plus(v250, v251); real2 v88 = load(in, 86 << shift); real2 v24 = load(in, 22 << shift); real2 v540 = plus(v24, v88); real2 v536 = minus(v88, v24); real2 v120 = load(in, 118 << shift); real2 v56 = load(in, 54 << shift); real2 v541 = plus(v56, v120); real2 v535 = reverse(minus(v120, v56)); real2 v807 = plus(v540, v541); real2 v801 = reverse(minus(v541, v540)); real2 v1062 = plus(v806, v807); real2 v1058 = minus(v807, v806); real2 v1059 = minusplus(v1057, v1058); real2 v1061 = minusplus(uminus(v1057), v1058); real2 v1127 = plus(v1062, v1063); real2 v1121 = reverse(minus(v1063, v1062)); real2 v1071 = ctimesminusplus(reverse(v1061), ctbl[3], ctimes(v1061, ctbl[2])); real2 v1067 = ctimesminusplus(reverse(v1059), ctbl[2], ctimes(v1059, ctbl[3])); real2 v1153 = reverse(minus(v1127, v1126)); real2 v1159 = plus(v1126, v1127); real2 v1123 = minusplus(v1121, v1122); real2 v1125 = minusplus(uminus(v1121), v1122); real2 v1254 = plus(v1003, v1067); real2 v1248 = reverse(minus(v1067, v1003)); real2 v1131 = ctimesminusplus(reverse(v1123), ctbl[0], ctimes(v1123, ctbl[1])); real2 v1305 = reverse(minus(v1071, v1007)); real2 v1311 = plus(v1007, v1071); real2 v1135 = ctimesminusplus(reverse(v1125), ctbl[0], ctimes(v1125, ctbl[0])); real2 v42 = load(in, 40 << shift); real2 v106 = load(in, 104 << shift); real2 v283 = reverse(minus(v106, v42)); real2 v289 = plus(v42, v106); real2 v10 = load(in, 8 << shift); real2 v74 = load(in, 72 << shift); real2 v284 = minus(v74, v10); real2 v288 = plus(v10, v74); real2 v838 = plus(v288, v289); real2 v834 = minus(v289, v288); real2 v66 = load(in, 64 << shift); real2 v2 = load(in, 0 << shift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << shift); real2 v34 = load(in, 32 << shift); real2 v131 = reverse(minus(v98, v34)); real2 v137 = plus(v34, v98); real2 v706 = minus(v137, v136); real2 v710 = plus(v136, v137); real2 v122 = load(in, 120 << shift); real2 v58 = load(in, 56 << shift); real2 v570 = reverse(minus(v122, v58)); real2 v576 = plus(v58, v122); real2 v90 = load(in, 88 << shift); real2 v26 = load(in, 24 << shift); real2 v575 = plus(v26, v90); real2 v571 = minus(v90, v26); real2 v839 = plus(v575, v576); real2 v833 = reverse(minus(v576, v575)); real2 v961 = reverse(minus(v839, v838)); real2 v967 = plus(v838, v839); real2 v50 = load(in, 48 << shift); real2 v114 = load(in, 112 << shift); real2 v438 = plus(v50, v114); real2 v432 = reverse(minus(v114, v50)); real2 v82 = load(in, 80 << shift); real2 v18 = load(in, 16 << shift); real2 v433 = minus(v82, v18); real2 v437 = plus(v18, v82); real2 v705 = reverse(minus(v438, v437)); real2 v711 = plus(v437, v438); real2 v962 = minus(v711, v710); real2 v966 = plus(v710, v711); real2 v1094 = plus(v966, v967); real2 v1090 = minus(v967, v966); real2 v126 = load(in, 124 << shift); real2 v62 = load(in, 60 << shift); real2 v643 = plus(v62, v126); real2 v637 = reverse(minus(v126, v62)); real2 v30 = load(in, 28 << shift); real2 v94 = load(in, 92 << shift); real2 v638 = minus(v94, v30); real2 v642 = plus(v30, v94); real2 v903 = plus(v642, v643); real2 v897 = reverse(minus(v643, v642)); real2 v14 = load(in, 12 << shift); real2 v78 = load(in, 76 << shift); real2 v361 = minus(v78, v14); real2 v365 = plus(v14, v78); real2 v46 = load(in, 44 << shift); real2 v110 = load(in, 108 << shift); real2 v360 = reverse(minus(v110, v46)); real2 v366 = plus(v46, v110); real2 v898 = minus(v366, v365); real2 v902 = plus(v365, v366); real2 v1031 = plus(v902, v903); real2 v1025 = reverse(minus(v903, v902)); real2 v102 = load(in, 100 << shift); real2 v38 = load(in, 36 << shift); real2 v205 = reverse(minus(v102, v38)); real2 v211 = plus(v38, v102); real2 v70 = load(in, 68 << shift); real2 v6 = load(in, 4 << shift); real2 v210 = plus(v6, v70); real2 v206 = minus(v70, v6); real2 v770 = minus(v211, v210); real2 v774 = plus(v210, v211); real2 v86 = load(in, 84 << shift); real2 v22 = load(in, 20 << shift); real2 v502 = minus(v86, v22); real2 v506 = plus(v22, v86); real2 v118 = load(in, 116 << shift); real2 v54 = load(in, 52 << shift); real2 v501 = reverse(minus(v118, v54)); real2 v507 = plus(v54, v118); real2 v769 = reverse(minus(v507, v506)); real2 v775 = plus(v506, v507); real2 v1030 = plus(v774, v775); real2 v1026 = minus(v775, v774); real2 v1089 = reverse(minus(v1031, v1030)); real2 v1095 = plus(v1030, v1031); real2 v1099 = minus(uplusminus(v1089), v1090); real2 v1103 = minus(uminusplus(v1089), v1090); real2 v1215 = plus(v1099, v1131); store(out, 72 << shift, minus(v1215, v1216)); store(out, 8 << shift, plus(v1215, v1216)); real2 v1211 = minus(v1131, v1099); store(out, 40 << shift, minus(uplusminus(v1210), v1211)); store(out, 104 << shift, minus(uminusplus(v1210), v1211)); real2 v1234 = plus(v1103, v1135); real2 v1230 = minus(v1135, v1103); store(out, 120 << shift, minus(uminusplus(v1229), v1230)); store(out, 56 << shift, minus(uplusminus(v1229), v1230)); store(out, 24 << shift, plus(v1234, v1235)); store(out, 88 << shift, minus(v1234, v1235)); real2 v1154 = minus(v1095, v1094); real2 v1158 = plus(v1094, v1095); real2 v1167 = minus(uminusplus(v1153), v1154); real2 v1163 = minus(uplusminus(v1153), v1154); store(out, 16 << shift, plus(v1163, v1179)); store(out, 80 << shift, minus(v1163, v1179)); store(out, 48 << shift, plus(v1167, v1183)); store(out, 112 << shift, minus(v1167, v1183)); real2 v1186 = minus(v1159, v1158); real2 v1190 = plus(v1158, v1159); store(out, 0 << shift, plus(v1190, v1191)); store(out, 64 << shift, minus(v1190, v1191)); store(out, 32 << shift, minus(uplusminus(v1185), v1186)); store(out, 96 << shift, minus(uminusplus(v1185), v1186)); real2 v1027 = minusplus(v1025, v1026); real2 v1029 = minusplus(uminus(v1025), v1026); real2 v971 = minus(uplusminus(v961), v962); real2 v975 = minus(uminusplus(v961), v962); real2 v1039 = ctimesminusplus(reverse(v1029), ctbl[0], ctimes(v1029, ctbl[0])); real2 v1310 = plus(v975, v1039); real2 v1306 = minus(v1039, v975); real2 v1342 = plus(v1310, v1311); real2 v1338 = minus(v1311, v1310); store(out, 12 << shift, plus(v1342, v1343)); store(out, 76 << shift, minus(v1342, v1343)); store(out, 108 << shift, minus(uminusplus(v1337), v1338)); store(out, 44 << shift, minus(uplusminus(v1337), v1338)); real2 v1315 = minus(uplusminus(v1305), v1306); store(out, 28 << shift, plus(v1315, v1331)); store(out, 92 << shift, minus(v1315, v1331)); real2 v1319 = minus(uminusplus(v1305), v1306); store(out, 60 << shift, plus(v1319, v1335)); store(out, 124 << shift, minus(v1319, v1335)); real2 v1035 = ctimesminusplus(reverse(v1027), ctbl[0], ctimes(v1027, ctbl[1])); real2 v1253 = plus(v971, v1035); real2 v1249 = minus(v1035, v971); real2 v1281 = minus(v1254, v1253); store(out, 36 << shift, minus(uplusminus(v1280), v1281)); store(out, 100 << shift, minus(uminusplus(v1280), v1281)); real2 v1285 = plus(v1253, v1254); store(out, 68 << shift, minus(v1285, v1286)); store(out, 4 << shift, plus(v1285, v1286)); real2 v1262 = minus(uminusplus(v1248), v1249); store(out, 116 << shift, minus(v1262, v1278)); store(out, 52 << shift, plus(v1262, v1278)); real2 v1258 = minus(uplusminus(v1248), v1249); store(out, 20 << shift, plus(v1258, v1274)); store(out, 84 << shift, minus(v1258, v1274)); real2 v901 = minusplus(uminus(v897), v898); real2 v899 = minusplus(v897, v898); real2 v805 = minusplus(uminus(v801), v802); real2 v803 = minusplus(v801, v802); real2 v811 = ctimesminusplus(reverse(v803), ctbl[8], ctimes(v803, ctbl[9])); real2 v1400 = plus(v811, v939); real2 v1394 = reverse(minus(v939, v811)); real2 v837 = minusplus(uminus(v833), v834); real2 v835 = minusplus(v833, v834); real2 v843 = ctimesminusplus(reverse(v835), ctbl[0], ctimes(v835, ctbl[1])); real2 v773 = minusplus(uminus(v769), v770); real2 v771 = minusplus(v769, v770); real2 v1398 = minusplus(uminus(v1394), v1395); real2 v1396 = minusplus(v1394, v1395); real2 v907 = ctimesminusplus(reverse(v899), ctbl[2], ctimes(v899, ctbl[3])); real2 v715 = minus(uplusminus(v705), v706); real2 v719 = minus(uminusplus(v705), v706); real2 v1363 = minus(v843, v715); real2 v1367 = plus(v715, v843); real2 v1408 = ctimesminusplus(reverse(v1398), ctbl[0], ctimes(v1398, ctbl[0])); real2 v779 = ctimesminusplus(reverse(v771), ctbl[4], ctimes(v771, ctbl[5])); real2 v1362 = reverse(minus(v907, v779)); real2 v1368 = plus(v779, v907); real2 v1376 = minus(uminusplus(v1362), v1363); real2 v1372 = minus(uplusminus(v1362), v1363); real2 v1507 = plus(v1376, v1408); real2 v1503 = minus(v1408, v1376); store(out, 122 << shift, minus(uminusplus(v1502), v1503)); store(out, 58 << shift, minus(uplusminus(v1502), v1503)); store(out, 90 << shift, minus(v1507, v1508)); store(out, 26 << shift, plus(v1507, v1508)); real2 v1404 = ctimesminusplus(reverse(v1396), ctbl[0], ctimes(v1396, ctbl[1])); real2 v1484 = minus(v1404, v1372); real2 v1488 = plus(v1372, v1404); store(out, 10 << shift, plus(v1488, v1489)); store(out, 74 << shift, minus(v1488, v1489)); store(out, 106 << shift, minus(uminusplus(v1483), v1484)); store(out, 42 << shift, minus(uplusminus(v1483), v1484)); real2 v1426 = reverse(minus(v1400, v1399)); real2 v1432 = plus(v1399, v1400); real2 v1431 = plus(v1367, v1368); real2 v1427 = minus(v1368, v1367); real2 v1463 = plus(v1431, v1432); store(out, 66 << shift, minus(v1463, v1464)); store(out, 2 << shift, plus(v1463, v1464)); real2 v1459 = minus(v1432, v1431); store(out, 98 << shift, minus(uminusplus(v1458), v1459)); store(out, 34 << shift, minus(uplusminus(v1458), v1459)); real2 v1436 = minus(uplusminus(v1426), v1427); store(out, 82 << shift, minus(v1436, v1452)); store(out, 18 << shift, plus(v1436, v1452)); real2 v1440 = minus(uminusplus(v1426), v1427); store(out, 50 << shift, plus(v1440, v1456)); store(out, 114 << shift, minus(v1440, v1456)); real2 v911 = ctimesminusplus(reverse(v901), ctbl[3], ctimes(v901, ctbl[2])); real2 v783 = ctimesminusplus(reverse(v773), ctbl[2], ctimes(v773, ctbl[3])); real2 v1527 = plus(v783, v911); real2 v1521 = reverse(minus(v911, v783)); real2 v943 = ctimesminusplus(reverse(v933), ctbl[9], ctimes(v933, ctbl[8])); real2 v847 = ctimesminusplus(reverse(v837), ctbl[0], ctimes(v837, ctbl[0])); real2 v1522 = minus(v847, v719); real2 v1526 = plus(v719, v847); real2 v1590 = plus(v1526, v1527); real2 v1586 = minus(v1527, v1526); real2 v815 = ctimesminusplus(reverse(v805), ctbl[6], ctimes(v805, ctbl[12])); real2 v1559 = plus(v815, v943); real2 v1553 = reverse(minus(v943, v815)); real2 v751 = ctimesminusplus(reverse(v741), ctbl[8], ctimes(v741, ctbl[9])); real2 v1558 = plus(v751, v879); real2 v1554 = minus(v879, v751); real2 v1585 = reverse(minus(v1559, v1558)); real2 v1591 = plus(v1558, v1559); real2 v1618 = minus(v1591, v1590); real2 v1622 = plus(v1590, v1591); store(out, 70 << shift, minus(v1622, v1623)); store(out, 6 << shift, plus(v1622, v1623)); store(out, 102 << shift, minus(uminusplus(v1617), v1618)); store(out, 38 << shift, minus(uplusminus(v1617), v1618)); real2 v1599 = minus(uminusplus(v1585), v1586); real2 v1595 = minus(uplusminus(v1585), v1586); store(out, 22 << shift, plus(v1595, v1611)); store(out, 86 << shift, minus(v1595, v1611)); store(out, 54 << shift, plus(v1599, v1615)); store(out, 118 << shift, minus(v1599, v1615)); real2 v1555 = minusplus(v1553, v1554); real2 v1557 = minusplus(uminus(v1553), v1554); real2 v1531 = minus(uplusminus(v1521), v1522); real2 v1535 = minus(uminusplus(v1521), v1522); real2 v1567 = ctimesminusplus(reverse(v1557), ctbl[0], ctimes(v1557, ctbl[0])); real2 v1666 = plus(v1535, v1567); store(out, 94 << shift, minus(v1666, v1667)); store(out, 30 << shift, plus(v1666, v1667)); real2 v1662 = minus(v1567, v1535); store(out, 126 << shift, minus(uminusplus(v1661), v1662)); store(out, 62 << shift, minus(uplusminus(v1661), v1662)); real2 v1563 = ctimesminusplus(reverse(v1555), ctbl[0], ctimes(v1555, ctbl[1])); real2 v1643 = minus(v1563, v1531); store(out, 46 << shift, minus(uplusminus(v1642), v1643)); store(out, 110 << shift, minus(uminusplus(v1642), v1643)); real2 v1647 = plus(v1531, v1563); store(out, 78 << shift, minus(v1647, v1648)); store(out, 14 << shift, plus(v1647, v1648)); real2 v436 = minusplus(uminus(v432), v433); real2 v434 = minusplus(v432, v433); real2 v145 = minus(uminusplus(v131), v132); real2 v141 = minus(uplusminus(v131), v132); real2 v607 = minusplus(uminus(v603), v604); real2 v605 = minusplus(v603, v604); real2 v615 = ctimesminusplus(reverse(v605), ctbl[22], ctimes(v605, ctbl[23])); real2 v325 = minusplus(uminus(v321), v322); real2 v323 = minusplus(v321, v322); real2 v171 = minusplus(uminus(v167), v168); real2 v169 = minusplus(v167, v168); real2 v179 = ctimesminusplus(reverse(v169), ctbl[28], ctimes(v169, ctbl[29])); real2 v333 = ctimesminusplus(reverse(v323), ctbl[24], ctimes(v323, ctbl[25])); real2 v1718 = plus(v333, v615); real2 v1712 = reverse(minus(v615, v333)); real2 v470 = minusplus(v468, v469); real2 v472 = minusplus(uminus(v468), v469); real2 v478 = ctimesminusplus(reverse(v470), ctbl[26], ctimes(v470, ctbl[27])); real2 v1717 = plus(v179, v478); real2 v1713 = minus(v478, v179); real2 v1841 = minus(v1718, v1717); real2 v1845 = plus(v1717, v1718); real2 v674 = minusplus(v672, v673); real2 v676 = minusplus(uminus(v672), v673); real2 v249 = minusplus(uminus(v245), v246); real2 v247 = minusplus(v245, v246); real2 v255 = ctimesminusplus(reverse(v247), ctbl[20], ctimes(v247, ctbl[21])); real2 v398 = minusplus(v396, v397); real2 v400 = minusplus(uminus(v396), v397); real2 v408 = ctimesminusplus(reverse(v398), ctbl[16], ctimes(v398, ctbl[17])); real2 v572 = minusplus(v570, v571); real2 v574 = minusplus(uminus(v570), v571); real2 v625 = minusplus(uminus(v621), v622); real2 v623 = minusplus(v621, v622); real2 v631 = ctimesminusplus(reverse(v623), ctbl[38], ctimes(v623, ctbl[39])); real2 v1728 = reverse(minus(v631, v353)); real2 v1734 = plus(v353, v631); real2 v1857 = minus(v1734, v1733); real2 v1861 = plus(v1733, v1734); real2 v580 = ctimesminusplus(reverse(v572), ctbl[2], ctimes(v572, ctbl[3])); real2 v537 = minusplus(v535, v536); real2 v539 = minusplus(uminus(v535), v536); real2 v546 = ctimesminusplus(reverse(v537), ctbl[18], ctimes(v537, ctbl[19])); real2 v682 = ctimesminusplus(reverse(v674), ctbl[14], ctimes(v674, ctbl[15])); real2 v1776 = reverse(minus(v682, v408)); real2 v1782 = plus(v408, v682); real2 v641 = minusplus(uminus(v637), v638); real2 v639 = minusplus(v637, v638); real2 v287 = minusplus(uminus(v283), v284); real2 v285 = minusplus(v283, v284); real2 v295 = ctimesminusplus(reverse(v285), ctbl[4], ctimes(v285, ctbl[5])); real2 v1680 = reverse(minus(v580, v295)); real2 v1686 = plus(v295, v580); real2 v505 = minusplus(uminus(v501), v502); real2 v503 = minusplus(v501, v502); real2 v513 = ctimesminusplus(reverse(v503), ctbl[10], ctimes(v503, ctbl[11])); real2 v444 = ctimesminusplus(reverse(v434), ctbl[0], ctimes(v434, ctbl[1])); real2 v648 = ctimesminusplus(reverse(v639), ctbl[6], ctimes(v639, ctbl[7])); real2 v1685 = plus(v141, v444); real2 v1681 = minus(v444, v141); real2 v1888 = reverse(minus(v1862, v1861)); real2 v1894 = plus(v1861, v1862); real2 v1910 = plus(v1893, v1894); real2 v1904 = reverse(minus(v1894, v1893)); real2 v1813 = plus(v1685, v1686); real2 v1809 = minus(v1686, v1685); real2 v1777 = minus(v546, v255); real2 v1781 = plus(v255, v546); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); real2 v1840 = reverse(minus(v1782, v1781)); real2 v1846 = plus(v1781, v1782); real2 v1878 = plus(v1845, v1846); real2 v1872 = reverse(minus(v1846, v1845)); real2 v364 = minusplus(uminus(v360), v361); real2 v362 = minusplus(v360, v361); real2 v370 = ctimesminusplus(reverse(v362), ctbl[8], ctimes(v362, ctbl[9])); real2 v1744 = reverse(minus(v648, v370)); real2 v1750 = plus(v370, v648); real2 v217 = ctimesminusplus(reverse(v207), ctbl[12], ctimes(v207, ctbl[13])); real2 v1749 = plus(v217, v513); real2 v1745 = minus(v513, v217); real2 v1814 = plus(v1749, v1750); real2 v1808 = reverse(minus(v1750, v1749)); real2 v1877 = plus(v1813, v1814); real2 v1873 = minus(v1814, v1813); real2 v1905 = minus(v1878, v1877); real2 v1909 = plus(v1877, v1878); store(out, 33 << shift, minus(uplusminus(v1904), v1905)); store(out, 97 << shift, minus(uminusplus(v1904), v1905)); store(out, 65 << shift, minus(v1909, v1910)); store(out, 1 << shift, plus(v1909, v1910)); real2 v1890 = minusplus(v1888, v1889); real2 v1892 = minusplus(uminus(v1888), v1889); real2 v1902 = ctimesminusplus(reverse(v1892), ctbl[0], ctimes(v1892, ctbl[0])); real2 v1886 = minus(uminusplus(v1872), v1873); store(out, 49 << shift, plus(v1886, v1902)); store(out, 113 << shift, minus(v1886, v1902)); real2 v1882 = minus(uplusminus(v1872), v1873); real2 v1898 = ctimesminusplus(reverse(v1890), ctbl[0], ctimes(v1890, ctbl[1])); store(out, 81 << shift, minus(v1882, v1898)); store(out, 17 << shift, plus(v1882, v1898)); real2 v1858 = minusplus(v1856, v1857); real2 v1860 = minusplus(uminus(v1856), v1857); real2 v1870 = ctimesminusplus(reverse(v1860), ctbl[3], ctimes(v1860, ctbl[2])); real2 v1948 = reverse(minus(v1870, v1838)); real2 v1954 = plus(v1838, v1870); real2 v1822 = minus(uminusplus(v1808), v1809); real2 v1818 = minus(uplusminus(v1808), v1809); real2 v1842 = minusplus(v1840, v1841); real2 v1844 = minusplus(uminus(v1840), v1841); real2 v1854 = ctimesminusplus(reverse(v1844), ctbl[0], ctimes(v1844, ctbl[0])); real2 v1949 = minus(v1854, v1822); store(out, 121 << shift, minus(uminusplus(v1948), v1949)); store(out, 57 << shift, minus(uplusminus(v1948), v1949)); real2 v1953 = plus(v1822, v1854); store(out, 89 << shift, minus(v1953, v1954)); store(out, 25 << shift, plus(v1953, v1954)); real2 v1850 = ctimesminusplus(reverse(v1842), ctbl[0], ctimes(v1842, ctbl[1])); real2 v1866 = ctimesminusplus(reverse(v1858), ctbl[2], ctimes(v1858, ctbl[3])); real2 v1929 = reverse(minus(v1866, v1834)); real2 v1935 = plus(v1834, v1866); real2 v1930 = minus(v1850, v1818); store(out, 105 << shift, minus(uminusplus(v1929), v1930)); store(out, 41 << shift, minus(uplusminus(v1929), v1930)); real2 v1934 = plus(v1818, v1850); store(out, 73 << shift, minus(v1934, v1935)); store(out, 9 << shift, plus(v1934, v1935)); real2 v1690 = minus(uplusminus(v1680), v1681); real2 v1694 = minus(uminusplus(v1680), v1681); real2 v1716 = minusplus(uminus(v1712), v1713); real2 v1714 = minusplus(v1712, v1713); real2 v1730 = minusplus(v1728, v1729); real2 v1732 = minusplus(uminus(v1728), v1729); real2 v1742 = ctimesminusplus(reverse(v1732), ctbl[6], ctimes(v1732, ctbl[12])); real2 v1726 = ctimesminusplus(reverse(v1716), ctbl[2], ctimes(v1716, ctbl[3])); real2 v1780 = minusplus(uminus(v1776), v1777); real2 v1778 = minusplus(v1776, v1777); real2 v1790 = ctimesminusplus(reverse(v1780), ctbl[3], ctimes(v1780, ctbl[2])); real2 v2030 = plus(v1726, v1790); real2 v2024 = reverse(minus(v1790, v1726)); real2 v1806 = ctimesminusplus(reverse(v1796), ctbl[9], ctimes(v1796, ctbl[8])); real2 v1746 = minusplus(v1744, v1745); real2 v1748 = minusplus(uminus(v1744), v1745); real2 v2040 = reverse(minus(v1806, v1742)); real2 v2046 = plus(v1742, v1806); real2 v2062 = plus(v2045, v2046); real2 v2056 = reverse(minus(v2046, v2045)); real2 v1758 = ctimesminusplus(reverse(v1748), ctbl[0], ctimes(v1748, ctbl[0])); real2 v2025 = minus(v1758, v1694); real2 v2029 = plus(v1694, v1758); real2 v2057 = minus(v2030, v2029); store(out, 109 << shift, minus(uminusplus(v2056), v2057)); store(out, 45 << shift, minus(uplusminus(v2056), v2057)); real2 v2061 = plus(v2029, v2030); store(out, 13 << shift, plus(v2061, v2062)); store(out, 77 << shift, minus(v2061, v2062)); real2 v2044 = minusplus(uminus(v2040), v2041); real2 v2042 = minusplus(v2040, v2041); real2 v2054 = ctimesminusplus(reverse(v2044), ctbl[0], ctimes(v2044, ctbl[0])); real2 v2038 = minus(uminusplus(v2024), v2025); real2 v2034 = minus(uplusminus(v2024), v2025); store(out, 125 << shift, minus(v2038, v2054)); store(out, 61 << shift, plus(v2038, v2054)); real2 v2050 = ctimesminusplus(reverse(v2042), ctbl[0], ctimes(v2042, ctbl[1])); store(out, 29 << shift, plus(v2034, v2050)); store(out, 93 << shift, minus(v2034, v2050)); real2 v1738 = ctimesminusplus(reverse(v1730), ctbl[8], ctimes(v1730, ctbl[9])); real2 v1802 = ctimesminusplus(reverse(v1794), ctbl[6], ctimes(v1794, ctbl[7])); real2 v1989 = plus(v1738, v1802); real2 v1983 = reverse(minus(v1802, v1738)); real2 v1999 = reverse(minus(v1989, v1988)); real2 v2005 = plus(v1988, v1989); real2 v1722 = ctimesminusplus(reverse(v1714), ctbl[4], ctimes(v1714, ctbl[5])); real2 v1786 = ctimesminusplus(reverse(v1778), ctbl[2], ctimes(v1778, ctbl[3])); real2 v1973 = plus(v1722, v1786); real2 v1967 = reverse(minus(v1786, v1722)); real2 v1754 = ctimesminusplus(reverse(v1746), ctbl[0], ctimes(v1746, ctbl[1])); real2 v1972 = plus(v1690, v1754); real2 v1968 = minus(v1754, v1690); real2 v2004 = plus(v1972, v1973); store(out, 5 << shift, plus(v2004, v2005)); store(out, 69 << shift, minus(v2004, v2005)); real2 v2000 = minus(v1973, v1972); store(out, 37 << shift, minus(uplusminus(v1999), v2000)); store(out, 101 << shift, minus(uminusplus(v1999), v2000)); real2 v1985 = minusplus(v1983, v1984); real2 v1987 = minusplus(uminus(v1983), v1984); real2 v1993 = ctimesminusplus(reverse(v1985), ctbl[0], ctimes(v1985, ctbl[1])); real2 v1977 = minus(uplusminus(v1967), v1968); store(out, 21 << shift, plus(v1977, v1993)); store(out, 85 << shift, minus(v1977, v1993)); real2 v1981 = minus(uminusplus(v1967), v1968); real2 v1997 = ctimesminusplus(reverse(v1987), ctbl[0], ctimes(v1987, ctbl[0])); store(out, 117 << shift, minus(v1981, v1997)); store(out, 53 << shift, plus(v1981, v1997)); real2 v703 = ctimesminusplus(reverse(v692), ctbl[45], ctimes(v692, ctbl[44])); real2 v261 = ctimesminusplus(reverse(v249), ctbl[26], ctimes(v249, ctbl[27])); real2 v550 = ctimesminusplus(reverse(v539), ctbl[15], ctimes(v539, ctbl[14])); real2 v413 = ctimesminusplus(reverse(v400), ctbl[18], ctimes(v400, ctbl[24])); real2 v2178 = minus(v550, v261); real2 v2182 = plus(v261, v550); real2 v686 = ctimesminusplus(reverse(v676), ctbl[21], ctimes(v676, ctbl[20])); real2 v2183 = plus(v413, v686); real2 v2177 = reverse(minus(v686, v413)); real2 v203 = ctimesminusplus(reverse(v191), ctbl[56], ctimes(v191, ctbl[57])); real2 v281 = ctimesminusplus(reverse(v267), ctbl[50], ctimes(v267, ctbl[51])); real2 v2247 = plus(v2182, v2183); real2 v2241 = reverse(minus(v2183, v2182)); real2 v619 = ctimesminusplus(reverse(v607), ctbl[27], ctimes(v607, ctbl[26])); real2 v358 = ctimesminusplus(reverse(v345), ctbl[30], ctimes(v345, ctbl[60])); real2 v499 = ctimesminusplus(reverse(v488), ctbl[36], ctimes(v488, ctbl[54])); real2 v2130 = minus(v499, v203); real2 v2134 = plus(v203, v499); real2 v185 = ctimesminusplus(reverse(v171), ctbl[20], ctimes(v171, ctbl[21])); real2 v339 = ctimesminusplus(reverse(v325), ctbl[14], ctimes(v325, ctbl[15])); real2 v2119 = plus(v339, v619); real2 v2113 = reverse(minus(v619, v339)); real2 v482 = ctimesminusplus(reverse(v472), ctbl[24], ctimes(v472, ctbl[18])); real2 v2114 = minus(v482, v185); real2 v2118 = plus(v185, v482); real2 v2246 = plus(v2118, v2119); real2 v2242 = minus(v2119, v2118); real2 v584 = ctimesminusplus(reverse(v574), ctbl[3], ctimes(v574, ctbl[2])); real2 v652 = ctimesminusplus(reverse(v641), ctbl[9], ctimes(v641, ctbl[8])); real2 v568 = ctimesminusplus(reverse(v556), ctbl[39], ctimes(v556, ctbl[38])); real2 v2194 = minus(v568, v281); real2 v2198 = plus(v281, v568); real2 v517 = ctimesminusplus(reverse(v505), ctbl[12], ctimes(v505, ctbl[6])); real2 v430 = ctimesminusplus(reverse(v419), ctbl[42], ctimes(v419, ctbl[48])); real2 v2193 = reverse(minus(v703, v430)); real2 v2199 = plus(v430, v703); real2 v2273 = reverse(minus(v2247, v2246)); real2 v2279 = plus(v2246, v2247); real2 v375 = ctimesminusplus(reverse(v364), ctbl[6], ctimes(v364, ctbl[12])); real2 v2145 = reverse(minus(v652, v375)); real2 v2151 = plus(v375, v652); real2 v2263 = plus(v2198, v2199); real2 v2257 = reverse(minus(v2199, v2198)); real2 v448 = ctimesminusplus(reverse(v436), ctbl[0], ctimes(v436, ctbl[0])); real2 v635 = ctimesminusplus(reverse(v625), ctbl[33], ctimes(v625, ctbl[32])); real2 v2129 = reverse(minus(v635, v358)); real2 v2135 = plus(v358, v635); real2 v2258 = minus(v2135, v2134); real2 v2262 = plus(v2134, v2135); real2 v2086 = plus(v145, v448); real2 v2082 = minus(v448, v145); real2 v301 = ctimesminusplus(reverse(v287), ctbl[2], ctimes(v287, ctbl[3])); real2 v223 = ctimesminusplus(reverse(v209), ctbl[8], ctimes(v209, ctbl[9])); real2 v2150 = plus(v223, v517); real2 v2146 = minus(v517, v223); real2 v2081 = reverse(minus(v584, v301)); real2 v2087 = plus(v301, v584); real2 v2210 = minus(v2087, v2086); real2 v2214 = plus(v2086, v2087); real2 v2215 = plus(v2150, v2151); real2 v2209 = reverse(minus(v2151, v2150)); real2 v2289 = reverse(minus(v2263, v2262)); real2 v2295 = plus(v2262, v2263); real2 v2311 = plus(v2294, v2295); real2 v2305 = reverse(minus(v2295, v2294)); real2 v2274 = minus(v2215, v2214); real2 v2278 = plus(v2214, v2215); real2 v2306 = minus(v2279, v2278); real2 v2310 = plus(v2278, v2279); store(out, 35 << shift, minus(uplusminus(v2305), v2306)); store(out, 99 << shift, minus(uminusplus(v2305), v2306)); store(out, 3 << shift, plus(v2310, v2311)); store(out, 67 << shift, minus(v2310, v2311)); real2 v2293 = minusplus(uminus(v2289), v2290); real2 v2291 = minusplus(v2289, v2290); real2 v2303 = ctimesminusplus(reverse(v2293), ctbl[0], ctimes(v2293, ctbl[0])); real2 v2287 = minus(uminusplus(v2273), v2274); store(out, 51 << shift, plus(v2287, v2303)); store(out, 115 << shift, minus(v2287, v2303)); real2 v2283 = minus(uplusminus(v2273), v2274); real2 v2299 = ctimesminusplus(reverse(v2291), ctbl[0], ctimes(v2291, ctbl[1])); store(out, 19 << shift, plus(v2283, v2299)); store(out, 83 << shift, minus(v2283, v2299)); real2 v2261 = minusplus(uminus(v2257), v2258); real2 v2259 = minusplus(v2257, v2258); real2 v2243 = minusplus(v2241, v2242); real2 v2245 = minusplus(uminus(v2241), v2242); real2 v2251 = ctimesminusplus(reverse(v2243), ctbl[0], ctimes(v2243, ctbl[1])); real2 v2267 = ctimesminusplus(reverse(v2259), ctbl[2], ctimes(v2259, ctbl[3])); real2 v2229 = minusplus(uminus(v2225), v2226); real2 v2227 = minusplus(v2225, v2226); real2 v2235 = ctimesminusplus(reverse(v2227), ctbl[4], ctimes(v2227, ctbl[5])); real2 v2330 = reverse(minus(v2267, v2235)); real2 v2336 = plus(v2235, v2267); real2 v2219 = minus(uplusminus(v2209), v2210); real2 v2223 = minus(uminusplus(v2209), v2210); real2 v2335 = plus(v2219, v2251); real2 v2331 = minus(v2251, v2219); store(out, 43 << shift, minus(uplusminus(v2330), v2331)); store(out, 107 << shift, minus(uminusplus(v2330), v2331)); store(out, 75 << shift, minus(v2335, v2336)); store(out, 11 << shift, plus(v2335, v2336)); real2 v2239 = ctimesminusplus(reverse(v2229), ctbl[2], ctimes(v2229, ctbl[3])); real2 v2271 = ctimesminusplus(reverse(v2261), ctbl[3], ctimes(v2261, ctbl[2])); real2 v2349 = reverse(minus(v2271, v2239)); real2 v2355 = plus(v2239, v2271); real2 v2255 = ctimesminusplus(reverse(v2245), ctbl[0], ctimes(v2245, ctbl[0])); real2 v2350 = minus(v2255, v2223); store(out, 59 << shift, minus(uplusminus(v2349), v2350)); store(out, 123 << shift, minus(uminusplus(v2349), v2350)); real2 v2354 = plus(v2223, v2255); store(out, 91 << shift, minus(v2354, v2355)); store(out, 27 << shift, plus(v2354, v2355)); real2 v2165 = minusplus(uminus(v2161), v2162); real2 v2163 = minusplus(v2161, v2162); real2 v2133 = minusplus(uminus(v2129), v2130); real2 v2131 = minusplus(v2129, v2130); real2 v2195 = minusplus(v2193, v2194); real2 v2197 = minusplus(uminus(v2193), v2194); real2 v2203 = ctimesminusplus(reverse(v2195), ctbl[6], ctimes(v2195, ctbl[7])); real2 v2171 = ctimesminusplus(reverse(v2163), ctbl[10], ctimes(v2163, ctbl[11])); real2 v2385 = minus(v2171, v2107); real2 v2389 = plus(v2107, v2171); real2 v2139 = ctimesminusplus(reverse(v2131), ctbl[8], ctimes(v2131, ctbl[9])); real2 v2390 = plus(v2139, v2203); real2 v2384 = reverse(minus(v2203, v2139)); real2 v2406 = plus(v2389, v2390); real2 v2400 = reverse(minus(v2390, v2389)); real2 v2181 = minusplus(uminus(v2177), v2178); real2 v2179 = minusplus(v2177, v2178); real2 v2091 = minus(uplusminus(v2081), v2082); real2 v2095 = minus(uminusplus(v2081), v2082); real2 v2117 = minusplus(uminus(v2113), v2114); real2 v2115 = minusplus(v2113, v2114); real2 v2123 = ctimesminusplus(reverse(v2115), ctbl[4], ctimes(v2115, ctbl[5])); real2 v2187 = ctimesminusplus(reverse(v2179), ctbl[2], ctimes(v2179, ctbl[3])); real2 v2374 = plus(v2123, v2187); real2 v2368 = reverse(minus(v2187, v2123)); real2 v2149 = minusplus(uminus(v2145), v2146); real2 v2147 = minusplus(v2145, v2146); real2 v2155 = ctimesminusplus(reverse(v2147), ctbl[0], ctimes(v2147, ctbl[1])); real2 v2373 = plus(v2091, v2155); real2 v2369 = minus(v2155, v2091); real2 v2401 = minus(v2374, v2373); real2 v2405 = plus(v2373, v2374); store(out, 71 << shift, minus(v2405, v2406)); store(out, 7 << shift, plus(v2405, v2406)); store(out, 103 << shift, minus(uminusplus(v2400), v2401)); store(out, 39 << shift, minus(uplusminus(v2400), v2401)); real2 v2388 = minusplus(uminus(v2384), v2385); real2 v2386 = minusplus(v2384, v2385); real2 v2398 = ctimesminusplus(reverse(v2388), ctbl[0], ctimes(v2388, ctbl[0])); real2 v2378 = minus(uplusminus(v2368), v2369); real2 v2382 = minus(uminusplus(v2368), v2369); store(out, 55 << shift, plus(v2382, v2398)); store(out, 119 << shift, minus(v2382, v2398)); real2 v2394 = ctimesminusplus(reverse(v2386), ctbl[0], ctimes(v2386, ctbl[1])); store(out, 23 << shift, plus(v2378, v2394)); store(out, 87 << shift, minus(v2378, v2394)); real2 v2207 = ctimesminusplus(reverse(v2197), ctbl[9], ctimes(v2197, ctbl[8])); real2 v2111 = ctimesminusplus(reverse(v2101), ctbl[8], ctimes(v2101, ctbl[9])); real2 v2175 = ctimesminusplus(reverse(v2165), ctbl[12], ctimes(v2165, ctbl[6])); real2 v2446 = plus(v2111, v2175); real2 v2442 = minus(v2175, v2111); real2 v2143 = ctimesminusplus(reverse(v2133), ctbl[6], ctimes(v2133, ctbl[12])); real2 v2441 = reverse(minus(v2207, v2143)); real2 v2447 = plus(v2143, v2207); real2 v2443 = minusplus(v2441, v2442); real2 v2445 = minusplus(uminus(v2441), v2442); real2 v2159 = ctimesminusplus(reverse(v2149), ctbl[0], ctimes(v2149, ctbl[0])); real2 v2455 = ctimesminusplus(reverse(v2445), ctbl[0], ctimes(v2445, ctbl[0])); real2 v2127 = ctimesminusplus(reverse(v2117), ctbl[2], ctimes(v2117, ctbl[3])); real2 v2191 = ctimesminusplus(reverse(v2181), ctbl[3], ctimes(v2181, ctbl[2])); real2 v2431 = plus(v2127, v2191); real2 v2425 = reverse(minus(v2191, v2127)); real2 v2426 = minus(v2159, v2095); real2 v2430 = plus(v2095, v2159); real2 v2439 = minus(uminusplus(v2425), v2426); store(out, 127 << shift, minus(v2439, v2455)); store(out, 63 << shift, plus(v2439, v2455)); real2 v2435 = minus(uplusminus(v2425), v2426); real2 v2451 = ctimesminusplus(reverse(v2443), ctbl[0], ctimes(v2443, ctbl[1])); store(out, 95 << shift, minus(v2435, v2451)); store(out, 31 << shift, plus(v2435, v2451)); real2 v2463 = plus(v2446, v2447); real2 v2457 = reverse(minus(v2447, v2446)); real2 v2458 = minus(v2431, v2430); store(out, 47 << shift, minus(uplusminus(v2457), v2458)); store(out, 111 << shift, minus(uminusplus(v2457), v2458)); real2 v2462 = plus(v2430, v2431); store(out, 79 << shift, minus(v2462, v2463)); store(out, 15 << shift, plus(v2462, v2463)); // Pres : 68088 } } ALIGNED(8192) void but128f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v56, v120)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = ctimesminusplus(reverse(v575), tbl[92 + tbloffset], ctimes(v575, tbl[93 + tbloffset])); real2 v583 = ctimesminusplus(reverse(v573), tbl[90 + tbloffset], ctimes(v573, tbl[91 + tbloffset])); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v576, v577)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v40, v104)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = ctimesminusplus(reverse(v253), tbl[26 + tbloffset], ctimes(v253, tbl[27 + tbloffset])); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = ctimesminusplus(reverse(v895), tbl[156 + tbloffset], ctimes(v895, tbl[157 + tbloffset])); real2 v903 = ctimesminusplus(reverse(v893), tbl[154 + tbloffset], ctimes(v893, tbl[155 + tbloffset])); real2 v269 = ctimesminusplus(reverse(v255), tbl[28 + tbloffset], ctimes(v255, tbl[29 + tbloffset])); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v64, v128)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v736, v737)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = ctimesminusplus(reverse(v735), tbl[124 + tbloffset], ctimes(v735, tbl[125 + tbloffset])); real2 v743 = ctimesminusplus(reverse(v733), tbl[122 + tbloffset], ctimes(v733, tbl[123 + tbloffset])); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v48, v112)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = ctimesminusplus(reverse(v1053), tbl[186 + tbloffset], ctimes(v1053, tbl[187 + tbloffset])); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v903, v1063)); real2 v1069 = ctimesminusplus(reverse(v1055), tbl[188 + tbloffset], ctimes(v1055, tbl[189 + tbloffset])); real2 v1869 = reverse(minus(v909, v1069)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = ctimesminusplus(reverse(v415), tbl[60 + tbloffset], ctimes(v415, tbl[61 + tbloffset])); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1056, v1057)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1216, v1217)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v429, v749)); real2 v2765 = reverse(minus(v2690, v2691)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = ctimesminusplus(reverse(v2689), tbl[476 + tbloffset], ctimes(v2689, tbl[477 + tbloffset])); real2 v2697 = ctimesminusplus(reverse(v2687), tbl[474 + tbloffset], ctimes(v2687, tbl[475 + tbloffset])); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = ctimesminusplus(reverse(v1213), tbl[218 + tbloffset], ctimes(v1213, tbl[219 + tbloffset])); real2 v1229 = ctimesminusplus(reverse(v1215), tbl[220 + tbloffset], ctimes(v1215, tbl[221 + tbloffset])); real2 v423 = ctimesminusplus(reverse(v413), tbl[58 + tbloffset], ctimes(v413, tbl[59 + tbloffset])); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v423, v743)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2164, v2165)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v44, v108)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = ctimesminusplus(reverse(v333), tbl[42 + tbloffset], ctimes(v333, tbl[43 + tbloffset])); real2 v349 = ctimesminusplus(reverse(v335), tbl[44 + tbloffset], ctimes(v335, tbl[45 + tbloffset])); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v60, v124)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v656, v657)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = ctimesminusplus(reverse(v973), tbl[170 + tbloffset], ctimes(v973, tbl[171 + tbloffset])); real2 v1131 = reverse(minus(v976, v977)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = ctimesminusplus(reverse(v655), tbl[108 + tbloffset], ctimes(v655, tbl[109 + tbloffset])); real2 v663 = ctimesminusplus(reverse(v653), tbl[106 + tbloffset], ctimes(v653, tbl[107 + tbloffset])); real2 v2079 = reverse(minus(v343, v663)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v349, v669)); real2 v2611 = plus(v349, v669); real2 v989 = ctimesminusplus(reverse(v975), tbl[172 + tbloffset], ctimes(v975, tbl[173 + tbloffset])); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v52, v116)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v496, v497)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = ctimesminusplus(reverse(v495), tbl[76 + tbloffset], ctimes(v495, tbl[77 + tbloffset])); real2 v503 = ctimesminusplus(reverse(v493), tbl[74 + tbloffset], ctimes(v493, tbl[75 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v36, v100)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = ctimesminusplus(reverse(v1135), tbl[204 + tbloffset], ctimes(v1135, tbl[205 + tbloffset])); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = ctimesminusplus(reverse(v1293), tbl[234 + tbloffset], ctimes(v1293, tbl[235 + tbloffset])); real2 v1331 = reverse(minus(v1296, v1297)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = ctimesminusplus(reverse(v175), tbl[12 + tbloffset], ctimes(v175, tbl[13 + tbloffset])); real2 v1309 = ctimesminusplus(reverse(v1295), tbl[236 + tbloffset], ctimes(v1295, tbl[237 + tbloffset])); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = ctimesminusplus(reverse(v1133), tbl[202 + tbloffset], ctimes(v1133, tbl[203 + tbloffset])); real2 v1541 = reverse(minus(v1149, v1229)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = ctimesminusplus(reverse(v813), tbl[138 + tbloffset], ctimes(v813, tbl[139 + tbloffset])); real2 v829 = ctimesminusplus(reverse(v815), tbl[140 + tbloffset], ctimes(v815, tbl[141 + tbloffset])); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2770, v2771)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = ctimesminusplus(reverse(v2607), tbl[458 + tbloffset], ctimes(v2607, tbl[459 + tbloffset])); real2 v2623 = ctimesminusplus(reverse(v2609), tbl[460 + tbloffset], ctimes(v2609, tbl[461 + tbloffset])); real2 v3013 = reverse(minus(v2623, v2703)); real2 v3019 = plus(v2623, v2703); real2 v2783 = ctimesminusplus(reverse(v2769), tbl[492 + tbloffset], ctimes(v2769, tbl[493 + tbloffset])); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2617, v2697)); real2 v2777 = ctimesminusplus(reverse(v2767), tbl[490 + tbloffset], ctimes(v2767, tbl[491 + tbloffset])); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1874, v1875)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = ctimesminusplus(reverse(v1663), tbl[296 + tbloffset], ctimes(v1663, tbl[297 + tbloffset])); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = ctimesminusplus(reverse(v1873), tbl[332 + tbloffset], ctimes(v1873, tbl[333 + tbloffset])); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1664, v1665)); real2 v1671 = ctimesminusplus(reverse(v1661), tbl[294 + tbloffset], ctimes(v1661, tbl[295 + tbloffset])); real2 v1881 = ctimesminusplus(reverse(v1871), tbl[330 + tbloffset], ctimes(v1871, tbl[331 + tbloffset])); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1143, v1223)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v54, v118)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v536, v537)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = ctimesminusplus(reverse(v535), tbl[84 + tbloffset], ctimes(v535, tbl[85 + tbloffset])); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v38, v102)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = ctimesminusplus(reverse(v215), tbl[20 + tbloffset], ctimes(v215, tbl[21 + tbloffset])); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = ctimesminusplus(reverse(v853), tbl[146 + tbloffset], ctimes(v853, tbl[147 + tbloffset])); real2 v869 = ctimesminusplus(reverse(v855), tbl[148 + tbloffset], ctimes(v855, tbl[149 + tbloffset])); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v46, v110)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = ctimesminusplus(reverse(v375), tbl[52 + tbloffset], ctimes(v375, tbl[53 + tbloffset])); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v62, v126)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v696, v697)); real2 v1171 = reverse(minus(v1016, v1017)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = ctimesminusplus(reverse(v1173), tbl[210 + tbloffset], ctimes(v1173, tbl[211 + tbloffset])); real2 v1189 = ctimesminusplus(reverse(v1175), tbl[212 + tbloffset], ctimes(v1175, tbl[213 + tbloffset])); real2 v1029 = ctimesminusplus(reverse(v1015), tbl[180 + tbloffset], ctimes(v1015, tbl[181 + tbloffset])); real2 v1023 = ctimesminusplus(reverse(v1013), tbl[178 + tbloffset], ctimes(v1013, tbl[179 + tbloffset])); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v863, v1023)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v869, v1029)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = ctimesminusplus(reverse(v695), tbl[116 + tbloffset], ctimes(v695, tbl[117 + tbloffset])); real2 v2645 = reverse(minus(v389, v709)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1176, v1177)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2650, v2651)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v50, v114)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v456, v457)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = ctimesminusplus(reverse(v455), tbl[68 + tbloffset], ctimes(v455, tbl[69 + tbloffset])); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v34, v98)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = ctimesminusplus(reverse(v135), tbl[4 + tbloffset], ctimes(v135, tbl[5 + tbloffset])); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = ctimesminusplus(reverse(v773), tbl[130 + tbloffset], ctimes(v773, tbl[131 + tbloffset])); real2 v789 = ctimesminusplus(reverse(v775), tbl[132 + tbloffset], ctimes(v775, tbl[133 + tbloffset])); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v42, v106)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = ctimesminusplus(reverse(v295), tbl[36 + tbloffset], ctimes(v295, tbl[37 + tbloffset])); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v58, v122)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v616, v617)); real2 v1091 = reverse(minus(v936, v937)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = ctimesminusplus(reverse(v1093), tbl[194 + tbloffset], ctimes(v1093, tbl[195 + tbloffset])); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = ctimesminusplus(reverse(v1333), tbl[242 + tbloffset], ctimes(v1333, tbl[243 + tbloffset])); real2 v1349 = ctimesminusplus(reverse(v1335), tbl[244 + tbloffset], ctimes(v1335, tbl[245 + tbloffset])); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = ctimesminusplus(reverse(v1467), tbl[264 + tbloffset], ctimes(v1467, tbl[265 + tbloffset])); real2 v1475 = ctimesminusplus(reverse(v1465), tbl[262 + tbloffset], ctimes(v1465, tbl[263 + tbloffset])); real2 v1109 = ctimesminusplus(reverse(v1095), tbl[196 + tbloffset], ctimes(v1095, tbl[197 + tbloffset])); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = ctimesminusplus(reverse(v1543), tbl[274 + tbloffset], ctimes(v1543, tbl[275 + tbloffset])); real2 v1559 = ctimesminusplus(reverse(v1545), tbl[276 + tbloffset], ctimes(v1545, tbl[277 + tbloffset])); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = ctimesminusplus(reverse(v1255), tbl[228 + tbloffset], ctimes(v1255, tbl[229 + tbloffset])); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = ctimesminusplus(reverse(v1253), tbl[226 + tbloffset], ctimes(v1253, tbl[227 + tbloffset])); real2 v943 = ctimesminusplus(reverse(v933), tbl[162 + tbloffset], ctimes(v933, tbl[163 + tbloffset])); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = ctimesminusplus(reverse(v1621), tbl[286 + tbloffset], ctimes(v1621, tbl[287 + tbloffset])); real2 v949 = ctimesminusplus(reverse(v935), tbl[164 + tbloffset], ctimes(v935, tbl[165 + tbloffset])); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = ctimesminusplus(reverse(v615), tbl[100 + tbloffset], ctimes(v615, tbl[101 + tbloffset])); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = ctimesminusplus(reverse(v1623), tbl[288 + tbloffset], ctimes(v1623, tbl[289 + tbloffset])); real2 v1927 = ctimesminusplus(reverse(v1913), tbl[340 + tbloffset], ctimes(v1913, tbl[341 + tbloffset])); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v309, v629)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = ctimesminusplus(reverse(v1911), tbl[338 + tbloffset], ctimes(v1911, tbl[339 + tbloffset])); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = ctimesminusplus(reverse(v1833), tbl[324 + tbloffset], ctimes(v1833, tbl[325 + tbloffset])); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = ctimesminusplus(reverse(v1831), tbl[322 + tbloffset], ctimes(v1831, tbl[323 + tbloffset])); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = ctimesminusplus(reverse(v1703), tbl[304 + tbloffset], ctimes(v1703, tbl[305 + tbloffset])); real2 v1711 = ctimesminusplus(reverse(v1701), tbl[302 + tbloffset], ctimes(v1701, tbl[303 + tbloffset])); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v63, v127)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v716, v717)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v47, v111)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = ctimesminusplus(reverse(v1035), tbl[184 + tbloffset], ctimes(v1035, tbl[185 + tbloffset])); real2 v1043 = ctimesminusplus(reverse(v1033), tbl[182 + tbloffset], ctimes(v1033, tbl[183 + tbloffset])); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1036, v1037)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v55, v119)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v556, v557)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v39, v103)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1196, v1197)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = ctimesminusplus(reverse(v873), tbl[150 + tbloffset], ctimes(v873, tbl[151 + tbloffset])); real2 v1639 = reverse(minus(v883, v1043)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = ctimesminusplus(reverse(v1195), tbl[216 + tbloffset], ctimes(v1195, tbl[217 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1193), tbl[214 + tbloffset], ctimes(v1193, tbl[215 + tbloffset])); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v51, v115)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v476, v477)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v35, v99)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = ctimesminusplus(reverse(v793), tbl[134 + tbloffset], ctimes(v793, tbl[135 + tbloffset])); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v43, v107)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v59, v123)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v636, v637)); real2 v1111 = reverse(minus(v956, v957)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = ctimesminusplus(reverse(v1273), tbl[230 + tbloffset], ctimes(v1273, tbl[231 + tbloffset])); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = ctimesminusplus(reverse(v1275), tbl[232 + tbloffset], ctimes(v1275, tbl[233 + tbloffset])); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = ctimesminusplus(reverse(v1113), tbl[198 + tbloffset], ctimes(v1113, tbl[199 + tbloffset])); real2 v1129 = ctimesminusplus(reverse(v1115), tbl[200 + tbloffset], ctimes(v1115, tbl[201 + tbloffset])); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v53, v117)); real2 v831 = reverse(minus(v516, v517)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v37, v101)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v61, v125)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v676, v677)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v45, v109)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v996, v997)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = ctimesminusplus(reverse(v1153), tbl[206 + tbloffset], ctimes(v1153, tbl[207 + tbloffset])); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v41, v105)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v57, v121)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v596, v597)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v49, v113)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v65, v129)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v756, v757)); real2 v1231 = reverse(minus(v1076, v1077)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1236, v1237)); real2 v1351 = reverse(minus(v1316, v1317)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1356, v1357)); real2 v1377 = plus(v1356, v1357); store(out, 0 << outShift, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); store(out, 64 << outShift, ctimesminusplus(v1390, tbl[0 + tbloffset], ctimes(reverse(v1390), tbl[1 + tbloffset]))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = ctimesminusplus(reverse(v1355), tbl[248 + tbloffset], ctimes(v1355, tbl[249 + tbloffset])); store(out, 48 << outShift, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); store(out, 112 << outShift, ctimesminusplus(v1404, tbl[0 + tbloffset], ctimes(reverse(v1404), tbl[1 + tbloffset]))); real2 v1363 = ctimesminusplus(reverse(v1353), tbl[246 + tbloffset], ctimes(v1353, tbl[247 + tbloffset])); store(out, 16 << outShift, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); store(out, 80 << outShift, ctimesminusplus(v1398, tbl[0 + tbloffset], ctimes(reverse(v1398), tbl[1 + tbloffset]))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); store(out, 96 << outShift, ctimesminusplus(reverse(v1375), tbl[252 + tbloffset], ctimes(v1375, tbl[253 + tbloffset]))); store(out, 32 << outShift, ctimesminusplus(reverse(v1373), tbl[250 + tbloffset], ctimes(v1373, tbl[251 + tbloffset]))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = ctimesminusplus(reverse(v1313), tbl[238 + tbloffset], ctimes(v1313, tbl[239 + tbloffset])); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1283, v1323)); store(out, 8 << outShift, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); store(out, 72 << outShift, ctimesminusplus(v1430, tbl[0 + tbloffset], ctimes(reverse(v1430), tbl[1 + tbloffset]))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); store(out, 104 << outShift, ctimesminusplus(reverse(v1415), tbl[256 + tbloffset], ctimes(v1415, tbl[257 + tbloffset]))); store(out, 40 << outShift, ctimesminusplus(reverse(v1413), tbl[254 + tbloffset], ctimes(v1413, tbl[255 + tbloffset]))); real2 v1329 = ctimesminusplus(reverse(v1315), tbl[240 + tbloffset], ctimes(v1315, tbl[241 + tbloffset])); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1289, v1329)); store(out, 24 << outShift, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); store(out, 88 << outShift, ctimesminusplus(v1456, tbl[0 + tbloffset], ctimes(reverse(v1456), tbl[1 + tbloffset]))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); store(out, 120 << outShift, ctimesminusplus(reverse(v1441), tbl[260 + tbloffset], ctimes(v1441, tbl[261 + tbloffset]))); store(out, 56 << outShift, ctimesminusplus(reverse(v1439), tbl[258 + tbloffset], ctimes(v1439, tbl[259 + tbloffset]))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = ctimesminusplus(reverse(v1233), tbl[222 + tbloffset], ctimes(v1233, tbl[223 + tbloffset])); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1163, v1243)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1488, v1489)); store(out, 4 << outShift, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); store(out, 68 << outShift, ctimesminusplus(v1522, tbl[0 + tbloffset], ctimes(reverse(v1522), tbl[1 + tbloffset]))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); store(out, 36 << outShift, ctimesminusplus(reverse(v1505), tbl[270 + tbloffset], ctimes(v1505, tbl[271 + tbloffset]))); store(out, 100 << outShift, ctimesminusplus(reverse(v1507), tbl[272 + tbloffset], ctimes(v1507, tbl[273 + tbloffset]))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = ctimesminusplus(reverse(v1487), tbl[268 + tbloffset], ctimes(v1487, tbl[269 + tbloffset])); store(out, 52 << outShift, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); store(out, 116 << outShift, ctimesminusplus(v1534, tbl[0 + tbloffset], ctimes(reverse(v1534), tbl[1 + tbloffset]))); real2 v1495 = ctimesminusplus(reverse(v1485), tbl[266 + tbloffset], ctimes(v1485, tbl[267 + tbloffset])); store(out, 20 << outShift, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); store(out, 84 << outShift, ctimesminusplus(v1528, tbl[0 + tbloffset], ctimes(reverse(v1528), tbl[1 + tbloffset]))); real2 v1249 = ctimesminusplus(reverse(v1235), tbl[224 + tbloffset], ctimes(v1235, tbl[225 + tbloffset])); real2 v1169 = ctimesminusplus(reverse(v1155), tbl[208 + tbloffset], ctimes(v1155, tbl[209 + tbloffset])); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1169, v1249)); real2 v1581 = reverse(minus(v1566, v1567)); real2 v1587 = plus(v1566, v1567); store(out, 12 << outShift, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); store(out, 76 << outShift, ctimesminusplus(v1600, tbl[0 + tbloffset], ctimes(reverse(v1600), tbl[1 + tbloffset]))); real2 v1583 = minusplus(v1581, v1582); store(out, 44 << outShift, ctimesminusplus(reverse(v1583), tbl[282 + tbloffset], ctimes(v1583, tbl[283 + tbloffset]))); real2 v1585 = minusplus(uminus(v1581), v1582); store(out, 108 << outShift, ctimesminusplus(reverse(v1585), tbl[284 + tbloffset], ctimes(v1585, tbl[285 + tbloffset]))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = ctimesminusplus(reverse(v1565), tbl[280 + tbloffset], ctimes(v1565, tbl[281 + tbloffset])); store(out, 60 << outShift, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); store(out, 124 << outShift, ctimesminusplus(v1612, tbl[0 + tbloffset], ctimes(reverse(v1612), tbl[1 + tbloffset]))); real2 v1573 = ctimesminusplus(reverse(v1563), tbl[278 + tbloffset], ctimes(v1563, tbl[279 + tbloffset])); store(out, 28 << outShift, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); store(out, 92 << outShift, ctimesminusplus(v1606, tbl[0 + tbloffset], ctimes(reverse(v1606), tbl[1 + tbloffset]))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = ctimesminusplus(reverse(v953), tbl[166 + tbloffset], ctimes(v953, tbl[167 + tbloffset])); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = ctimesminusplus(reverse(v993), tbl[174 + tbloffset], ctimes(v993, tbl[175 + tbloffset])); real2 v843 = ctimesminusplus(reverse(v833), tbl[142 + tbloffset], ctimes(v833, tbl[143 + tbloffset])); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = ctimesminusplus(reverse(v1643), tbl[292 + tbloffset], ctimes(v1643, tbl[293 + tbloffset])); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = ctimesminusplus(reverse(v913), tbl[158 + tbloffset], ctimes(v913, tbl[159 + tbloffset])); real2 v1083 = ctimesminusplus(reverse(v1073), tbl[190 + tbloffset], ctimes(v1073, tbl[191 + tbloffset])); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v923, v1083)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = ctimesminusplus(reverse(v1683), tbl[300 + tbloffset], ctimes(v1683, tbl[301 + tbloffset])); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1657, v1697)); store(out, 26 << outShift, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); store(out, 90 << outShift, ctimesminusplus(v1822, tbl[0 + tbloffset], ctimes(reverse(v1822), tbl[1 + tbloffset]))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); store(out, 58 << outShift, ctimesminusplus(reverse(v1805), tbl[318 + tbloffset], ctimes(v1805, tbl[319 + tbloffset]))); store(out, 122 << outShift, ctimesminusplus(reverse(v1807), tbl[320 + tbloffset], ctimes(v1807, tbl[321 + tbloffset]))); real2 v1651 = ctimesminusplus(reverse(v1641), tbl[290 + tbloffset], ctimes(v1641, tbl[291 + tbloffset])); real2 v1691 = ctimesminusplus(reverse(v1681), tbl[298 + tbloffset], ctimes(v1681, tbl[299 + tbloffset])); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1651, v1691)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); store(out, 106 << outShift, ctimesminusplus(reverse(v1781), tbl[316 + tbloffset], ctimes(v1781, tbl[317 + tbloffset]))); store(out, 42 << outShift, ctimesminusplus(reverse(v1779), tbl[314 + tbloffset], ctimes(v1779, tbl[315 + tbloffset]))); store(out, 10 << outShift, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); store(out, 74 << outShift, ctimesminusplus(v1796, tbl[0 + tbloffset], ctimes(reverse(v1796), tbl[1 + tbloffset]))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1684, v1685)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1724, v1725)); store(out, 2 << outShift, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); store(out, 66 << outShift, ctimesminusplus(v1758, tbl[0 + tbloffset], ctimes(reverse(v1758), tbl[1 + tbloffset]))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); store(out, 98 << outShift, ctimesminusplus(reverse(v1743), tbl[312 + tbloffset], ctimes(v1743, tbl[313 + tbloffset]))); store(out, 34 << outShift, ctimesminusplus(reverse(v1741), tbl[310 + tbloffset], ctimes(v1741, tbl[311 + tbloffset]))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = ctimesminusplus(reverse(v1723), tbl[308 + tbloffset], ctimes(v1723, tbl[309 + tbloffset])); store(out, 50 << outShift, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); store(out, 114 << outShift, ctimesminusplus(v1770, tbl[0 + tbloffset], ctimes(reverse(v1770), tbl[1 + tbloffset]))); real2 v1731 = ctimesminusplus(reverse(v1721), tbl[306 + tbloffset], ctimes(v1721, tbl[307 + tbloffset])); store(out, 18 << outShift, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); store(out, 82 << outShift, ctimesminusplus(v1764, tbl[0 + tbloffset], ctimes(reverse(v1764), tbl[1 + tbloffset]))); real2 v809 = ctimesminusplus(reverse(v795), tbl[136 + tbloffset], ctimes(v795, tbl[137 + tbloffset])); real2 v969 = ctimesminusplus(reverse(v955), tbl[168 + tbloffset], ctimes(v955, tbl[169 + tbloffset])); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = ctimesminusplus(reverse(v835), tbl[144 + tbloffset], ctimes(v835, tbl[145 + tbloffset])); real2 v929 = ctimesminusplus(reverse(v915), tbl[160 + tbloffset], ctimes(v915, tbl[161 + tbloffset])); real2 v889 = ctimesminusplus(reverse(v875), tbl[152 + tbloffset], ctimes(v875, tbl[153 + tbloffset])); real2 v1089 = ctimesminusplus(reverse(v1075), tbl[192 + tbloffset], ctimes(v1075, tbl[193 + tbloffset])); real2 v1009 = ctimesminusplus(reverse(v995), tbl[176 + tbloffset], ctimes(v995, tbl[177 + tbloffset])); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v889, v1049)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v929, v1089)); real2 v1929 = reverse(minus(v1894, v1895)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1934, v1935)); store(out, 6 << outShift, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); store(out, 70 << outShift, ctimesminusplus(v1968, tbl[0 + tbloffset], ctimes(reverse(v1968), tbl[1 + tbloffset]))); real2 v1951 = minusplus(v1949, v1950); store(out, 38 << outShift, ctimesminusplus(reverse(v1951), tbl[346 + tbloffset], ctimes(v1951, tbl[347 + tbloffset]))); real2 v1953 = minusplus(uminus(v1949), v1950); store(out, 102 << outShift, ctimesminusplus(reverse(v1953), tbl[348 + tbloffset], ctimes(v1953, tbl[349 + tbloffset]))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = ctimesminusplus(reverse(v1933), tbl[344 + tbloffset], ctimes(v1933, tbl[345 + tbloffset])); store(out, 54 << outShift, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); store(out, 118 << outShift, ctimesminusplus(v1980, tbl[0 + tbloffset], ctimes(reverse(v1980), tbl[1 + tbloffset]))); real2 v1941 = ctimesminusplus(reverse(v1931), tbl[342 + tbloffset], ctimes(v1931, tbl[343 + tbloffset])); store(out, 22 << outShift, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); store(out, 86 << outShift, ctimesminusplus(v1974, tbl[0 + tbloffset], ctimes(reverse(v1974), tbl[1 + tbloffset]))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = ctimesminusplus(reverse(v1853), tbl[328 + tbloffset], ctimes(v1853, tbl[329 + tbloffset])); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = ctimesminusplus(reverse(v1893), tbl[336 + tbloffset], ctimes(v1893, tbl[337 + tbloffset])); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1867, v1907)); store(out, 30 << outShift, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); store(out, 94 << outShift, ctimesminusplus(v2032, tbl[0 + tbloffset], ctimes(reverse(v2032), tbl[1 + tbloffset]))); real2 v2017 = minusplus(uminus(v2013), v2014); store(out, 126 << outShift, ctimesminusplus(reverse(v2017), tbl[356 + tbloffset], ctimes(v2017, tbl[357 + tbloffset]))); real2 v2015 = minusplus(v2013, v2014); store(out, 62 << outShift, ctimesminusplus(reverse(v2015), tbl[354 + tbloffset], ctimes(v2015, tbl[355 + tbloffset]))); real2 v1861 = ctimesminusplus(reverse(v1851), tbl[326 + tbloffset], ctimes(v1851, tbl[327 + tbloffset])); real2 v1901 = ctimesminusplus(reverse(v1891), tbl[334 + tbloffset], ctimes(v1891, tbl[335 + tbloffset])); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1861, v1901)); store(out, 14 << outShift, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); store(out, 78 << outShift, ctimesminusplus(v2006, tbl[0 + tbloffset], ctimes(reverse(v2006), tbl[1 + tbloffset]))); real2 v1991 = minusplus(uminus(v1987), v1988); store(out, 110 << outShift, ctimesminusplus(reverse(v1991), tbl[352 + tbloffset], ctimes(v1991, tbl[353 + tbloffset]))); real2 v1989 = minusplus(v1987, v1988); store(out, 46 << outShift, ctimesminusplus(reverse(v1989), tbl[350 + tbloffset], ctimes(v1989, tbl[351 + tbloffset]))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = ctimesminusplus(reverse(v595), tbl[96 + tbloffset], ctimes(v595, tbl[97 + tbloffset])); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = ctimesminusplus(reverse(v675), tbl[112 + tbloffset], ctimes(v675, tbl[113 + tbloffset])); real2 v209 = ctimesminusplus(reverse(v195), tbl[16 + tbloffset], ctimes(v195, tbl[17 + tbloffset])); real2 v289 = ctimesminusplus(reverse(v275), tbl[32 + tbloffset], ctimes(v275, tbl[33 + tbloffset])); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = ctimesminusplus(reverse(v515), tbl[80 + tbloffset], ctimes(v515, tbl[81 + tbloffset])); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = ctimesminusplus(reverse(v355), tbl[48 + tbloffset], ctimes(v355, tbl[49 + tbloffset])); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v369, v689)); real2 v449 = ctimesminusplus(reverse(v435), tbl[64 + tbloffset], ctimes(v435, tbl[65 + tbloffset])); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = ctimesminusplus(reverse(v755), tbl[128 + tbloffset], ctimes(v755, tbl[129 + tbloffset])); real2 v2705 = reverse(minus(v449, v769)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = ctimesminusplus(reverse(v395), tbl[56 + tbloffset], ctimes(v395, tbl[57 + tbloffset])); real2 v729 = ctimesminusplus(reverse(v715), tbl[120 + tbloffset], ctimes(v715, tbl[121 + tbloffset])); real2 v329 = ctimesminusplus(reverse(v315), tbl[40 + tbloffset], ctimes(v315, tbl[41 + tbloffset])); real2 v489 = ctimesminusplus(reverse(v475), tbl[72 + tbloffset], ctimes(v475, tbl[73 + tbloffset])); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = ctimesminusplus(reverse(v155), tbl[8 + tbloffset], ctimes(v155, tbl[9 + tbloffset])); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = ctimesminusplus(reverse(v635), tbl[104 + tbloffset], ctimes(v635, tbl[105 + tbloffset])); real2 v249 = ctimesminusplus(reverse(v235), tbl[24 + tbloffset], ctimes(v235, tbl[25 + tbloffset])); real2 v569 = ctimesminusplus(reverse(v555), tbl[88 + tbloffset], ctimes(v555, tbl[89 + tbloffset])); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2710, v2711)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2790, v2791)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v409, v729)); real2 v2745 = reverse(minus(v2670, v2671)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v329, v649)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2830, v2831)); real2 v2851 = plus(v2830, v2831); store(out, 3 << outShift, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); store(out, 67 << outShift, ctimesminusplus(v2864, tbl[0 + tbloffset], ctimes(reverse(v2864), tbl[1 + tbloffset]))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); store(out, 35 << outShift, ctimesminusplus(reverse(v2847), tbl[506 + tbloffset], ctimes(v2847, tbl[507 + tbloffset]))); store(out, 99 << outShift, ctimesminusplus(reverse(v2849), tbl[508 + tbloffset], ctimes(v2849, tbl[509 + tbloffset]))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = ctimesminusplus(reverse(v2827), tbl[502 + tbloffset], ctimes(v2827, tbl[503 + tbloffset])); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = ctimesminusplus(reverse(v2807), tbl[498 + tbloffset], ctimes(v2807, tbl[499 + tbloffset])); store(out, 19 << outShift, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); store(out, 83 << outShift, ctimesminusplus(v2870, tbl[0 + tbloffset], ctimes(reverse(v2870), tbl[1 + tbloffset]))); real2 v2823 = ctimesminusplus(reverse(v2809), tbl[500 + tbloffset], ctimes(v2809, tbl[501 + tbloffset])); real2 v2843 = ctimesminusplus(reverse(v2829), tbl[504 + tbloffset], ctimes(v2829, tbl[505 + tbloffset])); store(out, 51 << outShift, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); store(out, 115 << outShift, ctimesminusplus(v2876, tbl[0 + tbloffset], ctimes(reverse(v2876), tbl[1 + tbloffset]))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = ctimesminusplus(reverse(v2789), tbl[496 + tbloffset], ctimes(v2789, tbl[497 + tbloffset])); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = ctimesminusplus(reverse(v2729), tbl[484 + tbloffset], ctimes(v2729, tbl[485 + tbloffset])); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = ctimesminusplus(reverse(v2749), tbl[488 + tbloffset], ctimes(v2749, tbl[489 + tbloffset])); real2 v2909 = reverse(minus(v2763, v2803)); real2 v2915 = plus(v2763, v2803); store(out, 27 << outShift, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); store(out, 91 << outShift, ctimesminusplus(v2928, tbl[0 + tbloffset], ctimes(reverse(v2928), tbl[1 + tbloffset]))); real2 v2913 = minusplus(uminus(v2909), v2910); store(out, 123 << outShift, ctimesminusplus(reverse(v2913), tbl[516 + tbloffset], ctimes(v2913, tbl[517 + tbloffset]))); real2 v2911 = minusplus(v2909, v2910); store(out, 59 << outShift, ctimesminusplus(reverse(v2911), tbl[514 + tbloffset], ctimes(v2911, tbl[515 + tbloffset]))); real2 v2737 = ctimesminusplus(reverse(v2727), tbl[482 + tbloffset], ctimes(v2727, tbl[483 + tbloffset])); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = ctimesminusplus(reverse(v2787), tbl[494 + tbloffset], ctimes(v2787, tbl[495 + tbloffset])); real2 v2757 = ctimesminusplus(reverse(v2747), tbl[486 + tbloffset], ctimes(v2747, tbl[487 + tbloffset])); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2757, v2797)); store(out, 11 << outShift, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); store(out, 75 << outShift, ctimesminusplus(v2902, tbl[0 + tbloffset], ctimes(reverse(v2902), tbl[1 + tbloffset]))); real2 v2887 = minusplus(uminus(v2883), v2884); store(out, 107 << outShift, ctimesminusplus(reverse(v2887), tbl[512 + tbloffset], ctimes(v2887, tbl[513 + tbloffset]))); real2 v2885 = minusplus(v2883, v2884); store(out, 43 << outShift, ctimesminusplus(reverse(v2885), tbl[510 + tbloffset], ctimes(v2885, tbl[511 + tbloffset]))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = ctimesminusplus(reverse(v2707), tbl[478 + tbloffset], ctimes(v2707, tbl[479 + tbloffset])); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = ctimesminusplus(reverse(v2627), tbl[462 + tbloffset], ctimes(v2627, tbl[463 + tbloffset])); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2637, v2717)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = ctimesminusplus(reverse(v2567), tbl[450 + tbloffset], ctimes(v2567, tbl[451 + tbloffset])); real2 v2657 = ctimesminusplus(reverse(v2647), tbl[466 + tbloffset], ctimes(v2647, tbl[467 + tbloffset])); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = ctimesminusplus(reverse(v2667), tbl[470 + tbloffset], ctimes(v2667, tbl[471 + tbloffset])); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = ctimesminusplus(reverse(v2587), tbl[454 + tbloffset], ctimes(v2587, tbl[455 + tbloffset])); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2960, v2961)); real2 v2981 = plus(v2960, v2961); store(out, 7 << outShift, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); store(out, 71 << outShift, ctimesminusplus(v2994, tbl[0 + tbloffset], ctimes(reverse(v2994), tbl[1 + tbloffset]))); real2 v2979 = minusplus(uminus(v2975), v2976); store(out, 103 << outShift, ctimesminusplus(reverse(v2979), tbl[528 + tbloffset], ctimes(v2979, tbl[529 + tbloffset]))); real2 v2977 = minusplus(v2975, v2976); store(out, 39 << outShift, ctimesminusplus(reverse(v2977), tbl[526 + tbloffset], ctimes(v2977, tbl[527 + tbloffset]))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = ctimesminusplus(reverse(v2939), tbl[520 + tbloffset], ctimes(v2939, tbl[521 + tbloffset])); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = ctimesminusplus(reverse(v2959), tbl[524 + tbloffset], ctimes(v2959, tbl[525 + tbloffset])); store(out, 55 << outShift, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); store(out, 119 << outShift, ctimesminusplus(v3006, tbl[0 + tbloffset], ctimes(reverse(v3006), tbl[1 + tbloffset]))); real2 v2947 = ctimesminusplus(reverse(v2937), tbl[518 + tbloffset], ctimes(v2937, tbl[519 + tbloffset])); real2 v2967 = ctimesminusplus(reverse(v2957), tbl[522 + tbloffset], ctimes(v2957, tbl[523 + tbloffset])); store(out, 23 << outShift, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); store(out, 87 << outShift, ctimesminusplus(v3000, tbl[0 + tbloffset], ctimes(reverse(v3000), tbl[1 + tbloffset]))); real2 v2663 = ctimesminusplus(reverse(v2649), tbl[468 + tbloffset], ctimes(v2649, tbl[469 + tbloffset])); real2 v2583 = ctimesminusplus(reverse(v2569), tbl[452 + tbloffset], ctimes(v2569, tbl[453 + tbloffset])); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = ctimesminusplus(reverse(v2629), tbl[464 + tbloffset], ctimes(v2629, tbl[465 + tbloffset])); real2 v2723 = ctimesminusplus(reverse(v2709), tbl[480 + tbloffset], ctimes(v2709, tbl[481 + tbloffset])); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2643, v2723)); real2 v2683 = ctimesminusplus(reverse(v2669), tbl[472 + tbloffset], ctimes(v2669, tbl[473 + tbloffset])); real2 v3031 = ctimesminusplus(reverse(v3017), tbl[532 + tbloffset], ctimes(v3017, tbl[533 + tbloffset])); real2 v2603 = ctimesminusplus(reverse(v2589), tbl[456 + tbloffset], ctimes(v2589, tbl[457 + tbloffset])); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = ctimesminusplus(reverse(v3037), tbl[536 + tbloffset], ctimes(v3037, tbl[537 + tbloffset])); store(out, 63 << outShift, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); store(out, 127 << outShift, ctimesminusplus(v3084, tbl[0 + tbloffset], ctimes(reverse(v3084), tbl[1 + tbloffset]))); real2 v3025 = ctimesminusplus(reverse(v3015), tbl[530 + tbloffset], ctimes(v3015, tbl[531 + tbloffset])); real2 v3045 = ctimesminusplus(reverse(v3035), tbl[534 + tbloffset], ctimes(v3035, tbl[535 + tbloffset])); store(out, 31 << outShift, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); store(out, 95 << outShift, ctimesminusplus(v3078, tbl[0 + tbloffset], ctimes(reverse(v3078), tbl[1 + tbloffset]))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3038, v3039)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); store(out, 47 << outShift, ctimesminusplus(reverse(v3055), tbl[538 + tbloffset], ctimes(v3055, tbl[539 + tbloffset]))); real2 v3057 = minusplus(uminus(v3053), v3054); store(out, 111 << outShift, ctimesminusplus(reverse(v3057), tbl[540 + tbloffset], ctimes(v3057, tbl[541 + tbloffset]))); store(out, 15 << outShift, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); store(out, 79 << outShift, ctimesminusplus(v3072, tbl[0 + tbloffset], ctimes(reverse(v3072), tbl[1 + tbloffset]))); real2 v683 = ctimesminusplus(reverse(v673), tbl[110 + tbloffset], ctimes(v673, tbl[111 + tbloffset])); real2 v363 = ctimesminusplus(reverse(v353), tbl[46 + tbloffset], ctimes(v353, tbl[47 + tbloffset])); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v363, v683)); real2 v283 = ctimesminusplus(reverse(v273), tbl[30 + tbloffset], ctimes(v273, tbl[31 + tbloffset])); real2 v723 = ctimesminusplus(reverse(v713), tbl[118 + tbloffset], ctimes(v713, tbl[119 + tbloffset])); real2 v403 = ctimesminusplus(reverse(v393), tbl[54 + tbloffset], ctimes(v393, tbl[55 + tbloffset])); real2 v603 = ctimesminusplus(reverse(v593), tbl[94 + tbloffset], ctimes(v593, tbl[95 + tbloffset])); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v403, v723)); real2 v543 = ctimesminusplus(reverse(v533), tbl[82 + tbloffset], ctimes(v533, tbl[83 + tbloffset])); real2 v383 = ctimesminusplus(reverse(v373), tbl[50 + tbloffset], ctimes(v373, tbl[51 + tbloffset])); real2 v703 = ctimesminusplus(reverse(v693), tbl[114 + tbloffset], ctimes(v693, tbl[115 + tbloffset])); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v383, v703)); real2 v223 = ctimesminusplus(reverse(v213), tbl[18 + tbloffset], ctimes(v213, tbl[19 + tbloffset])); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = ctimesminusplus(reverse(v433), tbl[62 + tbloffset], ctimes(v433, tbl[63 + tbloffset])); real2 v203 = ctimesminusplus(reverse(v193), tbl[14 + tbloffset], ctimes(v193, tbl[15 + tbloffset])); real2 v763 = ctimesminusplus(reverse(v753), tbl[126 + tbloffset], ctimes(v753, tbl[127 + tbloffset])); real2 v2179 = reverse(minus(v443, v763)); real2 v2185 = plus(v443, v763); real2 v523 = ctimesminusplus(reverse(v513), tbl[78 + tbloffset], ctimes(v513, tbl[79 + tbloffset])); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = ctimesminusplus(reverse(v633), tbl[102 + tbloffset], ctimes(v633, tbl[103 + tbloffset])); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2184, v2185)); real2 v563 = ctimesminusplus(reverse(v553), tbl[86 + tbloffset], ctimes(v553, tbl[87 + tbloffset])); real2 v243 = ctimesminusplus(reverse(v233), tbl[22 + tbloffset], ctimes(v233, tbl[23 + tbloffset])); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = ctimesminusplus(reverse(v133), tbl[2 + tbloffset], ctimes(v133, tbl[3 + tbloffset])); real2 v183 = ctimesminusplus(reverse(v173), tbl[10 + tbloffset], ctimes(v173, tbl[11 + tbloffset])); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = ctimesminusplus(reverse(v153), tbl[6 + tbloffset], ctimes(v153, tbl[7 + tbloffset])); real2 v303 = ctimesminusplus(reverse(v293), tbl[34 + tbloffset], ctimes(v293, tbl[35 + tbloffset])); real2 v623 = ctimesminusplus(reverse(v613), tbl[98 + tbloffset], ctimes(v613, tbl[99 + tbloffset])); real2 v2039 = reverse(minus(v303, v623)); real2 v2045 = plus(v303, v623); real2 v463 = ctimesminusplus(reverse(v453), tbl[66 + tbloffset], ctimes(v453, tbl[67 + tbloffset])); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = ctimesminusplus(reverse(v313), tbl[38 + tbloffset], ctimes(v313, tbl[39 + tbloffset])); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2124, v2125)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2144, v2145)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2264, v2265)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2244, v2245)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = ctimesminusplus(reverse(v2281), tbl[406 + tbloffset], ctimes(v2281, tbl[407 + tbloffset])); real2 v483 = ctimesminusplus(reverse(v473), tbl[70 + tbloffset], ctimes(v473, tbl[71 + tbloffset])); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v323, v643)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = ctimesminusplus(reverse(v2301), tbl[410 + tbloffset], ctimes(v2301, tbl[411 + tbloffset])); store(out, 17 << outShift, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); store(out, 81 << outShift, ctimesminusplus(v2344, tbl[0 + tbloffset], ctimes(reverse(v2344), tbl[1 + tbloffset]))); real2 v2297 = ctimesminusplus(reverse(v2283), tbl[408 + tbloffset], ctimes(v2283, tbl[409 + tbloffset])); real2 v2317 = ctimesminusplus(reverse(v2303), tbl[412 + tbloffset], ctimes(v2303, tbl[413 + tbloffset])); store(out, 49 << outShift, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); store(out, 113 << outShift, ctimesminusplus(v2350, tbl[0 + tbloffset], ctimes(reverse(v2350), tbl[1 + tbloffset]))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2304, v2305)); store(out, 1 << outShift, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); store(out, 65 << outShift, ctimesminusplus(v2338, tbl[0 + tbloffset], ctimes(reverse(v2338), tbl[1 + tbloffset]))); real2 v2321 = minusplus(v2319, v2320); store(out, 33 << outShift, ctimesminusplus(reverse(v2321), tbl[414 + tbloffset], ctimes(v2321, tbl[415 + tbloffset]))); real2 v2323 = minusplus(uminus(v2319), v2320); store(out, 97 << outShift, ctimesminusplus(reverse(v2323), tbl[416 + tbloffset], ctimes(v2323, tbl[417 + tbloffset]))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = ctimesminusplus(reverse(v2243), tbl[400 + tbloffset], ctimes(v2243, tbl[401 + tbloffset])); real2 v2217 = ctimesminusplus(reverse(v2203), tbl[392 + tbloffset], ctimes(v2203, tbl[393 + tbloffset])); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = ctimesminusplus(reverse(v2263), tbl[404 + tbloffset], ctimes(v2263, tbl[405 + tbloffset])); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = ctimesminusplus(reverse(v2223), tbl[396 + tbloffset], ctimes(v2223, tbl[397 + tbloffset])); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2237, v2277)); store(out, 25 << outShift, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); store(out, 89 << outShift, ctimesminusplus(v2402, tbl[0 + tbloffset], ctimes(reverse(v2402), tbl[1 + tbloffset]))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); store(out, 121 << outShift, ctimesminusplus(reverse(v2387), tbl[424 + tbloffset], ctimes(v2387, tbl[425 + tbloffset]))); store(out, 57 << outShift, ctimesminusplus(reverse(v2385), tbl[422 + tbloffset], ctimes(v2385, tbl[423 + tbloffset]))); real2 v2251 = ctimesminusplus(reverse(v2241), tbl[398 + tbloffset], ctimes(v2241, tbl[399 + tbloffset])); real2 v2211 = ctimesminusplus(reverse(v2201), tbl[390 + tbloffset], ctimes(v2201, tbl[391 + tbloffset])); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = ctimesminusplus(reverse(v2261), tbl[402 + tbloffset], ctimes(v2261, tbl[403 + tbloffset])); real2 v2231 = ctimesminusplus(reverse(v2221), tbl[394 + tbloffset], ctimes(v2221, tbl[395 + tbloffset])); real2 v2357 = reverse(minus(v2231, v2271)); real2 v2363 = plus(v2231, v2271); store(out, 9 << outShift, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); store(out, 73 << outShift, ctimesminusplus(v2376, tbl[0 + tbloffset], ctimes(reverse(v2376), tbl[1 + tbloffset]))); real2 v2361 = minusplus(uminus(v2357), v2358); store(out, 105 << outShift, ctimesminusplus(reverse(v2361), tbl[420 + tbloffset], ctimes(v2361, tbl[421 + tbloffset]))); real2 v2359 = minusplus(v2357, v2358); store(out, 41 << outShift, ctimesminusplus(reverse(v2359), tbl[418 + tbloffset], ctimes(v2359, tbl[419 + tbloffset]))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = ctimesminusplus(reverse(v2081), tbl[366 + tbloffset], ctimes(v2081, tbl[367 + tbloffset])); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = ctimesminusplus(reverse(v2041), tbl[358 + tbloffset], ctimes(v2041, tbl[359 + tbloffset])); real2 v2131 = ctimesminusplus(reverse(v2121), tbl[374 + tbloffset], ctimes(v2121, tbl[375 + tbloffset])); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = ctimesminusplus(reverse(v2161), tbl[382 + tbloffset], ctimes(v2161, tbl[383 + tbloffset])); real2 v2409 = reverse(minus(v2091, v2171)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = ctimesminusplus(reverse(v2181), tbl[386 + tbloffset], ctimes(v2181, tbl[387 + tbloffset])); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = ctimesminusplus(reverse(v2101), tbl[370 + tbloffset], ctimes(v2101, tbl[371 + tbloffset])); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2111, v2191)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = ctimesminusplus(reverse(v2141), tbl[378 + tbloffset], ctimes(v2141, tbl[379 + tbloffset])); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = ctimesminusplus(reverse(v2061), tbl[362 + tbloffset], ctimes(v2061, tbl[363 + tbloffset])); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2434, v2435)); store(out, 5 << outShift, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); store(out, 69 << outShift, ctimesminusplus(v2468, tbl[0 + tbloffset], ctimes(reverse(v2468), tbl[1 + tbloffset]))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); store(out, 101 << outShift, ctimesminusplus(reverse(v2453), tbl[436 + tbloffset], ctimes(v2453, tbl[437 + tbloffset]))); store(out, 37 << outShift, ctimesminusplus(reverse(v2451), tbl[434 + tbloffset], ctimes(v2451, tbl[435 + tbloffset]))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = ctimesminusplus(reverse(v2411), tbl[426 + tbloffset], ctimes(v2411, tbl[427 + tbloffset])); real2 v2441 = ctimesminusplus(reverse(v2431), tbl[430 + tbloffset], ctimes(v2431, tbl[431 + tbloffset])); store(out, 21 << outShift, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); store(out, 85 << outShift, ctimesminusplus(v2474, tbl[0 + tbloffset], ctimes(reverse(v2474), tbl[1 + tbloffset]))); real2 v2427 = ctimesminusplus(reverse(v2413), tbl[428 + tbloffset], ctimes(v2413, tbl[429 + tbloffset])); real2 v2447 = ctimesminusplus(reverse(v2433), tbl[432 + tbloffset], ctimes(v2433, tbl[433 + tbloffset])); store(out, 53 << outShift, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); store(out, 117 << outShift, ctimesminusplus(v2480, tbl[0 + tbloffset], ctimes(reverse(v2480), tbl[1 + tbloffset]))); real2 v2057 = ctimesminusplus(reverse(v2043), tbl[360 + tbloffset], ctimes(v2043, tbl[361 + tbloffset])); real2 v2097 = ctimesminusplus(reverse(v2083), tbl[368 + tbloffset], ctimes(v2083, tbl[369 + tbloffset])); real2 v2157 = ctimesminusplus(reverse(v2143), tbl[380 + tbloffset], ctimes(v2143, tbl[381 + tbloffset])); real2 v2197 = ctimesminusplus(reverse(v2183), tbl[388 + tbloffset], ctimes(v2183, tbl[389 + tbloffset])); real2 v2117 = ctimesminusplus(reverse(v2103), tbl[372 + tbloffset], ctimes(v2103, tbl[373 + tbloffset])); real2 v2507 = reverse(minus(v2117, v2197)); real2 v2513 = plus(v2117, v2197); real2 v2137 = ctimesminusplus(reverse(v2123), tbl[376 + tbloffset], ctimes(v2123, tbl[377 + tbloffset])); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = ctimesminusplus(reverse(v2163), tbl[384 + tbloffset], ctimes(v2163, tbl[385 + tbloffset])); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2097, v2177)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = ctimesminusplus(reverse(v2063), tbl[364 + tbloffset], ctimes(v2063, tbl[365 + tbloffset])); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2512, v2513)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); store(out, 109 << outShift, ctimesminusplus(reverse(v2531), tbl[448 + tbloffset], ctimes(v2531, tbl[449 + tbloffset]))); store(out, 45 << outShift, ctimesminusplus(reverse(v2529), tbl[446 + tbloffset], ctimes(v2529, tbl[447 + tbloffset]))); store(out, 13 << outShift, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); store(out, 77 << outShift, ctimesminusplus(v2546, tbl[0 + tbloffset], ctimes(reverse(v2546), tbl[1 + tbloffset]))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = ctimesminusplus(reverse(v2489), tbl[438 + tbloffset], ctimes(v2489, tbl[439 + tbloffset])); real2 v2519 = ctimesminusplus(reverse(v2509), tbl[442 + tbloffset], ctimes(v2509, tbl[443 + tbloffset])); store(out, 29 << outShift, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); store(out, 93 << outShift, ctimesminusplus(v2552, tbl[0 + tbloffset], ctimes(reverse(v2552), tbl[1 + tbloffset]))); real2 v2505 = ctimesminusplus(reverse(v2491), tbl[440 + tbloffset], ctimes(v2491, tbl[441 + tbloffset])); real2 v2525 = ctimesminusplus(reverse(v2511), tbl[444 + tbloffset], ctimes(v2511, tbl[445 + tbloffset])); store(out, 61 << outShift, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); store(out, 125 << outShift, ctimesminusplus(v2558, tbl[0 + tbloffset], ctimes(reverse(v2558), tbl[1 + tbloffset]))); // Pres : 76263 } } ALIGNED(8192) void but128b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v120, v56)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = ctimesminusplus(reverse(v575), tbl[92 + tbloffset], ctimes(v575, tbl[93 + tbloffset])); real2 v583 = ctimesminusplus(reverse(v573), tbl[90 + tbloffset], ctimes(v573, tbl[91 + tbloffset])); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v577, v576)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v104, v40)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = ctimesminusplus(reverse(v253), tbl[26 + tbloffset], ctimes(v253, tbl[27 + tbloffset])); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = ctimesminusplus(reverse(v895), tbl[156 + tbloffset], ctimes(v895, tbl[157 + tbloffset])); real2 v903 = ctimesminusplus(reverse(v893), tbl[154 + tbloffset], ctimes(v893, tbl[155 + tbloffset])); real2 v269 = ctimesminusplus(reverse(v255), tbl[28 + tbloffset], ctimes(v255, tbl[29 + tbloffset])); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v128, v64)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v737, v736)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = ctimesminusplus(reverse(v735), tbl[124 + tbloffset], ctimes(v735, tbl[125 + tbloffset])); real2 v743 = ctimesminusplus(reverse(v733), tbl[122 + tbloffset], ctimes(v733, tbl[123 + tbloffset])); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v112, v48)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = ctimesminusplus(reverse(v1053), tbl[186 + tbloffset], ctimes(v1053, tbl[187 + tbloffset])); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v1063, v903)); real2 v1069 = ctimesminusplus(reverse(v1055), tbl[188 + tbloffset], ctimes(v1055, tbl[189 + tbloffset])); real2 v1869 = reverse(minus(v1069, v909)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = ctimesminusplus(reverse(v415), tbl[60 + tbloffset], ctimes(v415, tbl[61 + tbloffset])); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1057, v1056)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1217, v1216)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v749, v429)); real2 v2765 = reverse(minus(v2691, v2690)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = ctimesminusplus(reverse(v2689), tbl[476 + tbloffset], ctimes(v2689, tbl[477 + tbloffset])); real2 v2697 = ctimesminusplus(reverse(v2687), tbl[474 + tbloffset], ctimes(v2687, tbl[475 + tbloffset])); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = ctimesminusplus(reverse(v1213), tbl[218 + tbloffset], ctimes(v1213, tbl[219 + tbloffset])); real2 v1229 = ctimesminusplus(reverse(v1215), tbl[220 + tbloffset], ctimes(v1215, tbl[221 + tbloffset])); real2 v423 = ctimesminusplus(reverse(v413), tbl[58 + tbloffset], ctimes(v413, tbl[59 + tbloffset])); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v743, v423)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2165, v2164)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v108, v44)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = ctimesminusplus(reverse(v333), tbl[42 + tbloffset], ctimes(v333, tbl[43 + tbloffset])); real2 v349 = ctimesminusplus(reverse(v335), tbl[44 + tbloffset], ctimes(v335, tbl[45 + tbloffset])); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v124, v60)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v657, v656)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = ctimesminusplus(reverse(v973), tbl[170 + tbloffset], ctimes(v973, tbl[171 + tbloffset])); real2 v1131 = reverse(minus(v977, v976)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = ctimesminusplus(reverse(v655), tbl[108 + tbloffset], ctimes(v655, tbl[109 + tbloffset])); real2 v663 = ctimesminusplus(reverse(v653), tbl[106 + tbloffset], ctimes(v653, tbl[107 + tbloffset])); real2 v2079 = reverse(minus(v663, v343)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v669, v349)); real2 v2611 = plus(v349, v669); real2 v989 = ctimesminusplus(reverse(v975), tbl[172 + tbloffset], ctimes(v975, tbl[173 + tbloffset])); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v116, v52)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v497, v496)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = ctimesminusplus(reverse(v495), tbl[76 + tbloffset], ctimes(v495, tbl[77 + tbloffset])); real2 v503 = ctimesminusplus(reverse(v493), tbl[74 + tbloffset], ctimes(v493, tbl[75 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v100, v36)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = ctimesminusplus(reverse(v1135), tbl[204 + tbloffset], ctimes(v1135, tbl[205 + tbloffset])); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = ctimesminusplus(reverse(v1293), tbl[234 + tbloffset], ctimes(v1293, tbl[235 + tbloffset])); real2 v1331 = reverse(minus(v1297, v1296)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = ctimesminusplus(reverse(v175), tbl[12 + tbloffset], ctimes(v175, tbl[13 + tbloffset])); real2 v1309 = ctimesminusplus(reverse(v1295), tbl[236 + tbloffset], ctimes(v1295, tbl[237 + tbloffset])); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = ctimesminusplus(reverse(v1133), tbl[202 + tbloffset], ctimes(v1133, tbl[203 + tbloffset])); real2 v1541 = reverse(minus(v1229, v1149)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = ctimesminusplus(reverse(v813), tbl[138 + tbloffset], ctimes(v813, tbl[139 + tbloffset])); real2 v829 = ctimesminusplus(reverse(v815), tbl[140 + tbloffset], ctimes(v815, tbl[141 + tbloffset])); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2771, v2770)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = ctimesminusplus(reverse(v2607), tbl[458 + tbloffset], ctimes(v2607, tbl[459 + tbloffset])); real2 v2623 = ctimesminusplus(reverse(v2609), tbl[460 + tbloffset], ctimes(v2609, tbl[461 + tbloffset])); real2 v3013 = reverse(minus(v2703, v2623)); real2 v3019 = plus(v2623, v2703); real2 v2783 = ctimesminusplus(reverse(v2769), tbl[492 + tbloffset], ctimes(v2769, tbl[493 + tbloffset])); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2697, v2617)); real2 v2777 = ctimesminusplus(reverse(v2767), tbl[490 + tbloffset], ctimes(v2767, tbl[491 + tbloffset])); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1875, v1874)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = ctimesminusplus(reverse(v1663), tbl[296 + tbloffset], ctimes(v1663, tbl[297 + tbloffset])); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = ctimesminusplus(reverse(v1873), tbl[332 + tbloffset], ctimes(v1873, tbl[333 + tbloffset])); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1665, v1664)); real2 v1671 = ctimesminusplus(reverse(v1661), tbl[294 + tbloffset], ctimes(v1661, tbl[295 + tbloffset])); real2 v1881 = ctimesminusplus(reverse(v1871), tbl[330 + tbloffset], ctimes(v1871, tbl[331 + tbloffset])); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1223, v1143)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v118, v54)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v537, v536)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = ctimesminusplus(reverse(v535), tbl[84 + tbloffset], ctimes(v535, tbl[85 + tbloffset])); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v102, v38)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = ctimesminusplus(reverse(v215), tbl[20 + tbloffset], ctimes(v215, tbl[21 + tbloffset])); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = ctimesminusplus(reverse(v853), tbl[146 + tbloffset], ctimes(v853, tbl[147 + tbloffset])); real2 v869 = ctimesminusplus(reverse(v855), tbl[148 + tbloffset], ctimes(v855, tbl[149 + tbloffset])); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v110, v46)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = ctimesminusplus(reverse(v375), tbl[52 + tbloffset], ctimes(v375, tbl[53 + tbloffset])); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v126, v62)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v697, v696)); real2 v1171 = reverse(minus(v1017, v1016)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = ctimesminusplus(reverse(v1173), tbl[210 + tbloffset], ctimes(v1173, tbl[211 + tbloffset])); real2 v1189 = ctimesminusplus(reverse(v1175), tbl[212 + tbloffset], ctimes(v1175, tbl[213 + tbloffset])); real2 v1029 = ctimesminusplus(reverse(v1015), tbl[180 + tbloffset], ctimes(v1015, tbl[181 + tbloffset])); real2 v1023 = ctimesminusplus(reverse(v1013), tbl[178 + tbloffset], ctimes(v1013, tbl[179 + tbloffset])); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v1023, v863)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v1029, v869)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = ctimesminusplus(reverse(v695), tbl[116 + tbloffset], ctimes(v695, tbl[117 + tbloffset])); real2 v2645 = reverse(minus(v709, v389)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1177, v1176)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2651, v2650)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v114, v50)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v457, v456)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = ctimesminusplus(reverse(v455), tbl[68 + tbloffset], ctimes(v455, tbl[69 + tbloffset])); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v98, v34)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = ctimesminusplus(reverse(v135), tbl[4 + tbloffset], ctimes(v135, tbl[5 + tbloffset])); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = ctimesminusplus(reverse(v773), tbl[130 + tbloffset], ctimes(v773, tbl[131 + tbloffset])); real2 v789 = ctimesminusplus(reverse(v775), tbl[132 + tbloffset], ctimes(v775, tbl[133 + tbloffset])); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v106, v42)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = ctimesminusplus(reverse(v295), tbl[36 + tbloffset], ctimes(v295, tbl[37 + tbloffset])); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v122, v58)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v617, v616)); real2 v1091 = reverse(minus(v937, v936)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = ctimesminusplus(reverse(v1093), tbl[194 + tbloffset], ctimes(v1093, tbl[195 + tbloffset])); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = ctimesminusplus(reverse(v1333), tbl[242 + tbloffset], ctimes(v1333, tbl[243 + tbloffset])); real2 v1349 = ctimesminusplus(reverse(v1335), tbl[244 + tbloffset], ctimes(v1335, tbl[245 + tbloffset])); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = ctimesminusplus(reverse(v1467), tbl[264 + tbloffset], ctimes(v1467, tbl[265 + tbloffset])); real2 v1475 = ctimesminusplus(reverse(v1465), tbl[262 + tbloffset], ctimes(v1465, tbl[263 + tbloffset])); real2 v1109 = ctimesminusplus(reverse(v1095), tbl[196 + tbloffset], ctimes(v1095, tbl[197 + tbloffset])); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = ctimesminusplus(reverse(v1543), tbl[274 + tbloffset], ctimes(v1543, tbl[275 + tbloffset])); real2 v1559 = ctimesminusplus(reverse(v1545), tbl[276 + tbloffset], ctimes(v1545, tbl[277 + tbloffset])); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = ctimesminusplus(reverse(v1255), tbl[228 + tbloffset], ctimes(v1255, tbl[229 + tbloffset])); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = ctimesminusplus(reverse(v1253), tbl[226 + tbloffset], ctimes(v1253, tbl[227 + tbloffset])); real2 v943 = ctimesminusplus(reverse(v933), tbl[162 + tbloffset], ctimes(v933, tbl[163 + tbloffset])); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = ctimesminusplus(reverse(v1621), tbl[286 + tbloffset], ctimes(v1621, tbl[287 + tbloffset])); real2 v949 = ctimesminusplus(reverse(v935), tbl[164 + tbloffset], ctimes(v935, tbl[165 + tbloffset])); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = ctimesminusplus(reverse(v615), tbl[100 + tbloffset], ctimes(v615, tbl[101 + tbloffset])); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = ctimesminusplus(reverse(v1623), tbl[288 + tbloffset], ctimes(v1623, tbl[289 + tbloffset])); real2 v1927 = ctimesminusplus(reverse(v1913), tbl[340 + tbloffset], ctimes(v1913, tbl[341 + tbloffset])); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v629, v309)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = ctimesminusplus(reverse(v1911), tbl[338 + tbloffset], ctimes(v1911, tbl[339 + tbloffset])); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = ctimesminusplus(reverse(v1833), tbl[324 + tbloffset], ctimes(v1833, tbl[325 + tbloffset])); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = ctimesminusplus(reverse(v1831), tbl[322 + tbloffset], ctimes(v1831, tbl[323 + tbloffset])); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = ctimesminusplus(reverse(v1703), tbl[304 + tbloffset], ctimes(v1703, tbl[305 + tbloffset])); real2 v1711 = ctimesminusplus(reverse(v1701), tbl[302 + tbloffset], ctimes(v1701, tbl[303 + tbloffset])); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v127, v63)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v717, v716)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v111, v47)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = ctimesminusplus(reverse(v1035), tbl[184 + tbloffset], ctimes(v1035, tbl[185 + tbloffset])); real2 v1043 = ctimesminusplus(reverse(v1033), tbl[182 + tbloffset], ctimes(v1033, tbl[183 + tbloffset])); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1037, v1036)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v119, v55)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v557, v556)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v103, v39)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1197, v1196)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = ctimesminusplus(reverse(v873), tbl[150 + tbloffset], ctimes(v873, tbl[151 + tbloffset])); real2 v1639 = reverse(minus(v1043, v883)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = ctimesminusplus(reverse(v1195), tbl[216 + tbloffset], ctimes(v1195, tbl[217 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1193), tbl[214 + tbloffset], ctimes(v1193, tbl[215 + tbloffset])); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v115, v51)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v477, v476)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v99, v35)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = ctimesminusplus(reverse(v793), tbl[134 + tbloffset], ctimes(v793, tbl[135 + tbloffset])); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v107, v43)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v123, v59)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v637, v636)); real2 v1111 = reverse(minus(v957, v956)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = ctimesminusplus(reverse(v1273), tbl[230 + tbloffset], ctimes(v1273, tbl[231 + tbloffset])); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = ctimesminusplus(reverse(v1275), tbl[232 + tbloffset], ctimes(v1275, tbl[233 + tbloffset])); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = ctimesminusplus(reverse(v1113), tbl[198 + tbloffset], ctimes(v1113, tbl[199 + tbloffset])); real2 v1129 = ctimesminusplus(reverse(v1115), tbl[200 + tbloffset], ctimes(v1115, tbl[201 + tbloffset])); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v117, v53)); real2 v831 = reverse(minus(v517, v516)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v101, v37)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v125, v61)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v677, v676)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v109, v45)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v997, v996)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = ctimesminusplus(reverse(v1153), tbl[206 + tbloffset], ctimes(v1153, tbl[207 + tbloffset])); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v105, v41)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v121, v57)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v597, v596)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v113, v49)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v129, v65)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v757, v756)); real2 v1231 = reverse(minus(v1077, v1076)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1237, v1236)); real2 v1351 = reverse(minus(v1317, v1316)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1357, v1356)); real2 v1377 = plus(v1356, v1357); store(out, 0 << outShift, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); store(out, 64 << outShift, ctimesminusplus(v1390, tbl[0 + tbloffset], ctimes(reverse(v1390), tbl[1 + tbloffset]))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = ctimesminusplus(reverse(v1355), tbl[248 + tbloffset], ctimes(v1355, tbl[249 + tbloffset])); store(out, 48 << outShift, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); store(out, 112 << outShift, ctimesminusplus(v1404, tbl[0 + tbloffset], ctimes(reverse(v1404), tbl[1 + tbloffset]))); real2 v1363 = ctimesminusplus(reverse(v1353), tbl[246 + tbloffset], ctimes(v1353, tbl[247 + tbloffset])); store(out, 16 << outShift, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); store(out, 80 << outShift, ctimesminusplus(v1398, tbl[0 + tbloffset], ctimes(reverse(v1398), tbl[1 + tbloffset]))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); store(out, 96 << outShift, ctimesminusplus(reverse(v1375), tbl[252 + tbloffset], ctimes(v1375, tbl[253 + tbloffset]))); store(out, 32 << outShift, ctimesminusplus(reverse(v1373), tbl[250 + tbloffset], ctimes(v1373, tbl[251 + tbloffset]))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = ctimesminusplus(reverse(v1313), tbl[238 + tbloffset], ctimes(v1313, tbl[239 + tbloffset])); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1323, v1283)); store(out, 8 << outShift, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); store(out, 72 << outShift, ctimesminusplus(v1430, tbl[0 + tbloffset], ctimes(reverse(v1430), tbl[1 + tbloffset]))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); store(out, 104 << outShift, ctimesminusplus(reverse(v1415), tbl[256 + tbloffset], ctimes(v1415, tbl[257 + tbloffset]))); store(out, 40 << outShift, ctimesminusplus(reverse(v1413), tbl[254 + tbloffset], ctimes(v1413, tbl[255 + tbloffset]))); real2 v1329 = ctimesminusplus(reverse(v1315), tbl[240 + tbloffset], ctimes(v1315, tbl[241 + tbloffset])); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1329, v1289)); store(out, 24 << outShift, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); store(out, 88 << outShift, ctimesminusplus(v1456, tbl[0 + tbloffset], ctimes(reverse(v1456), tbl[1 + tbloffset]))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); store(out, 120 << outShift, ctimesminusplus(reverse(v1441), tbl[260 + tbloffset], ctimes(v1441, tbl[261 + tbloffset]))); store(out, 56 << outShift, ctimesminusplus(reverse(v1439), tbl[258 + tbloffset], ctimes(v1439, tbl[259 + tbloffset]))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = ctimesminusplus(reverse(v1233), tbl[222 + tbloffset], ctimes(v1233, tbl[223 + tbloffset])); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1243, v1163)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1489, v1488)); store(out, 4 << outShift, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); store(out, 68 << outShift, ctimesminusplus(v1522, tbl[0 + tbloffset], ctimes(reverse(v1522), tbl[1 + tbloffset]))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); store(out, 36 << outShift, ctimesminusplus(reverse(v1505), tbl[270 + tbloffset], ctimes(v1505, tbl[271 + tbloffset]))); store(out, 100 << outShift, ctimesminusplus(reverse(v1507), tbl[272 + tbloffset], ctimes(v1507, tbl[273 + tbloffset]))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = ctimesminusplus(reverse(v1487), tbl[268 + tbloffset], ctimes(v1487, tbl[269 + tbloffset])); store(out, 52 << outShift, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); store(out, 116 << outShift, ctimesminusplus(v1534, tbl[0 + tbloffset], ctimes(reverse(v1534), tbl[1 + tbloffset]))); real2 v1495 = ctimesminusplus(reverse(v1485), tbl[266 + tbloffset], ctimes(v1485, tbl[267 + tbloffset])); store(out, 20 << outShift, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); store(out, 84 << outShift, ctimesminusplus(v1528, tbl[0 + tbloffset], ctimes(reverse(v1528), tbl[1 + tbloffset]))); real2 v1249 = ctimesminusplus(reverse(v1235), tbl[224 + tbloffset], ctimes(v1235, tbl[225 + tbloffset])); real2 v1169 = ctimesminusplus(reverse(v1155), tbl[208 + tbloffset], ctimes(v1155, tbl[209 + tbloffset])); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1249, v1169)); real2 v1581 = reverse(minus(v1567, v1566)); real2 v1587 = plus(v1566, v1567); store(out, 12 << outShift, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); store(out, 76 << outShift, ctimesminusplus(v1600, tbl[0 + tbloffset], ctimes(reverse(v1600), tbl[1 + tbloffset]))); real2 v1583 = minusplus(v1581, v1582); store(out, 44 << outShift, ctimesminusplus(reverse(v1583), tbl[282 + tbloffset], ctimes(v1583, tbl[283 + tbloffset]))); real2 v1585 = minusplus(uminus(v1581), v1582); store(out, 108 << outShift, ctimesminusplus(reverse(v1585), tbl[284 + tbloffset], ctimes(v1585, tbl[285 + tbloffset]))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = ctimesminusplus(reverse(v1565), tbl[280 + tbloffset], ctimes(v1565, tbl[281 + tbloffset])); store(out, 60 << outShift, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); store(out, 124 << outShift, ctimesminusplus(v1612, tbl[0 + tbloffset], ctimes(reverse(v1612), tbl[1 + tbloffset]))); real2 v1573 = ctimesminusplus(reverse(v1563), tbl[278 + tbloffset], ctimes(v1563, tbl[279 + tbloffset])); store(out, 28 << outShift, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); store(out, 92 << outShift, ctimesminusplus(v1606, tbl[0 + tbloffset], ctimes(reverse(v1606), tbl[1 + tbloffset]))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = ctimesminusplus(reverse(v953), tbl[166 + tbloffset], ctimes(v953, tbl[167 + tbloffset])); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = ctimesminusplus(reverse(v993), tbl[174 + tbloffset], ctimes(v993, tbl[175 + tbloffset])); real2 v843 = ctimesminusplus(reverse(v833), tbl[142 + tbloffset], ctimes(v833, tbl[143 + tbloffset])); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = ctimesminusplus(reverse(v1643), tbl[292 + tbloffset], ctimes(v1643, tbl[293 + tbloffset])); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = ctimesminusplus(reverse(v913), tbl[158 + tbloffset], ctimes(v913, tbl[159 + tbloffset])); real2 v1083 = ctimesminusplus(reverse(v1073), tbl[190 + tbloffset], ctimes(v1073, tbl[191 + tbloffset])); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v1083, v923)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = ctimesminusplus(reverse(v1683), tbl[300 + tbloffset], ctimes(v1683, tbl[301 + tbloffset])); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1697, v1657)); store(out, 26 << outShift, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); store(out, 90 << outShift, ctimesminusplus(v1822, tbl[0 + tbloffset], ctimes(reverse(v1822), tbl[1 + tbloffset]))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); store(out, 58 << outShift, ctimesminusplus(reverse(v1805), tbl[318 + tbloffset], ctimes(v1805, tbl[319 + tbloffset]))); store(out, 122 << outShift, ctimesminusplus(reverse(v1807), tbl[320 + tbloffset], ctimes(v1807, tbl[321 + tbloffset]))); real2 v1651 = ctimesminusplus(reverse(v1641), tbl[290 + tbloffset], ctimes(v1641, tbl[291 + tbloffset])); real2 v1691 = ctimesminusplus(reverse(v1681), tbl[298 + tbloffset], ctimes(v1681, tbl[299 + tbloffset])); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1691, v1651)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); store(out, 106 << outShift, ctimesminusplus(reverse(v1781), tbl[316 + tbloffset], ctimes(v1781, tbl[317 + tbloffset]))); store(out, 42 << outShift, ctimesminusplus(reverse(v1779), tbl[314 + tbloffset], ctimes(v1779, tbl[315 + tbloffset]))); store(out, 10 << outShift, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); store(out, 74 << outShift, ctimesminusplus(v1796, tbl[0 + tbloffset], ctimes(reverse(v1796), tbl[1 + tbloffset]))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1685, v1684)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1725, v1724)); store(out, 2 << outShift, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); store(out, 66 << outShift, ctimesminusplus(v1758, tbl[0 + tbloffset], ctimes(reverse(v1758), tbl[1 + tbloffset]))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); store(out, 98 << outShift, ctimesminusplus(reverse(v1743), tbl[312 + tbloffset], ctimes(v1743, tbl[313 + tbloffset]))); store(out, 34 << outShift, ctimesminusplus(reverse(v1741), tbl[310 + tbloffset], ctimes(v1741, tbl[311 + tbloffset]))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = ctimesminusplus(reverse(v1723), tbl[308 + tbloffset], ctimes(v1723, tbl[309 + tbloffset])); store(out, 50 << outShift, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); store(out, 114 << outShift, ctimesminusplus(v1770, tbl[0 + tbloffset], ctimes(reverse(v1770), tbl[1 + tbloffset]))); real2 v1731 = ctimesminusplus(reverse(v1721), tbl[306 + tbloffset], ctimes(v1721, tbl[307 + tbloffset])); store(out, 18 << outShift, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); store(out, 82 << outShift, ctimesminusplus(v1764, tbl[0 + tbloffset], ctimes(reverse(v1764), tbl[1 + tbloffset]))); real2 v809 = ctimesminusplus(reverse(v795), tbl[136 + tbloffset], ctimes(v795, tbl[137 + tbloffset])); real2 v969 = ctimesminusplus(reverse(v955), tbl[168 + tbloffset], ctimes(v955, tbl[169 + tbloffset])); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = ctimesminusplus(reverse(v835), tbl[144 + tbloffset], ctimes(v835, tbl[145 + tbloffset])); real2 v929 = ctimesminusplus(reverse(v915), tbl[160 + tbloffset], ctimes(v915, tbl[161 + tbloffset])); real2 v889 = ctimesminusplus(reverse(v875), tbl[152 + tbloffset], ctimes(v875, tbl[153 + tbloffset])); real2 v1089 = ctimesminusplus(reverse(v1075), tbl[192 + tbloffset], ctimes(v1075, tbl[193 + tbloffset])); real2 v1009 = ctimesminusplus(reverse(v995), tbl[176 + tbloffset], ctimes(v995, tbl[177 + tbloffset])); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v1049, v889)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v1089, v929)); real2 v1929 = reverse(minus(v1895, v1894)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1935, v1934)); store(out, 6 << outShift, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); store(out, 70 << outShift, ctimesminusplus(v1968, tbl[0 + tbloffset], ctimes(reverse(v1968), tbl[1 + tbloffset]))); real2 v1951 = minusplus(v1949, v1950); store(out, 38 << outShift, ctimesminusplus(reverse(v1951), tbl[346 + tbloffset], ctimes(v1951, tbl[347 + tbloffset]))); real2 v1953 = minusplus(uminus(v1949), v1950); store(out, 102 << outShift, ctimesminusplus(reverse(v1953), tbl[348 + tbloffset], ctimes(v1953, tbl[349 + tbloffset]))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = ctimesminusplus(reverse(v1933), tbl[344 + tbloffset], ctimes(v1933, tbl[345 + tbloffset])); store(out, 54 << outShift, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); store(out, 118 << outShift, ctimesminusplus(v1980, tbl[0 + tbloffset], ctimes(reverse(v1980), tbl[1 + tbloffset]))); real2 v1941 = ctimesminusplus(reverse(v1931), tbl[342 + tbloffset], ctimes(v1931, tbl[343 + tbloffset])); store(out, 22 << outShift, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); store(out, 86 << outShift, ctimesminusplus(v1974, tbl[0 + tbloffset], ctimes(reverse(v1974), tbl[1 + tbloffset]))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = ctimesminusplus(reverse(v1853), tbl[328 + tbloffset], ctimes(v1853, tbl[329 + tbloffset])); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = ctimesminusplus(reverse(v1893), tbl[336 + tbloffset], ctimes(v1893, tbl[337 + tbloffset])); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1907, v1867)); store(out, 30 << outShift, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); store(out, 94 << outShift, ctimesminusplus(v2032, tbl[0 + tbloffset], ctimes(reverse(v2032), tbl[1 + tbloffset]))); real2 v2017 = minusplus(uminus(v2013), v2014); store(out, 126 << outShift, ctimesminusplus(reverse(v2017), tbl[356 + tbloffset], ctimes(v2017, tbl[357 + tbloffset]))); real2 v2015 = minusplus(v2013, v2014); store(out, 62 << outShift, ctimesminusplus(reverse(v2015), tbl[354 + tbloffset], ctimes(v2015, tbl[355 + tbloffset]))); real2 v1861 = ctimesminusplus(reverse(v1851), tbl[326 + tbloffset], ctimes(v1851, tbl[327 + tbloffset])); real2 v1901 = ctimesminusplus(reverse(v1891), tbl[334 + tbloffset], ctimes(v1891, tbl[335 + tbloffset])); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1901, v1861)); store(out, 14 << outShift, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); store(out, 78 << outShift, ctimesminusplus(v2006, tbl[0 + tbloffset], ctimes(reverse(v2006), tbl[1 + tbloffset]))); real2 v1991 = minusplus(uminus(v1987), v1988); store(out, 110 << outShift, ctimesminusplus(reverse(v1991), tbl[352 + tbloffset], ctimes(v1991, tbl[353 + tbloffset]))); real2 v1989 = minusplus(v1987, v1988); store(out, 46 << outShift, ctimesminusplus(reverse(v1989), tbl[350 + tbloffset], ctimes(v1989, tbl[351 + tbloffset]))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = ctimesminusplus(reverse(v595), tbl[96 + tbloffset], ctimes(v595, tbl[97 + tbloffset])); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = ctimesminusplus(reverse(v675), tbl[112 + tbloffset], ctimes(v675, tbl[113 + tbloffset])); real2 v209 = ctimesminusplus(reverse(v195), tbl[16 + tbloffset], ctimes(v195, tbl[17 + tbloffset])); real2 v289 = ctimesminusplus(reverse(v275), tbl[32 + tbloffset], ctimes(v275, tbl[33 + tbloffset])); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = ctimesminusplus(reverse(v515), tbl[80 + tbloffset], ctimes(v515, tbl[81 + tbloffset])); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = ctimesminusplus(reverse(v355), tbl[48 + tbloffset], ctimes(v355, tbl[49 + tbloffset])); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v689, v369)); real2 v449 = ctimesminusplus(reverse(v435), tbl[64 + tbloffset], ctimes(v435, tbl[65 + tbloffset])); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = ctimesminusplus(reverse(v755), tbl[128 + tbloffset], ctimes(v755, tbl[129 + tbloffset])); real2 v2705 = reverse(minus(v769, v449)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = ctimesminusplus(reverse(v395), tbl[56 + tbloffset], ctimes(v395, tbl[57 + tbloffset])); real2 v729 = ctimesminusplus(reverse(v715), tbl[120 + tbloffset], ctimes(v715, tbl[121 + tbloffset])); real2 v329 = ctimesminusplus(reverse(v315), tbl[40 + tbloffset], ctimes(v315, tbl[41 + tbloffset])); real2 v489 = ctimesminusplus(reverse(v475), tbl[72 + tbloffset], ctimes(v475, tbl[73 + tbloffset])); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = ctimesminusplus(reverse(v155), tbl[8 + tbloffset], ctimes(v155, tbl[9 + tbloffset])); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = ctimesminusplus(reverse(v635), tbl[104 + tbloffset], ctimes(v635, tbl[105 + tbloffset])); real2 v249 = ctimesminusplus(reverse(v235), tbl[24 + tbloffset], ctimes(v235, tbl[25 + tbloffset])); real2 v569 = ctimesminusplus(reverse(v555), tbl[88 + tbloffset], ctimes(v555, tbl[89 + tbloffset])); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2711, v2710)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2791, v2790)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v729, v409)); real2 v2745 = reverse(minus(v2671, v2670)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v649, v329)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2831, v2830)); real2 v2851 = plus(v2830, v2831); store(out, 3 << outShift, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); store(out, 67 << outShift, ctimesminusplus(v2864, tbl[0 + tbloffset], ctimes(reverse(v2864), tbl[1 + tbloffset]))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); store(out, 35 << outShift, ctimesminusplus(reverse(v2847), tbl[506 + tbloffset], ctimes(v2847, tbl[507 + tbloffset]))); store(out, 99 << outShift, ctimesminusplus(reverse(v2849), tbl[508 + tbloffset], ctimes(v2849, tbl[509 + tbloffset]))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = ctimesminusplus(reverse(v2827), tbl[502 + tbloffset], ctimes(v2827, tbl[503 + tbloffset])); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = ctimesminusplus(reverse(v2807), tbl[498 + tbloffset], ctimes(v2807, tbl[499 + tbloffset])); store(out, 19 << outShift, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); store(out, 83 << outShift, ctimesminusplus(v2870, tbl[0 + tbloffset], ctimes(reverse(v2870), tbl[1 + tbloffset]))); real2 v2823 = ctimesminusplus(reverse(v2809), tbl[500 + tbloffset], ctimes(v2809, tbl[501 + tbloffset])); real2 v2843 = ctimesminusplus(reverse(v2829), tbl[504 + tbloffset], ctimes(v2829, tbl[505 + tbloffset])); store(out, 51 << outShift, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); store(out, 115 << outShift, ctimesminusplus(v2876, tbl[0 + tbloffset], ctimes(reverse(v2876), tbl[1 + tbloffset]))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = ctimesminusplus(reverse(v2789), tbl[496 + tbloffset], ctimes(v2789, tbl[497 + tbloffset])); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = ctimesminusplus(reverse(v2729), tbl[484 + tbloffset], ctimes(v2729, tbl[485 + tbloffset])); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = ctimesminusplus(reverse(v2749), tbl[488 + tbloffset], ctimes(v2749, tbl[489 + tbloffset])); real2 v2909 = reverse(minus(v2803, v2763)); real2 v2915 = plus(v2763, v2803); store(out, 27 << outShift, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); store(out, 91 << outShift, ctimesminusplus(v2928, tbl[0 + tbloffset], ctimes(reverse(v2928), tbl[1 + tbloffset]))); real2 v2913 = minusplus(uminus(v2909), v2910); store(out, 123 << outShift, ctimesminusplus(reverse(v2913), tbl[516 + tbloffset], ctimes(v2913, tbl[517 + tbloffset]))); real2 v2911 = minusplus(v2909, v2910); store(out, 59 << outShift, ctimesminusplus(reverse(v2911), tbl[514 + tbloffset], ctimes(v2911, tbl[515 + tbloffset]))); real2 v2737 = ctimesminusplus(reverse(v2727), tbl[482 + tbloffset], ctimes(v2727, tbl[483 + tbloffset])); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = ctimesminusplus(reverse(v2787), tbl[494 + tbloffset], ctimes(v2787, tbl[495 + tbloffset])); real2 v2757 = ctimesminusplus(reverse(v2747), tbl[486 + tbloffset], ctimes(v2747, tbl[487 + tbloffset])); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2797, v2757)); store(out, 11 << outShift, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); store(out, 75 << outShift, ctimesminusplus(v2902, tbl[0 + tbloffset], ctimes(reverse(v2902), tbl[1 + tbloffset]))); real2 v2887 = minusplus(uminus(v2883), v2884); store(out, 107 << outShift, ctimesminusplus(reverse(v2887), tbl[512 + tbloffset], ctimes(v2887, tbl[513 + tbloffset]))); real2 v2885 = minusplus(v2883, v2884); store(out, 43 << outShift, ctimesminusplus(reverse(v2885), tbl[510 + tbloffset], ctimes(v2885, tbl[511 + tbloffset]))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = ctimesminusplus(reverse(v2707), tbl[478 + tbloffset], ctimes(v2707, tbl[479 + tbloffset])); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = ctimesminusplus(reverse(v2627), tbl[462 + tbloffset], ctimes(v2627, tbl[463 + tbloffset])); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2717, v2637)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = ctimesminusplus(reverse(v2567), tbl[450 + tbloffset], ctimes(v2567, tbl[451 + tbloffset])); real2 v2657 = ctimesminusplus(reverse(v2647), tbl[466 + tbloffset], ctimes(v2647, tbl[467 + tbloffset])); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = ctimesminusplus(reverse(v2667), tbl[470 + tbloffset], ctimes(v2667, tbl[471 + tbloffset])); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = ctimesminusplus(reverse(v2587), tbl[454 + tbloffset], ctimes(v2587, tbl[455 + tbloffset])); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2961, v2960)); real2 v2981 = plus(v2960, v2961); store(out, 7 << outShift, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); store(out, 71 << outShift, ctimesminusplus(v2994, tbl[0 + tbloffset], ctimes(reverse(v2994), tbl[1 + tbloffset]))); real2 v2979 = minusplus(uminus(v2975), v2976); store(out, 103 << outShift, ctimesminusplus(reverse(v2979), tbl[528 + tbloffset], ctimes(v2979, tbl[529 + tbloffset]))); real2 v2977 = minusplus(v2975, v2976); store(out, 39 << outShift, ctimesminusplus(reverse(v2977), tbl[526 + tbloffset], ctimes(v2977, tbl[527 + tbloffset]))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = ctimesminusplus(reverse(v2939), tbl[520 + tbloffset], ctimes(v2939, tbl[521 + tbloffset])); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = ctimesminusplus(reverse(v2959), tbl[524 + tbloffset], ctimes(v2959, tbl[525 + tbloffset])); store(out, 55 << outShift, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); store(out, 119 << outShift, ctimesminusplus(v3006, tbl[0 + tbloffset], ctimes(reverse(v3006), tbl[1 + tbloffset]))); real2 v2947 = ctimesminusplus(reverse(v2937), tbl[518 + tbloffset], ctimes(v2937, tbl[519 + tbloffset])); real2 v2967 = ctimesminusplus(reverse(v2957), tbl[522 + tbloffset], ctimes(v2957, tbl[523 + tbloffset])); store(out, 23 << outShift, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); store(out, 87 << outShift, ctimesminusplus(v3000, tbl[0 + tbloffset], ctimes(reverse(v3000), tbl[1 + tbloffset]))); real2 v2663 = ctimesminusplus(reverse(v2649), tbl[468 + tbloffset], ctimes(v2649, tbl[469 + tbloffset])); real2 v2583 = ctimesminusplus(reverse(v2569), tbl[452 + tbloffset], ctimes(v2569, tbl[453 + tbloffset])); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = ctimesminusplus(reverse(v2629), tbl[464 + tbloffset], ctimes(v2629, tbl[465 + tbloffset])); real2 v2723 = ctimesminusplus(reverse(v2709), tbl[480 + tbloffset], ctimes(v2709, tbl[481 + tbloffset])); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2723, v2643)); real2 v2683 = ctimesminusplus(reverse(v2669), tbl[472 + tbloffset], ctimes(v2669, tbl[473 + tbloffset])); real2 v3031 = ctimesminusplus(reverse(v3017), tbl[532 + tbloffset], ctimes(v3017, tbl[533 + tbloffset])); real2 v2603 = ctimesminusplus(reverse(v2589), tbl[456 + tbloffset], ctimes(v2589, tbl[457 + tbloffset])); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = ctimesminusplus(reverse(v3037), tbl[536 + tbloffset], ctimes(v3037, tbl[537 + tbloffset])); store(out, 63 << outShift, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); store(out, 127 << outShift, ctimesminusplus(v3084, tbl[0 + tbloffset], ctimes(reverse(v3084), tbl[1 + tbloffset]))); real2 v3025 = ctimesminusplus(reverse(v3015), tbl[530 + tbloffset], ctimes(v3015, tbl[531 + tbloffset])); real2 v3045 = ctimesminusplus(reverse(v3035), tbl[534 + tbloffset], ctimes(v3035, tbl[535 + tbloffset])); store(out, 31 << outShift, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); store(out, 95 << outShift, ctimesminusplus(v3078, tbl[0 + tbloffset], ctimes(reverse(v3078), tbl[1 + tbloffset]))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3039, v3038)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); store(out, 47 << outShift, ctimesminusplus(reverse(v3055), tbl[538 + tbloffset], ctimes(v3055, tbl[539 + tbloffset]))); real2 v3057 = minusplus(uminus(v3053), v3054); store(out, 111 << outShift, ctimesminusplus(reverse(v3057), tbl[540 + tbloffset], ctimes(v3057, tbl[541 + tbloffset]))); store(out, 15 << outShift, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); store(out, 79 << outShift, ctimesminusplus(v3072, tbl[0 + tbloffset], ctimes(reverse(v3072), tbl[1 + tbloffset]))); real2 v683 = ctimesminusplus(reverse(v673), tbl[110 + tbloffset], ctimes(v673, tbl[111 + tbloffset])); real2 v363 = ctimesminusplus(reverse(v353), tbl[46 + tbloffset], ctimes(v353, tbl[47 + tbloffset])); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v683, v363)); real2 v283 = ctimesminusplus(reverse(v273), tbl[30 + tbloffset], ctimes(v273, tbl[31 + tbloffset])); real2 v723 = ctimesminusplus(reverse(v713), tbl[118 + tbloffset], ctimes(v713, tbl[119 + tbloffset])); real2 v403 = ctimesminusplus(reverse(v393), tbl[54 + tbloffset], ctimes(v393, tbl[55 + tbloffset])); real2 v603 = ctimesminusplus(reverse(v593), tbl[94 + tbloffset], ctimes(v593, tbl[95 + tbloffset])); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v723, v403)); real2 v543 = ctimesminusplus(reverse(v533), tbl[82 + tbloffset], ctimes(v533, tbl[83 + tbloffset])); real2 v383 = ctimesminusplus(reverse(v373), tbl[50 + tbloffset], ctimes(v373, tbl[51 + tbloffset])); real2 v703 = ctimesminusplus(reverse(v693), tbl[114 + tbloffset], ctimes(v693, tbl[115 + tbloffset])); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v703, v383)); real2 v223 = ctimesminusplus(reverse(v213), tbl[18 + tbloffset], ctimes(v213, tbl[19 + tbloffset])); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = ctimesminusplus(reverse(v433), tbl[62 + tbloffset], ctimes(v433, tbl[63 + tbloffset])); real2 v203 = ctimesminusplus(reverse(v193), tbl[14 + tbloffset], ctimes(v193, tbl[15 + tbloffset])); real2 v763 = ctimesminusplus(reverse(v753), tbl[126 + tbloffset], ctimes(v753, tbl[127 + tbloffset])); real2 v2179 = reverse(minus(v763, v443)); real2 v2185 = plus(v443, v763); real2 v523 = ctimesminusplus(reverse(v513), tbl[78 + tbloffset], ctimes(v513, tbl[79 + tbloffset])); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = ctimesminusplus(reverse(v633), tbl[102 + tbloffset], ctimes(v633, tbl[103 + tbloffset])); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2185, v2184)); real2 v563 = ctimesminusplus(reverse(v553), tbl[86 + tbloffset], ctimes(v553, tbl[87 + tbloffset])); real2 v243 = ctimesminusplus(reverse(v233), tbl[22 + tbloffset], ctimes(v233, tbl[23 + tbloffset])); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = ctimesminusplus(reverse(v133), tbl[2 + tbloffset], ctimes(v133, tbl[3 + tbloffset])); real2 v183 = ctimesminusplus(reverse(v173), tbl[10 + tbloffset], ctimes(v173, tbl[11 + tbloffset])); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = ctimesminusplus(reverse(v153), tbl[6 + tbloffset], ctimes(v153, tbl[7 + tbloffset])); real2 v303 = ctimesminusplus(reverse(v293), tbl[34 + tbloffset], ctimes(v293, tbl[35 + tbloffset])); real2 v623 = ctimesminusplus(reverse(v613), tbl[98 + tbloffset], ctimes(v613, tbl[99 + tbloffset])); real2 v2039 = reverse(minus(v623, v303)); real2 v2045 = plus(v303, v623); real2 v463 = ctimesminusplus(reverse(v453), tbl[66 + tbloffset], ctimes(v453, tbl[67 + tbloffset])); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = ctimesminusplus(reverse(v313), tbl[38 + tbloffset], ctimes(v313, tbl[39 + tbloffset])); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2125, v2124)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2145, v2144)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2265, v2264)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2245, v2244)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = ctimesminusplus(reverse(v2281), tbl[406 + tbloffset], ctimes(v2281, tbl[407 + tbloffset])); real2 v483 = ctimesminusplus(reverse(v473), tbl[70 + tbloffset], ctimes(v473, tbl[71 + tbloffset])); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v643, v323)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = ctimesminusplus(reverse(v2301), tbl[410 + tbloffset], ctimes(v2301, tbl[411 + tbloffset])); store(out, 17 << outShift, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); store(out, 81 << outShift, ctimesminusplus(v2344, tbl[0 + tbloffset], ctimes(reverse(v2344), tbl[1 + tbloffset]))); real2 v2297 = ctimesminusplus(reverse(v2283), tbl[408 + tbloffset], ctimes(v2283, tbl[409 + tbloffset])); real2 v2317 = ctimesminusplus(reverse(v2303), tbl[412 + tbloffset], ctimes(v2303, tbl[413 + tbloffset])); store(out, 49 << outShift, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); store(out, 113 << outShift, ctimesminusplus(v2350, tbl[0 + tbloffset], ctimes(reverse(v2350), tbl[1 + tbloffset]))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2305, v2304)); store(out, 1 << outShift, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); store(out, 65 << outShift, ctimesminusplus(v2338, tbl[0 + tbloffset], ctimes(reverse(v2338), tbl[1 + tbloffset]))); real2 v2321 = minusplus(v2319, v2320); store(out, 33 << outShift, ctimesminusplus(reverse(v2321), tbl[414 + tbloffset], ctimes(v2321, tbl[415 + tbloffset]))); real2 v2323 = minusplus(uminus(v2319), v2320); store(out, 97 << outShift, ctimesminusplus(reverse(v2323), tbl[416 + tbloffset], ctimes(v2323, tbl[417 + tbloffset]))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = ctimesminusplus(reverse(v2243), tbl[400 + tbloffset], ctimes(v2243, tbl[401 + tbloffset])); real2 v2217 = ctimesminusplus(reverse(v2203), tbl[392 + tbloffset], ctimes(v2203, tbl[393 + tbloffset])); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = ctimesminusplus(reverse(v2263), tbl[404 + tbloffset], ctimes(v2263, tbl[405 + tbloffset])); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = ctimesminusplus(reverse(v2223), tbl[396 + tbloffset], ctimes(v2223, tbl[397 + tbloffset])); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2277, v2237)); store(out, 25 << outShift, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); store(out, 89 << outShift, ctimesminusplus(v2402, tbl[0 + tbloffset], ctimes(reverse(v2402), tbl[1 + tbloffset]))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); store(out, 121 << outShift, ctimesminusplus(reverse(v2387), tbl[424 + tbloffset], ctimes(v2387, tbl[425 + tbloffset]))); store(out, 57 << outShift, ctimesminusplus(reverse(v2385), tbl[422 + tbloffset], ctimes(v2385, tbl[423 + tbloffset]))); real2 v2251 = ctimesminusplus(reverse(v2241), tbl[398 + tbloffset], ctimes(v2241, tbl[399 + tbloffset])); real2 v2211 = ctimesminusplus(reverse(v2201), tbl[390 + tbloffset], ctimes(v2201, tbl[391 + tbloffset])); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = ctimesminusplus(reverse(v2261), tbl[402 + tbloffset], ctimes(v2261, tbl[403 + tbloffset])); real2 v2231 = ctimesminusplus(reverse(v2221), tbl[394 + tbloffset], ctimes(v2221, tbl[395 + tbloffset])); real2 v2357 = reverse(minus(v2271, v2231)); real2 v2363 = plus(v2231, v2271); store(out, 9 << outShift, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); store(out, 73 << outShift, ctimesminusplus(v2376, tbl[0 + tbloffset], ctimes(reverse(v2376), tbl[1 + tbloffset]))); real2 v2361 = minusplus(uminus(v2357), v2358); store(out, 105 << outShift, ctimesminusplus(reverse(v2361), tbl[420 + tbloffset], ctimes(v2361, tbl[421 + tbloffset]))); real2 v2359 = minusplus(v2357, v2358); store(out, 41 << outShift, ctimesminusplus(reverse(v2359), tbl[418 + tbloffset], ctimes(v2359, tbl[419 + tbloffset]))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = ctimesminusplus(reverse(v2081), tbl[366 + tbloffset], ctimes(v2081, tbl[367 + tbloffset])); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = ctimesminusplus(reverse(v2041), tbl[358 + tbloffset], ctimes(v2041, tbl[359 + tbloffset])); real2 v2131 = ctimesminusplus(reverse(v2121), tbl[374 + tbloffset], ctimes(v2121, tbl[375 + tbloffset])); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = ctimesminusplus(reverse(v2161), tbl[382 + tbloffset], ctimes(v2161, tbl[383 + tbloffset])); real2 v2409 = reverse(minus(v2171, v2091)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = ctimesminusplus(reverse(v2181), tbl[386 + tbloffset], ctimes(v2181, tbl[387 + tbloffset])); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = ctimesminusplus(reverse(v2101), tbl[370 + tbloffset], ctimes(v2101, tbl[371 + tbloffset])); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2191, v2111)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = ctimesminusplus(reverse(v2141), tbl[378 + tbloffset], ctimes(v2141, tbl[379 + tbloffset])); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = ctimesminusplus(reverse(v2061), tbl[362 + tbloffset], ctimes(v2061, tbl[363 + tbloffset])); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2435, v2434)); store(out, 5 << outShift, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); store(out, 69 << outShift, ctimesminusplus(v2468, tbl[0 + tbloffset], ctimes(reverse(v2468), tbl[1 + tbloffset]))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); store(out, 101 << outShift, ctimesminusplus(reverse(v2453), tbl[436 + tbloffset], ctimes(v2453, tbl[437 + tbloffset]))); store(out, 37 << outShift, ctimesminusplus(reverse(v2451), tbl[434 + tbloffset], ctimes(v2451, tbl[435 + tbloffset]))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = ctimesminusplus(reverse(v2411), tbl[426 + tbloffset], ctimes(v2411, tbl[427 + tbloffset])); real2 v2441 = ctimesminusplus(reverse(v2431), tbl[430 + tbloffset], ctimes(v2431, tbl[431 + tbloffset])); store(out, 21 << outShift, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); store(out, 85 << outShift, ctimesminusplus(v2474, tbl[0 + tbloffset], ctimes(reverse(v2474), tbl[1 + tbloffset]))); real2 v2427 = ctimesminusplus(reverse(v2413), tbl[428 + tbloffset], ctimes(v2413, tbl[429 + tbloffset])); real2 v2447 = ctimesminusplus(reverse(v2433), tbl[432 + tbloffset], ctimes(v2433, tbl[433 + tbloffset])); store(out, 53 << outShift, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); store(out, 117 << outShift, ctimesminusplus(v2480, tbl[0 + tbloffset], ctimes(reverse(v2480), tbl[1 + tbloffset]))); real2 v2057 = ctimesminusplus(reverse(v2043), tbl[360 + tbloffset], ctimes(v2043, tbl[361 + tbloffset])); real2 v2097 = ctimesminusplus(reverse(v2083), tbl[368 + tbloffset], ctimes(v2083, tbl[369 + tbloffset])); real2 v2157 = ctimesminusplus(reverse(v2143), tbl[380 + tbloffset], ctimes(v2143, tbl[381 + tbloffset])); real2 v2197 = ctimesminusplus(reverse(v2183), tbl[388 + tbloffset], ctimes(v2183, tbl[389 + tbloffset])); real2 v2117 = ctimesminusplus(reverse(v2103), tbl[372 + tbloffset], ctimes(v2103, tbl[373 + tbloffset])); real2 v2507 = reverse(minus(v2197, v2117)); real2 v2513 = plus(v2117, v2197); real2 v2137 = ctimesminusplus(reverse(v2123), tbl[376 + tbloffset], ctimes(v2123, tbl[377 + tbloffset])); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = ctimesminusplus(reverse(v2163), tbl[384 + tbloffset], ctimes(v2163, tbl[385 + tbloffset])); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2177, v2097)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = ctimesminusplus(reverse(v2063), tbl[364 + tbloffset], ctimes(v2063, tbl[365 + tbloffset])); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2513, v2512)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); store(out, 109 << outShift, ctimesminusplus(reverse(v2531), tbl[448 + tbloffset], ctimes(v2531, tbl[449 + tbloffset]))); store(out, 45 << outShift, ctimesminusplus(reverse(v2529), tbl[446 + tbloffset], ctimes(v2529, tbl[447 + tbloffset]))); store(out, 13 << outShift, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); store(out, 77 << outShift, ctimesminusplus(v2546, tbl[0 + tbloffset], ctimes(reverse(v2546), tbl[1 + tbloffset]))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = ctimesminusplus(reverse(v2489), tbl[438 + tbloffset], ctimes(v2489, tbl[439 + tbloffset])); real2 v2519 = ctimesminusplus(reverse(v2509), tbl[442 + tbloffset], ctimes(v2509, tbl[443 + tbloffset])); store(out, 29 << outShift, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); store(out, 93 << outShift, ctimesminusplus(v2552, tbl[0 + tbloffset], ctimes(reverse(v2552), tbl[1 + tbloffset]))); real2 v2505 = ctimesminusplus(reverse(v2491), tbl[440 + tbloffset], ctimes(v2491, tbl[441 + tbloffset])); real2 v2525 = ctimesminusplus(reverse(v2511), tbl[444 + tbloffset], ctimes(v2511, tbl[445 + tbloffset])); store(out, 61 << outShift, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); store(out, 125 << outShift, ctimesminusplus(v2558, tbl[0 + tbloffset], ctimes(reverse(v2558), tbl[1 + tbloffset]))); // Pres : 76263 } } ALIGNED(8192) void tbut128f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v56, v120)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = timesminusplus(reverse(v575), load(tbl, 92 * VECWIDTH + tbloffset), times(v575, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v583 = timesminusplus(reverse(v573), load(tbl, 90 * VECWIDTH + tbloffset), times(v573, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v576, v577)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v40, v104)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = timesminusplus(reverse(v253), load(tbl, 26 * VECWIDTH + tbloffset), times(v253, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = timesminusplus(reverse(v895), load(tbl, 156 * VECWIDTH + tbloffset), times(v895, load(tbl, 157 * VECWIDTH + tbloffset))); real2 v903 = timesminusplus(reverse(v893), load(tbl, 154 * VECWIDTH + tbloffset), times(v893, load(tbl, 155 * VECWIDTH + tbloffset))); real2 v269 = timesminusplus(reverse(v255), load(tbl, 28 * VECWIDTH + tbloffset), times(v255, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v64, v128)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v736, v737)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = timesminusplus(reverse(v735), load(tbl, 124 * VECWIDTH + tbloffset), times(v735, load(tbl, 125 * VECWIDTH + tbloffset))); real2 v743 = timesminusplus(reverse(v733), load(tbl, 122 * VECWIDTH + tbloffset), times(v733, load(tbl, 123 * VECWIDTH + tbloffset))); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v48, v112)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = timesminusplus(reverse(v1053), load(tbl, 186 * VECWIDTH + tbloffset), times(v1053, load(tbl, 187 * VECWIDTH + tbloffset))); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v903, v1063)); real2 v1069 = timesminusplus(reverse(v1055), load(tbl, 188 * VECWIDTH + tbloffset), times(v1055, load(tbl, 189 * VECWIDTH + tbloffset))); real2 v1869 = reverse(minus(v909, v1069)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = timesminusplus(reverse(v415), load(tbl, 60 * VECWIDTH + tbloffset), times(v415, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1056, v1057)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1216, v1217)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v429, v749)); real2 v2765 = reverse(minus(v2690, v2691)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = timesminusplus(reverse(v2689), load(tbl, 476 * VECWIDTH + tbloffset), times(v2689, load(tbl, 477 * VECWIDTH + tbloffset))); real2 v2697 = timesminusplus(reverse(v2687), load(tbl, 474 * VECWIDTH + tbloffset), times(v2687, load(tbl, 475 * VECWIDTH + tbloffset))); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = timesminusplus(reverse(v1213), load(tbl, 218 * VECWIDTH + tbloffset), times(v1213, load(tbl, 219 * VECWIDTH + tbloffset))); real2 v1229 = timesminusplus(reverse(v1215), load(tbl, 220 * VECWIDTH + tbloffset), times(v1215, load(tbl, 221 * VECWIDTH + tbloffset))); real2 v423 = timesminusplus(reverse(v413), load(tbl, 58 * VECWIDTH + tbloffset), times(v413, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v423, v743)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2164, v2165)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v44, v108)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = timesminusplus(reverse(v333), load(tbl, 42 * VECWIDTH + tbloffset), times(v333, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v349 = timesminusplus(reverse(v335), load(tbl, 44 * VECWIDTH + tbloffset), times(v335, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v60, v124)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v656, v657)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = timesminusplus(reverse(v973), load(tbl, 170 * VECWIDTH + tbloffset), times(v973, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v1131 = reverse(minus(v976, v977)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = timesminusplus(reverse(v655), load(tbl, 108 * VECWIDTH + tbloffset), times(v655, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v663 = timesminusplus(reverse(v653), load(tbl, 106 * VECWIDTH + tbloffset), times(v653, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v2079 = reverse(minus(v343, v663)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v349, v669)); real2 v2611 = plus(v349, v669); real2 v989 = timesminusplus(reverse(v975), load(tbl, 172 * VECWIDTH + tbloffset), times(v975, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v52, v116)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v496, v497)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = timesminusplus(reverse(v495), load(tbl, 76 * VECWIDTH + tbloffset), times(v495, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v503 = timesminusplus(reverse(v493), load(tbl, 74 * VECWIDTH + tbloffset), times(v493, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v36, v100)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = timesminusplus(reverse(v1135), load(tbl, 204 * VECWIDTH + tbloffset), times(v1135, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = timesminusplus(reverse(v1293), load(tbl, 234 * VECWIDTH + tbloffset), times(v1293, load(tbl, 235 * VECWIDTH + tbloffset))); real2 v1331 = reverse(minus(v1296, v1297)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = timesminusplus(reverse(v175), load(tbl, 12 * VECWIDTH + tbloffset), times(v175, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v1309 = timesminusplus(reverse(v1295), load(tbl, 236 * VECWIDTH + tbloffset), times(v1295, load(tbl, 237 * VECWIDTH + tbloffset))); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = timesminusplus(reverse(v1133), load(tbl, 202 * VECWIDTH + tbloffset), times(v1133, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1541 = reverse(minus(v1149, v1229)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = timesminusplus(reverse(v813), load(tbl, 138 * VECWIDTH + tbloffset), times(v813, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v829 = timesminusplus(reverse(v815), load(tbl, 140 * VECWIDTH + tbloffset), times(v815, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2770, v2771)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = timesminusplus(reverse(v2607), load(tbl, 458 * VECWIDTH + tbloffset), times(v2607, load(tbl, 459 * VECWIDTH + tbloffset))); real2 v2623 = timesminusplus(reverse(v2609), load(tbl, 460 * VECWIDTH + tbloffset), times(v2609, load(tbl, 461 * VECWIDTH + tbloffset))); real2 v3013 = reverse(minus(v2623, v2703)); real2 v3019 = plus(v2623, v2703); real2 v2783 = timesminusplus(reverse(v2769), load(tbl, 492 * VECWIDTH + tbloffset), times(v2769, load(tbl, 493 * VECWIDTH + tbloffset))); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2617, v2697)); real2 v2777 = timesminusplus(reverse(v2767), load(tbl, 490 * VECWIDTH + tbloffset), times(v2767, load(tbl, 491 * VECWIDTH + tbloffset))); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1874, v1875)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = timesminusplus(reverse(v1663), load(tbl, 296 * VECWIDTH + tbloffset), times(v1663, load(tbl, 297 * VECWIDTH + tbloffset))); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = timesminusplus(reverse(v1873), load(tbl, 332 * VECWIDTH + tbloffset), times(v1873, load(tbl, 333 * VECWIDTH + tbloffset))); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1664, v1665)); real2 v1671 = timesminusplus(reverse(v1661), load(tbl, 294 * VECWIDTH + tbloffset), times(v1661, load(tbl, 295 * VECWIDTH + tbloffset))); real2 v1881 = timesminusplus(reverse(v1871), load(tbl, 330 * VECWIDTH + tbloffset), times(v1871, load(tbl, 331 * VECWIDTH + tbloffset))); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1143, v1223)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v54, v118)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v536, v537)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = timesminusplus(reverse(v535), load(tbl, 84 * VECWIDTH + tbloffset), times(v535, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v38, v102)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = timesminusplus(reverse(v215), load(tbl, 20 * VECWIDTH + tbloffset), times(v215, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = timesminusplus(reverse(v853), load(tbl, 146 * VECWIDTH + tbloffset), times(v853, load(tbl, 147 * VECWIDTH + tbloffset))); real2 v869 = timesminusplus(reverse(v855), load(tbl, 148 * VECWIDTH + tbloffset), times(v855, load(tbl, 149 * VECWIDTH + tbloffset))); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v46, v110)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = timesminusplus(reverse(v375), load(tbl, 52 * VECWIDTH + tbloffset), times(v375, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v62, v126)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v696, v697)); real2 v1171 = reverse(minus(v1016, v1017)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = timesminusplus(reverse(v1173), load(tbl, 210 * VECWIDTH + tbloffset), times(v1173, load(tbl, 211 * VECWIDTH + tbloffset))); real2 v1189 = timesminusplus(reverse(v1175), load(tbl, 212 * VECWIDTH + tbloffset), times(v1175, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1029 = timesminusplus(reverse(v1015), load(tbl, 180 * VECWIDTH + tbloffset), times(v1015, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v1023 = timesminusplus(reverse(v1013), load(tbl, 178 * VECWIDTH + tbloffset), times(v1013, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v863, v1023)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v869, v1029)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = timesminusplus(reverse(v695), load(tbl, 116 * VECWIDTH + tbloffset), times(v695, load(tbl, 117 * VECWIDTH + tbloffset))); real2 v2645 = reverse(minus(v389, v709)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1176, v1177)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2650, v2651)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v50, v114)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v456, v457)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = timesminusplus(reverse(v455), load(tbl, 68 * VECWIDTH + tbloffset), times(v455, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v34, v98)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = timesminusplus(reverse(v135), load(tbl, 4 * VECWIDTH + tbloffset), times(v135, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = timesminusplus(reverse(v773), load(tbl, 130 * VECWIDTH + tbloffset), times(v773, load(tbl, 131 * VECWIDTH + tbloffset))); real2 v789 = timesminusplus(reverse(v775), load(tbl, 132 * VECWIDTH + tbloffset), times(v775, load(tbl, 133 * VECWIDTH + tbloffset))); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v42, v106)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = timesminusplus(reverse(v295), load(tbl, 36 * VECWIDTH + tbloffset), times(v295, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v58, v122)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v616, v617)); real2 v1091 = reverse(minus(v936, v937)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = timesminusplus(reverse(v1093), load(tbl, 194 * VECWIDTH + tbloffset), times(v1093, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = timesminusplus(reverse(v1333), load(tbl, 242 * VECWIDTH + tbloffset), times(v1333, load(tbl, 243 * VECWIDTH + tbloffset))); real2 v1349 = timesminusplus(reverse(v1335), load(tbl, 244 * VECWIDTH + tbloffset), times(v1335, load(tbl, 245 * VECWIDTH + tbloffset))); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = timesminusplus(reverse(v1467), load(tbl, 264 * VECWIDTH + tbloffset), times(v1467, load(tbl, 265 * VECWIDTH + tbloffset))); real2 v1475 = timesminusplus(reverse(v1465), load(tbl, 262 * VECWIDTH + tbloffset), times(v1465, load(tbl, 263 * VECWIDTH + tbloffset))); real2 v1109 = timesminusplus(reverse(v1095), load(tbl, 196 * VECWIDTH + tbloffset), times(v1095, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = timesminusplus(reverse(v1543), load(tbl, 274 * VECWIDTH + tbloffset), times(v1543, load(tbl, 275 * VECWIDTH + tbloffset))); real2 v1559 = timesminusplus(reverse(v1545), load(tbl, 276 * VECWIDTH + tbloffset), times(v1545, load(tbl, 277 * VECWIDTH + tbloffset))); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = timesminusplus(reverse(v1255), load(tbl, 228 * VECWIDTH + tbloffset), times(v1255, load(tbl, 229 * VECWIDTH + tbloffset))); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = timesminusplus(reverse(v1253), load(tbl, 226 * VECWIDTH + tbloffset), times(v1253, load(tbl, 227 * VECWIDTH + tbloffset))); real2 v943 = timesminusplus(reverse(v933), load(tbl, 162 * VECWIDTH + tbloffset), times(v933, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = timesminusplus(reverse(v1621), load(tbl, 286 * VECWIDTH + tbloffset), times(v1621, load(tbl, 287 * VECWIDTH + tbloffset))); real2 v949 = timesminusplus(reverse(v935), load(tbl, 164 * VECWIDTH + tbloffset), times(v935, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = timesminusplus(reverse(v615), load(tbl, 100 * VECWIDTH + tbloffset), times(v615, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = timesminusplus(reverse(v1623), load(tbl, 288 * VECWIDTH + tbloffset), times(v1623, load(tbl, 289 * VECWIDTH + tbloffset))); real2 v1927 = timesminusplus(reverse(v1913), load(tbl, 340 * VECWIDTH + tbloffset), times(v1913, load(tbl, 341 * VECWIDTH + tbloffset))); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v309, v629)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = timesminusplus(reverse(v1911), load(tbl, 338 * VECWIDTH + tbloffset), times(v1911, load(tbl, 339 * VECWIDTH + tbloffset))); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = timesminusplus(reverse(v1833), load(tbl, 324 * VECWIDTH + tbloffset), times(v1833, load(tbl, 325 * VECWIDTH + tbloffset))); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = timesminusplus(reverse(v1831), load(tbl, 322 * VECWIDTH + tbloffset), times(v1831, load(tbl, 323 * VECWIDTH + tbloffset))); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = timesminusplus(reverse(v1703), load(tbl, 304 * VECWIDTH + tbloffset), times(v1703, load(tbl, 305 * VECWIDTH + tbloffset))); real2 v1711 = timesminusplus(reverse(v1701), load(tbl, 302 * VECWIDTH + tbloffset), times(v1701, load(tbl, 303 * VECWIDTH + tbloffset))); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v63, v127)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v716, v717)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v47, v111)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = timesminusplus(reverse(v1035), load(tbl, 184 * VECWIDTH + tbloffset), times(v1035, load(tbl, 185 * VECWIDTH + tbloffset))); real2 v1043 = timesminusplus(reverse(v1033), load(tbl, 182 * VECWIDTH + tbloffset), times(v1033, load(tbl, 183 * VECWIDTH + tbloffset))); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1036, v1037)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v55, v119)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v556, v557)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v39, v103)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1196, v1197)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = timesminusplus(reverse(v873), load(tbl, 150 * VECWIDTH + tbloffset), times(v873, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v1639 = reverse(minus(v883, v1043)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = timesminusplus(reverse(v1195), load(tbl, 216 * VECWIDTH + tbloffset), times(v1195, load(tbl, 217 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1193), load(tbl, 214 * VECWIDTH + tbloffset), times(v1193, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v51, v115)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v476, v477)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v35, v99)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = timesminusplus(reverse(v793), load(tbl, 134 * VECWIDTH + tbloffset), times(v793, load(tbl, 135 * VECWIDTH + tbloffset))); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v43, v107)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v59, v123)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v636, v637)); real2 v1111 = reverse(minus(v956, v957)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = timesminusplus(reverse(v1273), load(tbl, 230 * VECWIDTH + tbloffset), times(v1273, load(tbl, 231 * VECWIDTH + tbloffset))); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = timesminusplus(reverse(v1275), load(tbl, 232 * VECWIDTH + tbloffset), times(v1275, load(tbl, 233 * VECWIDTH + tbloffset))); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = timesminusplus(reverse(v1113), load(tbl, 198 * VECWIDTH + tbloffset), times(v1113, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1129 = timesminusplus(reverse(v1115), load(tbl, 200 * VECWIDTH + tbloffset), times(v1115, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v53, v117)); real2 v831 = reverse(minus(v516, v517)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v37, v101)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v61, v125)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v676, v677)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v45, v109)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v996, v997)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = timesminusplus(reverse(v1153), load(tbl, 206 * VECWIDTH + tbloffset), times(v1153, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v41, v105)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v57, v121)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v596, v597)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v49, v113)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v65, v129)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v756, v757)); real2 v1231 = reverse(minus(v1076, v1077)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1236, v1237)); real2 v1351 = reverse(minus(v1316, v1317)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1356, v1357)); real2 v1377 = plus(v1356, v1357); scatter(out, 0, 128, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); scatter(out, 64, 128, timesminusplus(v1390, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1390), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = timesminusplus(reverse(v1355), load(tbl, 248 * VECWIDTH + tbloffset), times(v1355, load(tbl, 249 * VECWIDTH + tbloffset))); scatter(out, 48, 128, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); scatter(out, 112, 128, timesminusplus(v1404, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1404), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1363 = timesminusplus(reverse(v1353), load(tbl, 246 * VECWIDTH + tbloffset), times(v1353, load(tbl, 247 * VECWIDTH + tbloffset))); scatter(out, 16, 128, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); scatter(out, 80, 128, timesminusplus(v1398, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1398), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); scatter(out, 96, 128, timesminusplus(reverse(v1375), load(tbl, 252 * VECWIDTH + tbloffset), times(v1375, load(tbl, 253 * VECWIDTH + tbloffset)))); scatter(out, 32, 128, timesminusplus(reverse(v1373), load(tbl, 250 * VECWIDTH + tbloffset), times(v1373, load(tbl, 251 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = timesminusplus(reverse(v1313), load(tbl, 238 * VECWIDTH + tbloffset), times(v1313, load(tbl, 239 * VECWIDTH + tbloffset))); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1283, v1323)); scatter(out, 8, 128, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); scatter(out, 72, 128, timesminusplus(v1430, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1430), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); scatter(out, 104, 128, timesminusplus(reverse(v1415), load(tbl, 256 * VECWIDTH + tbloffset), times(v1415, load(tbl, 257 * VECWIDTH + tbloffset)))); scatter(out, 40, 128, timesminusplus(reverse(v1413), load(tbl, 254 * VECWIDTH + tbloffset), times(v1413, load(tbl, 255 * VECWIDTH + tbloffset)))); real2 v1329 = timesminusplus(reverse(v1315), load(tbl, 240 * VECWIDTH + tbloffset), times(v1315, load(tbl, 241 * VECWIDTH + tbloffset))); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1289, v1329)); scatter(out, 24, 128, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); scatter(out, 88, 128, timesminusplus(v1456, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1456), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); scatter(out, 120, 128, timesminusplus(reverse(v1441), load(tbl, 260 * VECWIDTH + tbloffset), times(v1441, load(tbl, 261 * VECWIDTH + tbloffset)))); scatter(out, 56, 128, timesminusplus(reverse(v1439), load(tbl, 258 * VECWIDTH + tbloffset), times(v1439, load(tbl, 259 * VECWIDTH + tbloffset)))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = timesminusplus(reverse(v1233), load(tbl, 222 * VECWIDTH + tbloffset), times(v1233, load(tbl, 223 * VECWIDTH + tbloffset))); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1163, v1243)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1488, v1489)); scatter(out, 4, 128, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); scatter(out, 68, 128, timesminusplus(v1522, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1522), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); scatter(out, 36, 128, timesminusplus(reverse(v1505), load(tbl, 270 * VECWIDTH + tbloffset), times(v1505, load(tbl, 271 * VECWIDTH + tbloffset)))); scatter(out, 100, 128, timesminusplus(reverse(v1507), load(tbl, 272 * VECWIDTH + tbloffset), times(v1507, load(tbl, 273 * VECWIDTH + tbloffset)))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = timesminusplus(reverse(v1487), load(tbl, 268 * VECWIDTH + tbloffset), times(v1487, load(tbl, 269 * VECWIDTH + tbloffset))); scatter(out, 52, 128, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); scatter(out, 116, 128, timesminusplus(v1534, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1534), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1495 = timesminusplus(reverse(v1485), load(tbl, 266 * VECWIDTH + tbloffset), times(v1485, load(tbl, 267 * VECWIDTH + tbloffset))); scatter(out, 20, 128, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); scatter(out, 84, 128, timesminusplus(v1528, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1528), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1249 = timesminusplus(reverse(v1235), load(tbl, 224 * VECWIDTH + tbloffset), times(v1235, load(tbl, 225 * VECWIDTH + tbloffset))); real2 v1169 = timesminusplus(reverse(v1155), load(tbl, 208 * VECWIDTH + tbloffset), times(v1155, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1169, v1249)); real2 v1581 = reverse(minus(v1566, v1567)); real2 v1587 = plus(v1566, v1567); scatter(out, 12, 128, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); scatter(out, 76, 128, timesminusplus(v1600, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1600), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1583 = minusplus(v1581, v1582); scatter(out, 44, 128, timesminusplus(reverse(v1583), load(tbl, 282 * VECWIDTH + tbloffset), times(v1583, load(tbl, 283 * VECWIDTH + tbloffset)))); real2 v1585 = minusplus(uminus(v1581), v1582); scatter(out, 108, 128, timesminusplus(reverse(v1585), load(tbl, 284 * VECWIDTH + tbloffset), times(v1585, load(tbl, 285 * VECWIDTH + tbloffset)))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = timesminusplus(reverse(v1565), load(tbl, 280 * VECWIDTH + tbloffset), times(v1565, load(tbl, 281 * VECWIDTH + tbloffset))); scatter(out, 60, 128, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); scatter(out, 124, 128, timesminusplus(v1612, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1612), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1573 = timesminusplus(reverse(v1563), load(tbl, 278 * VECWIDTH + tbloffset), times(v1563, load(tbl, 279 * VECWIDTH + tbloffset))); scatter(out, 28, 128, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); scatter(out, 92, 128, timesminusplus(v1606, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1606), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = timesminusplus(reverse(v953), load(tbl, 166 * VECWIDTH + tbloffset), times(v953, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = timesminusplus(reverse(v993), load(tbl, 174 * VECWIDTH + tbloffset), times(v993, load(tbl, 175 * VECWIDTH + tbloffset))); real2 v843 = timesminusplus(reverse(v833), load(tbl, 142 * VECWIDTH + tbloffset), times(v833, load(tbl, 143 * VECWIDTH + tbloffset))); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = timesminusplus(reverse(v1643), load(tbl, 292 * VECWIDTH + tbloffset), times(v1643, load(tbl, 293 * VECWIDTH + tbloffset))); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = timesminusplus(reverse(v913), load(tbl, 158 * VECWIDTH + tbloffset), times(v913, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1083 = timesminusplus(reverse(v1073), load(tbl, 190 * VECWIDTH + tbloffset), times(v1073, load(tbl, 191 * VECWIDTH + tbloffset))); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v923, v1083)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = timesminusplus(reverse(v1683), load(tbl, 300 * VECWIDTH + tbloffset), times(v1683, load(tbl, 301 * VECWIDTH + tbloffset))); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1657, v1697)); scatter(out, 26, 128, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); scatter(out, 90, 128, timesminusplus(v1822, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1822), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); scatter(out, 58, 128, timesminusplus(reverse(v1805), load(tbl, 318 * VECWIDTH + tbloffset), times(v1805, load(tbl, 319 * VECWIDTH + tbloffset)))); scatter(out, 122, 128, timesminusplus(reverse(v1807), load(tbl, 320 * VECWIDTH + tbloffset), times(v1807, load(tbl, 321 * VECWIDTH + tbloffset)))); real2 v1651 = timesminusplus(reverse(v1641), load(tbl, 290 * VECWIDTH + tbloffset), times(v1641, load(tbl, 291 * VECWIDTH + tbloffset))); real2 v1691 = timesminusplus(reverse(v1681), load(tbl, 298 * VECWIDTH + tbloffset), times(v1681, load(tbl, 299 * VECWIDTH + tbloffset))); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1651, v1691)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); scatter(out, 106, 128, timesminusplus(reverse(v1781), load(tbl, 316 * VECWIDTH + tbloffset), times(v1781, load(tbl, 317 * VECWIDTH + tbloffset)))); scatter(out, 42, 128, timesminusplus(reverse(v1779), load(tbl, 314 * VECWIDTH + tbloffset), times(v1779, load(tbl, 315 * VECWIDTH + tbloffset)))); scatter(out, 10, 128, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); scatter(out, 74, 128, timesminusplus(v1796, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1796), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1684, v1685)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1724, v1725)); scatter(out, 2, 128, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); scatter(out, 66, 128, timesminusplus(v1758, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1758), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); scatter(out, 98, 128, timesminusplus(reverse(v1743), load(tbl, 312 * VECWIDTH + tbloffset), times(v1743, load(tbl, 313 * VECWIDTH + tbloffset)))); scatter(out, 34, 128, timesminusplus(reverse(v1741), load(tbl, 310 * VECWIDTH + tbloffset), times(v1741, load(tbl, 311 * VECWIDTH + tbloffset)))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = timesminusplus(reverse(v1723), load(tbl, 308 * VECWIDTH + tbloffset), times(v1723, load(tbl, 309 * VECWIDTH + tbloffset))); scatter(out, 50, 128, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); scatter(out, 114, 128, timesminusplus(v1770, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1770), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1731 = timesminusplus(reverse(v1721), load(tbl, 306 * VECWIDTH + tbloffset), times(v1721, load(tbl, 307 * VECWIDTH + tbloffset))); scatter(out, 18, 128, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); scatter(out, 82, 128, timesminusplus(v1764, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1764), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v809 = timesminusplus(reverse(v795), load(tbl, 136 * VECWIDTH + tbloffset), times(v795, load(tbl, 137 * VECWIDTH + tbloffset))); real2 v969 = timesminusplus(reverse(v955), load(tbl, 168 * VECWIDTH + tbloffset), times(v955, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = timesminusplus(reverse(v835), load(tbl, 144 * VECWIDTH + tbloffset), times(v835, load(tbl, 145 * VECWIDTH + tbloffset))); real2 v929 = timesminusplus(reverse(v915), load(tbl, 160 * VECWIDTH + tbloffset), times(v915, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v889 = timesminusplus(reverse(v875), load(tbl, 152 * VECWIDTH + tbloffset), times(v875, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v1089 = timesminusplus(reverse(v1075), load(tbl, 192 * VECWIDTH + tbloffset), times(v1075, load(tbl, 193 * VECWIDTH + tbloffset))); real2 v1009 = timesminusplus(reverse(v995), load(tbl, 176 * VECWIDTH + tbloffset), times(v995, load(tbl, 177 * VECWIDTH + tbloffset))); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v889, v1049)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v929, v1089)); real2 v1929 = reverse(minus(v1894, v1895)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1934, v1935)); scatter(out, 6, 128, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); scatter(out, 70, 128, timesminusplus(v1968, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1968), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1951 = minusplus(v1949, v1950); scatter(out, 38, 128, timesminusplus(reverse(v1951), load(tbl, 346 * VECWIDTH + tbloffset), times(v1951, load(tbl, 347 * VECWIDTH + tbloffset)))); real2 v1953 = minusplus(uminus(v1949), v1950); scatter(out, 102, 128, timesminusplus(reverse(v1953), load(tbl, 348 * VECWIDTH + tbloffset), times(v1953, load(tbl, 349 * VECWIDTH + tbloffset)))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = timesminusplus(reverse(v1933), load(tbl, 344 * VECWIDTH + tbloffset), times(v1933, load(tbl, 345 * VECWIDTH + tbloffset))); scatter(out, 54, 128, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); scatter(out, 118, 128, timesminusplus(v1980, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1980), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1941 = timesminusplus(reverse(v1931), load(tbl, 342 * VECWIDTH + tbloffset), times(v1931, load(tbl, 343 * VECWIDTH + tbloffset))); scatter(out, 22, 128, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); scatter(out, 86, 128, timesminusplus(v1974, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1974), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = timesminusplus(reverse(v1853), load(tbl, 328 * VECWIDTH + tbloffset), times(v1853, load(tbl, 329 * VECWIDTH + tbloffset))); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = timesminusplus(reverse(v1893), load(tbl, 336 * VECWIDTH + tbloffset), times(v1893, load(tbl, 337 * VECWIDTH + tbloffset))); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1867, v1907)); scatter(out, 30, 128, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); scatter(out, 94, 128, timesminusplus(v2032, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2032), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2017 = minusplus(uminus(v2013), v2014); scatter(out, 126, 128, timesminusplus(reverse(v2017), load(tbl, 356 * VECWIDTH + tbloffset), times(v2017, load(tbl, 357 * VECWIDTH + tbloffset)))); real2 v2015 = minusplus(v2013, v2014); scatter(out, 62, 128, timesminusplus(reverse(v2015), load(tbl, 354 * VECWIDTH + tbloffset), times(v2015, load(tbl, 355 * VECWIDTH + tbloffset)))); real2 v1861 = timesminusplus(reverse(v1851), load(tbl, 326 * VECWIDTH + tbloffset), times(v1851, load(tbl, 327 * VECWIDTH + tbloffset))); real2 v1901 = timesminusplus(reverse(v1891), load(tbl, 334 * VECWIDTH + tbloffset), times(v1891, load(tbl, 335 * VECWIDTH + tbloffset))); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1861, v1901)); scatter(out, 14, 128, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); scatter(out, 78, 128, timesminusplus(v2006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1991 = minusplus(uminus(v1987), v1988); scatter(out, 110, 128, timesminusplus(reverse(v1991), load(tbl, 352 * VECWIDTH + tbloffset), times(v1991, load(tbl, 353 * VECWIDTH + tbloffset)))); real2 v1989 = minusplus(v1987, v1988); scatter(out, 46, 128, timesminusplus(reverse(v1989), load(tbl, 350 * VECWIDTH + tbloffset), times(v1989, load(tbl, 351 * VECWIDTH + tbloffset)))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = timesminusplus(reverse(v595), load(tbl, 96 * VECWIDTH + tbloffset), times(v595, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = timesminusplus(reverse(v675), load(tbl, 112 * VECWIDTH + tbloffset), times(v675, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v209 = timesminusplus(reverse(v195), load(tbl, 16 * VECWIDTH + tbloffset), times(v195, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v289 = timesminusplus(reverse(v275), load(tbl, 32 * VECWIDTH + tbloffset), times(v275, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = timesminusplus(reverse(v515), load(tbl, 80 * VECWIDTH + tbloffset), times(v515, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = timesminusplus(reverse(v355), load(tbl, 48 * VECWIDTH + tbloffset), times(v355, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v369, v689)); real2 v449 = timesminusplus(reverse(v435), load(tbl, 64 * VECWIDTH + tbloffset), times(v435, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = timesminusplus(reverse(v755), load(tbl, 128 * VECWIDTH + tbloffset), times(v755, load(tbl, 129 * VECWIDTH + tbloffset))); real2 v2705 = reverse(minus(v449, v769)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = timesminusplus(reverse(v395), load(tbl, 56 * VECWIDTH + tbloffset), times(v395, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v729 = timesminusplus(reverse(v715), load(tbl, 120 * VECWIDTH + tbloffset), times(v715, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v329 = timesminusplus(reverse(v315), load(tbl, 40 * VECWIDTH + tbloffset), times(v315, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v489 = timesminusplus(reverse(v475), load(tbl, 72 * VECWIDTH + tbloffset), times(v475, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = timesminusplus(reverse(v155), load(tbl, 8 * VECWIDTH + tbloffset), times(v155, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = timesminusplus(reverse(v635), load(tbl, 104 * VECWIDTH + tbloffset), times(v635, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v249 = timesminusplus(reverse(v235), load(tbl, 24 * VECWIDTH + tbloffset), times(v235, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v569 = timesminusplus(reverse(v555), load(tbl, 88 * VECWIDTH + tbloffset), times(v555, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2710, v2711)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2790, v2791)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v409, v729)); real2 v2745 = reverse(minus(v2670, v2671)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v329, v649)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2830, v2831)); real2 v2851 = plus(v2830, v2831); scatter(out, 3, 128, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); scatter(out, 67, 128, timesminusplus(v2864, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2864), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); scatter(out, 35, 128, timesminusplus(reverse(v2847), load(tbl, 506 * VECWIDTH + tbloffset), times(v2847, load(tbl, 507 * VECWIDTH + tbloffset)))); scatter(out, 99, 128, timesminusplus(reverse(v2849), load(tbl, 508 * VECWIDTH + tbloffset), times(v2849, load(tbl, 509 * VECWIDTH + tbloffset)))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = timesminusplus(reverse(v2827), load(tbl, 502 * VECWIDTH + tbloffset), times(v2827, load(tbl, 503 * VECWIDTH + tbloffset))); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = timesminusplus(reverse(v2807), load(tbl, 498 * VECWIDTH + tbloffset), times(v2807, load(tbl, 499 * VECWIDTH + tbloffset))); scatter(out, 19, 128, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); scatter(out, 83, 128, timesminusplus(v2870, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2870), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2823 = timesminusplus(reverse(v2809), load(tbl, 500 * VECWIDTH + tbloffset), times(v2809, load(tbl, 501 * VECWIDTH + tbloffset))); real2 v2843 = timesminusplus(reverse(v2829), load(tbl, 504 * VECWIDTH + tbloffset), times(v2829, load(tbl, 505 * VECWIDTH + tbloffset))); scatter(out, 51, 128, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); scatter(out, 115, 128, timesminusplus(v2876, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2876), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = timesminusplus(reverse(v2789), load(tbl, 496 * VECWIDTH + tbloffset), times(v2789, load(tbl, 497 * VECWIDTH + tbloffset))); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = timesminusplus(reverse(v2729), load(tbl, 484 * VECWIDTH + tbloffset), times(v2729, load(tbl, 485 * VECWIDTH + tbloffset))); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = timesminusplus(reverse(v2749), load(tbl, 488 * VECWIDTH + tbloffset), times(v2749, load(tbl, 489 * VECWIDTH + tbloffset))); real2 v2909 = reverse(minus(v2763, v2803)); real2 v2915 = plus(v2763, v2803); scatter(out, 27, 128, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); scatter(out, 91, 128, timesminusplus(v2928, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2928), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2913 = minusplus(uminus(v2909), v2910); scatter(out, 123, 128, timesminusplus(reverse(v2913), load(tbl, 516 * VECWIDTH + tbloffset), times(v2913, load(tbl, 517 * VECWIDTH + tbloffset)))); real2 v2911 = minusplus(v2909, v2910); scatter(out, 59, 128, timesminusplus(reverse(v2911), load(tbl, 514 * VECWIDTH + tbloffset), times(v2911, load(tbl, 515 * VECWIDTH + tbloffset)))); real2 v2737 = timesminusplus(reverse(v2727), load(tbl, 482 * VECWIDTH + tbloffset), times(v2727, load(tbl, 483 * VECWIDTH + tbloffset))); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = timesminusplus(reverse(v2787), load(tbl, 494 * VECWIDTH + tbloffset), times(v2787, load(tbl, 495 * VECWIDTH + tbloffset))); real2 v2757 = timesminusplus(reverse(v2747), load(tbl, 486 * VECWIDTH + tbloffset), times(v2747, load(tbl, 487 * VECWIDTH + tbloffset))); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2757, v2797)); scatter(out, 11, 128, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); scatter(out, 75, 128, timesminusplus(v2902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2887 = minusplus(uminus(v2883), v2884); scatter(out, 107, 128, timesminusplus(reverse(v2887), load(tbl, 512 * VECWIDTH + tbloffset), times(v2887, load(tbl, 513 * VECWIDTH + tbloffset)))); real2 v2885 = minusplus(v2883, v2884); scatter(out, 43, 128, timesminusplus(reverse(v2885), load(tbl, 510 * VECWIDTH + tbloffset), times(v2885, load(tbl, 511 * VECWIDTH + tbloffset)))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = timesminusplus(reverse(v2707), load(tbl, 478 * VECWIDTH + tbloffset), times(v2707, load(tbl, 479 * VECWIDTH + tbloffset))); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = timesminusplus(reverse(v2627), load(tbl, 462 * VECWIDTH + tbloffset), times(v2627, load(tbl, 463 * VECWIDTH + tbloffset))); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2637, v2717)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = timesminusplus(reverse(v2567), load(tbl, 450 * VECWIDTH + tbloffset), times(v2567, load(tbl, 451 * VECWIDTH + tbloffset))); real2 v2657 = timesminusplus(reverse(v2647), load(tbl, 466 * VECWIDTH + tbloffset), times(v2647, load(tbl, 467 * VECWIDTH + tbloffset))); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = timesminusplus(reverse(v2667), load(tbl, 470 * VECWIDTH + tbloffset), times(v2667, load(tbl, 471 * VECWIDTH + tbloffset))); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = timesminusplus(reverse(v2587), load(tbl, 454 * VECWIDTH + tbloffset), times(v2587, load(tbl, 455 * VECWIDTH + tbloffset))); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2960, v2961)); real2 v2981 = plus(v2960, v2961); scatter(out, 7, 128, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); scatter(out, 71, 128, timesminusplus(v2994, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2994), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2979 = minusplus(uminus(v2975), v2976); scatter(out, 103, 128, timesminusplus(reverse(v2979), load(tbl, 528 * VECWIDTH + tbloffset), times(v2979, load(tbl, 529 * VECWIDTH + tbloffset)))); real2 v2977 = minusplus(v2975, v2976); scatter(out, 39, 128, timesminusplus(reverse(v2977), load(tbl, 526 * VECWIDTH + tbloffset), times(v2977, load(tbl, 527 * VECWIDTH + tbloffset)))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = timesminusplus(reverse(v2939), load(tbl, 520 * VECWIDTH + tbloffset), times(v2939, load(tbl, 521 * VECWIDTH + tbloffset))); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = timesminusplus(reverse(v2959), load(tbl, 524 * VECWIDTH + tbloffset), times(v2959, load(tbl, 525 * VECWIDTH + tbloffset))); scatter(out, 55, 128, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); scatter(out, 119, 128, timesminusplus(v3006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2947 = timesminusplus(reverse(v2937), load(tbl, 518 * VECWIDTH + tbloffset), times(v2937, load(tbl, 519 * VECWIDTH + tbloffset))); real2 v2967 = timesminusplus(reverse(v2957), load(tbl, 522 * VECWIDTH + tbloffset), times(v2957, load(tbl, 523 * VECWIDTH + tbloffset))); scatter(out, 23, 128, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); scatter(out, 87, 128, timesminusplus(v3000, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3000), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2663 = timesminusplus(reverse(v2649), load(tbl, 468 * VECWIDTH + tbloffset), times(v2649, load(tbl, 469 * VECWIDTH + tbloffset))); real2 v2583 = timesminusplus(reverse(v2569), load(tbl, 452 * VECWIDTH + tbloffset), times(v2569, load(tbl, 453 * VECWIDTH + tbloffset))); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = timesminusplus(reverse(v2629), load(tbl, 464 * VECWIDTH + tbloffset), times(v2629, load(tbl, 465 * VECWIDTH + tbloffset))); real2 v2723 = timesminusplus(reverse(v2709), load(tbl, 480 * VECWIDTH + tbloffset), times(v2709, load(tbl, 481 * VECWIDTH + tbloffset))); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2643, v2723)); real2 v2683 = timesminusplus(reverse(v2669), load(tbl, 472 * VECWIDTH + tbloffset), times(v2669, load(tbl, 473 * VECWIDTH + tbloffset))); real2 v3031 = timesminusplus(reverse(v3017), load(tbl, 532 * VECWIDTH + tbloffset), times(v3017, load(tbl, 533 * VECWIDTH + tbloffset))); real2 v2603 = timesminusplus(reverse(v2589), load(tbl, 456 * VECWIDTH + tbloffset), times(v2589, load(tbl, 457 * VECWIDTH + tbloffset))); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = timesminusplus(reverse(v3037), load(tbl, 536 * VECWIDTH + tbloffset), times(v3037, load(tbl, 537 * VECWIDTH + tbloffset))); scatter(out, 63, 128, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); scatter(out, 127, 128, timesminusplus(v3084, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3084), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3025 = timesminusplus(reverse(v3015), load(tbl, 530 * VECWIDTH + tbloffset), times(v3015, load(tbl, 531 * VECWIDTH + tbloffset))); real2 v3045 = timesminusplus(reverse(v3035), load(tbl, 534 * VECWIDTH + tbloffset), times(v3035, load(tbl, 535 * VECWIDTH + tbloffset))); scatter(out, 31, 128, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); scatter(out, 95, 128, timesminusplus(v3078, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3078), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3038, v3039)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); scatter(out, 47, 128, timesminusplus(reverse(v3055), load(tbl, 538 * VECWIDTH + tbloffset), times(v3055, load(tbl, 539 * VECWIDTH + tbloffset)))); real2 v3057 = minusplus(uminus(v3053), v3054); scatter(out, 111, 128, timesminusplus(reverse(v3057), load(tbl, 540 * VECWIDTH + tbloffset), times(v3057, load(tbl, 541 * VECWIDTH + tbloffset)))); scatter(out, 15, 128, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); scatter(out, 79, 128, timesminusplus(v3072, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3072), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v683 = timesminusplus(reverse(v673), load(tbl, 110 * VECWIDTH + tbloffset), times(v673, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v363 = timesminusplus(reverse(v353), load(tbl, 46 * VECWIDTH + tbloffset), times(v353, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v363, v683)); real2 v283 = timesminusplus(reverse(v273), load(tbl, 30 * VECWIDTH + tbloffset), times(v273, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v723 = timesminusplus(reverse(v713), load(tbl, 118 * VECWIDTH + tbloffset), times(v713, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v403 = timesminusplus(reverse(v393), load(tbl, 54 * VECWIDTH + tbloffset), times(v393, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v603 = timesminusplus(reverse(v593), load(tbl, 94 * VECWIDTH + tbloffset), times(v593, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v403, v723)); real2 v543 = timesminusplus(reverse(v533), load(tbl, 82 * VECWIDTH + tbloffset), times(v533, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v383 = timesminusplus(reverse(v373), load(tbl, 50 * VECWIDTH + tbloffset), times(v373, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v703 = timesminusplus(reverse(v693), load(tbl, 114 * VECWIDTH + tbloffset), times(v693, load(tbl, 115 * VECWIDTH + tbloffset))); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v383, v703)); real2 v223 = timesminusplus(reverse(v213), load(tbl, 18 * VECWIDTH + tbloffset), times(v213, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = timesminusplus(reverse(v433), load(tbl, 62 * VECWIDTH + tbloffset), times(v433, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v203 = timesminusplus(reverse(v193), load(tbl, 14 * VECWIDTH + tbloffset), times(v193, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v763 = timesminusplus(reverse(v753), load(tbl, 126 * VECWIDTH + tbloffset), times(v753, load(tbl, 127 * VECWIDTH + tbloffset))); real2 v2179 = reverse(minus(v443, v763)); real2 v2185 = plus(v443, v763); real2 v523 = timesminusplus(reverse(v513), load(tbl, 78 * VECWIDTH + tbloffset), times(v513, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = timesminusplus(reverse(v633), load(tbl, 102 * VECWIDTH + tbloffset), times(v633, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2184, v2185)); real2 v563 = timesminusplus(reverse(v553), load(tbl, 86 * VECWIDTH + tbloffset), times(v553, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v243 = timesminusplus(reverse(v233), load(tbl, 22 * VECWIDTH + tbloffset), times(v233, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = timesminusplus(reverse(v133), load(tbl, 2 * VECWIDTH + tbloffset), times(v133, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v183 = timesminusplus(reverse(v173), load(tbl, 10 * VECWIDTH + tbloffset), times(v173, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = timesminusplus(reverse(v153), load(tbl, 6 * VECWIDTH + tbloffset), times(v153, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v303 = timesminusplus(reverse(v293), load(tbl, 34 * VECWIDTH + tbloffset), times(v293, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v623 = timesminusplus(reverse(v613), load(tbl, 98 * VECWIDTH + tbloffset), times(v613, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v2039 = reverse(minus(v303, v623)); real2 v2045 = plus(v303, v623); real2 v463 = timesminusplus(reverse(v453), load(tbl, 66 * VECWIDTH + tbloffset), times(v453, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = timesminusplus(reverse(v313), load(tbl, 38 * VECWIDTH + tbloffset), times(v313, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2124, v2125)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2144, v2145)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2264, v2265)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2244, v2245)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = timesminusplus(reverse(v2281), load(tbl, 406 * VECWIDTH + tbloffset), times(v2281, load(tbl, 407 * VECWIDTH + tbloffset))); real2 v483 = timesminusplus(reverse(v473), load(tbl, 70 * VECWIDTH + tbloffset), times(v473, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v323, v643)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = timesminusplus(reverse(v2301), load(tbl, 410 * VECWIDTH + tbloffset), times(v2301, load(tbl, 411 * VECWIDTH + tbloffset))); scatter(out, 17, 128, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); scatter(out, 81, 128, timesminusplus(v2344, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2344), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2297 = timesminusplus(reverse(v2283), load(tbl, 408 * VECWIDTH + tbloffset), times(v2283, load(tbl, 409 * VECWIDTH + tbloffset))); real2 v2317 = timesminusplus(reverse(v2303), load(tbl, 412 * VECWIDTH + tbloffset), times(v2303, load(tbl, 413 * VECWIDTH + tbloffset))); scatter(out, 49, 128, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); scatter(out, 113, 128, timesminusplus(v2350, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2350), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2304, v2305)); scatter(out, 1, 128, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); scatter(out, 65, 128, timesminusplus(v2338, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2338), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2321 = minusplus(v2319, v2320); scatter(out, 33, 128, timesminusplus(reverse(v2321), load(tbl, 414 * VECWIDTH + tbloffset), times(v2321, load(tbl, 415 * VECWIDTH + tbloffset)))); real2 v2323 = minusplus(uminus(v2319), v2320); scatter(out, 97, 128, timesminusplus(reverse(v2323), load(tbl, 416 * VECWIDTH + tbloffset), times(v2323, load(tbl, 417 * VECWIDTH + tbloffset)))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = timesminusplus(reverse(v2243), load(tbl, 400 * VECWIDTH + tbloffset), times(v2243, load(tbl, 401 * VECWIDTH + tbloffset))); real2 v2217 = timesminusplus(reverse(v2203), load(tbl, 392 * VECWIDTH + tbloffset), times(v2203, load(tbl, 393 * VECWIDTH + tbloffset))); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = timesminusplus(reverse(v2263), load(tbl, 404 * VECWIDTH + tbloffset), times(v2263, load(tbl, 405 * VECWIDTH + tbloffset))); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = timesminusplus(reverse(v2223), load(tbl, 396 * VECWIDTH + tbloffset), times(v2223, load(tbl, 397 * VECWIDTH + tbloffset))); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2237, v2277)); scatter(out, 25, 128, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); scatter(out, 89, 128, timesminusplus(v2402, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2402), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); scatter(out, 121, 128, timesminusplus(reverse(v2387), load(tbl, 424 * VECWIDTH + tbloffset), times(v2387, load(tbl, 425 * VECWIDTH + tbloffset)))); scatter(out, 57, 128, timesminusplus(reverse(v2385), load(tbl, 422 * VECWIDTH + tbloffset), times(v2385, load(tbl, 423 * VECWIDTH + tbloffset)))); real2 v2251 = timesminusplus(reverse(v2241), load(tbl, 398 * VECWIDTH + tbloffset), times(v2241, load(tbl, 399 * VECWIDTH + tbloffset))); real2 v2211 = timesminusplus(reverse(v2201), load(tbl, 390 * VECWIDTH + tbloffset), times(v2201, load(tbl, 391 * VECWIDTH + tbloffset))); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = timesminusplus(reverse(v2261), load(tbl, 402 * VECWIDTH + tbloffset), times(v2261, load(tbl, 403 * VECWIDTH + tbloffset))); real2 v2231 = timesminusplus(reverse(v2221), load(tbl, 394 * VECWIDTH + tbloffset), times(v2221, load(tbl, 395 * VECWIDTH + tbloffset))); real2 v2357 = reverse(minus(v2231, v2271)); real2 v2363 = plus(v2231, v2271); scatter(out, 9, 128, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); scatter(out, 73, 128, timesminusplus(v2376, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2376), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2361 = minusplus(uminus(v2357), v2358); scatter(out, 105, 128, timesminusplus(reverse(v2361), load(tbl, 420 * VECWIDTH + tbloffset), times(v2361, load(tbl, 421 * VECWIDTH + tbloffset)))); real2 v2359 = minusplus(v2357, v2358); scatter(out, 41, 128, timesminusplus(reverse(v2359), load(tbl, 418 * VECWIDTH + tbloffset), times(v2359, load(tbl, 419 * VECWIDTH + tbloffset)))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = timesminusplus(reverse(v2081), load(tbl, 366 * VECWIDTH + tbloffset), times(v2081, load(tbl, 367 * VECWIDTH + tbloffset))); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = timesminusplus(reverse(v2041), load(tbl, 358 * VECWIDTH + tbloffset), times(v2041, load(tbl, 359 * VECWIDTH + tbloffset))); real2 v2131 = timesminusplus(reverse(v2121), load(tbl, 374 * VECWIDTH + tbloffset), times(v2121, load(tbl, 375 * VECWIDTH + tbloffset))); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = timesminusplus(reverse(v2161), load(tbl, 382 * VECWIDTH + tbloffset), times(v2161, load(tbl, 383 * VECWIDTH + tbloffset))); real2 v2409 = reverse(minus(v2091, v2171)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = timesminusplus(reverse(v2181), load(tbl, 386 * VECWIDTH + tbloffset), times(v2181, load(tbl, 387 * VECWIDTH + tbloffset))); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = timesminusplus(reverse(v2101), load(tbl, 370 * VECWIDTH + tbloffset), times(v2101, load(tbl, 371 * VECWIDTH + tbloffset))); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2111, v2191)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = timesminusplus(reverse(v2141), load(tbl, 378 * VECWIDTH + tbloffset), times(v2141, load(tbl, 379 * VECWIDTH + tbloffset))); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = timesminusplus(reverse(v2061), load(tbl, 362 * VECWIDTH + tbloffset), times(v2061, load(tbl, 363 * VECWIDTH + tbloffset))); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2434, v2435)); scatter(out, 5, 128, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); scatter(out, 69, 128, timesminusplus(v2468, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2468), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); scatter(out, 101, 128, timesminusplus(reverse(v2453), load(tbl, 436 * VECWIDTH + tbloffset), times(v2453, load(tbl, 437 * VECWIDTH + tbloffset)))); scatter(out, 37, 128, timesminusplus(reverse(v2451), load(tbl, 434 * VECWIDTH + tbloffset), times(v2451, load(tbl, 435 * VECWIDTH + tbloffset)))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = timesminusplus(reverse(v2411), load(tbl, 426 * VECWIDTH + tbloffset), times(v2411, load(tbl, 427 * VECWIDTH + tbloffset))); real2 v2441 = timesminusplus(reverse(v2431), load(tbl, 430 * VECWIDTH + tbloffset), times(v2431, load(tbl, 431 * VECWIDTH + tbloffset))); scatter(out, 21, 128, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); scatter(out, 85, 128, timesminusplus(v2474, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2474), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2427 = timesminusplus(reverse(v2413), load(tbl, 428 * VECWIDTH + tbloffset), times(v2413, load(tbl, 429 * VECWIDTH + tbloffset))); real2 v2447 = timesminusplus(reverse(v2433), load(tbl, 432 * VECWIDTH + tbloffset), times(v2433, load(tbl, 433 * VECWIDTH + tbloffset))); scatter(out, 53, 128, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); scatter(out, 117, 128, timesminusplus(v2480, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2480), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2057 = timesminusplus(reverse(v2043), load(tbl, 360 * VECWIDTH + tbloffset), times(v2043, load(tbl, 361 * VECWIDTH + tbloffset))); real2 v2097 = timesminusplus(reverse(v2083), load(tbl, 368 * VECWIDTH + tbloffset), times(v2083, load(tbl, 369 * VECWIDTH + tbloffset))); real2 v2157 = timesminusplus(reverse(v2143), load(tbl, 380 * VECWIDTH + tbloffset), times(v2143, load(tbl, 381 * VECWIDTH + tbloffset))); real2 v2197 = timesminusplus(reverse(v2183), load(tbl, 388 * VECWIDTH + tbloffset), times(v2183, load(tbl, 389 * VECWIDTH + tbloffset))); real2 v2117 = timesminusplus(reverse(v2103), load(tbl, 372 * VECWIDTH + tbloffset), times(v2103, load(tbl, 373 * VECWIDTH + tbloffset))); real2 v2507 = reverse(minus(v2117, v2197)); real2 v2513 = plus(v2117, v2197); real2 v2137 = timesminusplus(reverse(v2123), load(tbl, 376 * VECWIDTH + tbloffset), times(v2123, load(tbl, 377 * VECWIDTH + tbloffset))); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = timesminusplus(reverse(v2163), load(tbl, 384 * VECWIDTH + tbloffset), times(v2163, load(tbl, 385 * VECWIDTH + tbloffset))); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2097, v2177)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = timesminusplus(reverse(v2063), load(tbl, 364 * VECWIDTH + tbloffset), times(v2063, load(tbl, 365 * VECWIDTH + tbloffset))); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2512, v2513)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); scatter(out, 109, 128, timesminusplus(reverse(v2531), load(tbl, 448 * VECWIDTH + tbloffset), times(v2531, load(tbl, 449 * VECWIDTH + tbloffset)))); scatter(out, 45, 128, timesminusplus(reverse(v2529), load(tbl, 446 * VECWIDTH + tbloffset), times(v2529, load(tbl, 447 * VECWIDTH + tbloffset)))); scatter(out, 13, 128, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); scatter(out, 77, 128, timesminusplus(v2546, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2546), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = timesminusplus(reverse(v2489), load(tbl, 438 * VECWIDTH + tbloffset), times(v2489, load(tbl, 439 * VECWIDTH + tbloffset))); real2 v2519 = timesminusplus(reverse(v2509), load(tbl, 442 * VECWIDTH + tbloffset), times(v2509, load(tbl, 443 * VECWIDTH + tbloffset))); scatter(out, 29, 128, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); scatter(out, 93, 128, timesminusplus(v2552, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2552), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2505 = timesminusplus(reverse(v2491), load(tbl, 440 * VECWIDTH + tbloffset), times(v2491, load(tbl, 441 * VECWIDTH + tbloffset))); real2 v2525 = timesminusplus(reverse(v2511), load(tbl, 444 * VECWIDTH + tbloffset), times(v2511, load(tbl, 445 * VECWIDTH + tbloffset))); scatter(out, 61, 128, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); scatter(out, 125, 128, timesminusplus(v2558, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2558), load(tbl, 1 * VECWIDTH + tbloffset)))); // Pres : 76263 } } ALIGNED(8192) void tbut128b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i=0; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v120, v56)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = timesminusplus(reverse(v575), load(tbl, 92 * VECWIDTH + tbloffset), times(v575, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v583 = timesminusplus(reverse(v573), load(tbl, 90 * VECWIDTH + tbloffset), times(v573, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v577, v576)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v104, v40)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = timesminusplus(reverse(v253), load(tbl, 26 * VECWIDTH + tbloffset), times(v253, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = timesminusplus(reverse(v895), load(tbl, 156 * VECWIDTH + tbloffset), times(v895, load(tbl, 157 * VECWIDTH + tbloffset))); real2 v903 = timesminusplus(reverse(v893), load(tbl, 154 * VECWIDTH + tbloffset), times(v893, load(tbl, 155 * VECWIDTH + tbloffset))); real2 v269 = timesminusplus(reverse(v255), load(tbl, 28 * VECWIDTH + tbloffset), times(v255, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v128, v64)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v737, v736)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = timesminusplus(reverse(v735), load(tbl, 124 * VECWIDTH + tbloffset), times(v735, load(tbl, 125 * VECWIDTH + tbloffset))); real2 v743 = timesminusplus(reverse(v733), load(tbl, 122 * VECWIDTH + tbloffset), times(v733, load(tbl, 123 * VECWIDTH + tbloffset))); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v112, v48)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = timesminusplus(reverse(v1053), load(tbl, 186 * VECWIDTH + tbloffset), times(v1053, load(tbl, 187 * VECWIDTH + tbloffset))); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v1063, v903)); real2 v1069 = timesminusplus(reverse(v1055), load(tbl, 188 * VECWIDTH + tbloffset), times(v1055, load(tbl, 189 * VECWIDTH + tbloffset))); real2 v1869 = reverse(minus(v1069, v909)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = timesminusplus(reverse(v415), load(tbl, 60 * VECWIDTH + tbloffset), times(v415, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1057, v1056)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1217, v1216)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v749, v429)); real2 v2765 = reverse(minus(v2691, v2690)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = timesminusplus(reverse(v2689), load(tbl, 476 * VECWIDTH + tbloffset), times(v2689, load(tbl, 477 * VECWIDTH + tbloffset))); real2 v2697 = timesminusplus(reverse(v2687), load(tbl, 474 * VECWIDTH + tbloffset), times(v2687, load(tbl, 475 * VECWIDTH + tbloffset))); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = timesminusplus(reverse(v1213), load(tbl, 218 * VECWIDTH + tbloffset), times(v1213, load(tbl, 219 * VECWIDTH + tbloffset))); real2 v1229 = timesminusplus(reverse(v1215), load(tbl, 220 * VECWIDTH + tbloffset), times(v1215, load(tbl, 221 * VECWIDTH + tbloffset))); real2 v423 = timesminusplus(reverse(v413), load(tbl, 58 * VECWIDTH + tbloffset), times(v413, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v743, v423)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2165, v2164)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v108, v44)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = timesminusplus(reverse(v333), load(tbl, 42 * VECWIDTH + tbloffset), times(v333, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v349 = timesminusplus(reverse(v335), load(tbl, 44 * VECWIDTH + tbloffset), times(v335, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v124, v60)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v657, v656)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = timesminusplus(reverse(v973), load(tbl, 170 * VECWIDTH + tbloffset), times(v973, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v1131 = reverse(minus(v977, v976)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = timesminusplus(reverse(v655), load(tbl, 108 * VECWIDTH + tbloffset), times(v655, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v663 = timesminusplus(reverse(v653), load(tbl, 106 * VECWIDTH + tbloffset), times(v653, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v2079 = reverse(minus(v663, v343)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v669, v349)); real2 v2611 = plus(v349, v669); real2 v989 = timesminusplus(reverse(v975), load(tbl, 172 * VECWIDTH + tbloffset), times(v975, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v116, v52)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v497, v496)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = timesminusplus(reverse(v495), load(tbl, 76 * VECWIDTH + tbloffset), times(v495, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v503 = timesminusplus(reverse(v493), load(tbl, 74 * VECWIDTH + tbloffset), times(v493, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v100, v36)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = timesminusplus(reverse(v1135), load(tbl, 204 * VECWIDTH + tbloffset), times(v1135, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = timesminusplus(reverse(v1293), load(tbl, 234 * VECWIDTH + tbloffset), times(v1293, load(tbl, 235 * VECWIDTH + tbloffset))); real2 v1331 = reverse(minus(v1297, v1296)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = timesminusplus(reverse(v175), load(tbl, 12 * VECWIDTH + tbloffset), times(v175, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v1309 = timesminusplus(reverse(v1295), load(tbl, 236 * VECWIDTH + tbloffset), times(v1295, load(tbl, 237 * VECWIDTH + tbloffset))); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = timesminusplus(reverse(v1133), load(tbl, 202 * VECWIDTH + tbloffset), times(v1133, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1541 = reverse(minus(v1229, v1149)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = timesminusplus(reverse(v813), load(tbl, 138 * VECWIDTH + tbloffset), times(v813, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v829 = timesminusplus(reverse(v815), load(tbl, 140 * VECWIDTH + tbloffset), times(v815, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2771, v2770)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = timesminusplus(reverse(v2607), load(tbl, 458 * VECWIDTH + tbloffset), times(v2607, load(tbl, 459 * VECWIDTH + tbloffset))); real2 v2623 = timesminusplus(reverse(v2609), load(tbl, 460 * VECWIDTH + tbloffset), times(v2609, load(tbl, 461 * VECWIDTH + tbloffset))); real2 v3013 = reverse(minus(v2703, v2623)); real2 v3019 = plus(v2623, v2703); real2 v2783 = timesminusplus(reverse(v2769), load(tbl, 492 * VECWIDTH + tbloffset), times(v2769, load(tbl, 493 * VECWIDTH + tbloffset))); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2697, v2617)); real2 v2777 = timesminusplus(reverse(v2767), load(tbl, 490 * VECWIDTH + tbloffset), times(v2767, load(tbl, 491 * VECWIDTH + tbloffset))); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1875, v1874)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = timesminusplus(reverse(v1663), load(tbl, 296 * VECWIDTH + tbloffset), times(v1663, load(tbl, 297 * VECWIDTH + tbloffset))); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = timesminusplus(reverse(v1873), load(tbl, 332 * VECWIDTH + tbloffset), times(v1873, load(tbl, 333 * VECWIDTH + tbloffset))); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1665, v1664)); real2 v1671 = timesminusplus(reverse(v1661), load(tbl, 294 * VECWIDTH + tbloffset), times(v1661, load(tbl, 295 * VECWIDTH + tbloffset))); real2 v1881 = timesminusplus(reverse(v1871), load(tbl, 330 * VECWIDTH + tbloffset), times(v1871, load(tbl, 331 * VECWIDTH + tbloffset))); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1223, v1143)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v118, v54)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v537, v536)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = timesminusplus(reverse(v535), load(tbl, 84 * VECWIDTH + tbloffset), times(v535, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v102, v38)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = timesminusplus(reverse(v215), load(tbl, 20 * VECWIDTH + tbloffset), times(v215, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = timesminusplus(reverse(v853), load(tbl, 146 * VECWIDTH + tbloffset), times(v853, load(tbl, 147 * VECWIDTH + tbloffset))); real2 v869 = timesminusplus(reverse(v855), load(tbl, 148 * VECWIDTH + tbloffset), times(v855, load(tbl, 149 * VECWIDTH + tbloffset))); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v110, v46)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = timesminusplus(reverse(v375), load(tbl, 52 * VECWIDTH + tbloffset), times(v375, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v126, v62)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v697, v696)); real2 v1171 = reverse(minus(v1017, v1016)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = timesminusplus(reverse(v1173), load(tbl, 210 * VECWIDTH + tbloffset), times(v1173, load(tbl, 211 * VECWIDTH + tbloffset))); real2 v1189 = timesminusplus(reverse(v1175), load(tbl, 212 * VECWIDTH + tbloffset), times(v1175, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1029 = timesminusplus(reverse(v1015), load(tbl, 180 * VECWIDTH + tbloffset), times(v1015, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v1023 = timesminusplus(reverse(v1013), load(tbl, 178 * VECWIDTH + tbloffset), times(v1013, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v1023, v863)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v1029, v869)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = timesminusplus(reverse(v695), load(tbl, 116 * VECWIDTH + tbloffset), times(v695, load(tbl, 117 * VECWIDTH + tbloffset))); real2 v2645 = reverse(minus(v709, v389)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1177, v1176)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2651, v2650)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v114, v50)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v457, v456)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = timesminusplus(reverse(v455), load(tbl, 68 * VECWIDTH + tbloffset), times(v455, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v98, v34)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = timesminusplus(reverse(v135), load(tbl, 4 * VECWIDTH + tbloffset), times(v135, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = timesminusplus(reverse(v773), load(tbl, 130 * VECWIDTH + tbloffset), times(v773, load(tbl, 131 * VECWIDTH + tbloffset))); real2 v789 = timesminusplus(reverse(v775), load(tbl, 132 * VECWIDTH + tbloffset), times(v775, load(tbl, 133 * VECWIDTH + tbloffset))); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v106, v42)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = timesminusplus(reverse(v295), load(tbl, 36 * VECWIDTH + tbloffset), times(v295, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v122, v58)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v617, v616)); real2 v1091 = reverse(minus(v937, v936)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = timesminusplus(reverse(v1093), load(tbl, 194 * VECWIDTH + tbloffset), times(v1093, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = timesminusplus(reverse(v1333), load(tbl, 242 * VECWIDTH + tbloffset), times(v1333, load(tbl, 243 * VECWIDTH + tbloffset))); real2 v1349 = timesminusplus(reverse(v1335), load(tbl, 244 * VECWIDTH + tbloffset), times(v1335, load(tbl, 245 * VECWIDTH + tbloffset))); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = timesminusplus(reverse(v1467), load(tbl, 264 * VECWIDTH + tbloffset), times(v1467, load(tbl, 265 * VECWIDTH + tbloffset))); real2 v1475 = timesminusplus(reverse(v1465), load(tbl, 262 * VECWIDTH + tbloffset), times(v1465, load(tbl, 263 * VECWIDTH + tbloffset))); real2 v1109 = timesminusplus(reverse(v1095), load(tbl, 196 * VECWIDTH + tbloffset), times(v1095, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = timesminusplus(reverse(v1543), load(tbl, 274 * VECWIDTH + tbloffset), times(v1543, load(tbl, 275 * VECWIDTH + tbloffset))); real2 v1559 = timesminusplus(reverse(v1545), load(tbl, 276 * VECWIDTH + tbloffset), times(v1545, load(tbl, 277 * VECWIDTH + tbloffset))); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = timesminusplus(reverse(v1255), load(tbl, 228 * VECWIDTH + tbloffset), times(v1255, load(tbl, 229 * VECWIDTH + tbloffset))); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = timesminusplus(reverse(v1253), load(tbl, 226 * VECWIDTH + tbloffset), times(v1253, load(tbl, 227 * VECWIDTH + tbloffset))); real2 v943 = timesminusplus(reverse(v933), load(tbl, 162 * VECWIDTH + tbloffset), times(v933, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = timesminusplus(reverse(v1621), load(tbl, 286 * VECWIDTH + tbloffset), times(v1621, load(tbl, 287 * VECWIDTH + tbloffset))); real2 v949 = timesminusplus(reverse(v935), load(tbl, 164 * VECWIDTH + tbloffset), times(v935, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = timesminusplus(reverse(v615), load(tbl, 100 * VECWIDTH + tbloffset), times(v615, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = timesminusplus(reverse(v1623), load(tbl, 288 * VECWIDTH + tbloffset), times(v1623, load(tbl, 289 * VECWIDTH + tbloffset))); real2 v1927 = timesminusplus(reverse(v1913), load(tbl, 340 * VECWIDTH + tbloffset), times(v1913, load(tbl, 341 * VECWIDTH + tbloffset))); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v629, v309)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = timesminusplus(reverse(v1911), load(tbl, 338 * VECWIDTH + tbloffset), times(v1911, load(tbl, 339 * VECWIDTH + tbloffset))); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = timesminusplus(reverse(v1833), load(tbl, 324 * VECWIDTH + tbloffset), times(v1833, load(tbl, 325 * VECWIDTH + tbloffset))); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = timesminusplus(reverse(v1831), load(tbl, 322 * VECWIDTH + tbloffset), times(v1831, load(tbl, 323 * VECWIDTH + tbloffset))); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = timesminusplus(reverse(v1703), load(tbl, 304 * VECWIDTH + tbloffset), times(v1703, load(tbl, 305 * VECWIDTH + tbloffset))); real2 v1711 = timesminusplus(reverse(v1701), load(tbl, 302 * VECWIDTH + tbloffset), times(v1701, load(tbl, 303 * VECWIDTH + tbloffset))); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v127, v63)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v717, v716)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v111, v47)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = timesminusplus(reverse(v1035), load(tbl, 184 * VECWIDTH + tbloffset), times(v1035, load(tbl, 185 * VECWIDTH + tbloffset))); real2 v1043 = timesminusplus(reverse(v1033), load(tbl, 182 * VECWIDTH + tbloffset), times(v1033, load(tbl, 183 * VECWIDTH + tbloffset))); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1037, v1036)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v119, v55)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v557, v556)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v103, v39)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1197, v1196)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = timesminusplus(reverse(v873), load(tbl, 150 * VECWIDTH + tbloffset), times(v873, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v1639 = reverse(minus(v1043, v883)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = timesminusplus(reverse(v1195), load(tbl, 216 * VECWIDTH + tbloffset), times(v1195, load(tbl, 217 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1193), load(tbl, 214 * VECWIDTH + tbloffset), times(v1193, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v115, v51)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v477, v476)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v99, v35)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = timesminusplus(reverse(v793), load(tbl, 134 * VECWIDTH + tbloffset), times(v793, load(tbl, 135 * VECWIDTH + tbloffset))); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v107, v43)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v123, v59)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v637, v636)); real2 v1111 = reverse(minus(v957, v956)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = timesminusplus(reverse(v1273), load(tbl, 230 * VECWIDTH + tbloffset), times(v1273, load(tbl, 231 * VECWIDTH + tbloffset))); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = timesminusplus(reverse(v1275), load(tbl, 232 * VECWIDTH + tbloffset), times(v1275, load(tbl, 233 * VECWIDTH + tbloffset))); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = timesminusplus(reverse(v1113), load(tbl, 198 * VECWIDTH + tbloffset), times(v1113, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1129 = timesminusplus(reverse(v1115), load(tbl, 200 * VECWIDTH + tbloffset), times(v1115, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v117, v53)); real2 v831 = reverse(minus(v517, v516)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v101, v37)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v125, v61)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v677, v676)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v109, v45)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v997, v996)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = timesminusplus(reverse(v1153), load(tbl, 206 * VECWIDTH + tbloffset), times(v1153, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v105, v41)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v121, v57)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v597, v596)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v113, v49)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v129, v65)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v757, v756)); real2 v1231 = reverse(minus(v1077, v1076)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1237, v1236)); real2 v1351 = reverse(minus(v1317, v1316)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1357, v1356)); real2 v1377 = plus(v1356, v1357); scatter(out, 0, 128, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); scatter(out, 64, 128, timesminusplus(v1390, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1390), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = timesminusplus(reverse(v1355), load(tbl, 248 * VECWIDTH + tbloffset), times(v1355, load(tbl, 249 * VECWIDTH + tbloffset))); scatter(out, 48, 128, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); scatter(out, 112, 128, timesminusplus(v1404, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1404), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1363 = timesminusplus(reverse(v1353), load(tbl, 246 * VECWIDTH + tbloffset), times(v1353, load(tbl, 247 * VECWIDTH + tbloffset))); scatter(out, 16, 128, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); scatter(out, 80, 128, timesminusplus(v1398, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1398), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); scatter(out, 96, 128, timesminusplus(reverse(v1375), load(tbl, 252 * VECWIDTH + tbloffset), times(v1375, load(tbl, 253 * VECWIDTH + tbloffset)))); scatter(out, 32, 128, timesminusplus(reverse(v1373), load(tbl, 250 * VECWIDTH + tbloffset), times(v1373, load(tbl, 251 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = timesminusplus(reverse(v1313), load(tbl, 238 * VECWIDTH + tbloffset), times(v1313, load(tbl, 239 * VECWIDTH + tbloffset))); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1323, v1283)); scatter(out, 8, 128, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); scatter(out, 72, 128, timesminusplus(v1430, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1430), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); scatter(out, 104, 128, timesminusplus(reverse(v1415), load(tbl, 256 * VECWIDTH + tbloffset), times(v1415, load(tbl, 257 * VECWIDTH + tbloffset)))); scatter(out, 40, 128, timesminusplus(reverse(v1413), load(tbl, 254 * VECWIDTH + tbloffset), times(v1413, load(tbl, 255 * VECWIDTH + tbloffset)))); real2 v1329 = timesminusplus(reverse(v1315), load(tbl, 240 * VECWIDTH + tbloffset), times(v1315, load(tbl, 241 * VECWIDTH + tbloffset))); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1329, v1289)); scatter(out, 24, 128, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); scatter(out, 88, 128, timesminusplus(v1456, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1456), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); scatter(out, 120, 128, timesminusplus(reverse(v1441), load(tbl, 260 * VECWIDTH + tbloffset), times(v1441, load(tbl, 261 * VECWIDTH + tbloffset)))); scatter(out, 56, 128, timesminusplus(reverse(v1439), load(tbl, 258 * VECWIDTH + tbloffset), times(v1439, load(tbl, 259 * VECWIDTH + tbloffset)))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = timesminusplus(reverse(v1233), load(tbl, 222 * VECWIDTH + tbloffset), times(v1233, load(tbl, 223 * VECWIDTH + tbloffset))); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1243, v1163)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1489, v1488)); scatter(out, 4, 128, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); scatter(out, 68, 128, timesminusplus(v1522, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1522), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); scatter(out, 36, 128, timesminusplus(reverse(v1505), load(tbl, 270 * VECWIDTH + tbloffset), times(v1505, load(tbl, 271 * VECWIDTH + tbloffset)))); scatter(out, 100, 128, timesminusplus(reverse(v1507), load(tbl, 272 * VECWIDTH + tbloffset), times(v1507, load(tbl, 273 * VECWIDTH + tbloffset)))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = timesminusplus(reverse(v1487), load(tbl, 268 * VECWIDTH + tbloffset), times(v1487, load(tbl, 269 * VECWIDTH + tbloffset))); scatter(out, 52, 128, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); scatter(out, 116, 128, timesminusplus(v1534, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1534), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1495 = timesminusplus(reverse(v1485), load(tbl, 266 * VECWIDTH + tbloffset), times(v1485, load(tbl, 267 * VECWIDTH + tbloffset))); scatter(out, 20, 128, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); scatter(out, 84, 128, timesminusplus(v1528, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1528), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1249 = timesminusplus(reverse(v1235), load(tbl, 224 * VECWIDTH + tbloffset), times(v1235, load(tbl, 225 * VECWIDTH + tbloffset))); real2 v1169 = timesminusplus(reverse(v1155), load(tbl, 208 * VECWIDTH + tbloffset), times(v1155, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1249, v1169)); real2 v1581 = reverse(minus(v1567, v1566)); real2 v1587 = plus(v1566, v1567); scatter(out, 12, 128, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); scatter(out, 76, 128, timesminusplus(v1600, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1600), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1583 = minusplus(v1581, v1582); scatter(out, 44, 128, timesminusplus(reverse(v1583), load(tbl, 282 * VECWIDTH + tbloffset), times(v1583, load(tbl, 283 * VECWIDTH + tbloffset)))); real2 v1585 = minusplus(uminus(v1581), v1582); scatter(out, 108, 128, timesminusplus(reverse(v1585), load(tbl, 284 * VECWIDTH + tbloffset), times(v1585, load(tbl, 285 * VECWIDTH + tbloffset)))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = timesminusplus(reverse(v1565), load(tbl, 280 * VECWIDTH + tbloffset), times(v1565, load(tbl, 281 * VECWIDTH + tbloffset))); scatter(out, 60, 128, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); scatter(out, 124, 128, timesminusplus(v1612, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1612), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1573 = timesminusplus(reverse(v1563), load(tbl, 278 * VECWIDTH + tbloffset), times(v1563, load(tbl, 279 * VECWIDTH + tbloffset))); scatter(out, 28, 128, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); scatter(out, 92, 128, timesminusplus(v1606, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1606), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = timesminusplus(reverse(v953), load(tbl, 166 * VECWIDTH + tbloffset), times(v953, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = timesminusplus(reverse(v993), load(tbl, 174 * VECWIDTH + tbloffset), times(v993, load(tbl, 175 * VECWIDTH + tbloffset))); real2 v843 = timesminusplus(reverse(v833), load(tbl, 142 * VECWIDTH + tbloffset), times(v833, load(tbl, 143 * VECWIDTH + tbloffset))); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = timesminusplus(reverse(v1643), load(tbl, 292 * VECWIDTH + tbloffset), times(v1643, load(tbl, 293 * VECWIDTH + tbloffset))); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = timesminusplus(reverse(v913), load(tbl, 158 * VECWIDTH + tbloffset), times(v913, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1083 = timesminusplus(reverse(v1073), load(tbl, 190 * VECWIDTH + tbloffset), times(v1073, load(tbl, 191 * VECWIDTH + tbloffset))); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v1083, v923)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = timesminusplus(reverse(v1683), load(tbl, 300 * VECWIDTH + tbloffset), times(v1683, load(tbl, 301 * VECWIDTH + tbloffset))); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1697, v1657)); scatter(out, 26, 128, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); scatter(out, 90, 128, timesminusplus(v1822, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1822), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); scatter(out, 58, 128, timesminusplus(reverse(v1805), load(tbl, 318 * VECWIDTH + tbloffset), times(v1805, load(tbl, 319 * VECWIDTH + tbloffset)))); scatter(out, 122, 128, timesminusplus(reverse(v1807), load(tbl, 320 * VECWIDTH + tbloffset), times(v1807, load(tbl, 321 * VECWIDTH + tbloffset)))); real2 v1651 = timesminusplus(reverse(v1641), load(tbl, 290 * VECWIDTH + tbloffset), times(v1641, load(tbl, 291 * VECWIDTH + tbloffset))); real2 v1691 = timesminusplus(reverse(v1681), load(tbl, 298 * VECWIDTH + tbloffset), times(v1681, load(tbl, 299 * VECWIDTH + tbloffset))); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1691, v1651)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); scatter(out, 106, 128, timesminusplus(reverse(v1781), load(tbl, 316 * VECWIDTH + tbloffset), times(v1781, load(tbl, 317 * VECWIDTH + tbloffset)))); scatter(out, 42, 128, timesminusplus(reverse(v1779), load(tbl, 314 * VECWIDTH + tbloffset), times(v1779, load(tbl, 315 * VECWIDTH + tbloffset)))); scatter(out, 10, 128, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); scatter(out, 74, 128, timesminusplus(v1796, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1796), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1685, v1684)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1725, v1724)); scatter(out, 2, 128, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); scatter(out, 66, 128, timesminusplus(v1758, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1758), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); scatter(out, 98, 128, timesminusplus(reverse(v1743), load(tbl, 312 * VECWIDTH + tbloffset), times(v1743, load(tbl, 313 * VECWIDTH + tbloffset)))); scatter(out, 34, 128, timesminusplus(reverse(v1741), load(tbl, 310 * VECWIDTH + tbloffset), times(v1741, load(tbl, 311 * VECWIDTH + tbloffset)))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = timesminusplus(reverse(v1723), load(tbl, 308 * VECWIDTH + tbloffset), times(v1723, load(tbl, 309 * VECWIDTH + tbloffset))); scatter(out, 50, 128, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); scatter(out, 114, 128, timesminusplus(v1770, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1770), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1731 = timesminusplus(reverse(v1721), load(tbl, 306 * VECWIDTH + tbloffset), times(v1721, load(tbl, 307 * VECWIDTH + tbloffset))); scatter(out, 18, 128, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); scatter(out, 82, 128, timesminusplus(v1764, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1764), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v809 = timesminusplus(reverse(v795), load(tbl, 136 * VECWIDTH + tbloffset), times(v795, load(tbl, 137 * VECWIDTH + tbloffset))); real2 v969 = timesminusplus(reverse(v955), load(tbl, 168 * VECWIDTH + tbloffset), times(v955, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = timesminusplus(reverse(v835), load(tbl, 144 * VECWIDTH + tbloffset), times(v835, load(tbl, 145 * VECWIDTH + tbloffset))); real2 v929 = timesminusplus(reverse(v915), load(tbl, 160 * VECWIDTH + tbloffset), times(v915, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v889 = timesminusplus(reverse(v875), load(tbl, 152 * VECWIDTH + tbloffset), times(v875, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v1089 = timesminusplus(reverse(v1075), load(tbl, 192 * VECWIDTH + tbloffset), times(v1075, load(tbl, 193 * VECWIDTH + tbloffset))); real2 v1009 = timesminusplus(reverse(v995), load(tbl, 176 * VECWIDTH + tbloffset), times(v995, load(tbl, 177 * VECWIDTH + tbloffset))); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v1049, v889)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v1089, v929)); real2 v1929 = reverse(minus(v1895, v1894)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1935, v1934)); scatter(out, 6, 128, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); scatter(out, 70, 128, timesminusplus(v1968, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1968), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1951 = minusplus(v1949, v1950); scatter(out, 38, 128, timesminusplus(reverse(v1951), load(tbl, 346 * VECWIDTH + tbloffset), times(v1951, load(tbl, 347 * VECWIDTH + tbloffset)))); real2 v1953 = minusplus(uminus(v1949), v1950); scatter(out, 102, 128, timesminusplus(reverse(v1953), load(tbl, 348 * VECWIDTH + tbloffset), times(v1953, load(tbl, 349 * VECWIDTH + tbloffset)))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = timesminusplus(reverse(v1933), load(tbl, 344 * VECWIDTH + tbloffset), times(v1933, load(tbl, 345 * VECWIDTH + tbloffset))); scatter(out, 54, 128, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); scatter(out, 118, 128, timesminusplus(v1980, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1980), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1941 = timesminusplus(reverse(v1931), load(tbl, 342 * VECWIDTH + tbloffset), times(v1931, load(tbl, 343 * VECWIDTH + tbloffset))); scatter(out, 22, 128, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); scatter(out, 86, 128, timesminusplus(v1974, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1974), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = timesminusplus(reverse(v1853), load(tbl, 328 * VECWIDTH + tbloffset), times(v1853, load(tbl, 329 * VECWIDTH + tbloffset))); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = timesminusplus(reverse(v1893), load(tbl, 336 * VECWIDTH + tbloffset), times(v1893, load(tbl, 337 * VECWIDTH + tbloffset))); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1907, v1867)); scatter(out, 30, 128, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); scatter(out, 94, 128, timesminusplus(v2032, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2032), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2017 = minusplus(uminus(v2013), v2014); scatter(out, 126, 128, timesminusplus(reverse(v2017), load(tbl, 356 * VECWIDTH + tbloffset), times(v2017, load(tbl, 357 * VECWIDTH + tbloffset)))); real2 v2015 = minusplus(v2013, v2014); scatter(out, 62, 128, timesminusplus(reverse(v2015), load(tbl, 354 * VECWIDTH + tbloffset), times(v2015, load(tbl, 355 * VECWIDTH + tbloffset)))); real2 v1861 = timesminusplus(reverse(v1851), load(tbl, 326 * VECWIDTH + tbloffset), times(v1851, load(tbl, 327 * VECWIDTH + tbloffset))); real2 v1901 = timesminusplus(reverse(v1891), load(tbl, 334 * VECWIDTH + tbloffset), times(v1891, load(tbl, 335 * VECWIDTH + tbloffset))); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1901, v1861)); scatter(out, 14, 128, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); scatter(out, 78, 128, timesminusplus(v2006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1991 = minusplus(uminus(v1987), v1988); scatter(out, 110, 128, timesminusplus(reverse(v1991), load(tbl, 352 * VECWIDTH + tbloffset), times(v1991, load(tbl, 353 * VECWIDTH + tbloffset)))); real2 v1989 = minusplus(v1987, v1988); scatter(out, 46, 128, timesminusplus(reverse(v1989), load(tbl, 350 * VECWIDTH + tbloffset), times(v1989, load(tbl, 351 * VECWIDTH + tbloffset)))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = timesminusplus(reverse(v595), load(tbl, 96 * VECWIDTH + tbloffset), times(v595, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = timesminusplus(reverse(v675), load(tbl, 112 * VECWIDTH + tbloffset), times(v675, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v209 = timesminusplus(reverse(v195), load(tbl, 16 * VECWIDTH + tbloffset), times(v195, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v289 = timesminusplus(reverse(v275), load(tbl, 32 * VECWIDTH + tbloffset), times(v275, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = timesminusplus(reverse(v515), load(tbl, 80 * VECWIDTH + tbloffset), times(v515, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = timesminusplus(reverse(v355), load(tbl, 48 * VECWIDTH + tbloffset), times(v355, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v689, v369)); real2 v449 = timesminusplus(reverse(v435), load(tbl, 64 * VECWIDTH + tbloffset), times(v435, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = timesminusplus(reverse(v755), load(tbl, 128 * VECWIDTH + tbloffset), times(v755, load(tbl, 129 * VECWIDTH + tbloffset))); real2 v2705 = reverse(minus(v769, v449)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = timesminusplus(reverse(v395), load(tbl, 56 * VECWIDTH + tbloffset), times(v395, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v729 = timesminusplus(reverse(v715), load(tbl, 120 * VECWIDTH + tbloffset), times(v715, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v329 = timesminusplus(reverse(v315), load(tbl, 40 * VECWIDTH + tbloffset), times(v315, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v489 = timesminusplus(reverse(v475), load(tbl, 72 * VECWIDTH + tbloffset), times(v475, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = timesminusplus(reverse(v155), load(tbl, 8 * VECWIDTH + tbloffset), times(v155, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = timesminusplus(reverse(v635), load(tbl, 104 * VECWIDTH + tbloffset), times(v635, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v249 = timesminusplus(reverse(v235), load(tbl, 24 * VECWIDTH + tbloffset), times(v235, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v569 = timesminusplus(reverse(v555), load(tbl, 88 * VECWIDTH + tbloffset), times(v555, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2711, v2710)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2791, v2790)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v729, v409)); real2 v2745 = reverse(minus(v2671, v2670)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v649, v329)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2831, v2830)); real2 v2851 = plus(v2830, v2831); scatter(out, 3, 128, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); scatter(out, 67, 128, timesminusplus(v2864, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2864), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); scatter(out, 35, 128, timesminusplus(reverse(v2847), load(tbl, 506 * VECWIDTH + tbloffset), times(v2847, load(tbl, 507 * VECWIDTH + tbloffset)))); scatter(out, 99, 128, timesminusplus(reverse(v2849), load(tbl, 508 * VECWIDTH + tbloffset), times(v2849, load(tbl, 509 * VECWIDTH + tbloffset)))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = timesminusplus(reverse(v2827), load(tbl, 502 * VECWIDTH + tbloffset), times(v2827, load(tbl, 503 * VECWIDTH + tbloffset))); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = timesminusplus(reverse(v2807), load(tbl, 498 * VECWIDTH + tbloffset), times(v2807, load(tbl, 499 * VECWIDTH + tbloffset))); scatter(out, 19, 128, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); scatter(out, 83, 128, timesminusplus(v2870, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2870), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2823 = timesminusplus(reverse(v2809), load(tbl, 500 * VECWIDTH + tbloffset), times(v2809, load(tbl, 501 * VECWIDTH + tbloffset))); real2 v2843 = timesminusplus(reverse(v2829), load(tbl, 504 * VECWIDTH + tbloffset), times(v2829, load(tbl, 505 * VECWIDTH + tbloffset))); scatter(out, 51, 128, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); scatter(out, 115, 128, timesminusplus(v2876, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2876), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = timesminusplus(reverse(v2789), load(tbl, 496 * VECWIDTH + tbloffset), times(v2789, load(tbl, 497 * VECWIDTH + tbloffset))); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = timesminusplus(reverse(v2729), load(tbl, 484 * VECWIDTH + tbloffset), times(v2729, load(tbl, 485 * VECWIDTH + tbloffset))); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = timesminusplus(reverse(v2749), load(tbl, 488 * VECWIDTH + tbloffset), times(v2749, load(tbl, 489 * VECWIDTH + tbloffset))); real2 v2909 = reverse(minus(v2803, v2763)); real2 v2915 = plus(v2763, v2803); scatter(out, 27, 128, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); scatter(out, 91, 128, timesminusplus(v2928, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2928), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2913 = minusplus(uminus(v2909), v2910); scatter(out, 123, 128, timesminusplus(reverse(v2913), load(tbl, 516 * VECWIDTH + tbloffset), times(v2913, load(tbl, 517 * VECWIDTH + tbloffset)))); real2 v2911 = minusplus(v2909, v2910); scatter(out, 59, 128, timesminusplus(reverse(v2911), load(tbl, 514 * VECWIDTH + tbloffset), times(v2911, load(tbl, 515 * VECWIDTH + tbloffset)))); real2 v2737 = timesminusplus(reverse(v2727), load(tbl, 482 * VECWIDTH + tbloffset), times(v2727, load(tbl, 483 * VECWIDTH + tbloffset))); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = timesminusplus(reverse(v2787), load(tbl, 494 * VECWIDTH + tbloffset), times(v2787, load(tbl, 495 * VECWIDTH + tbloffset))); real2 v2757 = timesminusplus(reverse(v2747), load(tbl, 486 * VECWIDTH + tbloffset), times(v2747, load(tbl, 487 * VECWIDTH + tbloffset))); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2797, v2757)); scatter(out, 11, 128, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); scatter(out, 75, 128, timesminusplus(v2902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2887 = minusplus(uminus(v2883), v2884); scatter(out, 107, 128, timesminusplus(reverse(v2887), load(tbl, 512 * VECWIDTH + tbloffset), times(v2887, load(tbl, 513 * VECWIDTH + tbloffset)))); real2 v2885 = minusplus(v2883, v2884); scatter(out, 43, 128, timesminusplus(reverse(v2885), load(tbl, 510 * VECWIDTH + tbloffset), times(v2885, load(tbl, 511 * VECWIDTH + tbloffset)))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = timesminusplus(reverse(v2707), load(tbl, 478 * VECWIDTH + tbloffset), times(v2707, load(tbl, 479 * VECWIDTH + tbloffset))); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = timesminusplus(reverse(v2627), load(tbl, 462 * VECWIDTH + tbloffset), times(v2627, load(tbl, 463 * VECWIDTH + tbloffset))); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2717, v2637)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = timesminusplus(reverse(v2567), load(tbl, 450 * VECWIDTH + tbloffset), times(v2567, load(tbl, 451 * VECWIDTH + tbloffset))); real2 v2657 = timesminusplus(reverse(v2647), load(tbl, 466 * VECWIDTH + tbloffset), times(v2647, load(tbl, 467 * VECWIDTH + tbloffset))); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = timesminusplus(reverse(v2667), load(tbl, 470 * VECWIDTH + tbloffset), times(v2667, load(tbl, 471 * VECWIDTH + tbloffset))); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = timesminusplus(reverse(v2587), load(tbl, 454 * VECWIDTH + tbloffset), times(v2587, load(tbl, 455 * VECWIDTH + tbloffset))); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2961, v2960)); real2 v2981 = plus(v2960, v2961); scatter(out, 7, 128, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); scatter(out, 71, 128, timesminusplus(v2994, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2994), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2979 = minusplus(uminus(v2975), v2976); scatter(out, 103, 128, timesminusplus(reverse(v2979), load(tbl, 528 * VECWIDTH + tbloffset), times(v2979, load(tbl, 529 * VECWIDTH + tbloffset)))); real2 v2977 = minusplus(v2975, v2976); scatter(out, 39, 128, timesminusplus(reverse(v2977), load(tbl, 526 * VECWIDTH + tbloffset), times(v2977, load(tbl, 527 * VECWIDTH + tbloffset)))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = timesminusplus(reverse(v2939), load(tbl, 520 * VECWIDTH + tbloffset), times(v2939, load(tbl, 521 * VECWIDTH + tbloffset))); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = timesminusplus(reverse(v2959), load(tbl, 524 * VECWIDTH + tbloffset), times(v2959, load(tbl, 525 * VECWIDTH + tbloffset))); scatter(out, 55, 128, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); scatter(out, 119, 128, timesminusplus(v3006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2947 = timesminusplus(reverse(v2937), load(tbl, 518 * VECWIDTH + tbloffset), times(v2937, load(tbl, 519 * VECWIDTH + tbloffset))); real2 v2967 = timesminusplus(reverse(v2957), load(tbl, 522 * VECWIDTH + tbloffset), times(v2957, load(tbl, 523 * VECWIDTH + tbloffset))); scatter(out, 23, 128, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); scatter(out, 87, 128, timesminusplus(v3000, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3000), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2663 = timesminusplus(reverse(v2649), load(tbl, 468 * VECWIDTH + tbloffset), times(v2649, load(tbl, 469 * VECWIDTH + tbloffset))); real2 v2583 = timesminusplus(reverse(v2569), load(tbl, 452 * VECWIDTH + tbloffset), times(v2569, load(tbl, 453 * VECWIDTH + tbloffset))); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = timesminusplus(reverse(v2629), load(tbl, 464 * VECWIDTH + tbloffset), times(v2629, load(tbl, 465 * VECWIDTH + tbloffset))); real2 v2723 = timesminusplus(reverse(v2709), load(tbl, 480 * VECWIDTH + tbloffset), times(v2709, load(tbl, 481 * VECWIDTH + tbloffset))); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2723, v2643)); real2 v2683 = timesminusplus(reverse(v2669), load(tbl, 472 * VECWIDTH + tbloffset), times(v2669, load(tbl, 473 * VECWIDTH + tbloffset))); real2 v3031 = timesminusplus(reverse(v3017), load(tbl, 532 * VECWIDTH + tbloffset), times(v3017, load(tbl, 533 * VECWIDTH + tbloffset))); real2 v2603 = timesminusplus(reverse(v2589), load(tbl, 456 * VECWIDTH + tbloffset), times(v2589, load(tbl, 457 * VECWIDTH + tbloffset))); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = timesminusplus(reverse(v3037), load(tbl, 536 * VECWIDTH + tbloffset), times(v3037, load(tbl, 537 * VECWIDTH + tbloffset))); scatter(out, 63, 128, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); scatter(out, 127, 128, timesminusplus(v3084, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3084), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3025 = timesminusplus(reverse(v3015), load(tbl, 530 * VECWIDTH + tbloffset), times(v3015, load(tbl, 531 * VECWIDTH + tbloffset))); real2 v3045 = timesminusplus(reverse(v3035), load(tbl, 534 * VECWIDTH + tbloffset), times(v3035, load(tbl, 535 * VECWIDTH + tbloffset))); scatter(out, 31, 128, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); scatter(out, 95, 128, timesminusplus(v3078, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3078), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3039, v3038)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); scatter(out, 47, 128, timesminusplus(reverse(v3055), load(tbl, 538 * VECWIDTH + tbloffset), times(v3055, load(tbl, 539 * VECWIDTH + tbloffset)))); real2 v3057 = minusplus(uminus(v3053), v3054); scatter(out, 111, 128, timesminusplus(reverse(v3057), load(tbl, 540 * VECWIDTH + tbloffset), times(v3057, load(tbl, 541 * VECWIDTH + tbloffset)))); scatter(out, 15, 128, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); scatter(out, 79, 128, timesminusplus(v3072, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3072), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v683 = timesminusplus(reverse(v673), load(tbl, 110 * VECWIDTH + tbloffset), times(v673, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v363 = timesminusplus(reverse(v353), load(tbl, 46 * VECWIDTH + tbloffset), times(v353, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v683, v363)); real2 v283 = timesminusplus(reverse(v273), load(tbl, 30 * VECWIDTH + tbloffset), times(v273, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v723 = timesminusplus(reverse(v713), load(tbl, 118 * VECWIDTH + tbloffset), times(v713, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v403 = timesminusplus(reverse(v393), load(tbl, 54 * VECWIDTH + tbloffset), times(v393, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v603 = timesminusplus(reverse(v593), load(tbl, 94 * VECWIDTH + tbloffset), times(v593, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v723, v403)); real2 v543 = timesminusplus(reverse(v533), load(tbl, 82 * VECWIDTH + tbloffset), times(v533, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v383 = timesminusplus(reverse(v373), load(tbl, 50 * VECWIDTH + tbloffset), times(v373, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v703 = timesminusplus(reverse(v693), load(tbl, 114 * VECWIDTH + tbloffset), times(v693, load(tbl, 115 * VECWIDTH + tbloffset))); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v703, v383)); real2 v223 = timesminusplus(reverse(v213), load(tbl, 18 * VECWIDTH + tbloffset), times(v213, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = timesminusplus(reverse(v433), load(tbl, 62 * VECWIDTH + tbloffset), times(v433, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v203 = timesminusplus(reverse(v193), load(tbl, 14 * VECWIDTH + tbloffset), times(v193, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v763 = timesminusplus(reverse(v753), load(tbl, 126 * VECWIDTH + tbloffset), times(v753, load(tbl, 127 * VECWIDTH + tbloffset))); real2 v2179 = reverse(minus(v763, v443)); real2 v2185 = plus(v443, v763); real2 v523 = timesminusplus(reverse(v513), load(tbl, 78 * VECWIDTH + tbloffset), times(v513, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = timesminusplus(reverse(v633), load(tbl, 102 * VECWIDTH + tbloffset), times(v633, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2185, v2184)); real2 v563 = timesminusplus(reverse(v553), load(tbl, 86 * VECWIDTH + tbloffset), times(v553, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v243 = timesminusplus(reverse(v233), load(tbl, 22 * VECWIDTH + tbloffset), times(v233, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = timesminusplus(reverse(v133), load(tbl, 2 * VECWIDTH + tbloffset), times(v133, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v183 = timesminusplus(reverse(v173), load(tbl, 10 * VECWIDTH + tbloffset), times(v173, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = timesminusplus(reverse(v153), load(tbl, 6 * VECWIDTH + tbloffset), times(v153, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v303 = timesminusplus(reverse(v293), load(tbl, 34 * VECWIDTH + tbloffset), times(v293, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v623 = timesminusplus(reverse(v613), load(tbl, 98 * VECWIDTH + tbloffset), times(v613, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v2039 = reverse(minus(v623, v303)); real2 v2045 = plus(v303, v623); real2 v463 = timesminusplus(reverse(v453), load(tbl, 66 * VECWIDTH + tbloffset), times(v453, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = timesminusplus(reverse(v313), load(tbl, 38 * VECWIDTH + tbloffset), times(v313, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2125, v2124)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2145, v2144)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2265, v2264)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2245, v2244)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = timesminusplus(reverse(v2281), load(tbl, 406 * VECWIDTH + tbloffset), times(v2281, load(tbl, 407 * VECWIDTH + tbloffset))); real2 v483 = timesminusplus(reverse(v473), load(tbl, 70 * VECWIDTH + tbloffset), times(v473, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v643, v323)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = timesminusplus(reverse(v2301), load(tbl, 410 * VECWIDTH + tbloffset), times(v2301, load(tbl, 411 * VECWIDTH + tbloffset))); scatter(out, 17, 128, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); scatter(out, 81, 128, timesminusplus(v2344, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2344), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2297 = timesminusplus(reverse(v2283), load(tbl, 408 * VECWIDTH + tbloffset), times(v2283, load(tbl, 409 * VECWIDTH + tbloffset))); real2 v2317 = timesminusplus(reverse(v2303), load(tbl, 412 * VECWIDTH + tbloffset), times(v2303, load(tbl, 413 * VECWIDTH + tbloffset))); scatter(out, 49, 128, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); scatter(out, 113, 128, timesminusplus(v2350, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2350), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2305, v2304)); scatter(out, 1, 128, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); scatter(out, 65, 128, timesminusplus(v2338, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2338), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2321 = minusplus(v2319, v2320); scatter(out, 33, 128, timesminusplus(reverse(v2321), load(tbl, 414 * VECWIDTH + tbloffset), times(v2321, load(tbl, 415 * VECWIDTH + tbloffset)))); real2 v2323 = minusplus(uminus(v2319), v2320); scatter(out, 97, 128, timesminusplus(reverse(v2323), load(tbl, 416 * VECWIDTH + tbloffset), times(v2323, load(tbl, 417 * VECWIDTH + tbloffset)))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = timesminusplus(reverse(v2243), load(tbl, 400 * VECWIDTH + tbloffset), times(v2243, load(tbl, 401 * VECWIDTH + tbloffset))); real2 v2217 = timesminusplus(reverse(v2203), load(tbl, 392 * VECWIDTH + tbloffset), times(v2203, load(tbl, 393 * VECWIDTH + tbloffset))); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = timesminusplus(reverse(v2263), load(tbl, 404 * VECWIDTH + tbloffset), times(v2263, load(tbl, 405 * VECWIDTH + tbloffset))); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = timesminusplus(reverse(v2223), load(tbl, 396 * VECWIDTH + tbloffset), times(v2223, load(tbl, 397 * VECWIDTH + tbloffset))); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2277, v2237)); scatter(out, 25, 128, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); scatter(out, 89, 128, timesminusplus(v2402, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2402), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); scatter(out, 121, 128, timesminusplus(reverse(v2387), load(tbl, 424 * VECWIDTH + tbloffset), times(v2387, load(tbl, 425 * VECWIDTH + tbloffset)))); scatter(out, 57, 128, timesminusplus(reverse(v2385), load(tbl, 422 * VECWIDTH + tbloffset), times(v2385, load(tbl, 423 * VECWIDTH + tbloffset)))); real2 v2251 = timesminusplus(reverse(v2241), load(tbl, 398 * VECWIDTH + tbloffset), times(v2241, load(tbl, 399 * VECWIDTH + tbloffset))); real2 v2211 = timesminusplus(reverse(v2201), load(tbl, 390 * VECWIDTH + tbloffset), times(v2201, load(tbl, 391 * VECWIDTH + tbloffset))); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = timesminusplus(reverse(v2261), load(tbl, 402 * VECWIDTH + tbloffset), times(v2261, load(tbl, 403 * VECWIDTH + tbloffset))); real2 v2231 = timesminusplus(reverse(v2221), load(tbl, 394 * VECWIDTH + tbloffset), times(v2221, load(tbl, 395 * VECWIDTH + tbloffset))); real2 v2357 = reverse(minus(v2271, v2231)); real2 v2363 = plus(v2231, v2271); scatter(out, 9, 128, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); scatter(out, 73, 128, timesminusplus(v2376, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2376), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2361 = minusplus(uminus(v2357), v2358); scatter(out, 105, 128, timesminusplus(reverse(v2361), load(tbl, 420 * VECWIDTH + tbloffset), times(v2361, load(tbl, 421 * VECWIDTH + tbloffset)))); real2 v2359 = minusplus(v2357, v2358); scatter(out, 41, 128, timesminusplus(reverse(v2359), load(tbl, 418 * VECWIDTH + tbloffset), times(v2359, load(tbl, 419 * VECWIDTH + tbloffset)))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = timesminusplus(reverse(v2081), load(tbl, 366 * VECWIDTH + tbloffset), times(v2081, load(tbl, 367 * VECWIDTH + tbloffset))); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = timesminusplus(reverse(v2041), load(tbl, 358 * VECWIDTH + tbloffset), times(v2041, load(tbl, 359 * VECWIDTH + tbloffset))); real2 v2131 = timesminusplus(reverse(v2121), load(tbl, 374 * VECWIDTH + tbloffset), times(v2121, load(tbl, 375 * VECWIDTH + tbloffset))); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = timesminusplus(reverse(v2161), load(tbl, 382 * VECWIDTH + tbloffset), times(v2161, load(tbl, 383 * VECWIDTH + tbloffset))); real2 v2409 = reverse(minus(v2171, v2091)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = timesminusplus(reverse(v2181), load(tbl, 386 * VECWIDTH + tbloffset), times(v2181, load(tbl, 387 * VECWIDTH + tbloffset))); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = timesminusplus(reverse(v2101), load(tbl, 370 * VECWIDTH + tbloffset), times(v2101, load(tbl, 371 * VECWIDTH + tbloffset))); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2191, v2111)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = timesminusplus(reverse(v2141), load(tbl, 378 * VECWIDTH + tbloffset), times(v2141, load(tbl, 379 * VECWIDTH + tbloffset))); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = timesminusplus(reverse(v2061), load(tbl, 362 * VECWIDTH + tbloffset), times(v2061, load(tbl, 363 * VECWIDTH + tbloffset))); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2435, v2434)); scatter(out, 5, 128, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); scatter(out, 69, 128, timesminusplus(v2468, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2468), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); scatter(out, 101, 128, timesminusplus(reverse(v2453), load(tbl, 436 * VECWIDTH + tbloffset), times(v2453, load(tbl, 437 * VECWIDTH + tbloffset)))); scatter(out, 37, 128, timesminusplus(reverse(v2451), load(tbl, 434 * VECWIDTH + tbloffset), times(v2451, load(tbl, 435 * VECWIDTH + tbloffset)))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = timesminusplus(reverse(v2411), load(tbl, 426 * VECWIDTH + tbloffset), times(v2411, load(tbl, 427 * VECWIDTH + tbloffset))); real2 v2441 = timesminusplus(reverse(v2431), load(tbl, 430 * VECWIDTH + tbloffset), times(v2431, load(tbl, 431 * VECWIDTH + tbloffset))); scatter(out, 21, 128, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); scatter(out, 85, 128, timesminusplus(v2474, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2474), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2427 = timesminusplus(reverse(v2413), load(tbl, 428 * VECWIDTH + tbloffset), times(v2413, load(tbl, 429 * VECWIDTH + tbloffset))); real2 v2447 = timesminusplus(reverse(v2433), load(tbl, 432 * VECWIDTH + tbloffset), times(v2433, load(tbl, 433 * VECWIDTH + tbloffset))); scatter(out, 53, 128, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); scatter(out, 117, 128, timesminusplus(v2480, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2480), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2057 = timesminusplus(reverse(v2043), load(tbl, 360 * VECWIDTH + tbloffset), times(v2043, load(tbl, 361 * VECWIDTH + tbloffset))); real2 v2097 = timesminusplus(reverse(v2083), load(tbl, 368 * VECWIDTH + tbloffset), times(v2083, load(tbl, 369 * VECWIDTH + tbloffset))); real2 v2157 = timesminusplus(reverse(v2143), load(tbl, 380 * VECWIDTH + tbloffset), times(v2143, load(tbl, 381 * VECWIDTH + tbloffset))); real2 v2197 = timesminusplus(reverse(v2183), load(tbl, 388 * VECWIDTH + tbloffset), times(v2183, load(tbl, 389 * VECWIDTH + tbloffset))); real2 v2117 = timesminusplus(reverse(v2103), load(tbl, 372 * VECWIDTH + tbloffset), times(v2103, load(tbl, 373 * VECWIDTH + tbloffset))); real2 v2507 = reverse(minus(v2197, v2117)); real2 v2513 = plus(v2117, v2197); real2 v2137 = timesminusplus(reverse(v2123), load(tbl, 376 * VECWIDTH + tbloffset), times(v2123, load(tbl, 377 * VECWIDTH + tbloffset))); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = timesminusplus(reverse(v2163), load(tbl, 384 * VECWIDTH + tbloffset), times(v2163, load(tbl, 385 * VECWIDTH + tbloffset))); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2177, v2097)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = timesminusplus(reverse(v2063), load(tbl, 364 * VECWIDTH + tbloffset), times(v2063, load(tbl, 365 * VECWIDTH + tbloffset))); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2513, v2512)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); scatter(out, 109, 128, timesminusplus(reverse(v2531), load(tbl, 448 * VECWIDTH + tbloffset), times(v2531, load(tbl, 449 * VECWIDTH + tbloffset)))); scatter(out, 45, 128, timesminusplus(reverse(v2529), load(tbl, 446 * VECWIDTH + tbloffset), times(v2529, load(tbl, 447 * VECWIDTH + tbloffset)))); scatter(out, 13, 128, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); scatter(out, 77, 128, timesminusplus(v2546, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2546), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = timesminusplus(reverse(v2489), load(tbl, 438 * VECWIDTH + tbloffset), times(v2489, load(tbl, 439 * VECWIDTH + tbloffset))); real2 v2519 = timesminusplus(reverse(v2509), load(tbl, 442 * VECWIDTH + tbloffset), times(v2509, load(tbl, 443 * VECWIDTH + tbloffset))); scatter(out, 29, 128, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); scatter(out, 93, 128, timesminusplus(v2552, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2552), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2505 = timesminusplus(reverse(v2491), load(tbl, 440 * VECWIDTH + tbloffset), times(v2491, load(tbl, 441 * VECWIDTH + tbloffset))); real2 v2525 = timesminusplus(reverse(v2511), load(tbl, 444 * VECWIDTH + tbloffset), times(v2511, load(tbl, 445 * VECWIDTH + tbloffset))); scatter(out, 61, 128, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); scatter(out, 125, 128, timesminusplus(v2558, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2558), load(tbl, 1 * VECWIDTH + tbloffset)))); // Pres : 76263 } } #endif // #undef EMITREALSUB #ifdef EMITREALSUB ALIGNED(8192) void realSub0_%ISA%(real *d, const real *s, const int log2len, const real *rtCoef0, const real *rtCoef1) { const int n = 1 << log2len; real s0 = s[0], s1 = s[1]; int k=1; d[n+0] = s[n+0]; d[n+1] = s[n+1]; for(;;k+=VECWIDTH) { int idx0 = k, idx1 = n-VECWIDTH+1-k; if (idx0 + VECWIDTH >= idx1) break; real2 v = loadu(s, idx0); real2 u = reverse2(load (s, idx1)); real2 t = minusplus(v, u); real2 m = minusplus(reverse(times(t, loadu(rtCoef1, k))), times(t, loadu(rtCoef0, k))); storeu(d, idx0, minusplus(v, uminus(m))); store (d, idx1, reverse2(minus(u, m))); } for(;k= idx1) break; real2 v = loadu(s, idx0); real2 u = reverse2(load (s, idx1)); real2 t = minusplus(v, u); real2 m = minusplus(reverse(times(t, loadu(rtCoef1, k))), times(t, loadu(rtCoef0, k))); storeu(d, idx0, ctimes(uplusminus(minus(u, m)), c)); store (d, idx1, ctimes(reverse2(minusplus(m, uminus(v))), c)); } for(;k #include "sleef.h" #ifdef ENABLE_SSE2 #include "helpersse2.h" #endif #ifdef ENABLE_AVX #include "helperavx.h" #endif #ifdef ENABLE_AVX2 #include "helperavx2.h" #endif #ifdef ENABLE_AVX512F #include "helperavx512f.h" #endif #ifdef ENABLE_NEON32 #include "helperneon32.h" #endif #ifdef ENABLE_ADVSIMD #include "helperadvsimd.h" #endif #ifdef ENABLE_SVE #include "helpersve.h" #endif #ifdef ENABLE_VSX #include "helperpower_128.h" #endif #ifdef ENABLE_ZVECTOR #include "helpers390x_128.h" #endif #ifdef ENABLE_ZVECTOR2 #include "helpers390x_128.h" #endif #ifdef ENABLE_VECEXT #include "helpervecext.h" #endif #ifdef ENABLE_PUREC #include "helperpurec.h" #endif #define IMPORT_IS_EXPORT #include "sleefdft.h" #if BASETYPEID == 1 #define LOG2VECWIDTH (LOG2VECTLENDP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef double real; typedef vdouble real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vd_vd(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vd_vd(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vd_vd(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vd_vd_vd(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vd_vd_vd(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vd_vd_vd(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vd_vd_vd(d0, d1); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vd_vd_vd_vd(d0, d2, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vd_vd_vd(d0, vcast_vd_d(d)); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vd_vd_vd_vd(d0, vcast_vd_d(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vd_vd(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vd_vd(d0); } static INLINE real2 loadc(real c) { return vcast_vd_d(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vd_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vd_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vd(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vd(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vd(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vd(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #elif BASETYPEID == 2 #define LOG2VECWIDTH (LOG2VECTLENSP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef float real; typedef vfloat real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vf_vf(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vf_vf(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vf_vf(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vf_vf_vf(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vf_vf_vf(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vf_vf_vf(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vf_vf_vf(d0, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vf_vf_vf(d0, vcast_vf_f(d)); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vf_vf_vf_vf(d0, d2, d1); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vf_vf_vf_vf(d0, vcast_vf_f(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vf_vf(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vf_vf(d0); } static INLINE real2 loadc(real c) { return vcast_vf_f(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vf_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vf_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vf(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vf(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vf(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vf(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #elif BASETYPEID == 3 #define LOG2VECWIDTH (LOG2VECTLENDP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef long double real; typedef vlongdouble real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vl_vl(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vl_vl(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vl_vl(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vl_vl_vl(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vl_vl_vl(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vl_vl_vl(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vl_vl_vl(d0, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vl_vl_vl(d0, vcast_vl_l(d)); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vl_vl_vl_vl(d0, d2, d1); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vl_vl_vl_vl(d0, vcast_vl_l(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vl_vl(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vl_vl(d0); } static INLINE real2 loadc(real c) { return vcast_vl_l(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vl_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vl_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vl(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vl(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vl(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vl(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vl(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #elif BASETYPEID == 4 #define LOG2VECWIDTH (LOG2VECTLENDP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef Sleef_quad real; typedef vquad real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vq_vq(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vq_vq(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vq_vq(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vq_vq_vq(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vq_vq_vq(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vq_vq_vq(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vq_vq_vq(d0, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vq_vq_vq(d0, vcast_vq_q(d)); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vq_vq_vq_vq(d0, d2, d1); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vq_vq_vq_vq(d0, vcast_vq_q(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vq_vq(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vq_vq(d0); } static INLINE real2 loadc(real c) { return vcast_vq_q(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vq_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vq_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vq(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vq(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vq(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vq(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vq(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #else #error No BASETYPEID specified #endif #endif sleef-3.5.1/src/gencoef/000077500000000000000000000000001373003144100150075ustar00rootroot00000000000000sleef-3.5.1/src/gencoef/Makefile000066400000000000000000000006551373003144100164550ustar00rootroot00000000000000.PHONY: all all : gencoef mkrempitab mkrempitabqp gencoef : gencoef.c simplexfr.c sp.h dp.h ld.h qp.h gcc -O gencoef.c simplexfr.c -o gencoef -lmpfr -lm mkrempitab : mkrempitab.c gcc -O mkrempitab.c -o mkrempitab -lmpfr mkrempitabqp : mkrempitabqp.c gcc -O mkrempitabqp.c -o mkrempitabqp -lmpfr .PHONY: clean clean : rm -f gencoef gencoefdp gencoefld mkrempitab mkrempitabqp a.out *~ rm -f *.obj *.lib *.dll *.exp *.exe sleef-3.5.1/src/gencoef/dp.h000066400000000000000000000103411373003144100155620ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki // Shibata. http://shibatch.sourceforge.net // The code in this file is distributed under the Creative Commons // Attribution 4.0 International License. #define PREC_TARGET 53 #if 0 #define N 8 // Degree of equation #define S 40 // Number of samples for phase 1 #define L 4 // Number of high precision coefficients #define MIN 0.0 // Min argument #define MAX (M_PI/4) // Max argument #define PMUL 2 // The form of polynomial is y = x^(PADD+PMUL*0) + x^(PADD+PMUL*1) + ... #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_sin(ret, a, GMP_RNDN); } // The function to approximate void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 // Fix coef 0 to 1.0 #endif #if 0 #define N 10 #define S 40 #define L 2 #define MIN 0.0 #define MAX (M_PI/4) void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x; mpfr_init(x); mpfr_cos(ret, a, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clear(x); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #define FIXCOEF0 (-0.5) #endif #if 0 // for xsincospi4_u05 #define S 40 #define N 8 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 // for xsincospi4_u05 #define N 8 #define S 40 #define L 2 #define MIN 0.0 #define MAX 1.0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_cos(ret, x, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #endif #if 0 // for xsincospi4 #define N 7 #define S 40 #define L 0 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 #define N 17 #define S 60 #define L 0 #define MIN 0.0 #define MAX (M_PI/4) #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_tan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 #define N 11 #define S 35 #define L 2 #define MIN 1 //0.75 #define MAX 1.5 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_log(ret, a, GMP_RNDN); } void CFUNC(mpfr_t frd, mpfr_t fra) { mpfr_t tmp, one; mpfr_inits(tmp, one, NULL); mpfr_set_d(one, 1, GMP_RNDN); mpfr_add(tmp, fra, one, GMP_RNDN); mpfr_sub(frd, fra, one, GMP_RNDN); mpfr_div(frd, frd, tmp, GMP_RNDN); mpfr_clears(tmp, one, NULL); } #define FIXCOEF0 2.0 #endif #if 1 #define N 12 #define S 50 #define L 2 #define MIN -0.347 #define MAX 0.347 // 0.5 log 2 #define PMUL 1 #define PADD 0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_exp(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #define FIXCOEF1 1.0 //#define FIXCOEF2 0.5 #endif #if 0 #define N 21 #define S 100 #define L 1 #define P 1.1 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_atan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 #define N 20 #define S 100 #define L 0 #define P 1.54 #define MIN 0.0 #define MAX 0.708 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_asin(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif sleef-3.5.1/src/gencoef/gencoef.c000066400000000000000000000176721373003144100165760ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki Shibata. http://shibatch.sourceforge.net // Since the original code for simplex algorithm is developed by Haruhiko Okumura and // the code is distributed under the Creative Commons Attribution 4.0 International License, // the contents under this directory are also distributed under the same license. #include #include #include #include #include #include #include #include //#include "sp.h" #include "dp.h" //#include "ld.h" //#include "qp.h" #undef VERBOSE #define PREC 4096 #define EPS 1e-50 #define PREC2 (PREC_TARGET*4) #ifndef P #define P 1 #endif #ifndef Q #define Q 10000 #endif void mpfr_zinit(mpfr_t m); void regressMinRelError_fr(int n, int m, mpfr_t **x, mpfr_t *result); char *mpfrToStr(mpfr_t m) { mpfr_t fra; mpfr_init2(fra, mpfr_get_prec(m)); mpfr_abs(fra, m, GMP_RNDN); mpfr_exp_t e; char *s = mpfr_get_str(NULL, &e, 10, 0, fra, GMP_RNDN); char *ret = malloc(strlen(s) + 20); if (mpfr_sgn(m) == -1) ret[0] = '-'; else ret[0] = '+'; ret[1] = '0'; ret[2] = '.'; strcpy(&ret[3], s); mpfr_free_str(s); char estr[10]; sprintf(estr, "e%+d", (int)e); strcat(ret, estr); mpfr_clears(fra, NULL); return ret; } double countULP(mpfr_t d, mpfr_t c) { mpfr_t fry, frw; mpfr_inits(fry, frw, NULL); double c2 = mpfr_get_d(c, GMP_RNDN); if (c2 == 0 && mpfr_cmp_d(d, 0) != 0) return 10000; long e; mpfr_get_d_2exp(&e, c, GMP_RNDN); mpfr_set_ui_2exp(frw, 1, e-PREC_TARGET, GMP_RNDN); mpfr_sub(fry, d, c, GMP_RNDN); mpfr_div(fry, fry, frw, GMP_RNDN); double u = fabs(mpfr_get_d(fry, GMP_RNDN)); mpfr_clears(fry, frw, NULL); return u; } void func(mpfr_t s, mpfr_t x, mpfr_t *coef, int n) { mpfr_set_prec(s, PREC_TARGET); mpfr_set(s, coef[n-1], GMP_RNDN); for(int i=n-1;i>0;i--) { if (i == L-1) { mpfr_t t; mpfr_init2(t, PREC2); mpfr_set(t, s, GMP_RNDN); mpfr_set_prec(s, PREC2); mpfr_set(s, t, GMP_RNDN); mpfr_clear(t); } mpfr_mul(s, s, x, GMP_RNDN); mpfr_add(s, s, coef[i-1], GMP_RNDN); } } int main(int argc, char **argv) { int i, j; int n, m; double p; mpfr_set_default_prec(PREC); #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); float x = M_PI; mpfr_set_d(a, x, GMP_RNDN); x = nexttowardf(x, 100); x = nexttowardf(x, 100); x = nexttowardf(x, 100); mpfr_set_d(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); double x = M_PI; mpfr_set_d(a, x, GMP_RNDN); x = nexttoward(x, 100); x = nexttoward(x, 100); x = nexttoward(x, 100); mpfr_set_d(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); long double x = M_PI; mpfr_set_ld(a, x, GMP_RNDN); x = nexttowardl(x, 100); x = nexttowardl(x, 100); x = nexttowardl(x, 100); mpfr_set_ld(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); __float128 x = M_PI; mpfr_set_f128(a, x, GMP_RNDN); x = nextafterq(x, 100); x = nextafterq(x, 100); x = nextafterq(x, 100); mpfr_set_f128(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif m = N+1; n = argc >= 2 ? atoi(argv[1]) : S; p = argc >= 3 ? atof(argv[2]) : P; mpfr_t **x, *result; // x[m][n], result[m] x = calloc(sizeof(mpfr_t *), m); result = calloc(sizeof(mpfr_t), m); for(i=0;i=0;i--) { mpfr_set_prec(fra, PREC_TARGET+4); mpfr_set(fra, result[i], GMP_RNDN); char *s; printf("%s, \n", s = mpfrToStr(fra)); free(s); } printf("\n"); mpfr_set_prec(fra, PREC); double emax = 0; for(i=0;i<=n*10;i++) { double a = i * (double)(MAX - MIN) / (n*10.0) + MIN; mpfr_set_d(fra, a, GMP_RNDN); CFUNC(frd, fra); mpfr_set_d(frb, 0, GMP_RNDN); for(j=m-1;j>=0;j--) { mpfr_set_d(frc, (double)j*PMUL+PADD, GMP_RNDN); mpfr_pow(frc, frd, frc, GMP_RNDN); mpfr_mul(frc, frc, result[j], GMP_RNDN); mpfr_add(frb, frb, frc, GMP_RNDN); } TARGET(frc, fra); double u = countULP(frb, frc); if (u > emax) emax = u; } printf("Phase 1 : Max error = %g ULP\n\n", emax); fflush(stdout); // mpfr_t bestcoef[N], curcoef[N]; for(i=0;i= L ? PREC_TARGET : PREC2); mpfr_set(bestcoef[i], result[i], GMP_RNDN); mpfr_init2(curcoef[i], i >= L ? PREC_TARGET : PREC2); mpfr_set(curcoef[i], result[i], GMP_RNDN); } srandom(time(NULL)); mpfr_set_default_prec(PREC2); static mpfr_t a[Q], v[Q], am[Q], aa[Q]; for(i=0;i=0;j--) { mpfr_set_d(frc, (double)j*PMUL+PADD, GMP_RNDN); mpfr_pow(frc, a[i], frc, GMP_RNDN); mpfr_mul(frc, frc, curcoef[j], GMP_RNDN); mpfr_add(frb, frb, frc, GMP_RNDN); } double e = countULP(frb, v[i]); //printf("c = %.20g, t = %.20g, ulp = %g\n", mpfr_get_d(v[i], GMP_RNDN), mpfr_get_d(frb, GMP_RNDN), e); if (!isfinite(e)) continue; if (e > emax) { emax = e; worstx = mpfr_get_d(a[i], GMP_RNDN); } esum += e; } mpfr_set_prec(frb, PREC); //printf("emax = %g\n", emax); if (emax < best || (emax == best && esum < bestsum)) { for(i=0;i 10) printf("Max error = %g ULP, Sum error = %g (Max error at %g)\n", emax, esum, worstx); if ((best - emax) / best > 0.0001) k = 0; best = emax; bestsum = esum; bestworstx = worstx; } for(i=0;i 0) { for(int j=0;jr;j--) mpfr_nextbelow(curcoef[i]); } } } printf("\n"); for(i=N-1;i>=0;i--) { mpfr_set_prec(fra, i >= L ? PREC_TARGET+4 : PREC2); mpfr_set(fra, bestcoef[i], GMP_RNDN); char *s; printf("%s, \n", s = mpfrToStr(fra)); free(s); } printf("\nPhase 2 : max error = %g ULP at %g\n", best, bestworstx); exit(0); } sleef-3.5.1/src/gencoef/gencoef.txt000066400000000000000000000035721373003144100171650ustar00rootroot00000000000000 With this small tool, the coefficients for polynomial approximation used in kernels can be generated. Usage Edit gencoefdp.c. In the beginning of the file, specifications of the parameters for generating coefficients are listed. Enable one of them by changing #if. Then, run make to compile the source code. Run the gencoef, and it will show the generated coefficients in a few minutes. How it works There are two phases of the program. The first phase is the regression for minimizing the maximum relative error. This problem can be reduced to a linear programming problem, and the Simplex method is used in this implementation. This requires multi-precision calculation, and the implementation uses the MPFR library to do this. In this phase, only a small number of values (specified by S macro, usually 40 or so) of the function to approximate are sampled within the argument range. The function to approximate can be given by FRFUNC function. Specifying higher values for S does not always give better results. The second phase is to optimize the coefficients so that it gives good accuracy with double precision calculation. In this phase, it checks 100000 points (specified by Q macro) within the specified argument range to see if the polynomial gives good error bound. In some cases, the last few terms have to be calculated in higher precision in order to achieve 1 ULP overall accuracy, and this implementation can take care of that. The L parameter specifies the number of high precision coefficients. In some cases, it is desirable to fix the last few coefficients to values like 1. This can be specified if you define FIXCOEF0 macro. This sometimes does not work, however. In this case, you need to specify the function to approximate as shown in the definition for cos. Finding a set of good parameters is not a straightforward process. You usually need many iterations of trial and error. sleef-3.5.1/src/gencoef/ld.h000066400000000000000000000076711373003144100155720ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki // Shibata. http://shibatch.sourceforge.net // The code in this file is distributed under the Creative Commons // Attribution 4.0 International License. #define PREC_TARGET 64 #if 0 #define N 8 // Degree of equation #define S 40 // Number of samples for phase 1 #define L 4 // Number of high precision coefficients #define MIN 0.0 // Min argument #define MAX (M_PI/4) // Max argument #define PMUL 2 // The form of polynomial is y = x^(PADD+PMUL*0) + x^(PADD+PMUL*1) + ... #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_sin(ret, a, GMP_RNDN); } // The function to approximate void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 // Fix coef 0 to 1.0 #endif #if 0 #define N 10 #define S 40 #define L 2 #define MIN 0.0 #define MAX (M_PI/4) void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x; mpfr_init(x); mpfr_cos(ret, a, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clear(x); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #define FIXCOEF0 (-0.5) #endif #if 0 // for xsincospi4_u05 #define N 9 #define S 40 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 // for xsincospi4_u05 #define N 9 #define S 40 #define L 2 #define MIN 0.0 #define MAX 1.0 void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_cos(ret, x, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #endif #if 0 // for xsincospi4 #define N 7 #define S 40 #define L 0 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 #define N 17 #define S 40 #define L 0 #define MIN 0.0 #define MAX (M_PI/4) #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_tan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 #define N 9 #define S 40 #define L 2 #define MIN 1 //0.75 #define MAX 1.5 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_log(ret, a, GMP_RNDN); } void CFUNC(mpfr_t frd, mpfr_t fra) { mpfr_t tmp, one; mpfr_inits(tmp, one, NULL); mpfr_set_d(one, 1, GMP_RNDN); mpfr_add(tmp, fra, one, GMP_RNDN); mpfr_sub(frd, fra, one, GMP_RNDN); mpfr_div(frd, frd, tmp, GMP_RNDN); mpfr_clear(tmp, one, NULL); } #define FIXCOEF0 2.0 #endif #if 0 #define N 12 #define S 50 #define L 0 #define MIN -0.347 #define MAX 0.347 // 0.5 log 2 #define PMUL 1 #define PADD 0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_exp(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #define FIXCOEF1 1.0 #define FIXCOEF2 0.5 #endif #if 0 #define N 22 #define S 100 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_atan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif sleef-3.5.1/src/gencoef/mkrempitab.c000066400000000000000000000054571373003144100173210ustar00rootroot00000000000000#include #include #include #include #include static int64_t doubleToRawLongBits(double d) { union { double f; int64_t i; } tmp; tmp.f = d; return tmp.i; } static double longBitsToDouble(int64_t i) { union { double f; int64_t i; } tmp; tmp.i = i; return tmp.f; } static double removelsb(double d) { return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffffffffffeLL); } static int32_t floatToRawIntBits(float d) { union { float f; int32_t i; } tmp; tmp.f = d; return tmp.i; } static float intBitsToFloat(int32_t i) { union { float f; int32_t i; } tmp; tmp.i = i; return tmp.f; } static float removelsbf(float x) { return intBitsToFloat(0xfffffffc & floatToRawIntBits(x)); } int main(int argc, char **argv) { mpfr_set_default_prec(2048); mpfr_t pi, rpi, xrpi, x, y, z, r; mpfr_inits(pi, rpi, xrpi, x, y, z, r, NULL); mpfr_const_pi(pi, GMP_RNDN); mpfr_set_d(x, 0.5, GMP_RNDN); mpfr_div(rpi, x, pi, GMP_RNDN); printf("NOEXPORT ALIGNED(64) const double rempitabdp[] = {\n"); for(int i=55;i<1024;i++) { int M = i > 700 ? -64 : 0; int ex = i - 53; if (ex < -52) ex = -52; mpfr_set_d(x, ldexp(1, ex), GMP_RNDN); mpfr_mul(y, x, rpi, GMP_RNDN); mpfr_frac(xrpi, y, GMP_RNDN); mpfr_div(xrpi, xrpi, x, GMP_RNDN); mpfr_set_exp(xrpi, mpfr_get_exp(xrpi) - M); mpfr_set(x, xrpi, GMP_RNDN); double rpi0 = removelsb(mpfr_get_d(x, GMP_RNDN)); mpfr_set_d(y, rpi0, GMP_RNDN); mpfr_sub(x, x, y, GMP_RNDN); double rpi1 = removelsb(mpfr_get_d(x, GMP_RNDN)); mpfr_set_d(y, rpi1, GMP_RNDN); mpfr_sub(x, x, y, GMP_RNDN); double rpi2 = removelsb(mpfr_get_d(x, GMP_RNDN)); mpfr_set_d(y, rpi2, GMP_RNDN); mpfr_sub(x, x, y, GMP_RNDN); double rpi3 = mpfr_get_d(x, GMP_RNDN); printf(" %.20g, %.20g, %.20g, %.20g,\n", rpi0, rpi1, rpi2, rpi3); } printf("};\n\n"); printf("NOEXPORT ALIGNED(64) const float rempitabsp[] = {\n"); for(int i=25;i<128;i++) { int M = i > 90 ? -64 : 0; int ex = i - 23; mpfr_set_d(x, ldexp(1, ex), GMP_RNDN); mpfr_mul(y, x, rpi, GMP_RNDN); mpfr_frac(xrpi, y, GMP_RNDN); mpfr_div(xrpi, xrpi, x, GMP_RNDN); mpfr_set_exp(xrpi, mpfr_get_exp(xrpi) - M); mpfr_set(x, xrpi, GMP_RNDN); float rpi20 = removelsbf(mpfr_get_d(x, GMP_RNDN)); mpfr_set_d(y, rpi20, GMP_RNDN); mpfr_sub(x, x, y, GMP_RNDN); float rpi21 = removelsbf(mpfr_get_d(x, GMP_RNDN)); mpfr_set_d(y, rpi21, GMP_RNDN); mpfr_sub(x, x, y, GMP_RNDN); float rpi22 = removelsbf(mpfr_get_d(x, GMP_RNDN)); mpfr_set_d(y, rpi22, GMP_RNDN); mpfr_sub(x, x, y, GMP_RNDN); float rpi23 = mpfr_get_d(x, GMP_RNDN); printf(" %.10g, %.10g, %.10g, %.10g,\n", rpi20, rpi21, rpi22, rpi23); } printf("};\n"); } sleef-3.5.1/src/gencoef/mkrempitabqp.c000066400000000000000000000025611373003144100176530ustar00rootroot00000000000000#include #include #include #include #include #include #include #define N 8 #define B 8 #define NCOL (53-B) #define NROW ((16385+(53-B)*N-106)/NCOL+1) static double *rempitabqp = NULL; void generateRempitabqp() { rempitabqp = calloc(16385-106+(53-B)*(N+1), sizeof(double)); int orgprec = mpfr_get_default_prec(); mpfr_set_default_prec(18000); mpfr_t pi, m, n, o; mpfr_inits(pi, m, n, o, NULL); mpfr_const_pi(pi, GMP_RNDN); mpfr_d_div(n, 0.5, pi, GMP_RNDN); for(int e=106;e<16385+(53-B)*N;e++) { mpfr_set(m, n, GMP_RNDN); mpfr_set_ui_2exp(o, 1, -(113 - e), GMP_RNDN); mpfr_mul(m, m, o, GMP_RNDN); mpfr_frac(m, m, GMP_RNDN); mpfr_set_ui_2exp(o, 1, (53-B), GMP_RNDN); mpfr_mul(m, m, o, GMP_RNDN); mpfr_trunc(m, m); mpfr_set_ui_2exp(o, 1, 7-(53-B), GMP_RNDN); mpfr_mul(m, m, o, GMP_RNDN); int col = (e - 106) % NCOL; int row = (e - 106) / NCOL; rempitabqp[col * NROW + row] = mpfr_get_d(m, GMP_RNDN); } mpfr_clears(pi, m, n, o, NULL); mpfr_set_default_prec(orgprec); } int main(int argc, char **argv) { generateRempitabqp(); printf("NOEXPORT const double Sleef_rempitabqp[] = {\n "); for(int i=0;i<16385-106+(53-B)*(N+1);i++) { printf("%.20g, ", rempitabqp[i]); if ((i & 3) == 3) printf("\n "); } printf("\n};\n"); } sleef-3.5.1/src/gencoef/qp.h000066400000000000000000000071431373003144100156050ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki // Shibata. http://shibatch.sourceforge.net // The code in this file is distributed under the Creative Commons // Attribution 4.0 International License. #define PREC_TARGET 113 // #if 0 #define N 15 // Degree of equation #define S 150 // Number of samples for phase 1 #define L 0 // Number of high precision coefficients #define P 0.37 #define MIN 0.0 // Min argument #define MAX (M_PI/2) // Max argument #define PMUL 2 // The form of polynomial is y = x^(PADD+PMUL*0) + x^(PADD+PMUL*1) + ... #define PADD 3 void TARGET(mpfr_t ret, mpfr_t a) { // The function to approximate mpfr_sin(ret, a, GMP_RNDN); mpfr_sub(ret, ret, a, GMP_RNDN); // ret = sin(a) - a } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 #define N 15 #define S 150 #define L 0 #define MIN 0.0 #define MAX (M_PI/2) void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x; mpfr_init(x); mpfr_cos(ret, a, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clear(x); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 //#define FIXCOEF0 (-0.5) #endif #if 0 // for xsincospi4_u05 #define N 13 #define S 150 #define L 2 #define P 0.9 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 // for xsincospi4_u05 #define N 13 #define S 150 #define L 2 #define MIN 0.0 #define MAX 1.0 void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_cos(ret, x, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #endif #if 0 // running #define N 31 #define S 100 #define P 1.7 #define L 0 #define MIN 0.0 #define MAX (M_PI/4) #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_tan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 // running #define N 20 #define S 110 #define L 2 #define MIN 1 //0.75 #define MAX 1.5 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_log(ret, a, GMP_RNDN); } void CFUNC(mpfr_t frd, mpfr_t fra) { mpfr_t tmp, one; mpfr_inits(tmp, one, NULL); mpfr_set_d(one, 1, GMP_RNDN); mpfr_add(tmp, fra, one, GMP_RNDN); mpfr_sub(frd, fra, one, GMP_RNDN); mpfr_div(frd, frd, tmp, GMP_RNDN); mpfr_clears(tmp, one, NULL); } #define FIXCOEF0 2.0 #endif #if 1 #define N 22 #define S 140 #define L 2 #define MIN -0.347 #define MAX 0.347 // 0.5 log 2 #define PMUL 1 #define PADD 0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_exp(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #define FIXCOEF1 1.0 //#define FIXCOEF2 0.5 #endif #if 0 // running #define N 45 #define S 100 #define P 1.55 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_atan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif sleef-3.5.1/src/gencoef/simplexfr.c000066400000000000000000000227761373003144100172020ustar00rootroot00000000000000// The original code for simplex algorithm is taken from Haruhiko Okumura's book. // https://oku.edu.mie-u.ac.jp/~okumura/algo/ // The code is distributed under the Creative Commons Attribution 4.0 International License. // https://creativecommons.org/licenses/by/4.0/ // The code is modified by Naoki Shibata to process arbitrary precision numbers. #include #include #include #include #include #include #include #define PREC 4096 #define EPS 1e-50 #define OK 0 #define MAXIMIZABLE_TO_INFINITY 1 #define NOT_FEASIBLE 2 #define ERROR (-1) #define NOP (-1) #define EQU (0) #define LEQ 1 #define GEQ 2 static int m, n, n1, n2, n3, jmax; static int *col, *row, *nonzero_row, *inequality; static mpfr_t **a, *c, **q, *pivotcolumn; static mpfr_t zero, one, eps, minuseps, large; void mpfr_zinit(mpfr_t m) { mpfr_init(m); mpfr_set_d(m, 0, GMP_RNDN); } static void init(int n0, int m0) { int i, j; m = m0; n = n0; mpfr_init(zero); mpfr_set_d(zero, 0, GMP_RNDN); mpfr_init(one); mpfr_set_d(one, 1, GMP_RNDN); mpfr_init(eps); mpfr_set_d(eps, EPS, GMP_RNDN); mpfr_init(minuseps); mpfr_set_d(minuseps, -EPS, GMP_RNDN); mpfr_init(large); mpfr_set_d(large, 1.0 / EPS, GMP_RNDN); a = malloc(sizeof(mpfr_t *) * (m + 1)); for(i=0;i < m+1;i++) { a[i] = malloc(sizeof(mpfr_t) * (n + 1)); for(j=0;j < (n+1);j++) { mpfr_zinit(a[i][j]); } } q = malloc(sizeof(mpfr_t *) * (m + 1)); for(i=0;i < m+1;i++) { q[i] = malloc(sizeof(mpfr_t) * (m + 1)); for(j=0;j < m+1;j++) { mpfr_zinit(q[i][j]); } } c = malloc(sizeof(mpfr_t) * (n + 1)); for(j=0;j < (n+1);j++) { mpfr_zinit(c[j]); } pivotcolumn = malloc(sizeof(mpfr_t) * (m + 1)); for(j=0;j < (m+1);j++) { mpfr_zinit(pivotcolumn[j]); } col = calloc(m+1, sizeof(int)); row = calloc(n+2*m+1, sizeof(int)); nonzero_row = calloc(n+2*m+1, sizeof(int)); inequality = calloc(m+1, sizeof(int)); } static void dispose() { mpfr_clears(zero, one, eps, minuseps, large, (mpfr_ptr)0); int i, j; for(i=0;i < m+1;i++) { for(j=0;j < m+1;j++) { mpfr_clear(q[i][j]); } free(q[i]); } free(q); for(i=0;i < m+1;i++) { for(j=0;j < n+1;j++) { mpfr_clear(a[i][j]); } free(a[i]); } free(a); for(j=0;j < n+1;j++) { mpfr_clear(c[j]); } free(c); for(j=0;j < m+1;j++) { mpfr_clear(pivotcolumn[j]); } free(pivotcolumn); free(col); free(row); free(nonzero_row); free(inequality); } static void prepare() { int i; n1 = n; for (i = 1; i <= m; i++) if (inequality[i] == GEQ) { n1++; nonzero_row[n1] = i; } n2 = n1; for (i = 1; i <= m; i++) if (inequality[i] == LEQ) { n2++; col[i] = n2; nonzero_row[n2] = row[n2] = i; } n3 = n2; for (i = 1; i <= m; i++) if (inequality[i] != LEQ) { n3++; col[i] = n3; nonzero_row[n3] = row[n3] = i; } for (i = 0; i <= m; i++) { mpfr_set_d(q[i][i], 1, GMP_RNDN); } } static void tableau(mpfr_t ret, int i, int j) { int k; if (col[i] < 0) { mpfr_set_d(ret, 0, GMP_RNDN); return; } if (j <= n) { mpfr_t s; mpfr_zinit(s); mpfr_set_d(s, 0, GMP_RNDN); mpfr_t *tab = malloc(sizeof(mpfr_t) * (m + 1)); mpfr_ptr *ptab = malloc(sizeof(mpfr_ptr) * (m + 1)); for (k = 0; k <= m; k++) { mpfr_zinit(tab[k]); ptab[k] = (mpfr_ptr)&tab[k]; mpfr_mul(tab[k], q[i][k], a[k][j], GMP_RNDN); } mpfr_sum(s, ptab, m+1, GMP_RNDN); for (k = 0; k <= m; k++) { mpfr_clear(tab[k]); } free(ptab); free(tab); mpfr_set(ret, s, GMP_RNDN); mpfr_clear(s); return; } mpfr_set(ret, q[i][nonzero_row[j]], GMP_RNDN); if (j <= n1) { mpfr_neg(ret, ret, GMP_RNDN); return; } if (j <= n2 || i != 0) return; mpfr_add(ret, ret, one, GMP_RNDN); return; } static void pivot(int ipivot, int jpivot) { int i, j; mpfr_t u; mpfr_zinit(u); mpfr_set(u, pivotcolumn[ipivot], GMP_RNDN); for (j = 1; j <= m; j++) { mpfr_div(q[ipivot][j], q[ipivot][j], u, GMP_RNDN); } for (i = 0; i <= m; i++) if (i != ipivot) { mpfr_set(u, pivotcolumn[i], GMP_RNDN); for (j = 1; j <= m; j++) { mpfr_fms(q[i][j], q[ipivot][j], u, q[i][j], GMP_RNDN); mpfr_neg(q[i][j], q[i][j], GMP_RNDN); } } row[col[ipivot]] = 0; col[ipivot] = jpivot; row[jpivot] = ipivot; mpfr_clear(u); } static int minimize() { int i, ipivot, jpivot; mpfr_t t, u; mpfr_inits(t, u, (mpfr_ptr)0); for (;;) { for (jpivot = 1; jpivot <= jmax; jpivot++) { if (row[jpivot] == 0) { tableau(pivotcolumn[0], 0, jpivot); if (mpfr_cmp(pivotcolumn[0], minuseps) < 0) break; } } if (jpivot > jmax) { mpfr_clears(t, u, (mpfr_ptr)0); return 1; } mpfr_set(u, large, GMP_RNDN); ipivot = 0; for (i = 1; i <= m; i++) { tableau(pivotcolumn[i], i, jpivot); if (mpfr_cmp(pivotcolumn[i], eps) > 0) { tableau(t, i, 0); mpfr_div(t, t, pivotcolumn[i], GMP_RNDN); if (mpfr_cmp(t, u) < 0) { ipivot = i; mpfr_set(u, t, GMP_RNDN); } } } if (ipivot == 0) { mpfr_clears(t, u, (mpfr_ptr)0); return 0; // the objective function can be minimized to -infinite } pivot(ipivot, jpivot); } } static int phase1() { int i, j; mpfr_t u; mpfr_zinit(u); jmax = n3; for (i = 0; i <= m; i++) { if (col[i] > n2) mpfr_set_d(q[0][i], -1, GMP_RNDN); } minimize(); tableau(u, 0, 0); if (mpfr_cmp(u, minuseps) < 0) { mpfr_clear(u); return 0; } for (i = 1; i <= m; i++) { if (col[i] > n2) { col[i] = -1; } } mpfr_set_d(q[0][0], 1, GMP_RNDN); for (j = 1; j <= m; j++) mpfr_set_d(q[0][j], 0, GMP_RNDN); for (i = 1; i <= m; i++) { if ((j = col[i]) > 0 && j <= n && mpfr_cmp_d(c[j], 0) != 0) { mpfr_set(u, c[j], GMP_RNDN); for (j = 1; j <= m; j++) { mpfr_fms(q[0][j], q[i][j], u, q[0][j], GMP_RNDN); mpfr_neg(q[0][j], q[0][j], GMP_RNDN); } } } mpfr_clear(u); return 1; } static int phase2() { int j; jmax = n2; for (j = 0; j <= n; j++) { mpfr_set(a[0][j], c[j], GMP_RNDN); } return minimize(); } int solve_fr(mpfr_t *result, int n0, int m0, mpfr_t **a0, int *ineq0, mpfr_t *c0) { int i,j; m = m0; // number of inequations n = n0+1; // number of variables init(n, m); mpfr_t csum; mpfr_zinit(csum); for(j=0;j /dev/null) ARCH := $(shell uname -p) all : ifndef BUILDDIR @echo @echo Please set the build directory to BUILDDIR environment variable and run make once again. @echo e.g. export BUILDDIR='`pwd`'/../../build @echo else @echo @echo You can start measurement by "'"make measure"'". ifdef ICCAVAILABLE @echo You can start measurement with SVML by "'"make measureSVML"'". endif @echo Then, you can plot the results of measurement by "'"make plot"'". @echo @echo You have to install java and gnuplot to do plotting. @echo Stop all tasks on the computer before starting measurement. @echo endif benchsvml128_10.o : benchsvml128.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_10.o benchsvml128_40.o : benchsvml128.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_40.o benchsvml256_10.o : benchsvml256.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_10.o benchsvml256_40.o : benchsvml256.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_40.o benchsvml512_10.o : benchsvml512.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_10.o benchsvml512_40.o : benchsvml512.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_40.o benchsvml_10 : benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_10 benchsvml_40 : benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_40 # ifeq ($(ARCH),aarch64) benchsleef : benchsleef.c benchsleef128.o bench.h $(CC) benchsleef.c benchsleef128.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef benchsleef128.o : benchsleef128.c bench.h $(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c else benchsleef : benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o bench.h $(CC) benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef benchsleef128.o : benchsleef128.c bench.h $(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c benchsleef256.o : benchsleef256.c bench.h $(CC) benchsleef256.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c benchsleef512.o : benchsleef512.c bench.h $(CC) benchsleef512.c -Wall -mavx512f -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c endif # ProcessData.class : ProcessData.java javac ProcessData.java # ifndef BUILDDIR measure : @echo @echo Please set the build directory to BUILDDIR environment variable and run make once again. @echo e.g. export BUILDDIR='`pwd`'/../../build @echo else measure : benchsleef chmod +x ./measure.sh LD_LIBRARY_PATH=$(BUILDDIR)/lib ./measure.sh ./benchsleef @echo @echo Now, you can plot the results of measurement by "'"make plot"'". @echo You can do another measurement by "'"make measure"'". ifdef ICCAVAILABLE @echo You can start another measurement with SVML by "'"make measureSVML"'". endif @echo You can start over by "'"make restart"'". @echo endif measureSVML : all benchsvml_10 benchsvml_40 chmod +x ./measure.sh ./measure.sh ./benchsvml_10 ./benchsvml_40 @echo @echo Now, you can plot the results of measurement by "'"make plot"'". @echo You can do another measurement by "'"make measure"'". ifdef ICCAVAILABLE @echo You can start another measurement with SVML by "'"make measureSVML"'". endif @echo You can start over by "'"make restart"'". @echo plot : ProcessData.class counter.txt java ProcessData *dptrig*.out gnuplot script.out mv output.png trigdp.png java ProcessData *dpnontrig*.out gnuplot script.out mv output.png nontrigdp.png java ProcessData *sptrig*.out gnuplot script.out mv output.png trigsp.png java ProcessData *spnontrig*.out gnuplot script.out mv output.png nontrigsp.png @echo @echo Plotted results are in trigdp.png, nontrigdp.png, trigsp.png and nontrigsp.png. @echo clean : rm -f *~ a.out *.so *.so.* *.a *.s *.o rm -rf *.dSYM *.dylib rm -f *.obj *.lib *.dll *.exp *.exe *.stackdump rm -f *.class *.png benchsleef benchsvml_10 benchsvml_40 *.out counter.txt restart : rm -f *.out counter.txt sleef-3.5.1/src/libm-benchmarks/ProcessData.java000066400000000000000000000121601373003144100215120ustar00rootroot00000000000000import java.util.*; import java.io.*; public class ProcessData { static final int DP = 64, SP = 32; static LinkedHashMap funcNameOrder = new LinkedHashMap(); static class Key { final String funcName; final int prec, bits; final ArrayList range = new ArrayList(); final double ulps; Key(String s) { String[] a = s.split(","); funcName = a[0].trim(); if (funcNameOrder.get(funcName) == null) { funcNameOrder.put(funcName, funcNameOrder.size()); } prec = a[1].trim().equals("DP") ? DP : a[1].trim().equals("SP") ? SP : 0; bits = Integer.parseInt(a[2].trim()); int c; for(c = 3;;c++) { if (a[c].trim().endsWith("ulps")) break; range.add(Double.parseDouble(a[c])); } ulps = Double.parseDouble(a[c].trim().replace("ulps", "")); } public int hashCode() { int h = funcName.hashCode(); h ^= prec ^ bits; return h; } public boolean equals(Object o) { if (this == o) return true; Key k = (Key) o; if (funcName.compareTo(k.funcName) != 0) return false; if (prec != k.prec) return false; if (bits != k.bits) return false; if (range.size() != k.range.size()) return false; for(int i=0;i { public int compare(Key d0, Key d1) { if (d0 == d1) return 0; if (d0.prec < d1.prec) return 1; if (d0.prec > d1.prec) return -1; if (d0.ulps > d1.ulps) return 1; if (d0.ulps < d1.ulps) return -1; int fc = (int)funcNameOrder.get(d0.funcName) - (int)funcNameOrder.get(d1.funcName); if (fc != 0) return fc; if (d0.bits > d1.bits) return 1; if (d0.bits < d1.bits) return -1; if (d0.range.size() > d1.range.size()) return 1; if (d0.range.size() < d1.range.size()) return -1; for(int i=0;i d1.range.get(i)) return 1; if (d0.range.get(i) < d1.range.get(i)) return -1; } return 0; } } public static void main(String[] args) throws Exception { LinkedHashMap> allData = new LinkedHashMap>(); TreeSet allKeys = new TreeSet(new KeyComparator()); LinkedHashSet allColumnTitles = new LinkedHashSet(); double maximum = 0; for(int i=0;i v = allData.get(key); if (v == null) { v = new LinkedHashMap(); allData.put(key, v); } String[] a = s.split(","); double time = Double.parseDouble(a[a.length-1]); v.put(columnTitle, time); maximum = Math.max(maximum, time); } lnr.close(); } PrintStream ps = new PrintStream("data.out"); for(Key k : allKeys) { ps.print("\"" + k + "\" "); LinkedHashMap v = allData.get(k); for(String s : allColumnTitles) { Double d = v.get(s); if (d != null) ps.print(d); if (d == null) ps.print("0"); ps.print("\t"); } ps.println(); } ps.close(); ps = new PrintStream("script.out"); ps.println("set terminal pngcairo size 1280, 800 font \",10\""); ps.println("set output \"output.png\""); ps.println("color00 = \"#FF5050\";"); // red ps.println("color01 = \"#0066FF\";"); // blue ps.println("color02 = \"#00FF00\";"); // green ps.println("color03 = \"#FF9900\";"); // orange ps.println("color04 = \"#CC00CC\";"); // purple ps.println("color05 = \"#880000\";"); // brown ps.println("color06 = \"#003300\";"); // dark green ps.println("color07 = \"#000066\";"); // dark blue ps.println("set style data histogram"); ps.println("set style histogram cluster gap 1"); ps.println("set style fill solid 1.00"); ps.println("set boxwidth 0.9"); ps.println("set xtics format \"\""); ps.println("set xtics rotate by -90"); ps.println("set grid ytics"); ps.println("set ylabel \"Execution time in micro sec.\""); ps.println("set yrange [0:*]"); ps.println("set bmargin 24"); ps.println("set title \"Single execution time in micro sec.\""); ps.print("plot"); int i = 0; for(String s : allColumnTitles) { ps.print("\"data.out\" using " + (i+2) + ":xtic(1) title \"" + s + "\" linecolor rgb color" + String.format("%02d", i)); if (i != allColumnTitles.size()-1) ps.print(", "); i++; } ps.println(); ps.close(); } } sleef-3.5.1/src/libm-benchmarks/bench.h000066400000000000000000000050001373003144100176620ustar00rootroot00000000000000#define NITER1 100000 #define NITER2 10000 #define NITER (NITER1 * NITER2) #define callFuncSLEEF1_1(funcName, name, xmin, xmax, ulp, arg, type) ({ \ printf("%s\n", #funcName); \ uint64_t t = Sleef_currentTimeMicros(); \ for(int j=0;j #include #include #include #include #include #include #include "bench.h" int veclen = 16; double *abufdp, *bbufdp; float *abufsp, *bbufsp; FILE *fp; #if defined(__i386__) || defined(__x86_64__) void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { uint32_t a, b, c, d; __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); out[0] = a; out[1] = b; out[2] = c; out[3] = d; } int cpuSupportsAVX() { int32_t reg[4]; x86CpuID(reg, 1, 0); return (reg[2] & (1 << 28)) != 0; } int cpuSupportsAVX512F() { int32_t reg[4]; x86CpuID(reg, 7, 0); return (reg[1] & (1 << 16)) != 0; } #endif void fillDP(double *buf, double min, double max) { for(int i=0;i= 3) fnBase = argv[2]; srandom(time(NULL)); #if defined(__i386__) || defined(__x86_64__) int do128bit = 1; int do256bit = cpuSupportsAVX(); int do512bit = cpuSupportsAVX512F(); #elif defined(__ARM_NEON) int do128bit = 1; #else #error Unsupported architecture #endif posix_memalign((void **)&abufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); posix_memalign((void **)&bbufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); abufsp = (float *)abufdp; bbufsp = (float *)bbufdp; sprintf(fn, "%sdptrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_DPTrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_DPTrig(); if (do512bit) benchSleef512_DPTrig(); #endif fclose(fp); sprintf(fn, "%sdpnontrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_DPNontrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_DPNontrig(); if (do512bit) benchSleef512_DPNontrig(); #endif fclose(fp); sprintf(fn, "%ssptrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_SPTrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_SPTrig(); if (do512bit) benchSleef512_SPTrig(); #endif fclose(fp); sprintf(fn, "%sspnontrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_SPNontrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_SPNontrig(); if (do512bit) benchSleef512_SPNontrig(); #endif fclose(fp); exit(0); } sleef-3.5.1/src/libm-benchmarks/benchsleef128.c000066400000000000000000000203541373003144100211400ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __SSE2__ #if defined(_MSC_VER) #include #else #include #endif typedef __m128d vdouble; typedef __m128 vfloat; #define ENABLED #elif defined(__ARM_NEON) #include typedef float64x2_t vdouble; typedef float32x4_t vfloat; #define ENABLED #endif #ifdef ENABLED void benchSleef128_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); } void benchSleef128_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSLEEF1_1(Sleef_logd2_u10 , "log, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log10d2_u10, "log10, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log1pd2_u10, "log1p, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_logd2_u35 , "log, DP, 128", 0, 1e+300, 4.0, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSLEEF1_1(Sleef_expd2_u10 , "exp, DP, 128", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp2d2_u10 , "exp2, DP, 128", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp10d2_u10, "exp10, DP, 128", -700, 700, 1.0, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSLEEF1_2(Sleef_powd2_u10, "pow, DP, 128", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble); fillDP(abufdp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asind2_u10, "asin, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd2_u10, "acos, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_asind2_u35, "asin, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd2_u35, "acos, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSLEEF1_1(Sleef_atand2_u10, "atan, DP, 128", -10, 10, 1.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d2_u10, "atan2, DP, 128", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble); callFuncSLEEF1_1(Sleef_atand2_u35, "atan, DP, 128", -10, 10, 4.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d2_u35, "atan2, DP, 128", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble); } void benchSleef128_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); } void benchSleef128_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSLEEF1_1(Sleef_logf4_u10 , "log, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_log10f4_u10, "log10, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf4_u10, "log1p, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_logf4_u35 , "log, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log10f4_u35, "log10, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf4_u35, "log1p, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSLEEF1_1(Sleef_expf4_u10 , "exp, SP, 128", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp2f4_u10 , "exp2, SP, 128", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp10f4_u10, "exp10, SP, 128", -100, 100, 1.0, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSLEEF1_2(Sleef_powf4_u10, "pow, SP, 128", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat); fillSP(abufsp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asinf4_u10, "asin, SP, 128", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf4_u10, "acos, SP, 128", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_asinf4_u35, "asin, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf4_u35, "acos, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSLEEF1_1(Sleef_atanf4_u10, "atan, SP, 128", -10, 10, 1.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f4_u10, "atan2, SP, 128", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat); callFuncSLEEF1_1(Sleef_atanf4_u35, "atan, SP, 128", -10, 10, 4.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f4_u35, "atan2, SP, 128", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSleef128_DPTrig() {} void benchSleef128_DPNontrig() {} void benchSleef128_SPTrig() {} void benchSleef128_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.5.1/src/libm-benchmarks/benchsleef256.c000066400000000000000000000202111373003144100211320ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX__ #if defined(_MSC_VER) #include #else #include #endif typedef __m256d vdouble; typedef __m256 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSleef256_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); } void benchSleef256_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSLEEF1_1(Sleef_logd4_u10 , "log, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log10d4_u10, "log10, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log1pd4_u10, "log1p, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_logd4_u35 , "log, DP, 256", 0, 1e+300, 4.0, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSLEEF1_1(Sleef_expd4_u10 , "exp, DP, 256", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp2d4_u10 , "exp2, DP, 256", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp10d4_u10, "exp10, DP, 256", -700, 700, 1.0, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSLEEF1_2(Sleef_powd4_u10, "pow, DP, 256", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble); fillDP(abufdp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asind4_u10, "asin, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd4_u10, "acos, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_asind4_u35, "asin, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd4_u35, "acos, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSLEEF1_1(Sleef_atand4_u10, "atan, DP, 256", -10, 10, 1.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d4_u10, "atan2, DP, 256", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble); callFuncSLEEF1_1(Sleef_atand4_u35, "atan, DP, 256", -10, 10, 4.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d4_u35, "atan2, DP, 256", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble); } void benchSleef256_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); } void benchSleef256_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSLEEF1_1(Sleef_logf8_u10 , "log, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_log10f8_u10, "log10, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf8_u10, "log1p, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_logf8_u35 , "log, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log10f8_u35, "log10, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf8_u35, "log1p, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSLEEF1_1(Sleef_expf8_u10 , "exp, SP, 256", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp2f8_u10 , "exp2, SP, 256", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp10f8_u10, "exp10, SP, 256", -100, 100, 1.0, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSLEEF1_2(Sleef_powf8_u10, "pow, SP, 256", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat); fillSP(abufsp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asinf8_u10, "asin, SP, 256", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf8_u10, "acos, SP, 256", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_asinf8_u35, "asin, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf8_u35, "acos, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSLEEF1_1(Sleef_atanf8_u10, "atan, SP, 256", -10, 10, 1.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f8_u10, "atan2, SP, 256", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat); callFuncSLEEF1_1(Sleef_atanf8_u35, "atan, SP, 256", -10, 10, 4.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f8_u35, "atan2, SP, 256", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void zeroupper256() {} void benchSleef256_DPTrig() {} void benchSleef256_DPNontrig() {} void benchSleef256_SPTrig() {} void benchSleef256_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.5.1/src/libm-benchmarks/benchsleef512.c000066400000000000000000000202301373003144100211260ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX512F__ #if defined(_MSC_VER) #include #else #include #endif typedef __m512d vdouble; typedef __m512 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSleef512_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); } void benchSleef512_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSLEEF1_1(Sleef_logd8_u10 , "log, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log10d8_u10, "log10, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log1pd8_u10, "log1p, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_logd8_u35 , "log, DP, 512", 0, 1e+300, 4.0, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSLEEF1_1(Sleef_expd8_u10 , "exp, DP, 512", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp2d8_u10 , "exp2, DP, 512", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp10d8_u10, "exp10, DP, 512", -700, 700, 1.0, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSLEEF1_2(Sleef_powd8_u10, "pow, DP, 512", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble); fillDP(abufdp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asind8_u10, "asin, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd8_u10, "acos, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_asind8_u35, "asin, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd8_u35, "acos, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSLEEF1_1(Sleef_atand8_u10, "atan, DP, 512", -10, 10, 1.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d8_u10, "atan2, DP, 512", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble); callFuncSLEEF1_1(Sleef_atand8_u35, "atan, DP, 512", -10, 10, 4.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d8_u35, "atan2, DP, 512", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble); } void benchSleef512_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); } void benchSleef512_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSLEEF1_1(Sleef_logf16_u10 , "log, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_log10f16_u10, "log10, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf16_u10, "log1p, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_logf16_u35 , "log, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log10f16_u35, "log10, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf16_u35, "log1p, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSLEEF1_1(Sleef_expf16_u10 , "exp, SP, 512", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp2f16_u10 , "exp2, SP, 512", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp10f16_u10, "exp10, SP, 512", -100, 100, 1.0, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSLEEF1_2(Sleef_powf16_u10, "pow, SP, 512", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat); fillSP(abufsp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asinf16_u10, "asin, SP, 512", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf16_u10, "acos, SP, 512", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_asinf16_u35, "asin, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf16_u35, "acos, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSLEEF1_1(Sleef_atanf16_u10, "atan, SP, 512", -10, 10, 1.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f16_u10, "atan2, SP, 512", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat); callFuncSLEEF1_1(Sleef_atanf16_u35, "atan, SP, 512", -10, 10, 4.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f16_u35, "atan2, SP, 512", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSleef512_DPTrig() {} void benchSleef512_DPNontrig() {} void benchSleef512_SPTrig() {} void benchSleef512_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.5.1/src/libm-benchmarks/benchsvml.c000066400000000000000000000075601373003144100205740ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include "bench.h" int veclen = 16; int enableLogExp; double *abufdp, *bbufdp; float *abufsp, *bbufsp; FILE *fp; #if defined(__i386__) || defined(__x86_64__) void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { uint32_t a, b, c, d; __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); out[0] = a; out[1] = b; out[2] = c; out[3] = d; } int cpuSupportsAVX() { int32_t reg[4]; x86CpuID(reg, 1, 0); return (reg[2] & (1 << 28)) != 0; } int cpuSupportsAVX512F() { int32_t reg[4]; x86CpuID(reg, 7, 0); return (reg[1] & (1 << 16)) != 0; } #endif uint64_t Sleef_currentTimeMicros() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * 1000000LL + ((uint64_t)tp.tv_nsec/1000); } void fillDP(double *buf, double min, double max) { for(int i=0;i= 3) fnBase = argv[2]; srandom(time(NULL)); #if defined(__i386__) || defined(__x86_64__) int do128bit = 1; int do256bit = cpuSupportsAVX(); int do512bit = cpuSupportsAVX512F(); #elif defined(__ARM_NEON) int do128bit = 1; int do256bit = 0; int do512bit = 0; #else #error Unsupported architecture #endif posix_memalign((void **)&abufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); posix_memalign((void **)&bbufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); abufsp = (float *)abufdp; bbufsp = (float *)bbufdp; enableLogExp = SVMLULP < 2; sprintf(fn, "%sdptrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_DPTrig(); if (do256bit) benchSVML256_DPTrig(); if (do512bit) benchSVML512_DPTrig(); fclose(fp); sprintf(fn, "%sdpnontrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_DPNontrig(); if (do256bit) benchSVML256_DPNontrig(); if (do512bit) benchSVML512_DPNontrig(); fclose(fp); sprintf(fn, "%ssptrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_SPTrig(); if (do256bit) benchSVML256_SPTrig(); if (do512bit) benchSVML512_SPTrig(); fclose(fp); sprintf(fn, "%sspnontrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_SPNontrig(); if (do256bit) benchSVML256_SPNontrig(); if (do512bit) benchSVML512_SPNontrig(); fclose(fp); exit(0); } sleef-3.5.1/src/libm-benchmarks/benchsvml128.c000066400000000000000000000117331373003144100210240ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include uint64_t Sleef_currentTimeMicros(); void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern int enableLogExp; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __SSE2__ typedef __m128d vdouble; typedef __m128 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSVML128_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 6.28, abufdp, vdouble); callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 6.28, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+6, abufdp, vdouble); callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+6, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+100, abufdp, vdouble); callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+100, abufdp, vdouble); } void benchSVML128_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSVML1_1(_mm_log_pd , "log, DP, 128", 0, 1e+300, abufdp, vdouble); if (enableLogExp) { callFuncSVML1_1(_mm_log10_pd, "log10, DP, 128", 0, 1e+300, abufdp, vdouble); callFuncSVML1_1(_mm_log1p_pd, "log1p, DP, 128", 0, 1e+300, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSVML1_1(_mm_exp_pd , "exp, DP, 128", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm_exp2_pd , "exp2, DP, 128", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm_exp10_pd, "exp10, DP, 128", -700, 700, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSVML1_2(_mm_pow_pd, "pow, DP, 128", -30, 30, -30, 30, abufdp, bbufdp, vdouble); } fillDP(abufdp, -1.0, 1.0); callFuncSVML1_1(_mm_asin_pd, "asin, DP, 128", -1.0, 1.0, abufdp, vdouble); callFuncSVML1_1(_mm_acos_pd, "acos, DP, 128", -1.0, 1.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSVML1_1(_mm_atan_pd, "atan, DP, 128", -10, 10, abufdp, vdouble); callFuncSVML1_2(_mm_atan2_pd, "atan2, DP, 128", -10, 10, -10, 10, abufdp, bbufdp, vdouble); } void benchSVML128_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 6.28, abufsp, vfloat); callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 6.28, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 1e+20, abufsp, vfloat); callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 1e+20, abufsp, vfloat); } void benchSVML128_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSVML1_1(_mm_log_ps , "log, SP, 128", 0, 1e+38, abufsp, vfloat); if (enableLogExp) { callFuncSVML1_1(_mm_log10_ps, "log10, SP, 128", 0, 1e+38, abufsp, vfloat); //callFuncSVML1_1(_mm_log1p_ps, "log1p, SP, 128", 0, 1e+38, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSVML1_1(_mm_exp_ps , "exp, SP, 128", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm_exp2_ps , "exp2, SP, 128", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm_exp10_ps, "exp10, SP, 128", -100, 100, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSVML1_2(_mm_pow_ps, "pow, SP, 128", -30, 30, -30, 30, abufsp, bbufsp, vfloat); } fillSP(abufsp, -1.0, 1.0); callFuncSVML1_1(_mm_asin_ps, "asin, SP, 128", -1.0, 1, abufsp, vfloat); callFuncSVML1_1(_mm_acos_ps, "acos, SP, 128", -1.0, 1, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSVML1_1(_mm_atan_ps, "atan, SP, 128", -10, 10, abufsp, vfloat); callFuncSVML1_2(_mm_atan2_ps, "atan2, SP, 128", -10, 10, -10, 10, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSVML128_DPTrig() {} void benchSVML128_DPNontrig() {} void benchSVML128_SPTrig() {} void benchSVML128_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.5.1/src/libm-benchmarks/benchsvml256.c000066400000000000000000000122401373003144100210200ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include uint64_t Sleef_currentTimeMicros(); void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern int enableLogExp; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX__ typedef __m256d vdouble; typedef __m256 vfloat; #define ENABLED #endif #ifdef ENABLED void zeroupper256() { _mm256_zeroupper(); } void benchSVML256_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 6.28, abufdp, vdouble); callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 6.28, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+6, abufdp, vdouble); callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+6, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+100, abufdp, vdouble); callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+100, abufdp, vdouble); } void benchSVML256_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSVML1_1(_mm256_log_pd , "log, DP, 256", 0, 1e+300, abufdp, vdouble); if (enableLogExp) { callFuncSVML1_1(_mm256_log10_pd, "log10, DP, 256", 0, 1e+300, abufdp, vdouble); callFuncSVML1_1(_mm256_log1p_pd, "log1p, DP, 256", 0, 1e+300, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSVML1_1(_mm256_exp_pd , "exp, DP, 256", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm256_exp2_pd , "exp2, DP, 256", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm256_exp10_pd, "exp10, DP, 256", -700, 700, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSVML1_2(_mm256_pow_pd, "pow, DP, 256", -30, 30, -30, 30, abufdp, bbufdp, vdouble); } fillDP(abufdp, -1.0, 1.0); callFuncSVML1_1(_mm256_asin_pd, "asin, DP, 256", -1.0, 1.0, abufdp, vdouble); callFuncSVML1_1(_mm256_acos_pd, "acos, DP, 256", -1.0, 1.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSVML1_1(_mm256_atan_pd, "atan, DP, 256", -10, 10, abufdp, vdouble); callFuncSVML1_2(_mm256_atan2_pd, "atan2, DP, 256", -10, 10, -10, 10, abufdp, bbufdp, vdouble); } void benchSVML256_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 6.28, abufsp, vfloat); callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 6.28, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 1e+20, abufsp, vfloat); callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 1e+20, abufsp, vfloat); } void benchSVML256_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSVML1_1(_mm256_log_ps , "log, SP, 256", 0, 1e+38, abufsp, vfloat); if (enableLogExp) { callFuncSVML1_1(_mm256_log10_ps, "log10, SP, 256", 0, 1e+38, abufsp, vfloat); //callFuncSVML1_1(_mm256_log1p_ps, "log1p, SP, 256", 0, 1e+38, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSVML1_1(_mm256_exp_ps , "exp, SP, 256", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm256_exp2_ps , "exp2, SP, 256", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm256_exp10_ps, "exp10, SP, 256", -100, 100, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSVML1_2(_mm256_pow_ps, "pow, SP, 256", -30, 30, -30, 30, abufsp, bbufsp, vfloat); } fillSP(abufsp, -1.0, 1.0); callFuncSVML1_1(_mm256_asin_ps, "asin, SP, 256", -1.0, 1, abufsp, vfloat); callFuncSVML1_1(_mm256_acos_ps, "acos, SP, 256", -1.0, 1, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSVML1_1(_mm256_atan_ps, "atan, SP, 256", -10, 10, abufsp, vfloat); callFuncSVML1_2(_mm256_atan2_ps, "atan2, SP, 256", -10, 10, -10, 10, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void zeroupper256() {} void benchSVML256_DPTrig() {} void benchSVML256_DPNontrig() {} void benchSVML256_SPTrig() {} void benchSVML256_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.5.1/src/libm-benchmarks/benchsvml512.c000066400000000000000000000121421373003144100210140ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include uint64_t Sleef_currentTimeMicros(); void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern int enableLogExp; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX512F__ typedef __m512d vdouble; typedef __m512 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSVML512_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 6.28, abufdp, vdouble); callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 6.28, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+6, abufdp, vdouble); callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+6, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+100, abufdp, vdouble); callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+100, abufdp, vdouble); } void benchSVML512_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSVML1_1(_mm512_log_pd , "log, DP, 512", 0, 1e+300, abufdp, vdouble); if (enableLogExp) { callFuncSVML1_1(_mm512_log10_pd, "log10, DP, 512", 0, 1e+300, abufdp, vdouble); callFuncSVML1_1(_mm512_log1p_pd, "log1p, DP, 512", 0, 1e+300, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSVML1_1(_mm512_exp_pd , "exp, DP, 512", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm512_exp2_pd , "exp2, DP, 512", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm512_exp10_pd, "exp10, DP, 512", -700, 700, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSVML1_2(_mm512_pow_pd, "pow, DP, 512", -30, 30, -30, 30, abufdp, bbufdp, vdouble); } fillDP(abufdp, -1.0, 1.0); callFuncSVML1_1(_mm512_asin_pd, "asin, DP, 512", -1.0, 1.0, abufdp, vdouble); callFuncSVML1_1(_mm512_acos_pd, "acos, DP, 512", -1.0, 1.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSVML1_1(_mm512_atan_pd, "atan, DP, 512", -10, 10, abufdp, vdouble); callFuncSVML1_2(_mm512_atan2_pd, "atan2, DP, 512", -10, 10, -10, 10, abufdp, bbufdp, vdouble); } void benchSVML512_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 6.28, abufsp, vfloat); callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 6.28, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 1e+20, abufsp, vfloat); callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 1e+20, abufsp, vfloat); } void benchSVML512_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSVML1_1(_mm512_log_ps , "log, SP, 512", 0, 1e+38, abufsp, vfloat); if (enableLogExp) { callFuncSVML1_1(_mm512_log10_ps, "log10, SP, 512", 0, 1e+38, abufsp, vfloat); //callFuncSVML1_1(_mm512_log1p_ps, "log1p, SP, 512", 0, 1e+38, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSVML1_1(_mm512_exp_ps , "exp, SP, 512", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm512_exp2_ps , "exp2, SP, 512", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm512_exp10_ps, "exp10, SP, 512", -100, 100, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSVML1_2(_mm512_pow_ps, "pow, SP, 512", -30, 30, -30, 30, abufsp, bbufsp, vfloat); } fillSP(abufsp, -1.0, 1.0); callFuncSVML1_1(_mm512_asin_ps, "asin, SP, 512", -1.0, 1, abufsp, vfloat); callFuncSVML1_1(_mm512_acos_ps, "acos, SP, 512", -1.0, 1, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSVML1_1(_mm512_atan_ps, "atan, SP, 512", -10, 10, abufsp, vfloat); callFuncSVML1_2(_mm512_atan2_ps, "atan2, SP, 512", -10, 10, -10, 10, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSVML512_DPTrig() {} void benchSVML512_DPNontrig() {} void benchSVML512_SPTrig() {} void benchSVML512_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.5.1/src/libm-benchmarks/measure.sh000066400000000000000000000005071373003144100204360ustar00rootroot00000000000000#!/bin/sh echo read -p "Enter label of measurement(e.g. My desktop PC) : " label if [ -f counter.txt ] then counter=`cat counter.txt` else counter=0 fi echo Measurement in progress. This may take several minutes. for i in $*; do $i "$label" $counter done counter=$((counter+1)) echo $counter > counter.txt sleef-3.5.1/src/libm-tester/000077500000000000000000000000001373003144100156305ustar00rootroot00000000000000sleef-3.5.1/src/libm-tester/CMakeLists.txt000066400000000000000000000342621373003144100203770ustar00rootroot00000000000000link_directories(${sleef_BINARY_DIR}/lib) # libsleef link_directories(${sleef_BINARY_DIR}/src/common) # common.a include_directories(${sleef_BINARY_DIR}/include) # sleef.h include_directories(${sleef_SOURCE_DIR}/src/libm) # rename.h include_directories(${sleef_BINARY_DIR}/src/libm/include) # rename headers if(NOT LIB_MPFR) find_program(TESTER_COMMAND tester) endif(NOT LIB_MPFR) if (ENFORCE_TESTER AND NOT LIB_MPFR AND NOT TESTER_COMMAND) message(FATAL_ERROR "ENFORCE_TESTER is specified and tester is not available") endif(ENFORCE_TESTER AND NOT LIB_MPFR AND NOT TESTER_COMMAND) find_library(LIBRT rt) if (NOT LIBRT) set(LIBRT "") endif() set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS}") set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 ) if (ENABLE_LTO) list(APPEND COMMON_TARGET_PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) # -flto endif() # if (SLEEF_ARCH_X86) set(TEST3_CINZ purec_scalar sse2 sse4 avx avx512fnofma) set(TEST3_FINZ purecfma_scalar avx2128 avx2 avx512f) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") set(TEST3_CINZ purec_scalar advsimdnofma svenofma) set(TEST3_FINZ purecfma_scalar advsimd sve) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") set(TEST3_CINZ purec_scalar) set(TEST3_FINZ purecfma_scalar) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") set(TEST3_CINZ purec_scalar vsxnofma) set(TEST3_FINZ purecfma_scalar vsx) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x") set(TEST3_CINZ purec_scalar zvector2nofma) set(TEST3_FINZ purecfma_scalar zvector2) endif() # function(add_test_iut IUT) if (LIB_MPFR) set(TESTER ${TARGET_TESTER}) elseif(TESTER_COMMAND) set(TESTER ${TESTER_COMMAND}) endif() # When we are crosscompiling using the mkrename* tools from a native # build, we use the tester executable from the native build. if (CMAKE_CROSSCOMPILING AND NATIVE_BUILD_DIR) set(TESTER ${NATIVE_BUILD_DIR}/bin/${TARGET_TESTER}) endif(CMAKE_CROSSCOMPILING AND NATIVE_BUILD_DIR) if (TESTER) if (NOT EMULATOR) if (SDE_COMMAND) set(FLAGS_SDE "--sde" ${SDE_COMMAND}) else() set(FLAGS_SDE) endif() if (ARMIE_COMMAND) set(FLAGS_ARMIE ${ARMIE_COMMAND} -msve-vector-bits=${SVE_VECTOR_BITS}) else() set(FLAGS_ARMIE) endif() add_test(NAME ${IUT} COMMAND ${TESTER} ${FLAGS_SDE} ${FLAGS_ARMIE} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT} WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) else() add_test(NAME ${IUT} COMMAND ${TESTER} ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT} WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) endif() endif() endfunction() # Compile executable 'iut' add_executable(${TARGET_IUT} iut.c testerutil.c) target_compile_definitions(${TARGET_IUT} PRIVATE ${COMMON_TARGET_DEFINITIONS}) target_link_libraries(${TARGET_IUT} ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) set_target_properties(${TARGET_IUT} PROPERTIES ${COMMON_TARGET_PROPERTIES}) add_test_iut(${TARGET_IUT}) set(IUT_LIST ${TARGET_IUT}) set(IUT_SRC iutsimd.c iutsimdmain.c testerutil.c) # Add vector extension `iut`s macro(test_extension SIMD) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} LCSIMD) string(CONCAT TARGET_IUT${SIMD} "iut" ${LCSIMD}) add_executable(${TARGET_IUT${SIMD}} ${IUT_SRC}) target_compile_options(${TARGET_IUT${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${TARGET_IUT${SIMD}} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) target_link_libraries(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) if (FORCE_AAVPCS) target_compile_definitions(${TARGET_IUT${SIMD}} PRIVATE ENABLE_AAVPCS=1) endif(FORCE_AAVPCS) add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_HEADERS}) add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF}) set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES}) add_test_iut(${TARGET_IUT${SIMD}}) list(APPEND IUT_LIST ${TARGET_IUT${SIMD}}) # The iut programs whose names begin with "iuty" are the iut for the # deterministic version of functions. By checking the result of # testing with iutysse2, for example, it can be checked that the # corresponding deterministic functions passes the accuracy and # nonnumber tests. string(CONCAT IUTYNAME "iuty" ${LCSIMD}) add_executable(${IUTYNAME} ${IUT_SRC}) target_compile_options(${IUTYNAME} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${IUTYNAME} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1) target_link_libraries(${IUTYNAME} ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) add_dependencies(${IUTYNAME} ${TARGET_HEADERS}) add_dependencies(${IUTYNAME} ${TARGET_LIBSLEEF}) set_target_properties(${IUTYNAME} PROPERTIES ${COMMON_TARGET_PROPERTIES}) add_test_iut(${IUTYNAME}) list(APPEND IUT_LIST ${IUTYNAME}) # The iut programs whose names begin with "iuti" are the iut for the # inline version of functions. if (BUILD_INLINE_HEADERS AND SED_COMMAND) string(CONCAT IUTINAME "iuti" ${LCSIMD}) add_executable(${IUTINAME} ${IUT_SRC}) target_compile_options(${IUTINAME} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${IUTINAME} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} USE_INLINE_HEADER="sleefinline_${LCSIMD}.h" MACRO_ONLY_HEADER="macroonly${SIMD}.h" SIMD_SUFFIX=_${LCSIMD}_sleef ) target_include_directories(${IUTINAME} PRIVATE ${PROJECT_BINARY_DIR}/inline) target_link_libraries(${IUTINAME} ${LIBM} ${LIBRT} ${TARGET_LIBINLINE}) add_dependencies(${IUTINAME} ${TARGET_INLINE_HEADERS}) set_target_properties(${IUTINAME} PROPERTIES C_STANDARD 99) add_test_iut(${IUTINAME}) list(APPEND IUT_LIST ${IUTINAME}) endif(BUILD_INLINE_HEADERS AND SED_COMMAND) if(LIB_MPFR AND NOT ${SIMD} STREQUAL NEON32 AND NOT ${SIMD} STREQUAL NEON32VFPV4 AND NOT MINGW) # Build tester2 SIMD string(TOLOWER ${SIMD} SCSIMD) foreach(P dp sp) set(T "tester2${SCSIMD}${P}") add_executable(${T} tester2simd${P}.c testerutil.c) if(FORCE_AAVPCS) target_compile_definitions(${T} PRIVATE ENABLE_AAVPCS=1) endif(FORCE_AAVPCS) target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${T} PRIVATE ENABLE_${SIMD}=1 USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP}) add_dependencies(${T} ${TARGET_HEADERS}) add_dependencies(${T} ${TARGET_LIBSLEEF}) if (MPFR_INCLUDE_DIR) target_include_directories(${T} PRIVATE ${MPFR_INCLUDE_DIR}) endif() # The tester2 programs whose name begins with "tester2y" are the # testing program for the deterministic version of functions. set(T "tester2y${SCSIMD}${P}") add_executable(${T} tester2simd${P}.c testerutil.c) target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${T} PRIVATE ENABLE_${SIMD}=1 USEMPFR=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1) set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP}) add_dependencies(${T} ${TARGET_HEADERS}) add_dependencies(${T} ${TARGET_LIBSLEEF}) if (MPFR_INCLUDE_DIR) target_include_directories(${T} PRIVATE ${MPFR_INCLUDE_DIR}) endif() endforeach() endif() if(NOT ${SIMD} STREQUAL NEON32 AND NOT ${SIMD} STREQUAL NEON32VFPV4 AND SLEEF_OPENSSL_FOUND) # Build tester3 string(TOLOWER ${SIMD} SCSIMD) set(T "tester3${SCSIMD}") add_executable(${T} tester3.c tester3main.c testerutil.c) target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${T} PRIVATE ${COMMON_TARGET_DEFINITIONS} ${TESTER3_DEFINITIONS_${SIMD}}) set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Enable Vector PCS for Advanced SIMD (if supported) if(FORCE_AAVPCS) host_target_AAVPCS_definitions(${T}) endif() target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIBM} ${SLEEF_OPENSSL_LIBRARIES}) target_include_directories(${T} PRIVATE ${SLEEF_OPENSSL_INCLUDE_DIR}) add_dependencies(${T} ${TARGET_HEADERS}) add_dependencies(${T} ${TARGET_LIBSLEEF}) # Add test with tester3 list(FIND TEST3_CINZ ${SCSIMD} INDEX_TEST3_CINZ) if (NOT INDEX_TEST3_CINZ EQUAL -1) if (SDE_COMMAND) add_test(NAME tester3${SCSIMD} COMMAND ${SDE_COMMAND} "--" ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tester3${SCSIMD} ${sleef_SOURCE_DIR}/src/libm-tester/hash_cinz.txt) elseif(EMULATOR) add_test(NAME tester3${SCSIMD} COMMAND ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tester3${SCSIMD} ${sleef_SOURCE_DIR}/src/libm-tester/hash_cinz.txt) else() add_test(NAME tester3${SCSIMD} COMMAND tester3${SCSIMD} ${sleef_SOURCE_DIR}/src/libm-tester/hash_cinz.txt) endif() endif() list(FIND TEST3_FINZ ${SCSIMD} INDEX_TEST3_FINZ) if (NOT INDEX_TEST3_FINZ EQUAL -1) if (SDE_COMMAND) add_test(NAME tester3${SCSIMD} COMMAND ${SDE_COMMAND} "--" ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tester3${SCSIMD} ${sleef_SOURCE_DIR}/src/libm-tester/hash_finz.txt) elseif(EMULATOR) add_test(NAME tester3${SCSIMD} COMMAND ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tester3${SCSIMD} ${sleef_SOURCE_DIR}/src/libm-tester/hash_finz.txt) else() add_test(NAME tester3${SCSIMD} COMMAND tester3${SCSIMD} ${sleef_SOURCE_DIR}/src/libm-tester/hash_finz.txt) endif() endif() endif() endif(COMPILER_SUPPORTS_${SIMD}) endmacro(test_extension) foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) test_extension(${SIMD}) endforeach() function(add_gnuabi_compatibility_test SIMD MASKED) if (MASKED) set(GNUABI_COMPATIBILITY_TEST gnuabi_compatibility_${SIMD}_masked) else(MASKED) set(GNUABI_COMPATIBILITY_TEST gnuabi_compatibility_${SIMD}) endif(MASKED) add_executable(${GNUABI_COMPATIBILITY_TEST} gnuabi_compatibility.c) set_target_properties(${GNUABI_COMPATIBILITY_TEST} PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_compile_options(${GNUABI_COMPATIBILITY_TEST} PRIVATE ${FLAGS_ENABLE_${SIMD}}) if (MASKED) target_compile_definitions(${GNUABI_COMPATIBILITY_TEST} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} MASKED_GNUABI=1) else(MASKED) target_compile_definitions(${GNUABI_COMPATIBILITY_TEST} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) endif(MASKED) if (FORCE_AAVPCS) target_compile_definitions(${GNUABI_COMPATIBILITY_TEST} PRIVATE ENABLE_AAVPCS=1) endif(FORCE_AAVPCS) target_link_libraries(${GNUABI_COMPATIBILITY_TEST} ${TARGET_LIBSLEEFGNUABI} ${LIBM}) # These are linker tests that don't really need to be executed, # but seeing them in the report of ctest gives an idea of what # has been built for testing. if (EMULATOR) add_test(NAME ${GNUABI_COMPATIBILITY_TEST} COMMAND ${EMULATOR} $ WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) elseif(SDE_COMMAND) add_test(NAME ${GNUABI_COMPATIBILITY_TEST} COMMAND ${SDE_COMMAND} "--" $) else() add_test(NAME ${GNUABI_COMPATIBILITY_TEST} COMMAND $) endif(EMULATOR) endfunction(add_gnuabi_compatibility_test) if(ENABLE_GNUABI) foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) # GNUABI compatibility for the unmasked symbols. add_gnuabi_compatibility_test(${SIMD} OFF) # GNUABI compatibility for the masked symbols. if (MKMASKED_PARAMS_GNUABI_${SIMD}_sp) add_gnuabi_compatibility_test(${SIMD} ON) endif(MKMASKED_PARAMS_GNUABI_${SIMD}_sp) endif (COMPILER_SUPPORTS_${SIMD}) endforeach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) endif(ENABLE_GNUABI) if (SLEEF_ARCH_X86) # iutdsp128 add_executable(iutdsp128 ${IUT_SRC}) target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_SSE2}) target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF}) add_test_iut(iutdsp128) list(APPEND IUT_LIST iutdsp128) # iutdsp256 add_executable(iutdsp256 ${IUT_SRC}) target_compile_definitions(iutdsp256 PRIVATE ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(iutdsp256 PRIVATE ${FLAGS_ENABLE_AVX}) target_link_libraries(iutdsp256 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) add_dependencies(iutdsp256 ${TARGET_HEADERS} ${TARGET_LIBSLEEF}) add_test_iut(iutdsp256) list(APPEND IUT_LIST iutdsp256) endif(SLEEF_ARCH_X86) if(LIB_MPFR AND NOT MINGW) # Build tester2 scalar foreach(P dp sp) set(T "tester2${P}") add_executable(${T} tester2${P}.c testerutil.c) target_compile_definitions(${T} PRIVATE USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) set_target_properties(${T} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (FORCE_AAVPCS) target_compile_definitions(${T} PRIVATE ENABLE_AAVPCS=1) endif(FORCE_AAVPCS) if (MPFR_INCLUDE_DIR) target_include_directories(${T} PRIVATE ${MPFR_INCLUDE_DIR}) endif() target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP}) add_dependencies(${T} ${TARGET_HEADERS}) add_dependencies(${T} ${TARGET_LIBSLEEF}) endforeach() # No test defined with tester2 # Compile executable 'tester' add_host_executable(${TARGET_TESTER} tester.c testerutil.c) if (NOT CMAKE_CROSSCOMPILING) target_link_libraries(${TARGET_TESTER} ${LIB_MPFR} ${TARGET_LIBSLEEF} ${LIBM} ${LIBGMP}) target_compile_definitions(${TARGET_TESTER} PRIVATE USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(${TARGET_TESTER} PRIVATE -Wno-unused-result) set_target_properties(${TARGET_TESTER} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (MPFR_INCLUDE_DIR) target_include_directories(${TARGET_TESTER} PRIVATE ${MPFR_INCLUDE_DIR}) endif() endif() endif(LIB_MPFR AND NOT MINGW) # Tests depends on the library add_dependencies(${TARGET_IUT} ${TARGET_HEADERS}) sleef-3.5.1/src/libm-tester/gnuabi_compatibility.c000066400000000000000000000264521373003144100222030ustar00rootroot00000000000000/// This program makes sure that all the symbols that a /// GNUABI-compatible compiler (clang or gcc) can generate when /// vectorizing functions call from `#include ` are present in /// `libsleefgnuabi.so`. /// /// The header `math.h` is not the same on all systems, and different /// macros can activate different sets of functions. The list provide /// here shoudl cover the union of all possible systems that we want /// to support. In particular, the test is checking that the "finite" /// symmbols from `#include ` are present for /// those systems supporting them. #include #include #include #if defined(ENABLE_SSE4) || defined(ENABLE_SSE2) #define ISA_TOKEN b #define VLEN_SP 4 #define VLEN_DP 2 #define VECTOR_CC #endif /* defined(ENABLE_SSE4) || defined(ENABLE_SSE2) */ #ifdef ENABLE_AVX #define ISA_TOKEN c #define VLEN_SP 8 #define VLEN_DP 4 #define VECTOR_CC #endif /* ENABLE_AVX */ #ifdef ENABLE_AVX2 #define ISA_TOKEN d #define VLEN_SP 8 #define VLEN_DP 4 #define VECTOR_CC #endif /* ENABLE_AVX2 */ #ifdef ENABLE_AVX512F #define ISA_TOKEN e #define VLEN_SP 16 #define VLEN_DP 8 #define VECTOR_CC #endif /* ENABLE_AVX512F */ #ifdef ENABLE_ADVSIMD #define ISA_TOKEN n #define VLEN_SP 4 #ifdef ENABLE_AAVPCS #define VECTOR_CC __attribute__((aarch64_vector_pcs)) #else #define VECTOR_CC #endif #define VLEN_DP 2 #endif /* ENABLE_ADVSIMDF */ #ifdef ENABLE_SVE #include #define ISA_TOKEN s #define VLEN_SP (svcntw()) #define VLEN_DP (svcntd()) #define VLA_TOKEN x #define VECTOR_CC #endif /* ENABLE_SVE */ // GNUABI name mangling macro. #ifndef MASKED_GNUABI #define __MAKE_FN_NAME(name, t, vl, p) _ZGV##t##N##vl##p##_##name #else /* MASKED_GNUABI */ #define __MAKE_FN_NAME(name, t, vl, p) _ZGV##t##M##vl##p##_##name #endif /* MASKED_GNUABI */ // Level-1 expansion macros for declaration and call. The signature of // each function has three input paramters to avoid segfaults of // sincos-like functions that are effectively loading data from // memory. #define __DECLARE(name, t, vl, p) \ extern void VECTOR_CC __MAKE_FN_NAME(name, t, vl, p)(int *, int *, int *) #define __CALL(name, t, vl, p) __MAKE_FN_NAME(name, t, vl, p)(b0, b1, b2) // Make sure that the architectural macros are defined for each vector // extension. #ifndef ISA_TOKEN #error "Missing ISA token" #endif #ifndef VLEN_DP #error "Missing VLEN_DP" #endif #ifndef VLEN_DP #error "Missing VLEN_SP" #endif #if defined(ENABLE_SVE) && !defined(VLA_TOKEN) #error "Missing VLA_TOKEN" #endif /* defined(ENABLE_SVE) && !defined(VLA_TOKEN) */ // Declaration and call, first level expantion to pick up the // ISA_TOKEN and VLEN_* architectural macros. #ifndef ENABLE_SVE #define DECLARE_DP(name, p) __DECLARE(name, ISA_TOKEN, VLEN_DP, p) #define CALL_DP(name, p) __CALL(name, ISA_TOKEN, VLEN_DP, p) #else /* ENABLE_SVE */ #define DECLARE_DP(name, p) __DECLARE(name, ISA_TOKEN, VLA_TOKEN, p) #define CALL_DP(name, p) __CALL(name, ISA_TOKEN, VLA_TOKEN, p) #endif /* ENABLE_SVE */ // Douple precision function declarations. DECLARE_DP(__acos_finite, v); DECLARE_DP(__acosh_finite, v); DECLARE_DP(__asin_finite, v); DECLARE_DP(__atan2_finite, vv); DECLARE_DP(__atanh_finite, v); DECLARE_DP(__cosh_finite, v); DECLARE_DP(__exp10_finite, v); DECLARE_DP(__exp2_finite, v); DECLARE_DP(__exp_finite, v); DECLARE_DP(__fmod_finite, vv); DECLARE_DP(__modf_finite, vl8); DECLARE_DP(__hypot_finite, vv); DECLARE_DP(__log10_finite, v); // DECLARE_DP(__log2_finite,v); DECLARE_DP(__log_finite, v); DECLARE_DP(__pow_finite, vv); DECLARE_DP(__sinh_finite, v); DECLARE_DP(__sqrt_finite, v); DECLARE_DP(acos, v); DECLARE_DP(acosh, v); DECLARE_DP(asin, v); DECLARE_DP(asinh, v); DECLARE_DP(atan, v); DECLARE_DP(atan2, vv); DECLARE_DP(__atan2_finite, vv); DECLARE_DP(atanh, v); DECLARE_DP(cbrt, v); DECLARE_DP(ceil, v); DECLARE_DP(copysign, vv); DECLARE_DP(cos, v); DECLARE_DP(cosh, v); DECLARE_DP(cospi, v); DECLARE_DP(erf, v); DECLARE_DP(erfc, v); DECLARE_DP(exp, v); DECLARE_DP(exp10, v); DECLARE_DP(exp2, v); DECLARE_DP(expfrexp, v); DECLARE_DP(expm1, v); DECLARE_DP(fabs, v); DECLARE_DP(fdim, vv); DECLARE_DP(floor, v); DECLARE_DP(fma, vvv); DECLARE_DP(fmax, vv); DECLARE_DP(fmin, vv); DECLARE_DP(fmod, vv); DECLARE_DP(frfrexp, v); DECLARE_DP(hypot, vv); DECLARE_DP(ilogb, v); DECLARE_DP(ldexp, vv); DECLARE_DP(lgamma, v); DECLARE_DP(log, v); DECLARE_DP(log10, v); DECLARE_DP(log1p, v); DECLARE_DP(log2, v); DECLARE_DP(modf, vl8); DECLARE_DP(nextafter, vv); DECLARE_DP(pow, vv); DECLARE_DP(rint, v); DECLARE_DP(round, v); DECLARE_DP(sin, v); DECLARE_DP(sincos, vl8l8); DECLARE_DP(sincospi, vl8l8); DECLARE_DP(sinh, v); DECLARE_DP(sinpi, v); DECLARE_DP(sqrt, v); DECLARE_DP(tan, v); DECLARE_DP(tanh, v); DECLARE_DP(tgamma, v); DECLARE_DP(trunc, v); #ifndef ENABLE_SVE #define DECLARE_SP(name, p) __DECLARE(name, ISA_TOKEN, VLEN_SP, p) #define CALL_SP(name, p) __CALL(name, ISA_TOKEN, VLEN_SP, p) #else /* ENABLE_SVE */ #define DECLARE_SP(name, p) __DECLARE(name, ISA_TOKEN, VLA_TOKEN, p) #define CALL_SP(name, p) __CALL(name, ISA_TOKEN, VLA_TOKEN, p) #endif /* ENABLE_SVE */ // Single precision function declarations. DECLARE_SP(__acosf_finite, v); DECLARE_SP(__acoshf_finite, v); DECLARE_SP(__asinf_finite, v); DECLARE_SP(__atan2f_finite, vv); DECLARE_SP(__atanhf_finite, v); DECLARE_SP(__coshf_finite, v); DECLARE_SP(__exp10f_finite, v); DECLARE_SP(__exp2f_finite, v); DECLARE_SP(__expf_finite, v); DECLARE_SP(__fmodf_finite, vv); DECLARE_SP(__modff_finite, vl4); DECLARE_SP(__hypotf_finite, vv); DECLARE_SP(__log10f_finite, v); // DECLARE_SP(__log2f_finite,v); DECLARE_SP(__logf_finite, v); DECLARE_SP(__powf_finite, vv); DECLARE_SP(__sinhf_finite, v); DECLARE_SP(__sqrtf_finite, v); DECLARE_SP(acosf, v); DECLARE_SP(acoshf, v); DECLARE_SP(asinf, v); DECLARE_SP(asinhf, v); DECLARE_SP(atanf, v); DECLARE_SP(atan2f, vv); DECLARE_SP(atanhf, v); DECLARE_SP(cbrtf, v); DECLARE_SP(ceilf, v); DECLARE_SP(copysignf, vv); DECLARE_SP(cosf, v); DECLARE_SP(coshf, v); DECLARE_SP(cospif, v); DECLARE_SP(erff, v); DECLARE_SP(erfcf, v); DECLARE_SP(expf, v); DECLARE_SP(exp10f, v); DECLARE_SP(exp2f, v); DECLARE_SP(expm1f, v); DECLARE_SP(fabsf, v); DECLARE_SP(fdimf, vv); DECLARE_SP(floorf, v); DECLARE_SP(fmaf, vvv); DECLARE_SP(fmaxf, vv); DECLARE_SP(fminf, vv); DECLARE_SP(fmodf, vv); DECLARE_SP(frfrexpf, v); DECLARE_SP(hypotf, vv); #ifndef ENABLE_AVX // These two functions are not checked in some configurations due to // the issue in https://github.com/shibatch/sleef/issues/221 DECLARE_SP(expfrexpf, v); DECLARE_SP(ilogbf, v); #endif DECLARE_SP(ldexpf, vv); DECLARE_SP(lgammaf, v); DECLARE_SP(logf, v); DECLARE_SP(log10f, v); DECLARE_SP(log1pf, v); DECLARE_SP(log2f, v); DECLARE_SP(modff, vl4); DECLARE_SP(nextafterf, vv); DECLARE_SP(powf, vv); DECLARE_SP(rintf, v); DECLARE_SP(roundf, v); DECLARE_SP(sinf, v); DECLARE_SP(sincosf, vl4l4); DECLARE_SP(sincospif, vl4l4); DECLARE_SP(sinhf, v); DECLARE_SP(sinpif, v); DECLARE_SP(sqrtf, v); DECLARE_SP(tanf, v); DECLARE_SP(tanhf, v); DECLARE_SP(tgammaf, v); DECLARE_SP(truncf, v); static jmp_buf sigjmp; static void sighandler(int signum) { longjmp(sigjmp, 1); } int detectFeature() { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) { int b0[VLEN_SP]; int b1[VLEN_SP]; int b2[VLEN_SP]; CALL_DP(__acos_finite, v); signal(SIGILL, SIG_DFL); return 1; } else { signal(SIGILL, SIG_DFL); return 0; } } int main(void) { if (!detectFeature()) { return 0; } // Allocate enough memory to make sure that sincos-like functions can // load a full vector when invoked. All functions must operate on // these variables, which are printed at the end of the execution to // make sure that the compiler doesn't optimize out the calls. int b0[VLEN_SP]; int b1[VLEN_SP]; int b2[VLEN_SP]; // Double precision function call. CALL_DP(__acos_finite, v); CALL_DP(__acosh_finite, v); CALL_DP(__asin_finite, v); CALL_DP(__atan2_finite, vv); CALL_DP(__atanh_finite, v); CALL_DP(__cosh_finite, v); CALL_DP(__exp10_finite, v); CALL_DP(__exp2_finite, v); CALL_DP(__exp_finite, v); CALL_DP(__fmod_finite, vv); CALL_DP(__modf_finite, vl8); CALL_DP(__hypot_finite, vv); CALL_DP(__log10_finite, v); // CALL_DP(__log2_finite,v); CALL_DP(__log_finite, v); CALL_DP(__pow_finite, vv); CALL_DP(__sinh_finite, v); CALL_DP(__sqrt_finite, v); CALL_DP(acos, v); CALL_DP(acosh, v); CALL_DP(asin, v); CALL_DP(asinh, v); CALL_DP(atan, v); CALL_DP(atan2, vv); CALL_DP(atanh, v); CALL_DP(cbrt, v); CALL_DP(ceil, v); CALL_DP(copysign, vv); CALL_DP(cos, v); CALL_DP(cosh, v); CALL_DP(cospi, v); CALL_DP(erf, v); CALL_DP(erfc, v); CALL_DP(exp, v); CALL_DP(exp10, v); CALL_DP(exp2, v); CALL_DP(expfrexp, v); CALL_DP(expm1, v); CALL_DP(fabs, v); CALL_DP(fdim, vv); CALL_DP(floor, v); CALL_DP(fma, vvv); CALL_DP(fmax, vv); CALL_DP(fmin, vv); CALL_DP(fmod, vv); CALL_DP(frfrexp, v); CALL_DP(hypot, vv); CALL_DP(ilogb, v); CALL_DP(ldexp, vv); CALL_DP(lgamma, v); CALL_DP(log, v); CALL_DP(log10, v); CALL_DP(log1p, v); CALL_DP(log2, v); CALL_DP(modf, vl8); CALL_DP(nextafter, vv); CALL_DP(pow, vv); CALL_DP(rint, v); CALL_DP(round, v); CALL_DP(sin, v); CALL_DP(sincos, vl8l8); CALL_DP(sincospi, vl8l8); CALL_DP(sinh, v); CALL_DP(sinpi, v); CALL_DP(sqrt, v); CALL_DP(tan, v); CALL_DP(tanh, v); CALL_DP(tgamma, v); CALL_DP(trunc, v); // Single precision function call. CALL_SP(__acosf_finite, v); CALL_SP(__acoshf_finite, v); CALL_SP(__asinf_finite, v); CALL_SP(__atan2f_finite, vv); CALL_SP(__atanhf_finite, v); CALL_SP(__coshf_finite, v); CALL_SP(__exp10f_finite, v); CALL_SP(__exp2f_finite, v); CALL_SP(__expf_finite, v); CALL_SP(__fmodf_finite, vv); CALL_SP(__modff_finite, vl4); CALL_SP(__hypotf_finite, vv); CALL_SP(__log10f_finite, v); // CALL_SP(__log2f_finite,v); CALL_SP(__logf_finite, v); CALL_SP(__powf_finite, vv); CALL_SP(__sinhf_finite, v); CALL_SP(__sqrtf_finite, v); CALL_SP(acosf, v); CALL_SP(acoshf, v); CALL_SP(asinf, v); CALL_SP(asinhf, v); CALL_SP(atanf, v); CALL_SP(atan2f, vv); CALL_SP(atanhf, v); CALL_SP(cbrtf, v); CALL_SP(ceilf, v); CALL_SP(copysignf, vv); CALL_SP(cosf, v); CALL_SP(coshf, v); CALL_SP(cospif, v); CALL_SP(erff, v); CALL_SP(erfcf, v); CALL_SP(expf, v); CALL_SP(exp10f, v); CALL_SP(exp2f, v); CALL_SP(expm1f, v); CALL_SP(fabsf, v); CALL_SP(fdimf, vv); CALL_SP(floorf, v); CALL_SP(fmaf, vvv); CALL_SP(fmaxf, vv); CALL_SP(fminf, vv); CALL_SP(fmodf, vv); CALL_SP(frfrexpf, v); CALL_SP(hypotf, vv); #ifndef ENABLE_AVX // These two functions are not checked in some configurations due to // the issue in https://github.com/shibatch/sleef/issues/221 CALL_SP(expfrexpf, v); CALL_SP(ilogbf, v); #endif CALL_SP(ldexpf, vv); CALL_SP(lgammaf, v); CALL_SP(logf, v); CALL_SP(log10f, v); CALL_SP(log1pf, v); CALL_SP(log2f, v); CALL_SP(modff, vl4); CALL_SP(nextafterf, vv); CALL_SP(powf, vv); CALL_SP(rintf, v); CALL_SP(roundf, v); CALL_SP(sinf, v); CALL_SP(sincosf, vl4l4); CALL_SP(sincospif, vl4l4); CALL_SP(sinhf, v); CALL_SP(sinpif, v); CALL_SP(sqrtf, v); CALL_SP(tanf, v); CALL_SP(tanhf, v); CALL_SP(tgammaf, v); CALL_SP(truncf, v); // Print the vars to make sure the compiler does not remove the // calls. for (int i = 0; i < VLEN_SP; ++i) printf("%d %d %d\n", b0[i], b1[i], b2[i]); return 0; } sleef-3.5.1/src/libm-tester/hash_cinz.txt000066400000000000000000000125531373003144100203450ustar00rootroot00000000000000sin u35 bc50dfbcbd8ef534541d1babe90860c7 sin u10 dbc2cf81f292ef50fa0119e222c6c9f9 cos u35 506e34a809b80ad3603ed46ba2a574b0 cos u10 a0f69df5937152b8f8f0e671f3676289 tan u35 970b5cd7f0e05defa22ebb155ab61a40 tan u10 5fd08e0552e3ab853439bf5fd2bd344d sincos u10 7c164edcaa45988f6165b653fc76c495 sincos u35 38fe7e261e184ed8dbf432ce6bedc5c4 sincospi u05 0c6fc00c7aaf0b6e67d66542d1be833d sincospi u35 c428b0fc3e6c5be4d2c03dcd8bb27a7c log u10 4855b27222d900bea47a27cadba71727 log u35 c95484de57c167da3d8d6d1baadf9ffa log2 u10 2662df9af919680ca62e1752fb1b7539 log2 u35 1cd6d7f194a5e8364191497adc5c5cec log10 u10 36645e8031d873d66fd0ec2c5959f273 log1p u10 1383924fb56cf2e7eda27de21320c591 exp u10 13692a48edf2cf7a3e047b16ddfb7b81 exp2 u10 436146f8d6dcaa4a754837108a9aa3e1 exp2 u35 8881d075d9101a1dfa3f6a10b9ee8373 exp10 u10 9d704b310f683872a6446cfc97726a4d exp10 u35 bc07745ebc22a7ee97679154c24b23cc expm1 u10 cd3f0b8e86943d52c278394b60e2d22e pow u10 a0ea63b27d33262346a35c9439741075 cbrt u10 5d8bf28ac74624594fd1be9217817690 cbrt u10 3c896e03746bcf1b3f70182dfec3d93b cbrt u35 73daa306764e208aab1627ac110b10d7 cbrt u35 c29b7bf200215425b4ba948c8cc94c42 hypot u05 cc2f18e409e19a02cadf7b91fd869120 hypot u35 5194e0a554174a6145511ce3df9c1f46 asin u10 86c061caec3fa2e1bc71bda4dad29f4c asin u35 31303b88bdc00206265002d6cc5e89e4 acos u10 0a1a403590f2ac8364f132b334920945 acos u35 493f960c1cce57931d95a5a22a0587a3 atan u10 c97624a24ec034cc0c8985acb61d13cd atan u10 0be0f550406923016cfeb5ef62c25b15 atan u35 9d6d83e066b5a4851d44771418c9948c atan u35 f32c1aa4caa08c6945afd1125ba8b113 atan2 u10 6b1d9d25fcd96053acc19d1633fab36a atan2 u35 afb07894347062a96dab705b34eb1763 sinh u10 61d459b1f368087f6f23ebf8e9f0ea01 cosh u10 f77eb95f79e274c12b4e92dc0389259b tanh u10 2bb9dd54ed0fa22bb5f3b6d557eb58a3 asinh u10 01136e54e2a434839530dda54f33cfdb acosh u10 2f3c28c9ee2eb2b3d5659c6cb2a58e3e atanh u10 601a77ba8c1d5175f2808b48a41260c1 lgamma u10 90cdc41063f4198c6ad592c0cdd0f5da tgamma u10 6f864c3a1f17fbdf914cac7ffcd82cb7 erf u10 4031f3e285101359aea99feb5e2de3f0 erfc u15 5e116a4316dafa742769f71e18f6f9fe fabs bef2f2ac8a4789357e580b4da4f9b9fe copysign 3219022f267464e3704f90558e8df3bc fmax 4e4f5220ccfef191864c316df0d18fc0 fmin c0f8effb6c611e2b3b91b820ad943f62 fdim e876d103931f18ceede5bfd7e3df7ab0 fmod 618aa751e13012afdb41ec80dd35e6ba remainder 8d692dbb44bbc9be5af0c0657d3008b8 modf f03ce73cd4f9ea7f69c017f6e53355d5 nextafter 9eba4e30d12d74dc4e8003fcff0f1582 trunc 1bc7e909eba121dcef7f0e4046937ae5 floor 2cff66b499dc8a30cec9467de659b774 ceil b080e632dcb8f8134d8715752be12917 round 8907e21687ca9c2a539297536e754950 rint e49f837096bc661fe1c742801dd99a30 sinf u35 833d845950b9cbb025629fe4c040f8f6 sinf u10 9c21afa4d7d6af3fc666309c3cd647fe cosf u35 74d7f871a6553cd0019087895e2052ad cosf u10 35349e94c323c1614f22093959288010 tanf u35 bbb7c092d017e96d2454a38a20687735 tanf u10 227423bc04f42d76a8f68082ba696126 sincosf u10 83ecc4e3d5295056e9d8c52bc196b666 sincosf u35 533319caa49a961e4909bd6dcab40721 sincospif u05 8b3762b67a661957c1414c351ec49034 sincospif u35 cec15ed76a358091632634166fa77b66 logf u10 c5a90119943acc4199e1cc7030b5def8 logf u35 af2fbe4bfa2caaf59c734e3749dd15be log2f u10 ba8acae369bbb7b6404cccbc633fe25b log2f u35 ba32ebaa8c470899ebd433d190c00f03 log10f u10 7e235a82d960e4434575dd39648d8bb7 log1pf u10 350fc4f13502b36bb1107e1b1122acb1 expf u10 ee4adaabefa3fac6c0f1925b2a948eea exp2f u10 b0d283dbae0f36f1b3c7eed9871f0d0d exp2f u35 522cc30f722f77fceb07015830b351a3 exp10f u10 b0564be151965600f5744ff2e4992bc9 exp10f u35 d142f1fb40e44f0c9e042718f27ee3e0 expm1f u10 ebfd6498cb40f61b609882de8a7f3c74 powf u10 a7cba3239c87969662e8b41a4dd8b4ab cbrtf u10 01c5cac23fe21638be1c3eab6e368fd6 cbrtf u10 2a245b03f83e9114644d03b40dac707b cbrtf u35 3ce62350fd585f0524a12c974fbe6cf5 cbrtf u35 2aca0404626a28f7af7f60105ad6e217 hypotf u05 bc5971cbeebee27b4c0d91fbe3f6bf30 hypotf u35 a6f0f774b346a6bba08889ff9ba3f193 asinf u10 7f77f7453b961512c89e87e49c549cfe asinf u35 22ed8760aa328e1f714031eec592a4d8 acosf u10 15617dd0429b90e59d2923415934c2a6 acosf u35 af0b132d9e263721f9296187dbf9b9bf atanf u10 26b77fb423104b45633cf24500237d6e atanf u10 4313d0bc2708de53f74d804aac6564d4 atanf u35 97a1797897955643c722c7d291987331 atanf u35 7d3f47169415058e8578f11d899bfd10 atan2f u10 098a33f730fe95ce4774a991db4cee14 atan2f u35 56fc6bd8349979f0d0b1dcdb57f68363 sinhf u10 0780a2f57df3a831718195d1ee5c19ef coshf u10 cfbb6aed408e43a7b7f053474100ff2d tanhf u10 d19f254d41e8726c748df87b95bc9acd asinhf u10 260d129221468a86bbfd609c27bfea6a acoshf u10 24ced7e5631c78b20a5716faeedbaa92 atanhf u10 164fd77b8372b8c131baaacab1c9e650 lgammaf u10 3bf6d824175c4f4d86f3073064e41e84 tgammaf u10 f3a8d25c852068622bdfcae4cb813583 erff u10 afd2eb2e4f76145816d623ddc53959cb erfcf u15 915ab9830de89a5a504b3ce7cd2fecda fabsf a3c72220bc0ade68fe22e0a15eb730d4 copysignf 6b35517b8e1da78d9c9b52915d9a9b19 fmaxf 9833a60a2080e8fd9ae8de32c758966f fminf 2dcfa19e1f1ab4973a7dec9f2cc09fa0 fdimf c5c0fe7b095eb8ccbb19fbf934a36b24 fmodf 77aa84a9703e202a56e5f4609bd2482b remainderf 5a453b1217c173e4dc0b0211066750be modff 5fa4f044f20478216aa085a01b189697 nextafterf 517c1c8f072e9024518d3d9ead98b85b truncf 6937050850be63c44d4b7dbd666febe6 floorf 9341be69ee345c8554bf3ab4e9316133 ceilf c70874771cbe9741f1f05fedd4b629e9 roundf 0cf52f6b8015099771e9a7dfa6b090bc rintf bed68e788e2b11543c09c9d52198abf8 fastsinf u3500 8eb51f86fb40414dd21284f020f24b6c fastcosf u3500 69cbc3703f1d2c68695b00b1b09287b2 fastpowf u3500 e02e6a692cfa22a6b7149168c67ea1d2 sleef-3.5.1/src/libm-tester/hash_finz.txt000066400000000000000000000125531373003144100203500ustar00rootroot00000000000000sin u35 c163e4a7e9ccebb2181dcc8653367d8c sin u10 0d6bf6f2c935db82588222da95659019 cos u35 52f902bd939d751b5b544ac70181fcff cos u10 afcdba92a75a76d56b8cf2f22d4bec9e tan u35 906cc42b6755fe514c5e185fcb4d2f55 tan u10 c98f29a62067fa63646d9bcc29a310c6 sincos u10 3fe37f4eb805505152f2b14a22a9f94e sincos u35 95a7b7f48c71febf10ec6eff796dd391 sincospi u05 0c6fc00c7aaf0b6e67d66542d1be833d sincospi u35 c428b0fc3e6c5be4d2c03dcd8bb27a7c log u10 4855b27222d900bea47a27cadba71727 log u35 015f8ae899c9b921d48919dd12ef19a9 log2 u10 2662df9af919680ca62e1752fb1b7539 log2 u35 908b1949db34ea855944f00089b21e23 log10 u10 36645e8031d873d66fd0ec2c5959f273 log1p u10 1383924fb56cf2e7eda27de21320c591 exp u10 084e5be89c2ad03e356078ea4f287bab exp2 u10 6e36db9ae2cf9eca82e3d9157c622351 exp2 u35 6e36db9ae2cf9eca82e3d9157c622351 exp10 u10 0cc08bc6a3d08d6e61450b5370c6161e exp10 u35 6904d5509ca794747aa249c13886f90f expm1 u10 cd3f0b8e86943d52c278394b60e2d22e pow u10 7e19796027d7c1d1999be948f90e6181 cbrt u10 5d8bf28ac74624594fd1be9217817690 cbrt u10 3c896e03746bcf1b3f70182dfec3d93b cbrt u35 fc7ee3e3e6c54365d708b752c242a947 cbrt u35 2408714a56d74f8c82389ca6772cdbc1 hypot u05 cc2f18e409e19a02cadf7b91fd869120 hypot u35 be7bbd41dffd746b70261ee773cbd4b2 asin u10 8a21b7c28cdaffc9d3e53f415367932e asin u35 9c9e8107782898e9faed6924ad1b3cb1 acos u10 28261e4eb8331865660c814676d5c6bc acos u35 310911130bfc45b10dabe3a072939331 atan u10 f931de72f2f6a7928f307a8a382ae255 atan u10 453f9ef62f58f9829320baf482a1d457 atan u35 6161b6189609f105b017d8768d0a41f1 atan u35 6face71d8d93c69448d49ed6140e361d atan2 u10 469babaeee9bd30e17af2f473b3ea500 atan2 u35 6a3e764125aab2a0a13e7a0d9ec02f7f sinh u10 61d459b1f368087f6f23ebf8e9f0ea01 cosh u10 f77eb95f79e274c12b4e92dc0389259b tanh u10 2bb9dd54ed0fa22bb5f3b6d557eb58a3 asinh u10 01136e54e2a434839530dda54f33cfdb acosh u10 2f3c28c9ee2eb2b3d5659c6cb2a58e3e atanh u10 601a77ba8c1d5175f2808b48a41260c1 lgamma u10 90cdc41063f4198c6ad592c0cdd0f5da tgamma u10 cb9a93844ad1713d2ab92ff5b6398150 erf u10 3f3c9bf4f8e5768c09c472cee4475e43 erfc u15 3e247a54183eeddedc33e99c50118995 fabs bef2f2ac8a4789357e580b4da4f9b9fe copysign 3219022f267464e3704f90558e8df3bc fmax 4e4f5220ccfef191864c316df0d18fc0 fmin c0f8effb6c611e2b3b91b820ad943f62 fdim e876d103931f18ceede5bfd7e3df7ab0 fmod 618aa751e13012afdb41ec80dd35e6ba remainder 8d692dbb44bbc9be5af0c0657d3008b8 modf f03ce73cd4f9ea7f69c017f6e53355d5 nextafter 9eba4e30d12d74dc4e8003fcff0f1582 trunc 1bc7e909eba121dcef7f0e4046937ae5 floor 2cff66b499dc8a30cec9467de659b774 ceil b080e632dcb8f8134d8715752be12917 round 8907e21687ca9c2a539297536e754950 rint e49f837096bc661fe1c742801dd99a30 sinf u35 f8f804eae1d9443103e81fec96293477 sinf u10 3f12a7381f1cbb1830d92b4ec72d21fe cosf u35 f2f3d1c9f090cde9c02439608dc7066e cosf u10 dc35f27fae65f63f0aa6ad241f8b387b tanf u35 68d42ad1fb412e6b8be3853461e61213 tanf u10 97df301d4f59e67d5318b5356b703f06 sincosf u10 a97124d810ec461c135dc4fb0c059b6f sincosf u35 0cc521e52ae1227d311012c2919c1ff2 sincospif u05 8b3762b67a661957c1414c351ec49034 sincospif u35 8720757f221c00cc8de24b7dc4949144 logf u10 c5a90119943acc4199e1cc7030b5def8 logf u35 b6234302d534d6ccd48155dd6b9a4293 log2f u10 ba8acae369bbb7b6404cccbc633fe25b log2f u35 74174c90717c86642b71284452a8aef6 log10f u10 7e235a82d960e4434575dd39648d8bb7 log1pf u10 e53dbfa80bcc1a7bcfd21000e6950475 expf u10 9597388315e4b3e89c4c97ce46374dcf exp2f u10 42d66e5e4cb88feb29c5b36c632159a5 exp2f u35 42d66e5e4cb88feb29c5b36c632159a5 exp10f u10 954f0824b6d949d0da03b49950dc6642 exp10f u35 6fb0e9a829e12a06679d379d05b53ede expm1f u10 ebfd6498cb40f61b609882de8a7f3c74 powf u10 2ed84af40d03e307a620365f172d010d cbrtf u10 01c5cac23fe21638be1c3eab6e368fd6 cbrtf u10 2a245b03f83e9114644d03b40dac707b cbrtf u35 6c22a6dc132c5212250970f22f42256d cbrtf u35 5ab696ae11f9637413d30e6496d5324b hypotf u05 bc5971cbeebee27b4c0d91fbe3f6bf30 hypotf u35 2a7cd97768287084b7fffc7e9fb39072 asinf u10 e2e571a01984c4ffb3f6e38e0328d90e asinf u35 70df2dfc3a3569868cce60c38e7b1962 acosf u10 5180fde4b02a0ca4cd75f0a786a1bfeb acosf u35 72b0e2f9791f90f1c43570b9e9ba893f atanf u10 fa672e387a204055f735b7af98dd8a35 atanf u10 d017670c13bc221b68bc9ee5f41c4b5e atanf u35 f592e46eaa5d29583f86d3e336f20b6b atanf u35 e7087fe40de46921826b373d10c40954 atan2f u10 275b2fa8ee554c45551bb142db9f8197 atan2f u35 44b187851195d24bab2561eb8f4ff5d0 sinhf u10 45bc228a14c3e39eeb35e9764394a23e coshf u10 838d441e85d415ef4fb1e5c5ea966a71 tanhf u10 d19f254d41e8726c748df87b95bc9acd asinhf u10 927eeb621a3e2d5039f1a07fcf150901 acoshf u10 932520013273174fcabe2be4a55f919f atanhf u10 164fd77b8372b8c131baaacab1c9e650 lgammaf u10 3bf6d824175c4f4d86f3073064e41e84 tgammaf u10 c3059747811d98846f74a63d3747ac3d erff u10 dbaf91aa6ca4902e8c4f1650f508d45d erfcf u15 687a9c577512d349ddbc0643013d2c56 fabsf a3c72220bc0ade68fe22e0a15eb730d4 copysignf 6b35517b8e1da78d9c9b52915d9a9b19 fmaxf 9833a60a2080e8fd9ae8de32c758966f fminf 2dcfa19e1f1ab4973a7dec9f2cc09fa0 fdimf c5c0fe7b095eb8ccbb19fbf934a36b24 fmodf 77aa84a9703e202a56e5f4609bd2482b remainderf 5a453b1217c173e4dc0b0211066750be modff 5fa4f044f20478216aa085a01b189697 nextafterf 517c1c8f072e9024518d3d9ead98b85b truncf 6937050850be63c44d4b7dbd666febe6 floorf 9341be69ee345c8554bf3ab4e9316133 ceilf c70874771cbe9741f1f05fedd4b629e9 roundf 0cf52f6b8015099771e9a7dfa6b090bc rintf bed68e788e2b11543c09c9d52198abf8 fastsinf u3500 5c48081c74cd0316379b580b047dbfc2 fastcosf u3500 6f73d116f109283e5632c31f5988f55b fastpowf u3500 6dbb3110412df4fed5a71f50d40def89 sleef-3.5.1/src/libm-tester/iut.c000066400000000000000000000621271373003144100166050ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #include "sleef.h" #include "testerutil.h" #define DORENAME #include "rename.h" #define BUFSIZE 1024 int main(int argc, char **argv) { char buf[BUFSIZE]; printf("3\n"); fflush(stdout); for(;;) { if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; if (startsWith(buf, "sin ")) { uint64_t u; sscanf(buf, "sin %" PRIx64, &u); u = d2u(xsin(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sin_u1 ")) { uint64_t u; sscanf(buf, "sin_u1 %" PRIx64, &u); u = d2u(xsin_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cos ")) { uint64_t u; sscanf(buf, "cos %" PRIx64, &u); u = d2u(xcos(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cos_u1 ")) { uint64_t u; sscanf(buf, "cos_u1 %" PRIx64, &u); u = d2u(xcos_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sincos ")) { uint64_t u; sscanf(buf, "sincos %" PRIx64, &u); Sleef_double2 x = xsincos(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sincos_u1 ")) { uint64_t u; sscanf(buf, "sincos_u1 %" PRIx64, &u); Sleef_double2 x = xsincos_u1(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sincospi_u05 ")) { uint64_t u; sscanf(buf, "sincospi_u05 %" PRIx64, &u); Sleef_double2 x = xsincospi_u05(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sincospi_u35 ")) { uint64_t u; sscanf(buf, "sincospi_u35 %" PRIx64, &u); Sleef_double2 x = xsincospi_u35(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sinpi_u05 ")) { uint64_t u; sscanf(buf, "sinpi_u05 %" PRIx64, &u); u = d2u(xsinpi_u05(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cospi_u05 ")) { uint64_t u; sscanf(buf, "cospi_u05 %" PRIx64, &u); u = d2u(xcospi_u05(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tan ")) { uint64_t u; sscanf(buf, "tan %" PRIx64, &u); u = d2u(xtan(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tan_u1 ")) { uint64_t u; sscanf(buf, "tan_u1 %" PRIx64, &u); u = d2u(xtan_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "asin ")) { uint64_t u; sscanf(buf, "asin %" PRIx64, &u); u = d2u(xasin(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "acos ")) { uint64_t u; sscanf(buf, "acos %" PRIx64, &u); u = d2u(xacos(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan ")) { uint64_t u; sscanf(buf, "atan %" PRIx64, &u); u = d2u(xatan(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log ")) { uint64_t u; sscanf(buf, "log %" PRIx64, &u); u = d2u(xlog(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp ")) { uint64_t u; sscanf(buf, "exp %" PRIx64, &u); u = d2u(xexp(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan2 ")) { uint64_t u, v; sscanf(buf, "atan2 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xatan2(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "asin_u1 ")) { uint64_t u; sscanf(buf, "asin_u1 %" PRIx64, &u); u = d2u(xasin_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "acos_u1 ")) { uint64_t u; sscanf(buf, "acos_u1 %" PRIx64, &u); u = d2u(xacos_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan_u1 ")) { uint64_t u; sscanf(buf, "atan_u1 %" PRIx64, &u); u = d2u(xatan_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan2_u1 ")) { uint64_t u, v; sscanf(buf, "atan2_u1 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xatan2_u1(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log_u1 ")) { uint64_t u; sscanf(buf, "log_u1 %" PRIx64, &u); u = d2u(xlog_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "pow ")) { uint64_t u, v; sscanf(buf, "pow %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xpow(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sinh ")) { uint64_t u; sscanf(buf, "sinh %" PRIx64, &u); u = d2u(xsinh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cosh ")) { uint64_t u; sscanf(buf, "cosh %" PRIx64, &u); u = d2u(xcosh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tanh ")) { uint64_t u; sscanf(buf, "tanh %" PRIx64, &u); u = d2u(xtanh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sinh_u35 ")) { uint64_t u; sscanf(buf, "sinh_u35 %" PRIx64, &u); u = d2u(xsinh_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cosh_u35 ")) { uint64_t u; sscanf(buf, "cosh_u35 %" PRIx64, &u); u = d2u(xcosh_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tanh_u35 ")) { uint64_t u; sscanf(buf, "tanh_u35 %" PRIx64, &u); u = d2u(xtanh_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "asinh ")) { uint64_t u; sscanf(buf, "asinh %" PRIx64, &u); u = d2u(xasinh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "acosh ")) { uint64_t u; sscanf(buf, "acosh %" PRIx64, &u); u = d2u(xacosh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atanh ")) { uint64_t u; sscanf(buf, "atanh %" PRIx64, &u); u = d2u(xatanh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fma ")) { uint64_t u, v, w; sscanf(buf, "fma %" PRIx64 " %" PRIx64 " %" PRIx64, &u, &v, &w); u = d2u(xfma(u2d(u), u2d(v), u2d(w))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sqrt ")) { uint64_t u; sscanf(buf, "sqrt %" PRIx64, &u); u = d2u(xsqrt(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sqrt_u05 ")) { uint64_t u; sscanf(buf, "sqrt_u05 %" PRIx64, &u); u = d2u(xsqrt_u05(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sqrt_u35 ")) { uint64_t u; sscanf(buf, "sqrt_u35 %" PRIx64, &u); u = d2u(xsqrt_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cbrt ")) { uint64_t u; sscanf(buf, "cbrt %" PRIx64, &u); u = d2u(xcbrt(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cbrt_u1 ")) { uint64_t u; sscanf(buf, "cbrt_u1 %" PRIx64, &u); u = d2u(xcbrt_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp2 ")) { uint64_t u; sscanf(buf, "exp2 %" PRIx64, &u); u = d2u(xexp2(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp2_u35 ")) { uint64_t u; sscanf(buf, "exp2_u35 %" PRIx64, &u); u = d2u(xexp2_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp10 ")) { uint64_t u; sscanf(buf, "exp10 %" PRIx64, &u); u = d2u(xexp10(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp10_u35 ")) { uint64_t u; sscanf(buf, "exp10_u35 %" PRIx64, &u); u = d2u(xexp10_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "expm1 ")) { uint64_t u; sscanf(buf, "expm1 %" PRIx64, &u); u = d2u(xexpm1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log10 ")) { uint64_t u; sscanf(buf, "log10 %" PRIx64, &u); u = d2u(xlog10(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log2 ")) { uint64_t u; sscanf(buf, "log2 %" PRIx64, &u); u = d2u(xlog2(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log2_u35 ")) { uint64_t u; sscanf(buf, "log2_u35 %" PRIx64, &u); u = d2u(xlog2_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log1p ")) { uint64_t u; sscanf(buf, "log1p %" PRIx64, &u); u = d2u(xlog1p(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "ldexp ")) { uint64_t u, v; sscanf(buf, "ldexp %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xldexp(u2d(u), (int)u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "hypot_u05 ")) { uint64_t u, v; sscanf(buf, "hypot_u05 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xhypot_u05(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "hypot_u35 ")) { uint64_t u, v; sscanf(buf, "hypot_u35 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xhypot_u35(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "copysign ")) { uint64_t u, v; sscanf(buf, "copysign %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xcopysign(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fmax ")) { uint64_t u, v; sscanf(buf, "fmax %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfmax(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fmin ")) { uint64_t u, v; sscanf(buf, "fmin %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfmin(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fdim ")) { uint64_t u, v; sscanf(buf, "fdim %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfdim(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "nextafter ")) { uint64_t u, v; sscanf(buf, "nextafter %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xnextafter(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fmod ")) { uint64_t u, v; sscanf(buf, "fmod %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfmod(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "remainder ")) { uint64_t u, v; sscanf(buf, "remainder %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xremainder(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fabs ")) { uint64_t u; sscanf(buf, "fabs %" PRIx64, &u); u = d2u(xfabs(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "trunc ")) { uint64_t u; sscanf(buf, "trunc %" PRIx64, &u); u = d2u(xtrunc(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "floor ")) { uint64_t u; sscanf(buf, "floor %" PRIx64, &u); u = d2u(xfloor(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "ceil ")) { uint64_t u; sscanf(buf, "ceil %" PRIx64, &u); u = d2u(xceil(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "round ")) { uint64_t u; sscanf(buf, "round %" PRIx64, &u); u = d2u(xround(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "rint ")) { uint64_t u; sscanf(buf, "rint %" PRIx64, &u); u = d2u(xrint(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "frfrexp ")) { uint64_t u; sscanf(buf, "frfrexp %" PRIx64, &u); u = d2u(xfrfrexp(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "modf ")) { uint64_t u; sscanf(buf, "modf %" PRIx64, &u); Sleef_double2 x = xmodf(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "tgamma_u1 ")) { uint64_t u; sscanf(buf, "tgamma_u1 %" PRIx64, &u); u = d2u(xtgamma_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "lgamma_u1 ")) { uint64_t u; sscanf(buf, "lgamma_u1 %" PRIx64, &u); u = d2u(xlgamma_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "erf_u1 ")) { uint64_t u; sscanf(buf, "erf_u1 %" PRIx64, &u); u = d2u(xerf_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "erfc_u15 ")) { uint64_t u; sscanf(buf, "erfc_u15 %" PRIx64, &u); u = d2u(xerfc_u15(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sinf ")) { uint32_t u; sscanf(buf, "sinf %x", &u); u = f2u(xsinf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cosf ")) { uint32_t u; sscanf(buf, "cosf %x", &u); u = f2u(xcosf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sincosf ")) { uint32_t u; sscanf(buf, "sincosf %x", &u); Sleef_float2 x = xsincosf(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "tanf ")) { uint32_t u; sscanf(buf, "tanf %x", &u); u = f2u(xtanf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "asinf ")) { uint32_t u; sscanf(buf, "asinf %x", &u); u = f2u(xasinf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "acosf ")) { uint32_t u; sscanf(buf, "acosf %x", &u); u = f2u(xacosf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atanf ")) { uint32_t u; sscanf(buf, "atanf %x", &u); u = f2u(xatanf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atan2f ")) { uint32_t u, v; sscanf(buf, "atan2f %x %x", &u, &v); u = f2u(xatan2f(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "logf ")) { uint32_t u; sscanf(buf, "logf %x", &u); u = f2u(xlogf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "expf ")) { uint32_t u; sscanf(buf, "expf %x", &u); u = f2u(xexpf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cbrtf ")) { uint32_t u; sscanf(buf, "cbrtf %x", &u); u = f2u(xcbrtf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sqrtf ")) { uint32_t u; sscanf(buf, "sqrtf %x", &u); u = f2u(xsqrtf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sqrtf_u05 ")) { uint32_t u; sscanf(buf, "sqrtf_u05 %x", &u); u = f2u(xsqrtf_u05(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sqrtf_u35 ")) { uint32_t u; sscanf(buf, "sqrtf_u35 %x", &u); u = f2u(xsqrtf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "ldexpf ")) { uint32_t u, v; sscanf(buf, "ldexpf %x %x", &u, &v); u = f2u(xldexpf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "powf ")) { uint32_t u, v; sscanf(buf, "powf %x %x", &u, &v); u = f2u(xpowf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fastpowf_u3500 ")) { uint32_t u, v; sscanf(buf, "fastpowf_u3500 %x %x", &u, &v); u = f2u(xfastpowf_u3500(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "sinhf ")) { uint32_t u; sscanf(buf, "sinhf %x", &u); u = f2u(xsinhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "coshf ")) { uint32_t u; sscanf(buf, "coshf %x", &u); u = f2u(xcoshf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "tanhf ")) { uint32_t u; sscanf(buf, "tanhf %x", &u); u = f2u(xtanhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sinhf_u35 ")) { uint32_t u; sscanf(buf, "sinhf_u35 %x", &u); u = f2u(xsinhf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "coshf_u35 ")) { uint32_t u; sscanf(buf, "coshf_u35 %x", &u); u = f2u(xcoshf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "tanhf_u35 ")) { uint32_t u; sscanf(buf, "tanhf_u35 %x", &u); u = f2u(xtanhf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "asinhf ")) { uint32_t u; sscanf(buf, "asinhf %x", &u); u = f2u(xasinhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "acoshf ")) { uint32_t u; sscanf(buf, "acoshf %x", &u); u = f2u(xacoshf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atanhf ")) { uint32_t u; sscanf(buf, "atanhf %x", &u); u = f2u(xatanhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "exp2f ")) { uint32_t u; sscanf(buf, "exp2f %x", &u); u = f2u(xexp2f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "exp10f ")) { uint32_t u; sscanf(buf, "exp10f %x", &u); u = f2u(xexp10f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "exp2f_u35 ")) { uint32_t u; sscanf(buf, "exp2f_u35 %x", &u); u = f2u(xexp2f_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "exp10f_u35 ")) { uint32_t u; sscanf(buf, "exp10f_u35 %x", &u); u = f2u(xexp10f_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "expm1f ")) { uint32_t u; sscanf(buf, "expm1f %x", &u); u = f2u(xexpm1f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "log10f ")) { uint32_t u; sscanf(buf, "log10f %x", &u); u = f2u(xlog10f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "log2f ")) { uint32_t u; sscanf(buf, "log2f %x", &u); u = f2u(xlog2f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "log2f_u35 ")) { uint32_t u; sscanf(buf, "log2f_u35 %x", &u); u = f2u(xlog2f_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "log1pf ")) { uint32_t u; sscanf(buf, "log1pf %x", &u); u = f2u(xlog1pf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sinf_u1 ")) { uint32_t u; sscanf(buf, "sinf_u1 %x", &u); u = f2u(xsinf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cosf_u1 ")) { uint32_t u; sscanf(buf, "cosf_u1 %x", &u); u = f2u(xcosf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sincosf_u1 ")) { uint32_t u; sscanf(buf, "sincosf_u1 %x", &u); Sleef_float2 x = xsincosf_u1(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "sincospif_u05 ")) { uint32_t u; sscanf(buf, "sincospif_u05 %x", &u); Sleef_float2 x = xsincospif_u05(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "sincospif_u35 ")) { uint32_t u; sscanf(buf, "sincospif_u35 %x", &u); Sleef_float2 x = xsincospif_u35(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "sinpif_u05 ")) { uint32_t u; sscanf(buf, "sinpif_u05 %x", &u); u = f2u(xsinpif_u05(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cospif_u05 ")) { uint32_t u; sscanf(buf, "cospif_u05 %x", &u); u = f2u(xcospif_u05(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "fastsinf_u3500 ")) { uint32_t u; sscanf(buf, "fastsinf_u3500 %x", &u); u = f2u(xfastsinf_u3500(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "fastcosf_u3500 ")) { uint32_t u; sscanf(buf, "fastcosf_u3500 %x", &u); u = f2u(xfastcosf_u3500(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "tanf_u1 ")) { uint32_t u; sscanf(buf, "tanf_u1 %x", &u); u = f2u(xtanf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "asinf_u1 ")) { uint32_t u; sscanf(buf, "asinf_u1 %x", &u); u = f2u(xasinf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "acosf_u1 ")) { uint32_t u; sscanf(buf, "acosf_u1 %x", &u); u = f2u(xacosf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atanf_u1 ")) { uint32_t u; sscanf(buf, "atanf_u1 %x", &u); u = f2u(xatanf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atan2f_u1 ")) { uint32_t u, v; sscanf(buf, "atan2f_u1 %x %x", &u, &v); u = f2u(xatan2f_u1(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "logf_u1 ")) { uint32_t u; sscanf(buf, "logf_u1 %x", &u); u = f2u(xlogf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cbrtf_u1 ")) { uint32_t u; sscanf(buf, "cbrtf_u1 %x", &u); u = f2u(xcbrtf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "ilogb ")) { uint64_t u; int i; sscanf(buf, "ilogb %" PRIx64, &u); i = xilogb(u2d(u)); printf("%d\n", i); } else if (startsWith(buf, "ilogbf ")) { uint32_t u; int i; sscanf(buf, "ilogbf %x", &u); i = xilogbf(u2f(u)); printf("%d\n", i); } else if (startsWith(buf, "hypotf_u05 ")) { uint32_t u, v; sscanf(buf, "hypotf_u05 %x %x", &u, &v); u = f2u(xhypotf_u05(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "hypotf_u35 ")) { uint32_t u, v; sscanf(buf, "hypotf_u35 %x %x", &u, &v); u = f2u(xhypotf_u35(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "copysignf ")) { uint32_t u, v; sscanf(buf, "copysignf %x %x", &u, &v); u = f2u(xcopysignf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fmaxf ")) { uint32_t u, v; sscanf(buf, "fmaxf %x %x", &u, &v); u = f2u(xfmaxf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fminf ")) { uint32_t u, v; sscanf(buf, "fminf %x %x", &u, &v); u = f2u(xfminf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fdimf ")) { uint32_t u, v; sscanf(buf, "fdimf %x %x", &u, &v); u = f2u(xfdimf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "nextafterf ")) { uint32_t u, v; sscanf(buf, "nextafterf %x %x", &u, &v); u = f2u(xnextafterf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fmodf ")) { uint32_t u, v; sscanf(buf, "fmodf %x %x", &u, &v); u = f2u(xfmodf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "remainderf ")) { uint32_t u, v; sscanf(buf, "remainderf %x %x", &u, &v); u = f2u(xremainderf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fabsf ")) { uint32_t u; sscanf(buf, "fabsf %x", &u); u = f2u(xfabsf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "truncf ")) { uint32_t u; sscanf(buf, "truncf %x", &u); u = f2u(xtruncf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "floorf ")) { uint32_t u; sscanf(buf, "floorf %x", &u); u = f2u(xfloorf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "ceilf ")) { uint32_t u; sscanf(buf, "ceilf %x", &u); u = f2u(xceilf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "roundf ")) { uint32_t u; sscanf(buf, "roundf %x", &u); u = f2u(xroundf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "rintf ")) { uint32_t u; sscanf(buf, "rintf %x", &u); u = f2u(xrintf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "frfrexpf ")) { uint32_t u; sscanf(buf, "frfrexpf %x", &u); u = f2u(xfrfrexpf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "modff ")) { uint32_t u; sscanf(buf, "modff %x", &u); Sleef_float2 x = xmodff(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "tgammaf_u1 ")) { uint32_t u; sscanf(buf, "tgammaf_u1 %x", &u); u = f2u(xtgammaf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "lgammaf_u1 ")) { uint32_t u; sscanf(buf, "lgammaf_u1 %x", &u); u = f2u(xlgammaf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "erff_u1 ")) { uint32_t u; sscanf(buf, "erff_u1 %x", &u); u = f2u(xerff_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "erfcf_u15 ")) { uint32_t u; sscanf(buf, "erfcf_u15 %x", &u); u = f2u(xerfcf_u15(u2f(u))); printf("%x\n", u); } else { break; } fflush(stdout); } return 0; } sleef-3.5.1/src/libm-tester/iutsimd.c000066400000000000000000000467011373003144100174620ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #include "misc.h" #if !defined(USE_INLINE_HEADER) #include "sleef.h" #else // #if !defined(USE_INLINE_HEADER) #include #include #include #include #if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) #ifndef FP_FAST_FMA #define FP_FAST_FMA #endif #endif #if defined(_MSC_VER) && !defined(__STDC__) #define __STDC__ 1 #endif #if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) #include #endif #if (defined(_MSC_VER)) #include #endif #if defined(__ARM_NEON__) || defined(__ARM_NEON) #include #endif #if defined(__ARM_FEATURE_SVE) #include #endif #if defined(__VSX__) #include #endif #if defined(__VX__) #include #endif #define SLEEF_ALWAYS_INLINE inline #define SLEEF_INLINE #define SLEEF_CONST #include USE_INLINE_HEADER #include MACRO_ONLY_HEADER #endif // #if !defined(USE_INLINE_HEADER) #include "testerutil.h" #define DORENAME #ifdef ENABLE_SSE2 #include "renamesse2.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helpersse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #endif #ifdef ENABLE_SSE4 #include "renamesse4.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helpersse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #endif #ifdef ENABLE_AVX #include "renameavx.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #endif #ifdef ENABLE_FMA4 #include "renamefma4.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helperavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #endif #ifdef ENABLE_AVX2 #include "renameavx2.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #endif #ifdef ENABLE_AVX2128 #include "renameavx2128.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx2_128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #endif #ifdef ENABLE_AVX512F #include "renameavx512f.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #endif #ifdef ENABLE_AVX512FNOFMA #include "renameavx512fnofma.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #endif #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #include "norename.h" #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #include "norename.h" #endif #ifdef ENABLE_NEON32 #include "renameneon32.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperneon32.h" typedef Sleef_float32x4_t_2 vfloat2; #endif #endif #ifdef ENABLE_NEON32VFPV4 #include "renameneon32vfpv4.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 4 #include "helperneon32.h" typedef Sleef_float32x4_t_2 vfloat2; #endif #endif #ifdef ENABLE_ADVSIMD #include "renameadvsimd.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #endif #ifdef ENABLE_ADVSIMDNOFMA #include "renameadvsimdnofma.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #endif #ifdef ENABLE_DSP128 #define CONFIG 2 #include "helpersse2.h" #include "renamedsp128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SVE #include "renamesve.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helpersve.h" typedef Sleef_svfloat64_t_2 vdouble2; typedef Sleef_svfloat32_t_2 vfloat2; #endif #endif #ifdef ENABLE_SVENOFMA #include "renamesvenofma.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helpersve.h" typedef Sleef_svfloat64_t_2 vdouble2; typedef Sleef_svfloat32_t_2 vfloat2; #endif #endif #ifdef ENABLE_DSP256 #define CONFIG 1 #include "helperavx.h" #include "renamedsp256.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_VSX #include "renamevsx.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperpower_128.h" #include "renamevsx.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif #endif #ifdef ENABLE_VSXNOFMA #include "renamevsxnofma.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperpower_128.h" #include "renamevsxnofma.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif #endif #ifdef ENABLE_ZVECTOR2 #include "renamezvector2.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 140 #include "helpers390x_128.h" typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif #endif #ifdef ENABLE_ZVECTOR2NOFMA #include "renamezvector2nofma.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 141 #include "helpers390x_128.h" typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif #endif #ifdef ENABLE_PUREC_SCALAR #include "renamepurec_scalar.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 1 #include "helperpurec_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif #endif #ifdef ENABLE_PURECFMA_SCALAR #include "renamepurecfma_scalar.h" #if !defined(USE_INLINE_HEADER) #define CONFIG 2 #include "helperpurec_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif #endif #ifdef USE_INLINE_HEADER #define CONCAT_SIMD_SUFFIX_(keyword, suffix) keyword ## suffix #define CONCAT_SIMD_SUFFIX(keyword, suffix) CONCAT_SIMD_SUFFIX_(keyword, suffix) #define vmask CONCAT_SIMD_SUFFIX(vmask, SIMD_SUFFIX) #define vopmask CONCAT_SIMD_SUFFIX(vopmask, SIMD_SUFFIX) #define vdouble CONCAT_SIMD_SUFFIX(vdouble, SIMD_SUFFIX) #define vint CONCAT_SIMD_SUFFIX(vint, SIMD_SUFFIX) #define vfloat CONCAT_SIMD_SUFFIX(vfloat, SIMD_SUFFIX) #define vint2 CONCAT_SIMD_SUFFIX(vint2, SIMD_SUFFIX) #define vdouble2 CONCAT_SIMD_SUFFIX(vdouble2, SIMD_SUFFIX) #define vfloat2 CONCAT_SIMD_SUFFIX(vfloat2, SIMD_SUFFIX) #define vd2getx_vd_vd2 CONCAT_SIMD_SUFFIX(vd2getx_vd_vd2, SIMD_SUFFIX) #define vd2gety_vd_vd2 CONCAT_SIMD_SUFFIX(vd2gety_vd_vd2, SIMD_SUFFIX) #define vf2getx_vf_vf2 CONCAT_SIMD_SUFFIX(vf2getx_vf_vf2, SIMD_SUFFIX) #define vf2gety_vf_vf2 CONCAT_SIMD_SUFFIX(vf2gety_vf_vf2, SIMD_SUFFIX) #define vloadu_vd_p CONCAT_SIMD_SUFFIX(vloadu_vd_p, SIMD_SUFFIX) #define vstoreu_v_p_vd CONCAT_SIMD_SUFFIX(vstoreu_v_p_vd, SIMD_SUFFIX) #define vloadu_vf_p CONCAT_SIMD_SUFFIX(vloadu_vf_p, SIMD_SUFFIX) #define vstoreu_v_p_vf CONCAT_SIMD_SUFFIX(vstoreu_v_p_vf, SIMD_SUFFIX) #define vloadu_vi_p CONCAT_SIMD_SUFFIX(vloadu_vi_p, SIMD_SUFFIX) #define vstoreu_v_p_vi CONCAT_SIMD_SUFFIX(vstoreu_v_p_vi, SIMD_SUFFIX) #endif // #ifdef ENABLE_DP int check_featureDP(double d) { double s[VECTLENDP]; int i; for(i=0;i #include #include #include #include static jmp_buf sigjmp; int do_test(int argc, char **argv); int check_featureDP(double d); int check_featureSP(float d); #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) #define SETJMP(x) setjmp(x) #define LONGJMP longjmp #else #define SETJMP(x) sigsetjmp(x, 1) #define LONGJMP siglongjmp #endif static void sighandler(int signum) { LONGJMP(sigjmp, 1); } int detectFeatureDP() { signal(SIGILL, sighandler); if (SETJMP(sigjmp) == 0) { int r = check_featureDP(1.0); signal(SIGILL, SIG_DFL); return r; } else { signal(SIGILL, SIG_DFL); return 0; } } int detectFeatureSP() { signal(SIGILL, sighandler); if (SETJMP(sigjmp) == 0) { int r = check_featureSP(1.0); signal(SIGILL, SIG_DFL); return r; } else { signal(SIGILL, SIG_DFL); return 0; } } int main(int argc, char **argv) { if (!detectFeatureDP() && !detectFeatureSP()) { fprintf(stderr, "\n\n***** This host does not support the necessary CPU features to execute this program *****\n\n\n"); printf("0\n"); fclose(stdout); exit(-1); } return do_test(argc, argv); } sleef-3.5.1/src/libm-tester/tester.c000066400000000000000000005625271373003144100173230ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // This define is needed to prevent the `execvpe` function to raise a // warning at compile time. For more information, see // https://linux.die.net/man/3/execvp. #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "misc.h" #include "testerutil.h" #ifndef NANf #define NANf ((float)NAN) #endif void stop(char *mes) { fprintf(stderr, "%s\n", mes); exit(-1); } int ptoc[2], ctop[2]; int pid; FILE *fpctop; extern char **environ; void startChild(const char *path, char *const argv[]) { pipe(ptoc); pipe(ctop); pid = fork(); assert(pid != -1); if (pid == 0) { // child process char buf0[1], buf1[1]; int i; close(ptoc[1]); close(ctop[0]); fflush(stdin); fflush(stdout); i = dup2(ptoc[0], fileno(stdin)); assert(i != -1); i = dup2(ctop[1], fileno(stdout)); assert(i != -1); setvbuf(stdin, buf0, _IONBF,0); setvbuf(stdout, buf1, _IONBF,0); fflush(stdin); fflush(stdout); #if !defined(__APPLE__) && !defined(__FreeBSD__) execvpe(path, argv, environ); #else execvp(path, argv); #endif fprintf(stderr, "execvp in startChild : %s\n", strerror(errno)); exit(-1); } // parent process close(ptoc[0]); close(ctop[1]); } // #define child_d_d(funcStr, arg) do { \ char str[256]; \ uint64_t u; \ sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%" PRIx64, &u); \ return u2d(u); \ } while(0) #define child_d2_d(funcStr, arg) do { \ char str[256]; \ uint64_t u, v; \ sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%" PRIx64 " %" PRIx64, &u, &v); \ Sleef_double2 ret; \ ret.x = u2d(u); \ ret.y = u2d(v); \ return ret; \ } while(0) #define child_d_d_d(funcStr, arg1, arg2) do { \ char str[256]; \ uint64_t u; \ sprintf(str, funcStr " %" PRIx64 " %" PRIx64 "\n", d2u(arg1), d2u(arg2)); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%" PRIx64, &u); \ return u2d(u); \ } while(0) double child_sin(double x) { child_d_d("sin", x); } double child_cos(double x) { child_d_d("cos", x); } double child_tan(double x) { child_d_d("tan", x); } double child_asin(double x) { child_d_d("asin", x); } double child_acos(double x) { child_d_d("acos", x); } double child_atan(double x) { child_d_d("atan", x); } double child_log(double x) { child_d_d("log", x); } double child_exp(double x) { child_d_d("exp", x); } double child_cbrt(double x) { child_d_d("cbrt", x); } double child_atan2(double y, double x) { child_d_d_d("atan2", y, x); } Sleef_double2 child_sincos(double x) { child_d2_d("sincos", x); } double child_sin_u1(double x) { child_d_d("sin_u1", x); } double child_cos_u1(double x) { child_d_d("cos_u1", x); } double child_tan_u1(double x) { child_d_d("tan_u1", x); } double child_asin_u1(double x) { child_d_d("asin_u1", x); } double child_acos_u1(double x) { child_d_d("acos_u1", x); } double child_atan_u1(double x) { child_d_d("atan_u1", x); } double child_log_u1(double x) { child_d_d("log_u1", x); } double child_exp_u1(double x) { child_d_d("exp_u1", x); } double child_cbrt_u1(double x) { child_d_d("cbrt_u1", x); } double child_atan2_u1(double y, double x) { child_d_d_d("atan2_u1", y, x); } Sleef_double2 child_sincos_u1(double x) { child_d2_d("sincos_u1", x); } double child_pow(double x, double y) { child_d_d_d("pow", x, y); } double child_sqrt(double x) { child_d_d("sqrt", x); } double child_sqrt_u05(double x) { child_d_d("sqrt_u05", x); } double child_sqrt_u35(double x) { child_d_d("sqrt_u35", x); } double child_sinh(double x) { child_d_d("sinh", x); } double child_cosh(double x) { child_d_d("cosh", x); } double child_tanh(double x) { child_d_d("tanh", x); } double child_sinh_u35(double x) { child_d_d("sinh_u35", x); } double child_cosh_u35(double x) { child_d_d("cosh_u35", x); } double child_tanh_u35(double x) { child_d_d("tanh_u35", x); } double child_asinh(double x) { child_d_d("asinh", x); } double child_acosh(double x) { child_d_d("acosh", x); } double child_atanh(double x) { child_d_d("atanh", x); } double child_log10(double x) { child_d_d("log10", x); } double child_log2(double x) { child_d_d("log2", x); } double child_log2_u35(double x) { child_d_d("log2_u35", x); } double child_log1p(double x) { child_d_d("log1p", x); } double child_exp2(double x) { child_d_d("exp2", x); } double child_exp10(double x) { child_d_d("exp10", x); } double child_exp2_u35(double x) { child_d_d("exp2_u35", x); } double child_exp10_u35(double x) { child_d_d("exp10_u35", x); } double child_expm1(double x) { child_d_d("expm1", x); } Sleef_double2 child_sincospi_u05(double x) { child_d2_d("sincospi_u05", x); } Sleef_double2 child_sincospi_u35(double x) { child_d2_d("sincospi_u35", x); } double child_sinpi_u05(double x) { child_d_d("sinpi_u05", x); } double child_cospi_u05(double x) { child_d_d("cospi_u05", x); } double child_hypot_u05(double x, double y) { child_d_d_d("hypot_u05", x, y); } double child_hypot_u35(double x, double y) { child_d_d_d("hypot_u35", x, y); } double child_copysign(double x, double y) { child_d_d_d("copysign", x, y); } double child_fmax(double x, double y) { child_d_d_d("fmax", x, y); } double child_fmin(double x, double y) { child_d_d_d("fmin", x, y); } double child_fdim(double x, double y) { child_d_d_d("fdim", x, y); } double child_nextafter(double x, double y) { child_d_d_d("nextafter", x, y); } double child_fmod(double x, double y) { child_d_d_d("fmod", x, y); } double child_remainder(double x, double y) { child_d_d_d("remainder", x, y); } double child_fabs(double x) { child_d_d("fabs", x); } double child_trunc(double x) { child_d_d("trunc", x); } double child_floor(double x) { child_d_d("floor", x); } double child_ceil(double x) { child_d_d("ceil", x); } double child_round(double x) { child_d_d("round", x); } double child_rint(double x) { child_d_d("rint", x); } double child_frfrexp(double x) { child_d_d("frfrexp", x); } Sleef_double2 child_modf(double x) { child_d2_d("modf", x); } double child_tgamma_u1(double x) { child_d_d("tgamma_u1", x); } double child_lgamma_u1(double x) { child_d_d("lgamma_u1", x); } double child_erf_u1(double x) { child_d_d("erf_u1", x); } double child_erfc_u15(double x) { child_d_d("erfc_u15", x); } // double child_ldexp(double x, int q) { char str[256]; uint64_t u; sprintf(str, "ldexp %" PRIx64 " %" PRIx64 "\n", d2u(x), d2u(q)); write(ptoc[1], str, strlen(str)); if (fgets(str, 255, fpctop) == NULL) stop("child_ldexp"); sscanf(str, "%" PRIx64, &u); return u2d(u); } int child_ilogb(double x) { char str[256]; int i; sprintf(str, "ilogb %" PRIx64 "\n", d2u(x)); write(ptoc[1], str, strlen(str)); if (fgets(str, 255, fpctop) == NULL) stop("child_ilogb"); sscanf(str, "%d", &i); return i; } // #define child_f_f(funcStr, arg) do { \ char str[256]; \ uint32_t u; \ sprintf(str, funcStr " %x\n", f2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%x", &u); \ return u2f(u); \ } while(0) #define child_f2_f(funcStr, arg) do { \ char str[256]; \ uint32_t u, v; \ sprintf(str, funcStr " %x\n", f2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%x %x", &u, &v); \ Sleef_float2 ret; \ ret.x = u2f(u); \ ret.y = u2f(v); \ return ret; \ } while(0) #define child_f_f_f(funcStr, arg1, arg2) do { \ char str[256]; \ uint32_t u; \ sprintf(str, funcStr " %x %x\n", f2u(arg1), f2u(arg2)); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%x", &u); \ return u2f(u); \ } while(0) float child_sinf(float x) { child_f_f("sinf", x); } float child_cosf(float x) { child_f_f("cosf", x); } float child_tanf(float x) { child_f_f("tanf", x); } float child_asinf(float x) { child_f_f("asinf", x); } float child_acosf(float x) { child_f_f("acosf", x); } float child_atanf(float x) { child_f_f("atanf", x); } float child_logf(float x) { child_f_f("logf", x); } float child_expf(float x) { child_f_f("expf", x); } float child_cbrtf(float x) { child_f_f("cbrtf", x); } float child_atan2f(float y, float x) { child_f_f_f("atan2f", y, x); } Sleef_float2 child_sincosf(float x) { child_f2_f("sincosf", x); } float child_sinf_u1(float x) { child_f_f("sinf_u1", x); } float child_cosf_u1(float x) { child_f_f("cosf_u1", x); } float child_tanf_u1(float x) { child_f_f("tanf_u1", x); } float child_asinf_u1(float x) { child_f_f("asinf_u1", x); } float child_acosf_u1(float x) { child_f_f("acosf_u1", x); } float child_atanf_u1(float x) { child_f_f("atanf_u1", x); } float child_logf_u1(float x) { child_f_f("logf_u1", x); } float child_expf_u1(float x) { child_f_f("expf_u1", x); } float child_cbrtf_u1(float x) { child_f_f("cbrtf_u1", x); } float child_atan2f_u1(float y, float x) { child_f_f_f("atan2f_u1", y, x); } Sleef_float2 child_sincosf_u1(float x) { child_f2_f("sincosf_u1", x); } float child_powf(float x, float y) { child_f_f_f("powf", x, y); } float child_sqrtf(float x) { child_f_f("sqrtf", x); } float child_sqrtf_u05(float x) { child_f_f("sqrtf_u05", x); } float child_sqrtf_u35(float x) { child_f_f("sqrtf_u35", x); } float child_sinhf(float x) { child_f_f("sinhf", x); } float child_coshf(float x) { child_f_f("coshf", x); } float child_tanhf(float x) { child_f_f("tanhf", x); } float child_sinhf_u35(float x) { child_f_f("sinhf_u35", x); } float child_coshf_u35(float x) { child_f_f("coshf_u35", x); } float child_tanhf_u35(float x) { child_f_f("tanhf_u35", x); } float child_asinhf(float x) { child_f_f("asinhf", x); } float child_acoshf(float x) { child_f_f("acoshf", x); } float child_atanhf(float x) { child_f_f("atanhf", x); } float child_log10f(float x) { child_f_f("log10f", x); } float child_log2f(float x) { child_f_f("log2f", x); } float child_log2f_u35(float x) { child_f_f("log2f_u35", x); } float child_log1pf(float x) { child_f_f("log1pf", x); } float child_exp2f(float x) { child_f_f("exp2f", x); } float child_exp10f(float x) { child_f_f("exp10f", x); } float child_exp2f_u35(float x) { child_f_f("exp2f_u35", x); } float child_exp10f_u35(float x) { child_f_f("exp10f_u35", x); } float child_expm1f(float x) { child_f_f("expm1f", x); } Sleef_float2 child_sincospif_u05(float x) { child_f2_f("sincospif_u05", x); } Sleef_float2 child_sincospif_u35(float x) { child_f2_f("sincospif_u35", x); } float child_sinpif_u05(float x) { child_f_f("sinpif_u05", x); } float child_cospif_u05(float x) { child_f_f("cospif_u05", x); } float child_hypotf_u05(float x, float y) { child_f_f_f("hypotf_u05", x, y); } float child_hypotf_u35(float x, float y) { child_f_f_f("hypotf_u35", x, y); } float child_copysignf(float x, float y) { child_f_f_f("copysignf", x, y); } float child_fmaxf(float x, float y) { child_f_f_f("fmaxf", x, y); } float child_fminf(float x, float y) { child_f_f_f("fminf", x, y); } float child_fdimf(float x, float y) { child_f_f_f("fdimf", x, y); } float child_nextafterf(float x, float y) { child_f_f_f("nextafterf", x, y); } float child_fmodf(float x, float y) { child_f_f_f("fmodf", x, y); } float child_remainderf(float x, float y) { child_f_f_f("remainderf", x, y); } float child_fabsf(float x) { child_f_f("fabsf", x); } float child_truncf(float x) { child_f_f("truncf", x); } float child_floorf(float x) { child_f_f("floorf", x); } float child_ceilf(float x) { child_f_f("ceilf", x); } float child_roundf(float x) { child_f_f("roundf", x); } float child_rintf(float x) { child_f_f("rintf", x); } float child_frfrexpf(float x) { child_f_f("frfrexpf", x); } Sleef_float2 child_modff(float x) { child_f2_f("modff", x); } float child_tgammaf_u1(float x) { child_f_f("tgammaf_u1", x); } float child_lgammaf_u1(float x) { child_f_f("lgammaf_u1", x); } float child_erff_u1(float x) { child_f_f("erff_u1", x); } float child_erfcf_u15(float x) { child_f_f("erfcf_u15", x); } float child_fastsinf_u3500(float x) { child_f_f("fastsinf_u3500", x); } float child_fastcosf_u3500(float x) { child_f_f("fastcosf_u3500", x); } float child_fastpowf_u3500(float x, float y) { child_f_f_f("fastpowf_u3500", x, y); } float child_ldexpf(float x, int q) { char str[256]; uint32_t u; sprintf(str, "ldexpf %x %x\n", f2u(x), f2u(q)); write(ptoc[1], str, strlen(str)); if (fgets(str, 255, fpctop) == NULL) stop("child_powf"); sscanf(str, "%x", &u); return u2f(u); } int child_ilogbf(float x) { char str[256]; int i; sprintf(str, "ilogbf %x\n", f2u(x)); write(ptoc[1], str, strlen(str)); if (fgets(str, 255, fpctop) == NULL) stop("child_ilogbf"); sscanf(str, "%d", &i); return i; } // int allTestsPassed = 1; void showResult(int success) { if (!success) allTestsPassed = 0; fprintf(stderr, "%s\n", success ? "OK" : "NG **************"); if (!success) { fprintf(stderr, "\n\n*** Test failed\n"); exit(-1); } } int enableDP = 0, enableSP = 0, deterministicMode = 0; void do_test() { mpfr_t frc, frt, frx, fry, frz; mpfr_inits(frc, frt, frx, fry, frz, NULL); int i, j; int64_t i64; double d, x, y; int success = 1; if (enableDP) { fprintf(stderr, "Denormal/nonnumber test atan2(y, x)\n\n"); fprintf(stderr, "If y is +0 and x is -0, +pi is returned : "); showResult(child_atan2(+0.0, -0.0) == M_PI); fprintf(stderr, "If y is -0 and x is -0, -pi is returned : "); showResult(child_atan2(-0.0, -0.0) == -M_PI); fprintf(stderr, "If y is +0 and x is +0, +0 is returned : "); showResult(isPlusZero(child_atan2(+0.0, +0.0))); fprintf(stderr, "If y is -0 and x is +0, -0 is returned : "); showResult(isMinusZero(child_atan2(-0.0, +0.0))); fprintf(stderr, "If y is positive infinity and x is negative infinity, +3*pi/4 is returned : "); showResult(child_atan2(POSITIVE_INFINITY, NEGATIVE_INFINITY) == 3*M_PI/4); fprintf(stderr, "If y is negative infinity and x is negative infinity, -3*pi/4 is returned : "); showResult(child_atan2(NEGATIVE_INFINITY, NEGATIVE_INFINITY) == -3*M_PI/4); fprintf(stderr, "If y is positive infinity and x is positive infinity, +pi/4 is returned : "); showResult(child_atan2(POSITIVE_INFINITY, POSITIVE_INFINITY) == M_PI/4); fprintf(stderr, "If y is negative infinity and x is positive infinity, -pi/4 is returned : "); showResult(child_atan2(NEGATIVE_INFINITY, POSITIVE_INFINITY) == -M_PI/4); { fprintf(stderr, "If y is +0 and x is less than 0, +pi is returned : "); double ya[] = { +0.0 }; double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 }; for(i=0;i 1e+300) continue; cmpDenorm_d_d(mpfr_fmod, child_fmod, xa[i], ya[j]); } } showResult(success); } { fprintf(stderr, "remainder denormal/nonnumber test : "); double xa[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, 1.7e+308, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN }; double ya[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, 1.0e+308, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN }; for(i=0;i 1e+300) continue; cmpDenorm_d_d(mpfr_remainder, child_remainder, xa[i], ya[j]); } } showResult(success); } { fprintf(stderr, "trunc denormal/nonnumber test : "); double xa[] = { +0.0, -0.0, +1, -1, +1e+10, -1e+10, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN }; for(i=0;i 1e+38) continue; cmpDenorm_f_f(mpfr_fmod, child_fmodf, xa[i], ya[j]); } } } else { float xa[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN }; float ya[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN }; for(i=0;i 1e+38) continue; cmpDenorm_f_f(mpfr_fmod, child_fmodf, xa[i], ya[j]); } } } showResult(success); } { fprintf(stderr, "remainderf denormal/nonnumber test : "); if (enableFlushToZero) { float xa[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN }; float ya[] = { +0.0, -0.0, +1, -1, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN }; for(i=0;i 1e+38) continue; cmpDenorm_f_f(mpfr_remainder, child_remainderf, xa[i], ya[j]); } } } else { float xa[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN }; float ya[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN }; for(i=0;i 1e+38) continue; cmpDenorm_f_f(mpfr_remainder, child_remainderf, xa[i], ya[j]); } } } showResult(success); } { fprintf(stderr, "truncf denormal/nonnumber test : "); float xa[] = { +0.0, -0.0, +1, -1, +1e+10, -1e+10, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN }; for(i=0;i bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyNR_d(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfrFunc(frc, frx); \ if (countULPdp(childFunc(argx), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracy_d_d(mpfrFunc, childFunc, argx, argy, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfr_set_d(fry, argy, GMP_RNDN); \ mpfrFunc(frc, frx, fry, GMP_RNDN); \ if (countULPdp(childFunc(argx, argy), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ argx, argy, childFunc(argx, argy), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx, argy), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyX_d(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_double2 d2 = childFunc(argx); \ if (countULPdp(d2.x, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, d2.x, mpfr_get_d(frc, GMP_RNDN), countULPdp(d2.x, frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyY_d(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_double2 d2 = childFunc(argx); \ if (countULPdp(d2.y, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, d2.y, mpfr_get_d(frc, GMP_RNDN), countULPdp(d2.y, frc)); \ success = 0; \ break; \ } \ } while(0) // fprintf(stderr, "\nAccuracy test\n"); // if (enableDP) { // 64 > 53(=number of bits in DP mantissa) mpfr_set_default_prec(64); fprintf(stderr, "hypot_u35 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_hypot, child_hypot_u35, y, x, 3.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_hypot, child_hypot_u35, y, x, 3.5); } showResult(success); // fprintf(stderr, "hypot_u05 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_hypot, child_hypot_u05, y, x, 0.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_hypot, child_hypot_u05, y, x, 0.5); } showResult(success); // fprintf(stderr, "copysign : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_copysign, child_copysign, y, x, 0); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_copysign, child_copysign, y, x, 0); } showResult(success); // fprintf(stderr, "fmax : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_max, child_fmax, y, x, 0); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_max, child_fmax, y, x, 0); } showResult(success); // fprintf(stderr, "fmin : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_min, child_fmin, y, x, 0); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_min, child_fmin, y, x, 0); } showResult(success); // fprintf(stderr, "fdim : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_dim, child_fdim, y, x, 0.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_dim, child_fdim, y, x, 0.5); } showResult(success); // fprintf(stderr, "fmod : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_fmod, child_fmod, y, x, 0.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_fmod, child_fmod, y, x, 0.5); } showResult(success); // fprintf(stderr, "remainder : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_remainder, child_remainder, y, x, 0.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_remainder, child_remainder, y, x, 0.5); } showResult(success); // fprintf(stderr, "trunc : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_trunc, child_trunc, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_trunc, child_trunc, d, 0); { double start = u2d(d2u((double)(INT64_C(1) << 52))-20), end = u2d(d2u((double)(INT64_C(1) << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_trunc, child_trunc, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_trunc, child_trunc, -d, 0); } showResult(success); // fprintf(stderr, "floor : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_floor, child_floor, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_floor, child_floor, d, 0); { double start = u2d(d2u((double)(INT64_C(1) << 52))-20), end = u2d(d2u((double)(INT64_C(1) << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_floor, child_floor, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_floor, child_floor, -d, 0); } showResult(success); // fprintf(stderr, "ceil : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_ceil, child_ceil, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_ceil, child_ceil, d, 0); { double start = u2d(d2u((double)(INT64_C(1) << 52))-20), end = u2d(d2u((double)(INT64_C(1) << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_ceil, child_ceil, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_ceil, child_ceil, -d, 0); } showResult(success); // fprintf(stderr, "round : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_round, child_round, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_round, child_round, d, 0); { double start = u2d(d2u((double)(INT64_C(1) << 52))-20), end = u2d(d2u((double)(INT64_C(1) << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_round, child_round, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_round, child_round, -d, 0); } showResult(success); // fprintf(stderr, "rint : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_rint, child_rint, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracy_d(mpfr_rint, child_rint, d, 0); { double start = u2d(d2u((double)(INT64_C(1) << 52))-20), end = u2d(d2u((double)(INT64_C(1) << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_rint, child_rint, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_rint, child_rint, -d, 0); } showResult(success); // fprintf(stderr, "sin : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sin, child_sin, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_sin, child_sin, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_sin, child_sin, pow(2.16, i), 3.5); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sin, child_sin, d, 3.5); } showResult(success); // fprintf(stderr, "sin_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sin, child_sin_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_sin, child_sin_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_sin, child_sin_u1, pow(2.16, i), 1.0); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sin, child_sin_u1, d, 1.0); } showResult(success); // fprintf(stderr, "sin in sincos : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_d(mpfr_sin, child_sincos, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sin, child_sincos, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracyX_d(mpfr_sin, child_sincos, pow(2.16, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sin, child_sincos, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincos_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_d(mpfr_sin, child_sincos_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sin, child_sincos_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracyX_d(mpfr_sin, child_sincos_u1, pow(2.16, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sin, child_sincos_u1, d, 1.0); } showResult(success); // // 1280 > 1024(=maximum DP exponent) + 53(=number of bits in DP mantissa) mpfr_set_default_prec(1280); fprintf(stderr, "sin in sincospi_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincospi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); } showResult(success); // fprintf(stderr, "sinpi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); } showResult(success); // fprintf(stderr, "cospi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(64); // fprintf(stderr, "cos : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cos, child_cos, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_cos, child_cos, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_cos, child_cos, pow(2.16, i), 3.5); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cos, child_cos, d, 3.5); } showResult(success); // fprintf(stderr, "cos_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cos, child_cos_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_cos, child_cos_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_cos, child_cos_u1, pow(2.16, i), 1.0); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cos, child_cos_u1, d, 1.0); } showResult(success); // fprintf(stderr, "cos in sincos : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_d(mpfr_cos, child_sincos, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cos, child_sincos, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracyY_d(mpfr_cos, child_sincos, pow(2.16, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cos, child_sincos, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincos_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_d(mpfr_cos, child_sincos_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cos, child_sincos_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracyY_d(mpfr_cos, child_sincos_u1, pow(2.16, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cos, child_sincos_u1, d, 1.0); } showResult(success); // mpfr_set_default_prec(1280); fprintf(stderr, "cos in sincospi_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincospi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(64); // fprintf(stderr, "tan : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); for(d = -1e+7;d < 1e+7 && success;d += 1000.1) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_tan, child_tan, pow(2.16, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); } showResult(success); // fprintf(stderr, "tan_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); for(d = -1e+7;d < 1e+7 && success;d += 1000.1) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_tan, child_tan_u1, pow(2.16, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); } showResult(success); // fprintf(stderr, "log : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log, child_log, d, 3.5); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log, child_log, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, pow(2.1, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, DBL_MAX * pow(0.9314821319758632, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, pow(0.933254300796991, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, DBL_MIN * pow(0.996323, i), 3.5); showResult(success); // fprintf(stderr, "log_u1 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log, child_log_u1, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log, child_log_u1, d, 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, pow(2.1, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, DBL_MAX * pow(0.9314821319758632, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, pow(0.933254300796991, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, DBL_MIN * pow(0.996323, i), 1.0); showResult(success); // fprintf(stderr, "exp : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp, child_exp, d, 1.0); for(d = -1000;d < 1000 && success;d += 1.1) checkAccuracy_d(mpfr_exp, child_exp, d, 1.0); showResult(success); // fprintf(stderr, "pow : "); for(y = 0.1;y < 100 && success;y += 0.6) { for(x = -100;x < 100 && success;x += 0.6) { checkAccuracy_d_d(mpfr_pow, child_pow, x, y, 1.0); } } for(y = -1000;y < 1000 && success;y += 0.1) checkAccuracy_d_d(mpfr_pow, child_pow, 2.1, y, 1.0); showResult(success); // if (!deterministicMode) { fprintf(stderr, "sqrt : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt, d, 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "sqrt_u05 : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt_u05, d, 0.506); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt_u05, pow(2.1, d), 0.506); showResult(success); // fprintf(stderr, "sqrt_u35 : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt_u35, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt_u35, pow(2.1, d), 3.5); showResult(success); } // fprintf(stderr, "cbrt : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_cbrt, child_cbrt, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_cbrt, child_cbrt, pow(2.1, d), 3.5); showResult(success); // fprintf(stderr, "cbrt_u1 : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_cbrt, child_cbrt_u1, d, 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_cbrt, child_cbrt_u1, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "asin : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_asin, child_asin, d, 3.5); showResult(success); // fprintf(stderr, "asin_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_asin, child_asin_u1, d, 1.0); showResult(success); // fprintf(stderr, "acos : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_acos, child_acos, d, 3.5); showResult(success); // fprintf(stderr, "acos_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_acos, child_acos_u1, d, 1.0); showResult(success); // fprintf(stderr, "atan : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_atan, child_atan, d, 3.5); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_atan, child_atan, d, 3.5); showResult(success); // fprintf(stderr, "atan_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_atan, child_atan_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_atan, child_atan_u1, d, 1.0); showResult(success); // fprintf(stderr, "atan2 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_atan2, child_atan2, y, x, 3.5); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_d_d(mpfr_atan2, child_atan2, y, x, 3.5); } showResult(success); // fprintf(stderr, "atan2_u1 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_atan2, child_atan2_u1, y, x, 1.0); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_d_d(mpfr_atan2, child_atan2_u1, y, x, 1.0); } showResult(success); // fprintf(stderr, "sinh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sinh, child_sinh, d, 1.0); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_sinh, child_sinh, d, 1.0); showResult(success); // fprintf(stderr, "cosh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cosh, child_cosh, d, 1.0); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_cosh, child_cosh, d, 1.0); showResult(success); // fprintf(stderr, "tanh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tanh, child_tanh, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_tanh, child_tanh, d, 1.0); showResult(success); // fprintf(stderr, "sinh_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sinh, child_sinh_u35, d, 3.5); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_sinh, child_sinh_u35, d, 3.5); showResult(success); // fprintf(stderr, "cosh_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cosh, child_cosh_u35, d, 3.5); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_cosh, child_cosh_u35, d, 3.5); showResult(success); // fprintf(stderr, "tanh_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tanh, child_tanh_u35, d, 3.5); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_tanh, child_tanh_u35, d, 3.5); showResult(success); // fprintf(stderr, "asinh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_asinh, child_asinh, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_asinh, child_asinh, d, 1.0); showResult(success); // fprintf(stderr, "acosh : "); for(d = 1;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_acosh, child_acosh, d, 1.0); for(d = 1;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_acosh, child_acosh, d, 1.0); showResult(success); // fprintf(stderr, "atanh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_atanh, child_atanh, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_atanh, child_atanh, d, 1.0); showResult(success); // fprintf(stderr, "exp2 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp2, child_exp2, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_exp2, child_exp2, d, 1.0); showResult(success); // fprintf(stderr, "exp10 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp10, child_exp10, d, 1.0); for(d = -300;d < 300 && success;d += 0.1) checkAccuracy_d(mpfr_exp10, child_exp10, d, 1.0); showResult(success); // fprintf(stderr, "exp2_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp2, child_exp2_u35, d, 3.5); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_exp2, child_exp2_u35, d, 3.5); showResult(success); // fprintf(stderr, "exp10_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp10, child_exp10_u35, d, 3.5); for(d = -300;d < 300 && success;d += 0.1) checkAccuracy_d(mpfr_exp10, child_exp10_u35, d, 3.5); showResult(success); // fprintf(stderr, "expm1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_expm1, child_expm1, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.21) checkAccuracy_d(mpfr_expm1, child_expm1, d, 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_d(mpfr_expm1, child_expm1, pow(10, -d), 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_d(mpfr_expm1, child_expm1, (-pow(10, -d)), 1.0); showResult(success); // fprintf(stderr, "log10 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log10, child_log10, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log10, child_log10, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_d(mpfr_log10, child_log10, (DBL_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log2 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log2, child_log2, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log2, child_log2, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_d(mpfr_log2, child_log2, (DBL_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log2_u35 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log2, child_log2_u35, d, 3.5); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log2, child_log2_u35, d, 3.5); for(i=0;i<10000 && success;i++) checkAccuracy_d(mpfr_log2, child_log2_u35, (DBL_MIN * pow(0.996323, i)), 3.5); showResult(success); // fprintf(stderr, "log1p : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0); showResult(success); // fprintf(stderr, "lgamma_u1 : "); for(d = -5000;d < 5000 && success;d += 1.1) checkAccuracy_d(mpfr_lgamma_nosign, child_lgamma_u1, d, 1.0); showResult(success); // fprintf(stderr, "tgamma_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_gamma, child_tgamma_u1, d, 1.0); showResult(success); // fprintf(stderr, "erf_u1 : "); for(d = -100;d < 100 && success;d += 0.02) checkAccuracy_d(mpfr_erf, child_erf_u1, d, 1.0); showResult(success); // fprintf(stderr, "erfc_u15 : "); for(d = -1;d < 100 && success;d += 0.01) checkAccuracy_d(mpfr_erfc, child_erfc_u15, d, 1.5); showResult(success); // { fprintf(stderr, "ilogb : "); for(d = 0.0001;d < 10;d += 0.001) { int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } for(d = 0.0001;d < 10000;d += 1.1) { int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } for(i=0;i<10000;i+=10) { d = DBL_MIN * pow(0.996323, i); if (d == 0) continue; int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } for(i=0;i<10000;i+=10) { d = pow(0.933254300796991, i); if (d == 0) continue; int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } showResult(success); } } // #define checkAccuracy_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ (float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyNR_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx); \ if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ (float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracy_f_f(mpfrFunc, childFunc, argx, argy, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfr_set_d(fry, (float)flushToZero(argy), GMP_RNDN); \ mpfrFunc(frc, frx, fry, GMP_RNDN); \ if (countULPsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ (float)flushToZero(argx), (float)flushToZero(argy), childFunc((float)flushToZero(argx), (float)flushToZero(argy)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyX_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \ if (countULPsp(d2.x, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", (float)flushToZero(argx), (double)d2.x, mpfr_get_d(frc, GMP_RNDN), countULPsp(d2.x, frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyY_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \ if (countULPsp(d2.y, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", (float)flushToZero(argx), (double)d2.y, mpfr_get_d(frc, GMP_RNDN), countULPsp(d2.y, frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracy2_f(mpfrFunc, childFunc, argx, bound, abound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ double t = childFunc((float)flushToZero(argx)); \ double ae = fabs(mpfr_get_d(frc, GMP_RNDN) - t); \ if (countULPsp(t, frc) > bound && ae > abound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf, abserror = %g\n", \ (float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc), ae); \ success = 0; \ break; \ } \ } while(0) // if (enableSP) { // 53 > 24(=number of bits in SP mantissa) mpfr_set_default_prec(53); fprintf(stderr, "hypotf_u35 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u35, y, x, 3.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u35, y, x, 3.5); } showResult(success); // fprintf(stderr, "hypotf_u05 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u05, y, x, 0.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u05, y, x, 0.5); } showResult(success); // fprintf(stderr, "copysignf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_copysign, child_copysignf, y, x, 0); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_copysign, child_copysignf, y, x, 0); } showResult(success); // fprintf(stderr, "fmaxf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_max, child_fmaxf, y, x, 0); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_max, child_fmaxf, y, x, 0); } showResult(success); // fprintf(stderr, "fminf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_min, child_fminf, y, x, 0); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_min, child_fminf, y, x, 0); } showResult(success); // fprintf(stderr, "fdimf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_dim, child_fdimf, y, x, 0.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_dim, child_fdimf, y, x, 0.5); } showResult(success); // fprintf(stderr, "fmodf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_fmod, child_fmodf, y, x, 0.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_fmod, child_fmodf, y, x, 0.5); } showResult(success); // fprintf(stderr, "remainderf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_remainder, child_remainderf, y, x, 0.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_remainder, child_remainderf, y, x, 0.5); } checkAccuracy_f_f(mpfr_remainder, child_remainderf, 11114942644092928.0, 224544296009728.0, 0.5); showResult(success); // fprintf(stderr, "truncf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_trunc, child_truncf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_trunc, child_truncf, d, 0); { double start = u2f(f2u((double)(INT64_C(1) << 23))-20), end = u2f(f2u((double)(INT64_C(1) << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_trunc, child_truncf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_trunc, child_truncf, -d, 0); } showResult(success); // fprintf(stderr, "floorf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_floor, child_floorf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_floor, child_floorf, d, 0); { double start = u2f(f2u((double)(INT64_C(1) << 23))-20), end = u2f(f2u((double)(INT64_C(1) << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_floor, child_floorf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_floor, child_floorf, -d, 0); } showResult(success); // fprintf(stderr, "ceilf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_ceil, child_ceilf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_ceil, child_ceilf, d, 0); { double start = u2f(f2u((double)(INT64_C(1) << 23))-20), end = u2f(f2u((double)(INT64_C(1) << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_ceil, child_ceilf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_ceil, child_ceilf, -d, 0); } showResult(success); // fprintf(stderr, "roundf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_round, child_roundf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_round, child_roundf, d, 0); { double start = u2f(f2u((double)(INT64_C(1) << 23))-20), end = u2f(f2u((double)(INT64_C(1) << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_round, child_roundf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_round, child_roundf, -d, 0); } showResult(success); // fprintf(stderr, "rintf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracy_f(mpfr_rint, child_rintf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracy_f(mpfr_rint, child_rintf, d, 0); { double start = u2f(f2u((double)(INT64_C(1) << 23))-20), end = u2f(f2u((double)(INT64_C(1) << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_rint, child_rintf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_rint, child_rintf, -d, 0); } showResult(success); // fprintf(stderr, "sinf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sin, child_sinf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_sin, child_sinf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_sin, child_sinf, pow(1.092, i), 3.5); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sin, child_sinf, d, 3.5); } showResult(success); // fprintf(stderr, "sinf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sin, child_sinf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_sin, child_sinf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_sin, child_sinf_u1, pow(1.092, i), 1.0); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sin, child_sinf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "sin in sincosf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_f(mpfr_sin, child_sincosf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sin, child_sincosf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracyX_f(mpfr_sin, child_sincosf, pow(1.092, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sin, child_sincosf, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincosf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, pow(1.092, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, d, 1.0); } showResult(success); // // 256 > 128(=maximum SP exponent) + 24(=number of bits in SP mantissa) mpfr_set_default_prec(256); fprintf(stderr, "sin in sincospif_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincospif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); } showResult(success); // fprintf(stderr, "sinpif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); } showResult(success); // fprintf(stderr, "cospif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(53); // fprintf(stderr, "cosf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cos, child_cosf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_cos, child_cosf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_cos, child_cosf, pow(1.092, i), 3.5); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cos, child_cosf, d, 3.5); } showResult(success); // fprintf(stderr, "cosf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cos, child_cosf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_cos, child_cosf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_cos, child_cosf_u1, pow(1.092, i), 1.0); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cos, child_cosf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "cos in sincosf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_f(mpfr_cos, child_sincosf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cos, child_sincosf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracyY_f(mpfr_cos, child_sincosf, pow(1.092, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cos, child_sincosf, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincosf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, pow(1.092, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, d, 1.0); } showResult(success); // mpfr_set_default_prec(256); fprintf(stderr, "cos in sincospif_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincospif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(53); // fprintf(stderr, "fastsinf_u3500 : "); for(d = -32;d < 32 && success;d += 0.001) checkAccuracy2_f(mpfr_sin, child_fastsinf_u3500, d, 350, 2e-6); showResult(success); fprintf(stderr, "fastcosf_u3500 : "); for(d = -32;d < 32 && success;d += 0.001) checkAccuracy2_f(mpfr_cos, child_fastcosf_u3500, d, 350, 2e-6); showResult(success); // fprintf(stderr, "tanf : "); checkAccuracy_f(mpfr_tan, child_tanf, 70.936981201171875, 3.5); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tan, child_tanf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_tan, child_tanf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_tan, child_tanf, pow(1.092, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_tan, child_tanf, d, 3.5); } showResult(success); // fprintf(stderr, "tanf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tan, child_tanf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_tan, child_tanf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_tan, child_tanf_u1, pow(1.092, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_tan, child_tanf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "logf : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log, child_logf, d, 3.5); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log, child_logf, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, pow(2.1, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, FLT_MAX * pow(0.9314821319758632, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, pow(0.933254300796991, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, FLT_MIN * pow(0.996323, i), 3.5); showResult(success); // fprintf(stderr, "logf_u1 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); if (!enableFlushToZero) { for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, FLT_MAX * pow(0.9314821319758632, i), 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, pow(2.1, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, pow(0.933254300796991, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, FLT_MIN * pow(0.996323, i), 1.0); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "expf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp, child_expf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 1.1) checkAccuracy_f(mpfr_exp, child_expf, d, 1.0); } showResult(success); // fprintf(stderr, "powf : "); if (!enableFlushToZero) { for(y = 0.1;y < 100 && success;y += 0.6) { for(x = -100;x < 100 && success;x += 0.6) { checkAccuracy_f_f(mpfr_pow, child_powf, x, y, 1.0); } } for(y = -1000;y < 1000 && success;y += 0.1) checkAccuracy_f_f(mpfr_pow, child_powf, 2.1, y, 1.0); } else { for(y = 0.1;y < 10 && success;y += 0.06) { for(x = -100;x < 10 && success;x += 0.06) { checkAccuracy_f_f(mpfr_pow, child_powf, x, y, 1.0); } } } showResult(success); // fprintf(stderr, "fastpowf_u3500 : "); for(y = -25;y < 25 && success;y += 0.121) { for(x = 0.1;x < 25 && success;x += 0.251) { checkAccuracy_f_f(mpfr_pow, child_fastpowf_u3500, x, y, 350); } } showResult(success); // if (!deterministicMode) { fprintf(stderr, "sqrtf : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf, d, 1.0); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "sqrtf_u05 : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u05, d, 0.506); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u05, pow(2.1, d), 0.506); showResult(success); // fprintf(stderr, "sqrtf_u35 : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, d, 3.5); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, pow(2.1, d), 3.5); showResult(success); } // fprintf(stderr, "cbrtf : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_cbrt, child_cbrtf, d, 3.5); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_cbrt, child_cbrtf, pow(2.1, d), 3.5); showResult(success); // fprintf(stderr, "cbrtf_u1 : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_cbrt, child_cbrtf_u1, d, 1.0); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_cbrt, child_cbrtf_u1, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "asinf : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_asin, child_asinf, d, 3.5); showResult(success); // fprintf(stderr, "asinf_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_asin, child_asinf_u1, d, 1.0); showResult(success); // fprintf(stderr, "acosf : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_acos, child_acosf, d, 3.5); showResult(success); // fprintf(stderr, "acosf_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_acos, child_acosf_u1, d, 1.0); showResult(success); // fprintf(stderr, "atanf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5); showResult(success); // fprintf(stderr, "atanf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atan, child_atanf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_atan, child_atanf_u1, d, 1.0); showResult(success); // fprintf(stderr, "atan2f : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_atan2, child_atan2f, y, x, 3.5); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_f_f(mpfr_atan2, child_atan2f, y, x, 3.5); } showResult(success); // fprintf(stderr, "atan2f_u1 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_atan2, child_atan2f_u1, y, x, 1.0); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_f_f(mpfr_atan2, child_atan2f_u1, y, x, 1.0); } showResult(success); // fprintf(stderr, "sinhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sinh, child_sinhf, d, 1.0); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_sinh, child_sinhf, d, 1.0); } showResult(success); // fprintf(stderr, "coshf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cosh, child_coshf, d, 1.0); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_cosh, child_coshf, d, 1.0); } showResult(success); // fprintf(stderr, "tanhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tanh, child_tanhf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_tanh, child_tanhf, d, 1.0); } showResult(success); // fprintf(stderr, "sinhf_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sinh, child_sinhf_u35, d, 3.5); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_sinh, child_sinhf_u35, d, 3.5); } showResult(success); // fprintf(stderr, "coshf_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cosh, child_coshf_u35, d, 3.5); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_cosh, child_coshf_u35, d, 3.5); } showResult(success); // fprintf(stderr, "tanhf_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tanh, child_tanhf_u35, d, 3.5); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_tanh, child_tanhf_u35, d, 3.5); } showResult(success); // fprintf(stderr, "asinhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_asinh, child_asinhf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_asinh, child_asinhf, d, 1.0); } showResult(success); // fprintf(stderr, "acoshf : "); for(d = 1;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_acosh, child_acoshf, d, 1.0); if (!enableFlushToZero) { for(d = 1;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_acosh, child_acoshf, d, 1.0); } showResult(success); // fprintf(stderr, "atanhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atanh, child_atanhf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_atanh, child_atanhf, d, 1.0); } showResult(success); // fprintf(stderr, "exp2f : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp2, child_exp2f, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_exp2, child_exp2f, d, 1.0); } showResult(success); // fprintf(stderr, "exp10f : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp10, child_exp10f, d, 1.0); if (!enableFlushToZero) { for(d = -300;d < 300 && success;d += 0.1) checkAccuracy_f(mpfr_exp10, child_exp10f, d, 1.0); } showResult(success); // fprintf(stderr, "exp2f_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp2, child_exp2f_u35, d, 3.5); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_exp2, child_exp2f_u35, d, 3.5); } showResult(success); // fprintf(stderr, "exp10f_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp10, child_exp10f_u35, d, 3.5); if (!enableFlushToZero) { for(d = -300;d < 300 && success;d += 0.1) checkAccuracy_f(mpfr_exp10, child_exp10f_u35, d, 3.5); } showResult(success); // fprintf(stderr, "expm1f : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_expm1, child_expm1f, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.21) checkAccuracy_f(mpfr_expm1, child_expm1f, d, 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_f(mpfr_expm1, child_expm1f, pow(10, -d), 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_f(mpfr_expm1, child_expm1f, (-pow(10, -d)), 1.0); } showResult(success); // fprintf(stderr, "log10f : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log10, child_log10f, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log10, child_log10f, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_f(mpfr_log10, child_log10f, (FLT_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log2f : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log2, child_log2f, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log2, child_log2f, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_f(mpfr_log2, child_log2f, (FLT_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log2f_u35 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log2, child_log2f_u35, d, 3.5); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log2, child_log2f_u35, d, 3.5); for(i=0;i<10000 && success;i++) checkAccuracy_f(mpfr_log2, child_log2f_u35, (FLT_MIN * pow(0.996323, i)), 3.5); showResult(success); // fprintf(stderr, "log1pf : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0); showResult(success); // fprintf(stderr, "lgammaf_u1 : "); for(d = -5000;d < 5000 && success;d += 1.1) checkAccuracy_f(mpfr_lgamma_nosign, child_lgammaf_u1, d, 1.0); showResult(success); // fprintf(stderr, "tgammaf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_gamma, child_tgammaf_u1, d, 1.0); showResult(success); // fprintf(stderr, "erff_u1 : "); for(d = -100;d < 100 && success;d += 0.02) checkAccuracy_f(mpfr_erf, child_erff_u1, d, 1.0); showResult(success); // fprintf(stderr, "erfcf_u15 : "); for(d = -1;d < 8 && success;d += 0.001) checkAccuracy_f(mpfr_erfc, child_erfcf_u15, d, 1.5); showResult(success); } } int main(int argc, char **argv) { char *argv2[argc+2], *commandSde = NULL; int i, a2s; // BUGFIX: this flush is to prevent incorrect syncing with the // `iut*` executable that causes failures in the CPU detection on // some CI systems. fflush(stdout); for(a2s=1;a2s #include #include #include #include #include #include #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #include "testerutil.h" #define DORENAME #include "rename.h" #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv_t; double nexttoward0(double x, int n) { union { double f; uint64_t u; } cx; cx.f = x; cx.u -=n ; return cx.f; } double rnd() { conv_t c; switch(random() & 63) { case 0: return nexttoward0( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif return c.d; } double rnd_fr() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d)); return c.d; } double rnd_zo() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d) || c.d < -1 || 1 < c.d); return c.d; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(1280); mpfr_inits(frw, frx, fry, frz, NULL); conv_t cd; double d, t; double d2, d3, zo; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.d = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i64 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.d = rnd_fr() * M_PI_4; cd.i64 += (random() & 0xf) - 0x7; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } Sleef_double2 sc = xsincospi_u05(d); Sleef_double2 sc2 = xsincospi_u35(d); { const double rangemax2 = 1e+9/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinpi(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = sc.x, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = sc2.x, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = xsinpi_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinpi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_cospi(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = sc.y, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = sc.y, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = xcospi_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cospi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } sc = xsincos(d); sc2 = xsincos_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsin(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u1 = countULPdp(sc.x, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u2 = countULPdp(t = xsin_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u3 = countULPdp(t = sc2.x, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcos(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = sc.y, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULPdp(t = xcos_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULPdp(t = sc2.y, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xtan(d), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xtan_u1(d), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog(fabs(d)), frx); if (u0 > 3.5) { printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xlog_u1(fabs(d)), frx); if (u1 > 1) { printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog10(fabs(d)), frx); if (u0 > 1) { printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog2(fabs(d)), frx); if (u0 > 1) { printf("Pure C log2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xlog2_u35(fabs(d)), frx); if (u1 > 3.5) { printf("Pure C log2_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog1p(d), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexp(d), frx); if (u0 > 1) { printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexp2(d), frx); if (u0 > 1) { printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xexp2_u35(d), frx); if (u1 > 3.5) { printf("Pure C exp2_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexp10(d), frx); if (u0 > 1.09) { printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xexp10_u35(d), frx); if (u1 > 3.5) { printf("Pure C exp10_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexpm1(d), frx); if (u0 > 1) { printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = xpow(d2, d), frx); if (u0 > 1) { printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcbrt(d), frx); if (u0 > 3.5) { printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xcbrt_u1(d), frx); if (u1 > 1) { printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xasin(zo), frx); if (u0 > 3.5) { printf("Pure C asin arg=%.20g ulp=%.20g\n", zo, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xasin_u1(zo), frx); if (u1 > 1) { printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", zo, u1); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xacos(zo), frx); if (u0 > 3.5) { printf("Pure C acos arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xacos_u1(zo), frx); if (u1 > 1) { printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xatan(d), frx); if (u0 > 3.5) { printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xatan_u1(d), frx); if (u1 > 1) { printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = xatan2(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = xatan2_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsinh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcosh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xtanh(d), frx); if (u0 > 1) { printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsinh_u35(d), frx); if ((fabs(d) <= 709 && u0 > 3.5) || (d > 709 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf("Pure C sinh_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcosh_u35(d), frx); if ((fabs(d) <= 709 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf("Pure C cosh_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xtanh_u35(d), frx); if (u0 > 3.5) { printf("Pure C tanh_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xasinh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xacosh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); printf("%.20g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xatanh(d), frx); if (u0 > 1) { printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xfabs(d), frx); if (u0 != 0) { printf("Pure C fabs arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xcopysign(d, d2), frx); if (u0 != 0 && !isnan(d2)) { printf("Pure C copysign arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfmax(d, d2), frx); if (u0 != 0) { printf("Pure C fmax arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfmin(d, d2), frx); if (u0 != 0) { printf("Pure C fmin arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfdim(d, d2), frx); if (u0 > 0.5) { printf("Pure C fdim arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPdp(t = xtrunc(d), frx); if (u0 != 0) { printf("Pure C trunc arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPdp(t = xfloor(d), frx); if (u0 != 0) { printf("Pure C floor arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPdp(t = xceil(d), frx); if (u0 != 0) { printf("Pure C ceil arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPdp(t = xround(d), frx); if (u0 != 0) { printf("Pure C round arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xrint(d), frx); if (u0 != 0) { printf("Pure C rint arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2dp(t = xfma(d, d2, d3), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+303 < c && c < 1e+303 && u0 > 0.5) || !(u0 <= 0.5 || isinf(t))) { printf("Pure C fma arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsqrt_u05(d), frx); if (u0 > 0.50001) { printf("Pure C sqrt_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = xhypot_u05(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5) { printf("Pure C hypot arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = xhypot_u35(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+308 < c && c < 1e+308 && u0 > 3.5) || !(u0 <= 3.5 || isinf(t))) { printf("Pure C hypot arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { t = xnextafter(d, d2); double c = nextafter(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf("Pure C nextafter arg=%.20g, %.20g\n", d, d2); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPdp(t = xfrfrexp(d), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf("Pure C frfrexp arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = xexpfrexp(d); if (d != 0 && isnumber(d) && cexp != texp) { printf("Pure C expfrexp arg=%.20g\n", d); printf("correct = %d, test = %d\n", cexp, texp); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfmod(d, d2), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabsl((long double)d / d2) < 1e+300 && u0 > 0.5) { printf("Pure C fmod arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_remainder(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xremainder(d, d2), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabsl((long double)d / d2) < 1e+300 && u0 > 0.5) { printf("Pure C remainder arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPdp(t = xldexp(d, exp), frx); if (u0 > 0.5) { printf("Pure C ldexp arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); Sleef_double2 t2 = xmodf(d); double u0 = countULPdp(t2.x, frz); double u1 = countULPdp(t2.y, fry); if (u0 != 0 || u1 != 0) { printf("Pure C modf arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", t2.x, t2.y); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPdp(t = xlgamma_u1(d), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-15 && u0 > 1) || (0 <= d && d < 2e+305 && u0 > 1) || (2e+305 <= d && !(u0 <= 1 || isinf(t))))) { printf("Pure C xlgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = xtgamma_u1(d), frx); if (u0 > 1.0) { printf("Pure C xtgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); static double ebz = 9.8813129168249308835e-324; // nextafter(nextafter(0, 1), 1); double u0 = countULP2dp(t = xerfc_u15(d), frx); if ((d > 26.2 && u0 > 2.5 && !(mpfr_get_d(frx, GMP_RNDN) == 0 && t <= ebz)) || (d <= 26.2 && u0 > 1.5)) { printf("Pure C xerfc_u15 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = xerf_u1(d), frx); if (u0 > 1) { printf("Pure C xerf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } exit(0); } sleef-3.5.1/src/libm-tester/tester2ld.c000066400000000000000000000370371373003144100177160ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include "misc.h" #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #define DORENAME #include "rename.h" #define DENORMAL_LDBL_MIN (3.6451995318824746025284059336194e-4951L) #define XLDBL_MIN (3.3621031431120935062626778173218e-4932L) #ifndef M_PIl #define M_PIl 3.141592653589793238462643383279502884L #endif #ifndef M_PI_4l #define M_PI_4l .785398163397448309615660845819875721049292L #endif #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) int isnumberl(long double x) { return x != INFINITYl && x != -INFINITYl && x == x; } int isPlusZerol(long double x) { return x == 0 && copysignl(1, x) == 1; } int isMinusZerol(long double x) { return x == 0 && copysignl(1, x) == -1; } mpfr_t fra, frb, frc, frd; double countULP(long double d, mpfr_t c) { long double c2 = mpfr_get_ld(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZerol(c2) && !isPlusZerol(d)) return 10003; //if (isMinusZerol(c2) && !isMinusZerol(d)) return 10004; if (isnanl(c2) && isnanl(d)) return 0; if (isnanl(c2) || isnanl(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberl(c2) && !isnumberl(d)) return 0; int e; frexpl(mpfr_get_ld(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-64), DENORMAL_LDBL_MIN), GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } double countULP2(long double d, mpfr_t c) { long double c2 = mpfr_get_ld(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZerol(c2) && !isPlusZerol(d)) return 10003; //if (isMinusZerol(c2) && !isMinusZerol(d)) return 10004; if (isnanl(c2) && isnanl(d)) return 0; if (isnanl(c2) || isnanl(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberl(c2) && !isnumberl(d)) return 0; int e; frexpl(mpfr_get_ld(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-64), LDBL_MIN), GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } typedef union { long double d; __int128 u128; } conv_t; long double rnd() { conv_t c; switch(random() & 15) { case 0: return INFINITY; case 1: return -INFINITY; } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); #else c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); #endif return c.d; } long double rnd_fr() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); #else c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); #endif } while(!isnumberl(c.d)); return c.d; } long double rnd_zo() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); #else c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); #endif } while(!isnumberl(c.d) || c.d < -1 || 1 < c.d); return c.d; } void sinpifr(mpfr_t ret, long double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void cospifr(mpfr_t ret, long double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(fra, frb, frc, frd, frw, frx, fry, frz, NULL); conv_t cd; long double d, t, d2, zo; int cnt, ecnt = 0; srandom(time(NULL)); #if 0 cd.d = M_PIl; mpfr_set_ld(frx, cd.d, GMP_RNDN); cd.u128 += 3; printf("%g\n", countULP2(cd.d, frx)); #endif const long double rangemax = 1e+9; for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); zo = rnd(); break; case 1: cd.d = rint((2 * (double)random() / RAND_MAX - 1) * 1e+10) * M_PI_4; cd.u128 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); zo = rnd_zo(); break; } Sleef_longdouble2 sc = xsincospil_u05(d); Sleef_longdouble2 sc2 = xsincospil_u35(d); { const double rangemax2 = 1e+9; sinpifr(frx, d); double u0 = countULP2(t = sc.x, frx); if (u0 != 0 && ((fabsl(d) <= rangemax2 && u0 > 0.505) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u05 sin arg=%.30Lg ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc2.x, frx); if (u1 != 0 && ((fabsl(d) <= rangemax2 && u1 > 1.5) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u35 sin arg=%.30Lg ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9; cospifr(frx, d); double u0 = countULP2(t = sc.y, frx); if (u0 != 0 && ((fabsl(d) <= rangemax2 && u0 > 0.505) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u05 cos arg=%.30Lg ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc.y, frx); if (u1 != 0 && ((fabsl(d) <= rangemax2 && u1 > 1.5) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u35 cos arg=%.30Lg ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } #if 0 double2 sc = xsincos(d); double2 sc2 = xsincos_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULP(t = xsin(d), frx); if ((fabsl(d) <= rangemax && u0 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(sc.x, frx); if ((fabsl(d) <= rangemax && u1 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xsin_u1(d), frx); if ((fabsl(d) <= rangemax && u2 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.x, frx); if ((fabsl(d) <= rangemax && u3 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULP(t = xcos(d), frx); if ((fabsl(d) <= rangemax && u0 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = sc.y, frx); if ((fabsl(d) <= rangemax && u1 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xcos_u1(d), frx); if ((fabsl(d) <= rangemax && u2 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.y, frx); if ((fabsl(d) <= rangemax && u3 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULP(t = xtan(d), frx); if ((fabsl(d) < 1e+7 && u0 > 3.5) || (fabsl(d) <= rangemax && u0 > 5) || isnan(t)) { printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xtan_u1(d), frx); if ((fabsl(d) <= rangemax && u1 > 1) || isnan(t)) { printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } d = rnd_fr(); double d2 = rnd_fr(), zo = rnd_zo(); { mpfr_set_d(frx, fabsl(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog(fabsl(d)), frx); if (u0 > 3.5) { printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xlog_u1(fabsl(d)), frx); if (u1 > 1) { printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsl(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog10(fabsl(d)), frx); if (u0 > 1) { printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog1p(d), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); printf("%g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp(d), frx); if (u0 > 1) { printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp2(d), frx); if (u0 > 1) { printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp10(d), frx); if (u0 > 1) { printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULP(t = xexpm1(d), frx); if (u0 > 1) { printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xpow(d2, d), frx); if (u0 > 1) { printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULP(t = xcbrt(d), frx); if (u0 > 3.5) { printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xcbrt_u1(d), frx); if (u1 > 1) { printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULP(t = xasin(zo), frx); if (u0 > 3.5) { printf("Pure C asin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xasin_u1(zo), frx); if (u1 > 1) { printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULP(t = xacos(zo), frx); if (u0 > 3.5) { printf("Pure C acos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xacos_u1(zo), frx); if (u1 > 1) { printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULP(t = xatan(d), frx); if (u0 > 3.5) { printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xatan_u1(d), frx); if (u1 > 1) { printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xatan2(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = xatan2_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xsinh(d), frx); if ((fabsl(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xcosh(d), frx); if ((fabsl(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xtanh(d), frx); if (u0 > 1) { printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xasinh(d), frx); if ((fabsl(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xacosh(d), frx); if ((fabsl(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); printf("%.20g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xatanh(d), frx); if (u0 > 1) { printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } #endif } } sleef-3.5.1/src/libm-tester/tester2qp.c000066400000000000000000000353361373003144100177370ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include #include #define _GNU_SOURCE #include #include #include #include "sleef.h" #include "f128util.h" #define DORENAME #include "rename.h" #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) int isnumberq(Sleef_quad x) { return !isinfq(x) && !isnanq(x); } int isPlusZeroq(Sleef_quad x) { return x == 0 && copysignq(1, x) == 1; } int isMinusZeroq(Sleef_quad x) { return x == 0 && copysignq(1, x) == -1; } mpfr_t fra, frb, frc, frd; double countULP(Sleef_quad d, mpfr_t c) { Sleef_quad c2 = mpfr_get_f128(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZeroq(c2) && !isPlusZeroq(d)) return 10003; //if (isMinusZeroq(c2) && !isMinusZeroq(d)) return 10004; if (isnanq(c2) && isnanq(d)) return 0; if (isnanq(c2) || isnanq(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberq(c2) && !isnumberq(d)) return 0; int e; frexpq(mpfr_get_f128(c, GMP_RNDN), &e); mpfr_set_f128(frb, fmaxq(ldexpq(1.0, e-113), FLT128_DENORM_MIN), GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } double countULP2(Sleef_quad d, mpfr_t c) { Sleef_quad c2 = mpfr_get_f128(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZeroq(c2) && !isPlusZeroq(d)) return 10003; //if (isMinusZeroq(c2) && !isMinusZeroq(d)) return 10004; if (isnanq(c2) && isnanq(d)) return 0; if (isnanq(c2) || isnanq(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberq(c2) && !isnumberq(d)) return 0; int e; frexpq(mpfr_get_f128(c, GMP_RNDN), &e); mpfr_set_f128(frb, fmaxq(ldexpq(1.0, e-113), FLT128_MIN), GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } typedef union { Sleef_quad d; __int128 u128; uint64_t u[2]; } conv_t; Sleef_quad rnd() { conv_t c; switch(random() & 15) { case 0: return INFINITY; case 1: return -INFINITY; } syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); return c.d; } Sleef_quad rnd_fr() { conv_t c; do { syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); } while(!isnumberq(c.d)); return c.d; } Sleef_quad rnd_zo() { conv_t c; do { syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); } while(!isnumberq(c.d) || c.d < -1 || 1 < c.d); return c.d; } void sinpifr(mpfr_t ret, Sleef_quad d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void cospifr(mpfr_t ret, Sleef_quad d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(2048); mpfr_inits(fra, frb, frc, frd, frw, frx, fry, frz, NULL); conv_t cd; Sleef_quad d, t, d2, zo; int cnt, ecnt = 0; srandom(time(NULL)); #if 0 cd.d = M_PIq; mpfr_set_f128(frx, cd.d, GMP_RNDN); cd.u128 += 3; printf("%g\n", countULP2(cd.d, frx)); #endif const Sleef_quad rangemax = 1e+9; for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); zo = rnd(); break; case 1: cd.d = rint((2 * (double)random() / RAND_MAX - 1) * 1e+10) * M_PI_4; cd.u128 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); zo = rnd_zo(); break; } Sleef_quad2 sc = xsincospiq_u05(d); Sleef_quad2 sc2 = xsincospiq_u35(d); { const double rangemax2 = 1e+9; sinpifr(frx, d); double u0 = countULP2(t = sc.x, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u05 sin arg="); printf128(d); printf(" ulp=%.20g\n", u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc2.x, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u35 sin arg=%.30Lg ulp=%.20g\n", (long double)d, u1); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9; cospifr(frx, d); double u0 = countULP2(t = sc.y, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u05 cos arg=%.30Lg ulp=%.20g\n", (long double)d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc.y, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u35 cos arg=%.30Lg ulp=%.20g\n", (long double)d, u1); fflush(stdout); ecnt++; } } #if 0 double2 sc = xsincos(d); double2 sc2 = xsincos_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULP(t = xsin(d), frx); if ((fabs(d) <= rangemax && u0 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(sc.x, frx); if ((fabs(d) <= rangemax && u1 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xsin_u1(d), frx); if ((fabs(d) <= rangemax && u2 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.x, frx); if ((fabs(d) <= rangemax && u3 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULP(t = xcos(d), frx); if ((fabs(d) <= rangemax && u0 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = sc.y, frx); if ((fabs(d) <= rangemax && u1 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xcos_u1(d), frx); if ((fabs(d) <= rangemax && u2 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.y, frx); if ((fabs(d) <= rangemax && u3 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULP(t = xtan(d), frx); if ((fabs(d) < 1e+7 && u0 > 3.5) || (fabs(d) <= rangemax && u0 > 5) || isnan(t)) { printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xtan_u1(d), frx); if ((fabs(d) <= rangemax && u1 > 1) || isnan(t)) { printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } d = rnd_fr(); double d2 = rnd_fr(), zo = rnd_zo(); { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog(fabs(d)), frx); if (u0 > 3.5) { printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xlog_u1(fabs(d)), frx); if (u1 > 1) { printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog10(fabs(d)), frx); if (u0 > 1) { printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog1p(d), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); printf("%g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp(d), frx); if (u0 > 1) { printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp2(d), frx); if (u0 > 1) { printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp10(d), frx); if (u0 > 1) { printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULP(t = xexpm1(d), frx); if (u0 > 1) { printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xpow(d2, d), frx); if (u0 > 1) { printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULP(t = xcbrt(d), frx); if (u0 > 3.5) { printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xcbrt_u1(d), frx); if (u1 > 1) { printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULP(t = xasin(zo), frx); if (u0 > 3.5) { printf("Pure C asin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xasin_u1(zo), frx); if (u1 > 1) { printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULP(t = xacos(zo), frx); if (u0 > 3.5) { printf("Pure C acos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xacos_u1(zo), frx); if (u1 > 1) { printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULP(t = xatan(d), frx); if (u0 > 3.5) { printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xatan_u1(d), frx); if (u1 > 1) { printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xatan2(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = xatan2_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xsinh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xcosh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xtanh(d), frx); if (u0 > 1) { printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xasinh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xacosh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); printf("%.20g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xatanh(d), frx); if (u0 > 1) { printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } #endif } } sleef-3.5.1/src/libm-tester/tester2simddp.c000066400000000000000000000756741373003144100206100ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #endif #include "sleef.h" #include "testerutil.h" #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #include "renamesse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #include "renamesse4.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #include "renameavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #include "renamefma4.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #include "renameavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #include "renameavx2128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #include "renameavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_AVX512FNOFMA #define CONFIG 2 #include "helperavx512f.h" #include "renameavx512fnofma.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #include "norename.h" #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #include "norename.h" #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #include "renameadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_ADVSIMDNOFMA #define CONFIG 2 #include "helperadvsimd.h" #include "renameadvsimdnofma.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #include "renamesve.h" #endif /* ENABLE_SVE */ #ifdef ENABLE_SVENOFMA #define CONFIG 2 #include "helpersve.h" #include "renamesvenofma.h" #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #include "renamevsx.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif #ifdef ENABLE_VSXNOFMA #define CONFIG 2 #include "helperpower_128.h" #include "renamevsxnofma.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif #ifdef ENABLE_ZVECTOR2 #define CONFIG 140 #include "helpers390x_128.h" #include "renamezvector2.h" typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif #ifdef ENABLE_ZVECTOR2NOFMA #define CONFIG 141 #include "helpers390x_128.h" #include "renamezvector2nofma.h" typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" #include "renamepurec_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 #include "helperpurec_scalar.h" #include "renamepurecfma_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif // #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } #endif // #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv_t; double nexttoward0(double x, int n) { union { double f; uint64_t u; } cx; cx.f = x; cx.u -= n; return cx.f; } double rnd() { conv_t c; switch(random() & 63) { case 0: return nexttoward0( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif return c.d; } double rnd_fr() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d)); return c.d; } double rnd_zo() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d) || c.d < -1 || 1 < c.d); return c.d; } void sinpifr(mpfr_t ret, double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void cospifr(mpfr_t ret, double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } vdouble vset(vdouble v, int idx, double d) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); a[idx] = d; return vloadu_vd_p(a); } double vget(vdouble v, int idx) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[idx]; } int vgeti(vint v, int idx) { int a[VECTLENDP*2]; vstoreu_v_p_vi(a, v); return a[idx]; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(frw, frx, fry, frz, NULL); conv_t cd; double d, t; double d2, d3, zo; vdouble vd = vcast_vd_d(0); vdouble vd2 = vcast_vd_d(0); vdouble vd3 = vcast_vd_d(0); vdouble vzo = vcast_vd_d(0); vdouble vad = vcast_vd_d(0); vdouble2 sc, sc2; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { int e = cnt % VECTLENDP; switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.d = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i64 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.d = rnd_fr() * M_PI_4; cd.i64 += (random() & 0xf) - 0x7; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } vd = vset(vd, e, d); vd2 = vset(vd2, e, d2); vd3 = vset(vd3, e, d3); vzo = vset(vzo, e, zo); vad = vset(vad, e, fabs(d)); // sc = xsincospi_u05(vd); sc2 = xsincospi_u35(vd); { const double rangemax2 = 1e+9/4; sinpifr(frx, d); double u0 = countULP2dp(t = vget(vd2getx_vd_vd2(sc), e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = vget(vd2getx_vd_vd2(sc2), e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = vget(xsinpi_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinpi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9/4; cospifr(frx, d); double u0 = countULP2dp(t = vget(vd2gety_vd_vd2(sc), e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = vget(vd2gety_vd_vd2(sc), e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = vget(xcospi_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cospi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } sc = xsincos(vd); sc2 = xsincos_u1(vd); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsin(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(vd2getx_vd_vd2(sc), e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULPdp(t = vget(xsin_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sin_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULPdp(t = vget(vd2getx_vd_vd2(sc2), e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcos(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(vd2gety_vd_vd2(sc), e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULPdp(t = vget(xcos_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULPdp(t = vget(vd2gety_vd_vd2(sc2), e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xtan(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf(ISANAME " tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xtan_u1(vd), e), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf(ISANAME " tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog(vad), e), frx); if (u0 > 3.5) { printf(ISANAME " log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xlog_u1(vad), e), frx); if (u1 > 1) { printf(ISANAME " log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog10(vad), e), frx); if (u0 > 1) { printf(ISANAME " log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog2(vad), e), frx); if (u0 > 1) { printf(ISANAME " log2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xlog2_u35(vad), e), frx); if (u1 > 3.5) { printf(ISANAME " log2_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog1p(vd), e), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf(ISANAME " log1p arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexp(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexp2(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xexp2_u35(vd), e), frx); if (u1 > 3.5) { printf(ISANAME " exp2_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexp10(vd), e), frx); if (u0 > 1.09) { printf(ISANAME " exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xexp10_u35(vd), e), frx); if (u1 > 3.5) { printf(ISANAME " exp10_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexpm1(vd), e), frx); if (u0 > 1) { printf(ISANAME " expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xpow(vd2, vd), e), frx); if (u0 > 1) { printf(ISANAME " pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcbrt(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xcbrt_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xasin(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " asin arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xasin_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " asin_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xacos(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " acos arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xacos_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " acos_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xatan(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xatan_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xatan2(vd2, vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = vget(xatan2_u1(vd2, vd), e), frx); if (u1 > 1) { printf(ISANAME " atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsinh(vd), e), frx); if ((fabs(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf(ISANAME " sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcosh(vd), e), frx); if ((fabs(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf(ISANAME " cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xtanh(vd), e), frx); if (u0 > 1) { printf(ISANAME " tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsinh_u35(vd), e), frx); if ((fabs(d) <= 709 && u0 > 3.5) || (d > 709 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf(ISANAME " sinh_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcosh_u35(vd), e), frx); if ((fabs(d) <= 709 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf(ISANAME " cosh_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xtanh_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " tanh_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xasinh(vd), e), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf(ISANAME " asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xacosh(vd), e), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf(ISANAME " acosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xatanh(vd), e), frx); if (u0 > 1) { printf(ISANAME " atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xfabs(vd), e), frx); if (u0 != 0) { printf(ISANAME " fabs arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xcopysign(vd, vd2), e), frx); if (u0 != 0 && !isnan(d2)) { printf(ISANAME " copysign arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfmax(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fmax arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfmin(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fmin arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfdim(vd, vd2), e), frx); if (u0 > 0.5) { printf(ISANAME " fdim arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPdp(t = vget(xtrunc(vd), e), frx); if (u0 != 0) { printf(ISANAME " trunc arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPdp(t = vget(xfloor(vd), e), frx); if (u0 != 0) { printf(ISANAME " floor arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPdp(t = vget(xceil(vd), e), frx); if (u0 != 0) { printf(ISANAME " ceil arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPdp(t = vget(xround(vd), e), frx); if (u0 != 0) { printf(ISANAME " round arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xrint(vd), e), frx); if (u0 != 0) { printf(ISANAME " rint arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2dp(t = vget(xfma(vd, vd2, vd3), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+303 < c && c < 1e+303 && u0 > 0.5) || !(u0 <= 0.5 || isinf(t))) { printf(ISANAME " fma arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } #ifndef DETERMINISTIC { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsqrt(vd), e), frx); if (u0 > 1.0) { printf(ISANAME " sqrt arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsqrt_u05(vd), e), frx); if (u0 > 0.50001) { printf(ISANAME " sqrt_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsqrt_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " sqrt_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } #endif // #ifndef DETERMINISTIC { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = vget(xhypot_u05(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5) { printf(ISANAME " hypot_u05 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = vget(xhypot_u35(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+308 < c && c < 1e+308 && u0 > 3.5) || !(u0 <= 3.5 || isinf(t))) { if (!(isinf(c) && t == 1.7976931348623157081e+308)) { printf(ISANAME " hypot_u35 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } { t = vget(xnextafter(vd, vd2), e); double c = nextafter(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf(ISANAME " nextafter arg=%.20g, %.20g\n", d, d2); printf("correct = %.20g, test = %.20g\n", c, t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPdp(t = vget(xfrfrexp(vd), e), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf(ISANAME " frfrexp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfmod(vd, vd2), e), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabsl((long double)d / d2) < 1e+300 && u0 > 0.5) { printf(ISANAME " fmod arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_remainder(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xremainder(vd, vd2), e), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabsl((long double)d / d2) < 1e+300 && u0 > 0.5) { printf(ISANAME " remainder arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } /* { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = vgeti(xexpfrexp(vd), e); if (isnumber(d) && cexp != texp) { printf(ISANAME " expfrexp arg=%.20g\n", d); fflush(stdout); ecnt++; } } { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPdp(t = vget(xldexp(d, exp), e), frx); if (u0 > 0.5) { printf(ISANAME " ldexp arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } */ { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); vdouble2 t2 = xmodf(vd); double u0 = countULPdp(vget(vd2getx_vd_vd2(t2), e), frz); double u1 = countULPdp(vget(vd2gety_vd_vd2(t2), e), fry); if (u0 != 0 || u1 != 0) { printf(ISANAME " modf arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", vget(vd2getx_vd_vd2(t2), e), vget(vd2gety_vd_vd2(t2), e)); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlgamma_u1(vd), e), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-15 && u0 > 1) || (0 <= d && d < 2e+305 && u0 > 1) || (2e+305 <= d && !(u0 <= 1 || isinf(t))))) { printf("Pure C xlgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = vget(xtgamma_u1(vd), e), frx); if (u0 > 1.0) { printf("Pure C xtgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); static double ebz = 9.8813129168249308835e-324; // nextafter(nextafter(0, 1), 1); double u0 = countULP2dp(t = vget(xerfc_u15(vd), e), frx); if ((d > 26.2 && u0 > 2.5 && !(mpfr_get_d(frx, GMP_RNDN) == 0 && t <= ebz)) || (d <= 26.2 && u0 > 1.5)) { printf("Pure C xerfc_u15 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = vget(xerf_u1(vd), e), frx); if (u0 > 1) { printf("Pure C xerf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } } sleef-3.5.1/src/libm-tester/tester2simdsp.c000066400000000000000000000761211373003144100206130ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #include "testerutil.h" #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #include "renamesse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #include "renamesse4.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #include "renameavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #include "renamefma4.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #include "renameavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #include "renameavx2128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #include "renameavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_AVX512FNOFMA #define CONFIG 2 #include "helperavx512f.h" #include "renameavx512fnofma.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #include "norename.h" #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #include "norename.h" #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #include "renameadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_ADVSIMDNOFMA #define CONFIG 2 #include "helperadvsimd.h" #include "renameadvsimdnofma.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #include "renamesve.h" #endif /* ENABLE_SVE */ #ifdef ENABLE_SVENOFMA #define CONFIG 2 #include "helpersve.h" #include "renamesvenofma.h" #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #include "renamevsx.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif #ifdef ENABLE_VSXNOFMA #define CONFIG 2 #include "helperpower_128.h" #include "renamevsxnofma.h" typedef Sleef___vector_double_2 vdouble2; typedef Sleef___vector_float_2 vfloat2; #endif #ifdef ENABLE_ZVECTOR2 #define CONFIG 140 #include "helpers390x_128.h" #include "renamezvector2.h" typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif #ifdef ENABLE_ZVECTOR2NOFMA #define CONFIG 141 #include "helpers390x_128.h" #include "renamezvector2nofma.h" typedef Sleef_SLEEF_VECTOR_DOUBLE_2 vdouble2; typedef Sleef_SLEEF_VECTOR_FLOAT_2 vfloat2; #endif #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" #include "renamepurec_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 #include "helperpurec_scalar.h" #include "renamepurecfma_scalar.h" typedef Sleef_double_2 vdouble2; typedef Sleef_float_2 vfloat2; #endif // #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } #endif // #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv64_t; typedef union { float f; uint32_t u32; int32_t i32; } conv32_t; static float nexttoward0f(float x, int n) { union { float f; int32_t u; } cx; cx.f = x; cx.u -= n; return x == 0 ? 0 : cx.f; } float rnd() { conv32_t c; switch(random() & 63) { case 0: return nexttoward0f( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0f(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0f( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0f(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif return c.f; } float rnd_fr() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f)); return c.f; } float rnd_zo() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f) || c.f < -1 || 1 < c.f); return c.f; } vfloat vset(vfloat v, int idx, float d) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); a[idx] = d; return vloadu_vf_p(a); } float vget(vfloat v, int idx) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[idx]; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(frw, frx, fry, frz, NULL); conv32_t cd; float d, t; float d2, d3, zo; vfloat vd = vcast_vf_f(0); vfloat vd2 = vcast_vf_f(0); vfloat vd3 = vcast_vf_f(0); vfloat vzo = vcast_vf_f(0); vfloat vad = vcast_vf_f(0); vfloat2 sc, sc2; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { int e = cnt % VECTLENSP; switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.f = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i32 += (random() & 0xff) - 0x7f; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.f = rnd_fr() * M_PI_4; cd.i32 += (random() & 0xf) - 0x7; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } vd = vset(vd, e, d); vd2 = vset(vd2, e, d2); vd3 = vset(vd3, e, d3); vzo = vset(vzo, e, zo); vad = vset(vad, e, fabs(d)); sc = xsincospif_u05(vd); sc2 = xsincospif_u35(vd); { const double rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinpi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(vf2getx_vf_vf2(sc), e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = vget(vf2getx_vf_vf2(sc2), e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = vget(xsinpif_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinpif_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_cospi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(vf2gety_vf_vf2(sc), e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = vget(vf2gety_vf_vf2(sc), e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = vget(xcospif_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cospif_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } sc = xsincosf(vd); sc2 = xsincosf_u1(vd); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); float u0 = countULPsp(t = vget(xsinf(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = vget(vf2getx_vf_vf2(sc), e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = vget(xsinf_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = vget(vf2getx_vf_vf2(sc2), e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } float u4 = countULPsp(t = vget(xfastsinf_u3500(vd), e), frx); double ae4 = fabs(mpfr_get_d(frx, GMP_RNDN) - t); if (u4 > 350 && ae4 > 2e-6) { printf(ISANAME " fastsinf_u3500 arg=%.20g ulp=%.20g\n", d, u4); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); float u0 = countULPsp(t = vget(xcosf(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cosf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = vget(vf2gety_vf_vf2(sc), e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = vget(xcosf_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cosf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = vget(vf2gety_vf_vf2(sc2), e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } float u4 = countULPsp(t = vget(xfastcosf_u3500(vd), e), frx); double ae4 = fabs(mpfr_get_d(frx, GMP_RNDN) - t); if (u4 > 350 && ae4 > 2e-6) { printf(ISANAME " fastcosf_u3500 arg=%.20g ulp=%.20g\n", d, u4); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); float u0 = countULPsp(t = vget(xtanf(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf(ISANAME " tanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = vget(xtanf_u1(vd), e), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf(ISANAME " tanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlogf(vad), e), frx); if (u0 > 3.5) { printf(ISANAME " logf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xlogf_u1(vad), e), frx); if (u1 > 1) { printf(ISANAME " logf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlog10f(vad), e), frx); if (u0 > 1) { printf(ISANAME " log10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlog2f(vad), e), frx); if (u0 > 1) { printf(ISANAME " log2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xlog2f_u35(vad), e), frx); if (u1 > 3.5) { printf(ISANAME " log2f_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlog1pf(vd), e), frx); if ((-1 <= d && d <= 1e+38 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+38 && !(u0 <= 1 || isinf(t)))) { printf(ISANAME " log1pf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexpf(vd), e), frx); if (u0 > 1) { printf(ISANAME " expf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexp2f(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xexp2f_u35(vd), e), frx); if (u1 > 3.5) { printf(ISANAME " exp2f_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexp10f(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xexp10f_u35(vd), e), frx); if (u1 > 3.5) { printf(ISANAME " exp10f_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexpm1f(vd), e), frx); if (u0 > 1) { printf(ISANAME " expm1f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xpowf(vd2, vd), e), frx); if (u0 > 1) { printf(ISANAME " powf arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } if (isnumber(d) && isnumber(d2)) { double u1 = countULPsp(t = vget(xfastpowf_u3500(vd2, vd), e), frx); if (isnumber((float)mpfr_get_d(frx, GMP_RNDN)) && u1 > 350) { printf(ISANAME " fastpowf_u3500 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xcbrtf(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " cbrtf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xcbrtf_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " cbrtf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xasinf(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " asinf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xasinf_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " asinf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xacosf(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " acosf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xacosf_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " acosf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xatanf(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xatanf_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " atanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xatan2f(vd2, vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atan2f arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = vget(xatan2f_u1(vd2, vd), e), frx); if (u1 > 1) { printf(ISANAME " atan2f_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsinhf(vd), e), frx); if ((fabs(d) <= 88.5 && u0 > 1) || (d > 88.5 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -88.5 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf(ISANAME " sinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xcoshf(vd), e), frx); if ((fabs(d) <= 88.5 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf(ISANAME " coshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xtanhf(vd), e), frx); if (u0 > 1.0001) { printf(ISANAME " tanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsinhf_u35(vd), e), frx); if ((fabs(d) <= 88 && u0 > 3.5) || (d > 88 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -88 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf(ISANAME " sinhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xcoshf_u35(vd), e), frx); if ((fabs(d) <= 88 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf(ISANAME " coshf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xtanhf_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " tanhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xasinhf(vd), e), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t < 0)))) { printf(ISANAME " asinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xacoshf(vd), e), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinff(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !isnan(t))) { printf(ISANAME " acoshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xatanhf(vd), e), frx); if (u0 > 1.0001) { printf(ISANAME " atanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // /* { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPsp(t = vget(xldexpf(d, exp)), frx); if (u0 > 0.5001) { printf("Pure C ldexpf arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } */ { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xfabsf(vd), e), frx); if (u0 != 0) { printf(ISANAME " fabsf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xcopysignf(vd, vd2), e), frx); if (u0 != 0 && !isnan(d2)) { printf(ISANAME " copysignf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfmaxf(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fmaxf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfminf(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fminf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfdimf(vd, vd2), e), frx); if (u0 > 0.5) { printf(ISANAME " fdimf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPsp(t = vget(xtruncf(vd), e), frx); if (u0 != 0) { printf(ISANAME " truncf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPsp(t = vget(xfloorf(vd), e), frx); if (u0 != 0) { printf(ISANAME " floorf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPsp(t = vget(xceilf(vd), e), frx); if (u0 != 0) { printf(ISANAME " ceilf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPsp(t = vget(xroundf(vd), e), frx); if (u0 != 0) { printf(ISANAME " roundf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xrintf(vd), e), frx); if (u0 != 0) { printf(ISANAME " rintf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); vfloat2 t2 = xmodff(vd); double u0 = countULPsp(vget(vf2getx_vf_vf2(t2), e), frz); double u1 = countULPsp(vget(vf2gety_vf_vf2(t2), e), fry); if (u0 != 0 || u1 != 0) { printf(ISANAME " modff arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", vget(vf2getx_vf_vf2(t2), e), vget(vf2gety_vf_vf2(t2), e)); fflush(stdout); ecnt++; } } { t = vget(xnextafterf(vd, vd2), e); double c = nextafterf(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf(ISANAME " nextafterf arg=%.20g, %.20g\n", d, d2); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPsp(t = vget(xfrfrexpf(vd), e), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf(ISANAME " frfrexpf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } /* { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = xexpfrexpf(d); if (d != 0 && isnumber(d) && cexp != texp) { printf(ISANAME " expfrexpf arg=%.20g\n", d); fflush(stdout); ecnt++; } } */ { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = vget(xhypotf_u05(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5001) { printf(ISANAME " hypotf_u05 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = vget(xhypotf_u35(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 >= 3.5) { printf(ISANAME " hypotf_u35 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfmodf(vd, vd2), e), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabs((double)d / d2) < 1e+38 && u0 > 0.5) { printf(ISANAME " fmodf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_remainder(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xremainderf(vd, vd2), e), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabs((double)d / d2) < 1e+38 && u0 > 0.5) { printf(ISANAME " remainderf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2sp(t = vget(xfmaf(vd, vd2, vd3), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+34 < c && c < 1e+33 && u0 > 0.5001) || !(u0 <= 0.5001 || isinf(t))) { printf(ISANAME " fmaf arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } #ifndef DETERMINISTIC { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsqrtf(vd), e), frx); if (u0 > 1.0) { printf(ISANAME " sqrtf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsqrtf_u05(vd), e), frx); if (u0 > 0.5001) { printf(ISANAME " sqrtf_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsqrtf_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " sqrtf_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } #endif // #ifndef DETERMINISTIC { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(xerfcf_u15(vd), e), frx); if (u0 > 1.5) { printf(ISANAME " erfcf_u15 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(xerff_u1(vd), e), frx); if (u0 > 1.0) { printf(ISANAME " erff_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlgammaf_u1(vd), e), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-8 && u0 > 1) || (0 <= d && d < 4e+36 && u0 > 1) || (4e+36 <= d && !(u0 <= 1 || isinf(t))))) { printf(ISANAME " xlgammaf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", (float)mpfr_get_d(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(xtgammaf_u1(vd), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (isnumber(c) || isnumber(t)) { if (u0 > 1.0) { printf(ISANAME " xtgammaf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", (float)mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } #if 0 if (cnt % 1000 == 0) { printf("cnt = %d \r", cnt); fflush(stdout); } #endif } } sleef-3.5.1/src/libm-tester/tester2sp.c000066400000000000000000000637321373003144100177420ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #include "testerutil.h" #define DORENAME #include "rename.h" #if defined(__APPLE__) static int isinff(float x) { return x == __builtin_inff() || x == -__builtin_inff(); } #endif #if defined(__FreeBSD__) #define isinff(x) ((x) == (float)(1e+300) || (x) == -(float)(1e+300)) #endif #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv64_t; typedef union { float f; uint32_t u32; int32_t i32; } conv32_t; static float nexttoward0f(float x, int n) { union { float f; int32_t u; } cx; cx.f = x; cx.u -= n; return x == 0 ? 0 : cx.f; } float rnd() { conv32_t c; switch(random() & 63) { case 0: return nexttoward0f( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0f(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0f( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0f(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif return c.f; } float rnd_fr() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f)); return c.f; } float rnd_zo() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f) || c.f < -1 || 1 < c.f); return c.f; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(frw, frx, fry, frz, NULL); conv32_t cd; float d, t; float d2, d3, zo; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.f = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i32 += (random() & 0xff) - 0x7f; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.f = rnd_fr() * M_PI_4; cd.i32 += (random() & 0xf) - 0x7; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } Sleef_float2 sc = xsincospif_u05(d); Sleef_float2 sc2 = xsincospif_u35(d); { const float rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinpi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = sc.x, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = sc2.x, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = xsinpif_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinpif_u05 arg=%.20g ulp=%.20g\n", d, u2); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { const float rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_cospi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = sc.y, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = sc.y, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = xcospif_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cospif_u05 arg=%.20g ulp=%.20g\n", d, u2); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } sc = xsincosf(d); sc2 = xsincosf_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); float u0 = countULPsp(t = xsinf(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = sc.x, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = xsinf_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = sc2.x, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } float u4 = countULPsp(t = xfastsinf_u3500(d), frx); double ae4 = fabs(mpfr_get_d(frx, GMP_RNDN) - t); if (u4 > 350 && ae4 > 2e-6) { printf("Pure C fastsinf_u3500 arg=%.20g ulp=%.20g\n", d, u4); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); float u0 = countULPsp(t = xcosf(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cosf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = sc.y, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = xcosf_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cosf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = sc2.y, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } float u4 = countULPsp(t = xfastcosf_u3500(d), frx); double ae4 = fabs(mpfr_get_d(frx, GMP_RNDN) - t); if (u4 > 350 && ae4 > 2e-6) { printf("Pure C fastcosf_u3500 arg=%.20g ulp=%.20g\n", d, u4); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); float u0 = countULPsp(t = xtanf(d), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf("Pure C tanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = xtanf_u1(d), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf("Pure C tanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlogf(fabsf(d)), frx); if (u0 > 3.5) { printf("Pure C logf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xlogf_u1(fabsf(d)), frx); if (u1 > 1) { printf("Pure C logf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlog10f(fabsf(d)), frx); if (u0 > 1) { printf("Pure C log10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlog2f(fabsf(d)), frx); if (u0 > 1) { printf("Pure C log2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xlog2f_u35(fabsf(d)), frx); if (u1 > 3.5) { printf("Pure C log2f_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlog1pf(d), frx); if ((-1 <= d && d <= 1e+38 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+38 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1pf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexpf(d), frx); if (u0 > 1) { printf("Pure C expf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexp2f(d), frx); if (u0 > 1) { printf("Pure C exp2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xexp2f_u35(d), frx); if (u1 > 3.5) { printf("Pure C exp2f_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexp10f(d), frx); if (u0 > 1) { printf("Pure C exp10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xexp10f_u35(d), frx); if (u1 > 3.5) { printf("Pure C exp10f_u35 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexpm1f(d), frx); if (u0 > 1) { printf("Pure C expm1f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = xpowf(d2, d), frx); if (u0 > 1) { printf("Pure C powf arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } if (isnumber(d) && isnumber(d2)) { double u1 = countULPsp(t = xfastpowf_u3500(d2, d), frx); if (isnumber((float)mpfr_get_d(frx, GMP_RNDN)) && u1 > 350) { printf("Pure C fastpowf_u3500 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xcbrtf(d), frx); if (u0 > 3.5) { printf("Pure C cbrtf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xcbrtf_u1(d), frx); if (u1 > 1) { printf("Pure C cbrtf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xasinf(zo), frx); if (u0 > 3.5) { printf("Pure C asinf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xasinf_u1(zo), frx); if (u1 > 1) { printf("Pure C asinf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xacosf(zo), frx); if (u0 > 3.5) { printf("Pure C acosf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xacosf_u1(zo), frx); if (u1 > 1) { printf("Pure C acosf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xatanf(d), frx); if (u0 > 3.5) { printf("Pure C atanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xatanf_u1(d), frx); if (u1 > 1) { printf("Pure C atanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = xatan2f(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2f arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = xatan2f_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2f_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsinhf(d), frx); if ((fabs(d) <= 88.5 && u0 > 1) || (d > 88.5 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -88.5 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xcoshf(d), frx); if ((fabs(d) <= 88.5 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C coshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xtanhf(d), frx); if (u0 > 1.0001) { printf("Pure C tanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsinhf_u35(d), frx); if ((fabs(d) <= 88 && u0 > 3.5) || (d > 88 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -88 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf("Pure C sinhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xcoshf_u35(d), frx); if ((fabs(d) <= 88 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf("Pure C coshf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xtanhf_u35(d), frx); if (u0 > 3.5) { printf("Pure C tanhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xasinhf(d), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t < 0)))) { printf("Pure C asinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xacoshf(d), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinff(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !isnan(t))) { printf("Pure C acoshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xatanhf(d), frx); if (u0 > 1.0001) { printf("Pure C atanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPsp(t = xldexpf(d, exp), frx); if (u0 > 0.5002) { printf("Pure C ldexpf arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xfabsf(d), frx); if (u0 != 0) { printf("Pure C fabsf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xcopysignf(d, d2), frx); if (u0 != 0 && !isnan(d2)) { printf("Pure C copysignf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfmaxf(d, d2), frx); if (u0 != 0) { printf("Pure C fmaxf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfminf(d, d2), frx); if (u0 != 0) { printf("Pure C fminf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfdimf(d, d2), frx); if (u0 > 0.5) { printf("Pure C fdimf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPsp(t = xtruncf(d), frx); if (u0 != 0) { printf("Pure C truncf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPsp(t = xfloorf(d), frx); if (u0 != 0) { printf("Pure C floorf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPsp(t = xceilf(d), frx); if (u0 != 0) { printf("Pure C ceilf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPsp(t = xroundf(d), frx); if (u0 != 0) { printf("Pure C roundf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xrintf(d), frx); if (u0 != 0) { printf("Pure C rintf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); Sleef_float2 t2 = xmodff(d); double u0 = countULPsp(t2.x, frz); double u1 = countULPsp(t2.y, fry); if (u0 != 0 || u1 != 0) { printf("Pure C modff arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", t2.x, t2.y); fflush(stdout); ecnt++; } } { t = xnextafterf(d, d2); double c = nextafterf(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf("Pure C nextafterf arg=%.20g, %.20g\n", d, d2); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPsp(t = xfrfrexpf(d), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf("Pure C frfrexpf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = xexpfrexpf(d); if (d != 0 && isnumber(d) && cexp != texp) { printf("Pure C expfrexpf arg=%.20g\n", d); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = xhypotf_u05(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5001) { printf("Pure C hypotf_u05 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = xhypotf_u35(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 >= 3.5) { printf("Pure C hypotf_u35 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfmodf(d, d2), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabs((double)d / d2) < 1e+38 && u0 > 0.5) { printf("Pure C fmodf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_remainder(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xremainderf(d, d2), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabs((double)d / d2) < 1e+38 && u0 > 0.5) { printf("Pure C remainderf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2sp(t = xfmaf(d, d2, d3), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+34 < c && c < 1e+33 && u0 > 0.5001) || !(u0 <= 0.5001 || isinf(t))) { printf("Pure C fmaf arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsqrtf_u05(d), frx); if (u0 > 0.5001) { printf("Pure C sqrtf_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsqrtf_u35(d), frx); if (u0 > 3.5) { printf("Pure C sqrtf_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = xerfcf_u15(d), frx); if (u0 > 1.5) { printf("Pure C erfcf arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = xerff_u1(d), frx); if (u0 > 1.0) { printf("Pure C erff arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPsp(t = xlgammaf_u1(d), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-8 && u0 > 1) || (0 <= d && d < 4e+36 && u0 > 1) || (4e+36 <= d && !(u0 <= 1 || isinf(t))))) { printf("Pure C xlgammaf arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = xtgammaf_u1(d), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (isnumber(c) || isnumber(t)) { if (u0 > 1.0) { printf("Pure C xtgamma arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } } exit(0); } sleef-3.5.1/src/libm-tester/tester3.c000066400000000000000000000462351373003144100173770ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include "sleef.h" #include "misc.h" #include "testerutil.h" #ifdef __VSX__ #include #undef vector #undef bool typedef __vector double __vector_double; typedef __vector float __vector_float; #endif #if defined(__VX__) && defined(__VEC__) #ifndef SLEEF_VECINTRIN_H_INCLUDED #include #define SLEEF_VECINTRIN_H_INCLUDED #endif typedef __attribute__((vector_size(16))) double vector_double; typedef __attribute__((vector_size(16))) float vector_float; #endif // #define XNAN (((union { int64_t u; double d; }) { .u = INT64_C(0xffffffffffffffff) }).d) #define XNANf (((union { int32_t u; float d; }) { .u = 0xffffffff }).d) static INLINE double unifyValue(double x) { x = !(x == x) ? XNAN : x; return x; } static INLINE float unifyValuef(float x) { x = !(x == x) ? XNANf : x; return x; } static INLINE double setdouble(double d, int r) { return d; } static INLINE double getdouble(double v, int r) { return unifyValue(v); } static INLINE float setfloat(float d, int r) { return d; } static INLINE float getfloat(float v, int r) { return unifyValuef(v); } #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) static INLINE __m128d set__m128d(double d, int r) { static double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return _mm_loadu_pd(a); } static INLINE double get__m128d(__m128d v, int r) { static double a[2]; _mm_storeu_pd(a, v); return unifyValue(a[r & 1]); } static INLINE __m128 set__m128(float d, int r) { static float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return _mm_loadu_ps(a); } static INLINE float get__m128(__m128 v, int r) { static float a[4]; _mm_storeu_ps(a, v); return unifyValuef(a[r & 3]); } #if defined(__AVX__) static INLINE __m256d set__m256d(double d, int r) { static double a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return _mm256_loadu_pd(a); } static INLINE double get__m256d(__m256d v, int r) { static double a[4]; _mm256_storeu_pd(a, v); return unifyValue(a[r & 3]); } static INLINE __m256 set__m256(float d, int r) { static float a[8]; memrand(a, sizeof(a)); a[r & 7] = d; return _mm256_loadu_ps(a); } static INLINE float get__m256(__m256 v, int r) { static float a[8]; _mm256_storeu_ps(a, v); return unifyValuef(a[r & 7]); } #endif #if defined(__AVX512F__) static INLINE __m512d set__m512d(double d, int r) { static double a[8]; memrand(a, sizeof(a)); a[r & 7] = d; return _mm512_loadu_pd(a); } static INLINE double get__m512d(__m512d v, int r) { static double a[8]; _mm512_storeu_pd(a, v); return unifyValue(a[r & 7]); } static INLINE __m512 set__m512(float d, int r) { static float a[16]; memrand(a, sizeof(a)); a[r & 15] = d; return _mm512_loadu_ps(a); } static INLINE float get__m512(__m512 v, int r) { static float a[16]; _mm512_storeu_ps(a, v); return unifyValuef(a[r & 15]); } #endif #endif // #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) #if defined(__aarch64__) && defined(__ARM_NEON) static INLINE VECTOR_CC float64x2_t setfloat64x2_t(double d, int r) { double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return vld1q_f64(a); } static INLINE VECTOR_CC double getfloat64x2_t(float64x2_t v, int r) { double a[2]; vst1q_f64(a, v); return unifyValue(a[r & 1]); } static INLINE VECTOR_CC float32x4_t setfloat32x4_t(float d, int r) { float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return vld1q_f32(a); } static INLINE VECTOR_CC float getfloat32x4_t(float32x4_t v, int r) { float a[4]; vst1q_f32(a, v); return unifyValuef(a[r & 3]); } #endif #ifdef __ARM_FEATURE_SVE static INLINE svfloat64_t setsvfloat64_t(double d, int r) { double a[svcntd()]; memrand(a, sizeof(a)); a[r & (svcntd()-1)] = d; return svld1_f64(svptrue_b8(), a); } static INLINE double getsvfloat64_t(svfloat64_t v, int r) { double a[svcntd()]; svst1_f64(svptrue_b8(), a, v); return unifyValue(a[r & (svcntd()-1)]); } static INLINE svfloat32_t setsvfloat32_t(float d, int r) { float a[svcntw()]; memrand(a, sizeof(a)); a[r & (svcntw()-1)] = d; return svld1_f32(svptrue_b8(), a); } static INLINE float getsvfloat32_t(svfloat32_t v, int r) { float a[svcntw()]; svst1_f32(svptrue_b8(), a, v); return unifyValuef(a[r & (svcntw()-1)]); } static svfloat64_t vd2getx_vd_vd2(svfloat64x2_t v) { return svget2_f64(v, 0); } static svfloat64_t vd2gety_vd_vd2(svfloat64x2_t v) { return svget2_f64(v, 1); } static svfloat32_t vf2getx_vf_vf2(svfloat32x2_t v) { return svget2_f32(v, 0); } static svfloat32_t vf2gety_vf_vf2(svfloat32x2_t v) { return svget2_f32(v, 1); } #endif #ifdef __VSX__ static INLINE __vector double set__vector_double(double d, int r) { double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return vec_vsx_ld(0, a); } static INLINE double get__vector_double(__vector double v, int r) { double a[2]; vec_vsx_st(v, 0, a); return unifyValue(a[r & 1]); } static INLINE __vector float set__vector_float(float d, int r) { float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return vec_vsx_ld(0, a); } static INLINE float get__vector_float(__vector float v, int r) { float a[4]; vec_vsx_st(v, 0, a); return unifyValuef(a[r & 3]); } #endif #ifdef __VX__ static INLINE __attribute__((vector_size(16))) double setSLEEF_VECTOR_DOUBLE(double d, int r) { double a[2]; memrand(a, sizeof(a)); a[r & 1] = d; return (__attribute__((vector_size(16))) double) { a[0], a[1] }; } static INLINE double getSLEEF_VECTOR_DOUBLE(__attribute__((vector_size(16))) double v, int r) { return unifyValue(v[r & 1]); } static INLINE __attribute__((vector_size(16))) float setSLEEF_VECTOR_FLOAT(float d, int r) { float a[4]; memrand(a, sizeof(a)); a[r & 3] = d; return (__attribute__((vector_size(16))) float) { a[0], a[1], a[2], a[3] }; } static INLINE float getSLEEF_VECTOR_FLOAT(__attribute__((vector_size(16))) float v, int r) { return unifyValuef(v[r & 3]); } #endif // // ATR = cinz_, NAME = sin, TYPE = d2, ULP = u35, EXT = sse2 #define FUNC(ATR, NAME, TYPE, ULP, EXT) Sleef_ ## ATR ## NAME ## TYPE ## _ ## ULP ## EXT #define _TYPE2(TYPE) Sleef_ ## TYPE ## _2 #define TYPE2(TYPE) _TYPE2(TYPE) #define SET(TYPE) set ## TYPE #define GET(TYPE) get ## TYPE #ifndef __ARM_FEATURE_SVE static DPTYPE vd2getx_vd_vd2(TYPE2(DPTYPE) v) { return v.x; } static DPTYPE vd2gety_vd_vd2(TYPE2(DPTYPE) v) { return v.y; } static SPTYPE vf2getx_vf_vf2(TYPE2(SPTYPE) v) { return v.x; } static SPTYPE vf2gety_vf_vf2(TYPE2(SPTYPE) v) { return v.y; } #endif // #define checkDigest(NAME, ULP) do { \ unsigned char d[16], mes[64], buf[64]; \ MD5_Final(d, &ctx); \ snprintf((char *)mes, 60, "%s %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", \ #NAME " " #ULP, d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7], \ d[8],d[9],d[10],d[11],d[12],d[13],d[14],d[15]); \ if (fp != NULL) { \ fgets((char *)buf, 60, fp); \ if (strncmp((char *)mes, (char *)buf, strlen((char *)mes)) != 0) { \ puts((char *)mes); \ puts((char *)buf); \ success = 0; \ } \ } else puts((char *)mes); \ } while(0) #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #define convertEndianness(ptr, len) do { \ for(int k=0;k #include #include #include #include int do_test(int argc, char **argv); static jmp_buf sigjmp; static void sighandler(int signum) { longjmp(sigjmp, 1); } int main(int argc, char **argv) { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) do_test(argc, argv); signal(SIGILL, SIG_DFL); exit(0); } sleef-3.5.1/src/libm-tester/testerutil.c000066400000000000000000000172161373003144100202070ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #if defined(__MINGW32__) || defined(__MINGW64__) #include #endif #if defined(_MSC_VER) #include #endif #include "misc.h" #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) int isnumber(double x) { return !isinf(x) && !isnan(x); } int isPlusZero(double x) { return x == 0 && copysign(1, x) == 1; } int isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; } double sign(double d) { return d < 0 ? -1 : 1; } int xisnan(double x) { return x != x; } int isnumberf(float x) { return !isinff(x) && !isnanf(x); } int isPlusZerof(float x) { return x == 0 && copysignf(1, x) == 1; } int isMinusZerof(float x) { return x == 0 && copysignf(1, x) == -1; } float signf(float d) { return d < 0 ? -1 : 1; } int xisnanf(float x) { return x != x; } int enableFlushToZero = 0; double flushToZero(double y) { if (enableFlushToZero && fabs(y) < FLT_MIN) y = copysign(0.0, y); return y; } // int readln(int fd, char *buf, int cnt) { int i, rcnt = 0; if (cnt < 1) return -1; while(cnt >= 2) { i = read(fd, buf, 1); if (i != 1) return i; if (*buf == '\n') break; rcnt++; buf++; cnt--; } *++buf = '\0'; rcnt++; return rcnt; } static uint64_t xseed; uint64_t xrand() { xseed = xseed * UINT64_C(6364136223846793005) + 1; return xseed; } // Fill memory with random bits void memrand(void *p, int size) { uint64_t *q = (uint64_t *)p; int i; for(i=0;i int cmpDenormsp(float x, mpfr_t fry) { float y = mpfr_get_d(fry, GMP_RNDN); x = flushToZero(x); y = flushToZero(y); if (xisnanf(x) && xisnanf(y)) return 1; if (xisnanf(x) || xisnanf(y)) return 0; if (isinf(x) != isinf(y)) return 0; if (x == POSITIVE_INFINITYf && y == POSITIVE_INFINITYf) return 1; if (x == NEGATIVE_INFINITYf && y == NEGATIVE_INFINITYf) return 1; if (y == 0) { if (isPlusZerof(x) && isPlusZerof(y)) return 1; if (isMinusZerof(x) && isMinusZerof(y)) return 1; return 0; } if (!xisnanf(x) && !xisnanf(y) && !isinf(x) && !isinf(y)) return signf(x) == signf(y); return 0; } int cmpDenormdp(double x, mpfr_t fry) { double y = mpfr_get_d(fry, GMP_RNDN); if (xisnan(x) && xisnan(y)) return 1; if (xisnan(x) || xisnan(y)) return 0; if (isinf(x) != isinf(y)) return 0; if (x == POSITIVE_INFINITY && y == POSITIVE_INFINITY) return 1; if (x == NEGATIVE_INFINITY && y == NEGATIVE_INFINITY) return 1; if (y == 0) { if (isPlusZero(x) && isPlusZero(y)) return 1; if (isMinusZero(x) && isMinusZero(y)) return 1; return 0; } if (!xisnan(x) && !xisnan(y) && !isinf(x) && !isinf(y)) return sign(x) == sign(y); return 0; } double countULPdp(double d, mpfr_t c) { mpfr_t fra, frb, frc, frd; mpfr_inits(fra, frb, frc, frd, NULL); double c2 = mpfr_get_d(c, GMP_RNDN); if (c2 == 0 && d != 0) { mpfr_clears(fra, frb, frc, frd, NULL); return 10000; } if (isnan(c2) && isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (isnan(c2) || isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 10001; } if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } double v = 0; if (isinf(d) && !isinf(mpfr_get_d(c, GMP_RNDN))) { d = copysign(DBL_MAX, c2); v = 1; } // int e; frexp(mpfr_get_d(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-53), DENORMAL_DBL_MIN), GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); mpfr_clears(fra, frb, frc, frd, NULL); return u + v; } double countULP2dp(double d, mpfr_t c) { mpfr_t fra, frb, frc, frd; mpfr_inits(fra, frb, frc, frd, NULL); double c2 = mpfr_get_d(c, GMP_RNDN); if (c2 == 0 && d != 0) { mpfr_clears(fra, frb, frc, frd, NULL); return 10000; } if (isnan(c2) && isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (isnan(c2) || isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 10001; } if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } double v = 0; if (isinf(d) && !isinf(mpfr_get_d(c, GMP_RNDN))) { d = copysign(DBL_MAX, c2); v = 1; } // int e; frexp(mpfr_get_d(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-53), DBL_MIN), GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); mpfr_clears(fra, frb, frc, frd, NULL); return u + v; } double countULPsp(float d, mpfr_t c0) { double c = mpfr_get_d(c0, GMP_RNDN); d = flushToZero(d); float c2 = flushToZero(c); if (c2 == 0 && d != 0) return 10000; if (isnan(c2) && isnan(d)) return 0; if (isnan(c2) || isnan(d)) return 10001; if (c2 == POSITIVE_INFINITYf && d == POSITIVE_INFINITYf) return 0; if (c2 == NEGATIVE_INFINITYf && d == NEGATIVE_INFINITYf) return 0; double v = 0; if (isinf(d) && !isinf(c)) { d = copysign(FLT_MAX, c2); v = 1; } // int e; frexp(c, &e); double u = fabs(d - c) * fmin(ldexp(1.0, 24-e), 1.0 / DENORMAL_FLT_MIN); return u + v; } double countULP2sp(float d, mpfr_t c0) { double c = mpfr_get_d(c0, GMP_RNDN); d = flushToZero(d); float c2 = flushToZero(c); if (c2 == 0 && d != 0) return 10000; if (isnan(c2) && isnan(d)) return 0; if (isnan(c2) || isnan(d)) return 10001; if (c2 == POSITIVE_INFINITYf && d == POSITIVE_INFINITYf) return 0; if (c2 == NEGATIVE_INFINITYf && d == NEGATIVE_INFINITYf) return 0; double v = 0; if (isinf(d) && !isinf(c)) { d = copysign(FLT_MAX, c2); v = 1; } // int e; frexp(c, &e); double u = fabs(d - c) * fmin(ldexp(1.0, 24-e), 1.0 / FLT_MIN); return u + v; } // void mpfr_sinpi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_mul(frd, frpi, arg, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void mpfr_cospi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_mul(frd, frpi, arg, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void mpfr_lgamma_nosign(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd) { int s; mpfr_lgamma(ret, &s, arg, rnd); } #endif // #define USEMPFR sleef-3.5.1/src/libm-tester/testerutil.h000066400000000000000000000040661373003144100202130ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) #define M_PIf ((float)M_PI) extern int enableFlushToZero; double flushToZero(double y); int isnumber(double x); int isPlusZero(double x); int isMinusZero(double x); int xisnan(double x); double sign(double d); int isnumberf(float x); int isPlusZerof(float x); int isMinusZerof(float x); int xisnanf(float x); float signf(float d); int readln(int fd, char *buf, int cnt); #define XRAND_MAX (INT64_C(0x100000000) * (double)INT64_C(0x100000000)) void xsrand(uint64_t s); uint64_t xrand(); void memrand(void *p, int size); // The following functions are meant to be inlined static double u2d(uint64_t u) { union { double f; uint64_t i; } tmp; tmp.i = u; return tmp.f; } static uint64_t d2u(double d) { union { double f; uint64_t i; } tmp; tmp.f = d; return tmp.i; } static float u2f(uint32_t u) { union { float f; uint32_t i; } tmp; tmp.i = u; return tmp.f; } static uint32_t f2u(float d) { union { float f; uint32_t i; } tmp; tmp.f = d; return tmp.i; } static int startsWith(char *str, char *prefix) { while(*prefix != '\0') if (*str++ != *prefix++) return 0; return *prefix == '\0'; } // #ifdef USEMPFR int cmpDenormdp(double x, mpfr_t fry); double countULPdp(double d, mpfr_t c); double countULP2dp(double d, mpfr_t c); int cmpDenormsp(float x, mpfr_t fry); double countULPsp(float d, mpfr_t c); double countULP2sp(float d, mpfr_t c); void mpfr_sinpi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd); void mpfr_cospi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd); void mpfr_lgamma_nosign(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd); #endif sleef-3.5.1/src/libm/000077500000000000000000000000001373003144100143245ustar00rootroot00000000000000sleef-3.5.1/src/libm/CMakeLists.txt000066400000000000000000000663341373003144100171000ustar00rootroot00000000000000file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include/) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS}") set(EXT_ENABLE_ALIAS AVX512F ADVSIMD NEON32 VSX ZVECTOR2) # -------------------------------------------------------------------- # sleef.h # -------------------------------------------------------------------- # File generated for the headers set(SLEEF_ORG_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/sleeflibm_header.h.org) set(SLEEF_ORG_FOOTER ${CMAKE_CURRENT_SOURCE_DIR}/sleeflibm_footer.h.org) set(SLEEF_INCLUDE_HEADER ${sleef_BINARY_DIR}/include/sleef.h) configure_file(${SLEEF_ORG_HEADER} ${SLEEF_INCLUDE_HEADER}) set(SLEEF_HEADER_COMMANDS "") foreach(SIMD ${SLEEF_HEADER_LIST}) list(APPEND SLEEF_HEADER_COMMANDS COMMAND echo Generating sleef.h: ${TARGET_MKRENAME} ${HEADER_PARAMS_${SIMD}}) list(APPEND SLEEF_HEADER_COMMANDS COMMAND $ ${HEADER_PARAMS_${SIMD}} >> ${SLEEF_INCLUDE_HEADER}) endforeach() if((MSVC OR MINGW AND WIN32) OR SLEEF_CLANG_ON_WINDOWS) string(REPLACE "/" "\\" sleef_footer_input_file "${SLEEF_ORG_FOOTER}") list(APPEND SLEEF_HEADER_COMMANDS COMMAND type ${sleef_footer_input_file} >> ${SLEEF_INCLUDE_HEADER}) else() list(APPEND SLEEF_HEADER_COMMANDS COMMAND cat ${SLEEF_ORG_FOOTER} >> ${SLEEF_INCLUDE_HEADER}) endif() add_custom_command(OUTPUT ${SLEEF_INCLUDE_HEADER} ${SLEEF_HEADER_COMMANDS} DEPENDS ${SLEEF_ORG_HEADER} ${SLEEF_ORG_FOOTER} ${TARGET_MKRENAME} ) # -------------------------------------------------------------------- # TARGET_MKRENAME # renameXXX.h for each SIMD # -------------------------------------------------------------------- # Helper executable: generates parts of the sleef header file add_host_executable(${TARGET_MKRENAME} mkrename.c) # Enable Vector PCS for Advanced SIMD (if supported) if(FORCE_AAVPCS) host_target_AAVPCS_definitions(${TARGET_MKRENAME}) endif() set(HEADER_FILES_GENERATED "") foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) # Need lowercase string for rename header string(TOLOWER ${SIMD} vecarch) set(OBJECT_${SIMD} "sleef${vecarch}") set(OBJECTDET_${SIMD} "sleefdet${vecarch}") set(HEADER_${SIMD} ${CMAKE_CURRENT_BINARY_DIR}/include/rename${vecarch}.h) list(APPEND HEADER_FILES_GENERATED ${HEADER_${SIMD}}) # Generate mkrename commands add_custom_command(OUTPUT ${HEADER_${SIMD}} COMMAND echo Generating rename${vecarch}.h: ${TARGET_MKRENAME} ${RENAME_PARAMS_${SIMD}} COMMAND $ ${RENAME_PARAMS_${SIMD}} > ${HEADER_${SIMD}} DEPENDS ${TARGET_MKRENAME} ) add_custom_target(rename${SIMD}.h_generated DEPENDS ${HEADER_${SIMD}}) endif() endforeach() # -------------------------------------------------------------------- # TARGET_MKRENAME_GNUABI # renameXXX_gnuabi.h for each SIMD GNU Abi # -------------------------------------------------------------------- # Helper executable: generates parts of the sleef header file gnu_abi add_host_executable(${TARGET_MKRENAME_GNUABI} mkrename_gnuabi.c) set(HEADER_GNUABI_FILES_GENERATED "") if(ENABLE_GNUABI) foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} vecarch) set(OBJECT_${SIMD}_dp_GNUABI "sleefgnuabi${vecarch}dp") set(OBJECT_${SIMD}_sp_GNUABI "sleefgnuabi${vecarch}sp") set(HEADER_${SIMD}_GNUABI ${CMAKE_CURRENT_BINARY_DIR}/include/rename${vecarch}_gnuabi.h) list(APPEND HEADER_GNUABI_FILES_GENERATED ${HEADER_${SIMD}_GNUABI}) # Generate mkrename_gnuabi commands add_custom_command(OUTPUT ${HEADER_${SIMD}_GNUABI} COMMAND echo Generating rename${vecarch}_gnuabi.h: ${TARGET_MKRENAME_GNUABI} ${RENAME_PARAMS_GNUABI_${SIMD}} COMMAND $ ${RENAME_PARAMS_GNUABI_${SIMD}} > ${HEADER_${SIMD}_GNUABI} DEPENDS ${TARGET_MKRENAME_GNUABI} ) # set_source_files_properties(${HEADER_${SIMD}_GNUABI} PROPERTIES GENERATED TRUE) endif() endforeach() endif() # -------------------------------------------------------------------- # TARGET_MKMASKED_GNUABI add_host_executable(${TARGET_MKMASKED_GNUABI} mkmasked_gnuabi.c) # maskedXXX_YY_gnuabi.h if(ENABLE_GNUABI) foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD} AND MKMASKED_PARAMS_GNUABI_${SIMD}_sp) string(TOLOWER ${SIMD} vecarch) set(HEADER_GENERATED "") foreach(T dp sp) set(HEADER_MASKED_${SIMD}_${T}_GNUABI ${CMAKE_CURRENT_BINARY_DIR}/include/masked_${vecarch}_${T}_gnuabi.h) list(APPEND HEADER_GENERATED ${HEADER_MASKED_${SIMD}_${T}_GNUABI}) add_custom_command(OUTPUT ${HEADER_MASKED_${SIMD}_${T}_GNUABI} COMMAND echo Generating ${HEADER_MASKED_${SIMD}_${T}_GNUABI} COMMAND $ ${MKMASKED_PARAMS_GNUABI_${SIMD}_${T}} > ${HEADER_MASKED_${SIMD}_${T}_GNUABI} DEPENDS ${TARGET_MKMASKED_GNUABI} ) endforeach() add_custom_target(masked${SIMD}_generated DEPENDS ${HEADER_GENERATED}) endif() endforeach() endif() # -------------------------------------------------------------------- # TARGET_HEADERS # -------------------------------------------------------------------- add_custom_target(${TARGET_HEADERS} ALL DEPENDS ${SLEEF_INCLUDE_HEADER} # Output only ${HEADER_FILES_GENERATED} # Output only ${HEADER_GNUABI_FILES_GENERATED} # Output only ) # -------------------------------------------------------------------- # TARGET_MKALIAS # -------------------------------------------------------------------- add_host_executable(${TARGET_MKALIAS} mkalias.c) if(FORCE_AAVPCS) host_target_AAVPCS_definitions(${TARGET_MKALIAS}) endif() # -------------------------------------------------------------------- # TARGET_MKDISP # -------------------------------------------------------------------- # Helper executable: dispatcher for the vector extensions add_host_executable(${TARGET_MKDISP} mkdisp.c) # Set C standard requirement (-std=gnu99 for gcc) set_target_properties( ${TARGET_MKRENAME} ${TARGET_MKRENAME_GNUABI} ${TARGET_MKDISP} ${TARGET_MKALIAS} ${TARGET_MKMASKED_GNUABI} PROPERTIES C_STANDARD 99 ) # -------------------------------------------------------------------- # TARGET_LIBSLEEF # -------------------------------------------------------------------- # Build main library set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 ) if (BUILD_SHARED_LIBS) list(APPEND COMMON_TARGET_PROPERTIES POSITION_INDEPENDENT_CODE ON) # -fPIC endif() if (ENABLE_LTO) list(APPEND COMMON_TARGET_PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) # -flto endif() # Original sleef sources set(STANDARD_SOURCES sleefdp.c sleefsp.c rempitab.c) # Check for different precision support and add sources accordingly if(COMPILER_SUPPORTS_LONG_DOUBLE) list(APPEND STANDARD_SOURCES sleefld.c) endif() add_library(${TARGET_LIBSLEEF} ${STANDARD_SOURCES}) add_dependencies(${TARGET_LIBSLEEF} ${TARGET_HEADERS}) set_target_properties(${TARGET_LIBSLEEF} PROPERTIES VERSION ${SLEEF_VERSION} SOVERSION ${SLEEF_SOVERSION} PUBLIC_HEADER ${SLEEF_INCLUDE_HEADER} ${COMMON_TARGET_PROPERTIES} ) target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ) if(COMPILER_SUPPORTS_FLOAT128) # TODO: Not supported for LLVM bitcode gen as it has a specific compilation flags target_sources(${TARGET_LIBSLEEF} PRIVATE sleefqp.c) target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE ENABLEFLOAT128=1 ${COMMON_TARGET_DEFINITIONS}) endif() if(COMPILER_SUPPORTS_BUILTIN_MATH) target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE ENABLE_BUILTIN_MATH=1) endif() # Compile SIMD versions # Single precision and double precision # Include symbols for each SIMD architecture (if supported by the platform) # Note: adds object file as sources via cmake conditional generator expression foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) list(FIND EXT_ENABLE_ALIAS ${SIMD} INDEX_ALIAS) string(TOLOWER ${SIMD} SIMDLC) if (${SIMD} STREQUAL "NEON32" OR ${SIMD} STREQUAL "NEON32VFPV4") set(SIMD_SOURCES sleefsimdsp.c) else() set(SIMD_SOURCES sleefsimdsp.c sleefsimddp.c) endif() # Create a library add_library(${OBJECT_${SIMD}} OBJECT ${SIMD_SOURCES} ${HEADER_${SIMD}}) add_library(${OBJECTDET_${SIMD}} OBJECT ${SIMD_SOURCES} ${HEADER_${SIMD}}) target_compile_definitions(${OBJECTDET_${SIMD}} PRIVATE DETERMINISTIC=1) if(COMPILER_SUPPORTS_BUILTIN_MATH) target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_BUILTIN_MATH=1) target_compile_definitions(${OBJECTDET_${SIMD}} PRIVATE ENABLE_BUILTIN_MATH=1) endif() if(ENABLE_ALTDIV) target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_ALTDIV=1) target_compile_definitions(${OBJECTDET_${SIMD}} PRIVATE ENABLE_ALTDIV=1) endif() if(ENABLE_ALTSQRT) target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_ALTSQRT=1) target_compile_definitions(${OBJECTDET_${SIMD}} PRIVATE ENABLE_ALTSQRT=1) endif() if (INDEX_ALIAS EQUAL -1) target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ) target_compile_definitions(${OBJECTDET_${SIMD}} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ) else() add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h COMMENT "Generating alias_${SIMDLC}.h" COMMAND $ ${ALIAS_PARAMS_${SIMD}_SP} > ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h COMMAND $ ${ALIAS_PARAMS_${SIMD}_DP} >> ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h DEPENDS ${TARGET_MKALIAS} ) add_custom_target(alias_${SIMDLC}.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h) add_dependencies(${OBJECT_${SIMD}} alias_${SIMDLC}.h_generated) add_dependencies(${OBJECTDET_${SIMD}} alias_${SIMDLC}.h_generated) target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ALIAS_NO_EXT_SUFFIX=\"alias_${SIMDLC}.h\" ) target_compile_definitions(${OBJECTDET_${SIMD}} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ALIAS_NO_EXT_SUFFIX=\"alias_${SIMDLC}.h\" ) endif() # Enable Vector PCS for Advanced SIMD (if supported) if(FORCE_AAVPCS AND ${SIMD} STREQUAL "ADVSIMD") target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_AAVPCS=1 ) endif() add_dependencies(${OBJECT_${SIMD}} rename${SIMD}.h_generated) add_dependencies(${OBJECTDET_${SIMD}} rename${SIMD}.h_generated) set_target_properties(${OBJECT_${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES} ) set_target_properties(${OBJECTDET_${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES} ) target_compile_options(${OBJECT_${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_options(${OBJECTDET_${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_sources(${TARGET_LIBSLEEF} PRIVATE $ $) endif(COMPILER_SUPPORTS_${SIMD}) endforeach() # if(BUILD_INLINE_HEADERS) if(CMAKE_C_COMPILER_ID MATCHES "Intel") message(FATAL_ERROR "BUILD_INLINE_HEADERS is not supported with Intel Compiler") endif() add_host_executable("addSuffix" addSuffix.c) set_target_properties("addSuffix" PROPERTIES C_STANDARD 99) file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/include/) set(INLINE_HEADER_FILES_GENERATED "") if (SED_COMMAND) foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} SIMDLC) set(INLINE_HEADER_FILE ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h) add_custom_command( OUTPUT ${INLINE_HEADER_FILE} COMMAND echo Generating sleefinline_${SIMDLC}.h # Preprocess sleefsimddp.c with SLEEF_GENHEADER defined, comments are preserved COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # /sleef/libm/sleefsimddp.c > /build/libm/sleefSSE2.h.tmp1 # Remove all lines except those begin with "//@" COMMAND ${SED_COMMAND} -n -e "/^\\/\\/@#.*$/p" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # sed -n -e "/^\\/\\/@#.*$/p" /build/src/libm/sleefSSE2.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 # > /build/src/libm/sleefSSE2.h.tmp2 # Remove "//@" COMMAND ${SED_COMMAND} -e "s/^\\/\\/@#/#/g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp2 # sed -e "s/^\\/\\/@#/#/g" /build/src/libm/sleefSSE2.h.tmp2 > ${CMAKE_CURRENT_BINARY_DIR}/include/macroonly${SIMD}.h # > /build/src/libm/include/macroonlySSE2.h # Preprocess sleefsimdsp.c with SLEEF_GENHEADER defined. Include macroonly*.h instead of helper*.h. COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} ${FLAG_PRESERVE_COMMENTS} # gcc -E -C ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/common ${FLAG_INCLUDE}${PROJECT_SOURCE_DIR}/src/arch # -I/sleef/src/common -I/sleef/src/arch ${FLAG_INCLUDE}${CMAKE_CURRENT_BINARY_DIR}/include/ # -I/build/src/libm/include ${FLAG_DEFINE}SLEEF_GENHEADER ${FLAG_DEFINE}ENABLE_${SIMD} ${FLAG_DEFINE}DORENAME # -DSLEEF_GENHEADER -DENABLE_SSE2 -DDORENAME ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c >> ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # /sleef/libm/sleefsimdsp.c >> /build/libm/sleefSSE2.h.tmp1 # Remove lines beginning with "#" so that the resulting file can be preprocessed again. COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp1 # sed -e "s/^#.*//g" /build/src/libm/sleefSSE2.h.tmp1 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c # > /build/src/libm/sleefSSE2.h.c # Preprocess the intemediate file again to remove comments COMMAND "${CMAKE_C_COMPILER}" ${FLAG_PREPROCESS} # gcc -E ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.c # /build/src/libm/sleefSSE2.h.c > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 # > /build/src/libm/sleefSSE2.h.tmp3 # Embed version number into the header COMMAND ${SED_COMMAND} -e # sed -e "s/%VERSION%/${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}/g" # "s/%VERSION%/3.5.0/g" ${CMAKE_CURRENT_SOURCE_DIR}/sleefinline_header.h.org # /sleef/libm/sleefinline_header.h.org > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp4 # > /build/src/libm/sleefSSE2.h.tmp4 # Remove lines beginning with "#" COMMAND ${SED_COMMAND} -e "s/^#.*//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp3 # sed -e "s/^#.*//g" /build/src/libm/sleefSSE2.h.tmp3 >> ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp4 # >> /build/src/libm/sleefSSE2.h.tmp4 # Replace multiple empty lines with a single empty line COMMAND ${SED_COMMAND} -e "s/^[[:space:]]*$//g" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp4 # sed -e "s/^[[:space:]]*$//g" /build/src/libm/sleefSSE2.h.tmp4 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp5 # > /build/src/libm/sleefSSE2.h.tmp5 COMMAND ${SED_COMMAND} "/^$/N;/^\\n$/D" ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp5 # sed -e "/^$/N;/^\n$/D" /build/src/libm/sleefSSE2.h.tmp5 > ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp6 # > /build/src/libm/sleefSSE2.h.tmp6 COMMAND $ ${CMAKE_CURRENT_BINARY_DIR}/sleef${SIMD}.h.tmp6 # addSuffix /build/src/libm/sleefSSE2.h.tmp6 ${CMAKE_CURRENT_SOURCE_DIR}/keywords.txt "_${SIMDLC}_sleef" # keywords.txt "_sse2_sleef" > ${PROJECT_BINARY_DIR}/include/sleefinline_${SIMDLC}.h # > /build/include/sleefinline_sse2.h MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimddp.c ${CMAKE_CURRENT_SOURCE_DIR}/sleefsimdsp.c ${HEADER_${SIMD}} DEPENDS ${HEADER_${SIMD}} addSuffix VERBATIM ) list(APPEND INLINE_HEADER_FILES_GENERATED ${INLINE_HEADER_FILE}) endif() endforeach() add_custom_target(${TARGET_INLINE_HEADERS} ALL DEPENDS ${INLINE_HEADER_FILES_GENERATED} ) install(FILES ${INLINE_HEADER_FILES_GENERATED} DESTINATION include) endif(SED_COMMAND) add_library(${TARGET_LIBINLINE} STATIC rempitab.c) install(TARGETS ${TARGET_LIBINLINE} PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") endif(BUILD_INLINE_HEADERS) # On some systems we need to explicitly link libsleef against libm to # use some of the math functions used in the scalar code (for example # sqrt). if(LIBM AND NOT COMPILER_SUPPORTS_BUILTIN_MATH) target_link_libraries(${TARGET_LIBSLEEF} ${LIBM}) endif() target_sources(${TARGET_LIBSLEEF} PRIVATE $) # -------------------------------------------------------------------- if (SLEEF_ARCH_X86) # Target dispsse.c add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c COMMENT "Generating dispsse.c" COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/dispsse.c.org ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c COMMAND $ 2 4 __m128d __m128 __m128i sse2 sse4 avx2128 >> ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c DEPENDS ${TARGET_MKDISP} ) add_custom_target(dispsse.c_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c) # Target renamedsp128.h add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp128.h COMMENT "Generating renamedsp128.h" COMMAND $ - 2 4 > ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp128.h DEPENDS ${TARGET_MKRENAME} ) add_custom_target(renamedsp128.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp128.h) # Target dispavx.c add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c COMMENT "Generating dispavx.c" COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/dispavx.c.org ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c COMMAND $ 4 8 __m256d __m256 __m128i avx fma4 avx2 >> ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c DEPENDS ${TARGET_MKDISP} ) add_custom_target(dispavx.c_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c) # Target renamedsp256.h add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp256.h COMMENT "Generating renamedsp256.h" COMMAND $ - 4 8 > ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp256.h DEPENDS ${TARGET_MKRENAME} ) add_custom_target(renamedsp256.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp256.h) # Target dispsse_obj if (COMPILER_SUPPORTS_FMA4) set(DISPATCHER_DEFINITIONS ${DISPATCHER_DEFINITIONS} ENABLE_FMA4=1) endif() if (COMPILER_SUPPORTS_AVX2) set(DISPATCHER_DEFINITIONS ${DISPATCHER_DEFINITIONS} ENABLE_AVX2=1) endif() if(COMPILER_SUPPORTS_SSE2) add_library(dispsse_obj OBJECT dispsse.c) target_compile_options(dispsse_obj PRIVATE ${FLAGS_ENABLE_SSE2}) set_target_properties(dispsse_obj PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_compile_definitions(dispsse_obj PRIVATE ${COMMON_TARGET_DEFINITIONS} ${DISPATCHER_DEFINITIONS}) target_include_directories(dispsse_obj PRIVATE ${sleef_BINARY_DIR}/include) add_dependencies(dispsse_obj dispsse.c_generated renamedsp128.h_generated ${TARGET_HEADERS}) target_sources(${TARGET_LIBSLEEF} PRIVATE $) endif() # Target dispavx_obj if(COMPILER_SUPPORTS_AVX) add_library(dispavx_obj OBJECT dispavx.c) target_compile_options(dispavx_obj PRIVATE ${FLAGS_ENABLE_AVX}) set_target_properties(dispavx_obj PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_compile_definitions(dispavx_obj PRIVATE ${COMMON_TARGET_DEFINITIONS} ${DISPATCHER_DEFINITIONS}) target_include_directories(dispavx_obj PRIVATE ${sleef_BINARY_DIR}/include) add_dependencies(dispavx_obj dispavx.c_generated renamedsp256.h_generated ${TARGET_HEADERS}) target_sources(${TARGET_LIBSLEEF} PRIVATE $) endif() endif(SLEEF_ARCH_X86) # -------------------------------------------------------------------- # TARGET_LIBSLEEFGNUABI # Compile SIMD versions for GNU Abi # -------------------------------------------------------------------- # Build gnuabi version from just simd object files if(ENABLE_GNUABI) set(TARGET_LIBSLEEFGNUABI_OBJECTS "") foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) # Need lowercase string for rename header string(TOLOWER ${SIMD} vecarch) foreach(T dp sp) add_library(${OBJECT_${SIMD}_${T}_GNUABI} OBJECT sleefsimd${T}.c ${HEADER_${SIMD}_GNUABI}) target_compile_definitions(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ENABLE_GNUABI=1 ) if(FORCE_AAVPCS AND ${SIMD} STREQUAL "ADVSIMD") target_compile_definitions(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE ENABLE_AAVPCS=1 ) endif() if(ENABLE_ALTDIV) target_compile_definitions(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE ENABLE_ALTDIV=1) endif() if(ENABLE_ALTSQRT) target_compile_definitions(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE ENABLE_ALTSQRT=1) endif() set_target_properties(${OBJECT_${SIMD}_${T}_GNUABI} PROPERTIES ${COMMON_TARGET_PROPERTIES} ) target_compile_options(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE ${FLAGS_ENABLE_${SIMD}}) if (COMPILER_SUPPORTS_WEAK_ALIASES) target_compile_options(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE -DENABLE_GNUABI=1) endif(COMPILER_SUPPORTS_WEAK_ALIASES) list(APPEND TARGET_LIBSLEEFGNUABI_OBJECTS $) if(MKMASKED_PARAMS_GNUABI_${SIMD}_${T}) target_compile_definitions(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE HEADER_MASKED=\"masked_${vecarch}_${T}_gnuabi.h\") add_dependencies(${OBJECT_${SIMD}_${T}_GNUABI} masked${SIMD}_generated) endif() endforeach() endif(COMPILER_SUPPORTS_${SIMD}) endforeach() # Create library add_library(${TARGET_LIBSLEEFGNUABI} ${TARGET_LIBSLEEFGNUABI_OBJECTS} rempitab.c) # Library properties set_target_properties(${TARGET_LIBSLEEFGNUABI} PROPERTIES VERSION ${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR} SOVERSION ${SLEEF_SOVERSION} POSITION_INDEPENDENT_CODE ON # -fPIC C_STANDARD 99 # -std=gnu99 LINKER_LANGUAGE C ) # On some systems we need to explicitly link libsleefgnuabi against # libm to use some of the math functions used in the scalar code (for # example sqrt). if(LIBM AND NOT COMPILER_SUPPORTS_BUILTIN_MATH) target_link_libraries(${TARGET_LIBSLEEFGNUABI} ${LIBM}) endif() endif(ENABLE_GNUABI) # -------------------------------------------------------------------- # TARGET_LLVM_BITCODE # Generate LLVM bitcode # -------------------------------------------------------------------- if(CLANG_EXE_PATH AND SLEEF_ENABLE_LLVM_BITCODE) set(SLEEP_LLVM_BITCODE_INCLUDES "") get_property(SLEEP_LLVM_BITCODE_INCLUDES_LIST DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) foreach(INCLUDE_DIRECTORY ${SLEEP_LLVM_BITCODE_INCLUDES_LIST}) set(SLEEP_LLVM_BITCODE_INCLUDES "${SLEEP_LLVM_BITCODE_INCLUDES} -I ${INCLUDE_DIRECTORY}") endforeach() separate_arguments(SLEEP_LLVM_BITCODE_INCLUDES_CLANG WINDOWS_COMMAND "${SLEEP_LLVM_BITCODE_INCLUDES}") set(SLEEF_CLANG_LLVM_BITCODE_OPTIONS -O3 -S -emit-llvm -D NDEBUG -D DORENAME=1) set(LLVM_BITCODE_OUTPUTS "") # Generate LLVM bitcode for regular SLEEF foreach(STANDARD_SOURCE ${STANDARD_SOURCES}) get_filename_component(SRC_WITHOUT_EXT ${STANDARD_SOURCE} NAME_WE) set(LLVM_BITCODE_INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${SRC_WITHOUT_EXT}.c) set(LLVM_BITCODE_OUTPUT ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${SRC_WITHOUT_EXT}.ll) add_custom_command(OUTPUT ${LLVM_BITCODE_OUTPUT} COMMAND ${CLANG_EXE_PATH} ${SLEEF_CLANG_LLVM_BITCODE_OPTIONS} -o ${LLVM_BITCODE_OUTPUT} ${LLVM_BITCODE_INPUT} ${SLEEP_LLVM_BITCODE_INCLUDES_CLANG} DEPENDS ${LLVM_BITCODE_INPUT} ) list(APPEND LLVM_BITCODE_OUTPUTS ${LLVM_BITCODE_OUTPUT}) endforeach() # Generate LLVM bitcode for SIMD SLEEF foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) if (${SIMD} STREQUAL "NEON32" OR ${SIMD} STREQUAL "NEON32VFPV4") set(SIMD_SOURCES sleefsimdsp.c) else() set(SIMD_SOURCES sleefsimdsp.c sleefsimddp.c) endif() if(COMPILER_SUPPORTS_${SIMD}) foreach(SIMD_SOURCE ${SIMD_SOURCES}) get_filename_component(SIMD_SOURCE_WITHOUT_EXT ${SIMD_SOURCE} NAME_WE) set(LLVM_BITCODE_INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${SIMD_SOURCE}) set(LLVM_BITCODE_OUTPUT ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${SIMD_SOURCE_WITHOUT_EXT}_${SIMD}.ll) add_custom_command(OUTPUT ${LLVM_BITCODE_OUTPUT} COMMAND ${CLANG_EXE_PATH} ${CLANG_FLAGS_ENABLE_${SIMD}} ${SLEEF_CLANG_LLVM_BITCODE_OPTIONS} -D ENABLE_${SIMD}=1 -o ${LLVM_BITCODE_OUTPUT} ${LLVM_BITCODE_INPUT} ${SLEEP_LLVM_BITCODE_INCLUDES_CLANG} DEPENDS ${LLVM_BITCODE_INPUT} ) list(APPEND LLVM_BITCODE_OUTPUTS ${LLVM_BITCODE_OUTPUT}) endforeach() endif() endforeach() file(MAKE_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) add_custom_target(${TARGET_LLVM_BITCODE} ALL DEPENDS ${LLVM_BITCODE_OUTPUTS} ) add_dependencies(${TARGET_LLVM_BITCODE} ${TARGET_HEADERS}) install(FILES ${LLVM_BITCODE_OUTPUTS} DESTINATION "${CMAKE_INSTALL_LIBDIR}") endif() # -------------------------------------------------------------------- # Install # -------------------------------------------------------------------- # Install libsleef and sleef.h install(TARGETS ${TARGET_LIBSLEEF} PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") configure_file("sleef.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/sleef.pc" @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/sleef.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") if(ENABLE_GNUABI) install(TARGETS ${TARGET_LIBSLEEFGNUABI} LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") endif() sleef-3.5.1/src/libm/addSuffix.c000066400000000000000000000065101373003144100164070ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #define N 1000 int nkeywords = 0, nalloc = 0; char **keywords = NULL, *suffix = NULL; void insert(char *buf) { for(int i=0;i \n", argv[0]); fprintf(stderr, "Add the suffix to keywords\n"); exit(-1); } FILE *fp = fopen(argv[2], "r"); if (fp == NULL) { fprintf(stderr, "Cannot open %s\n", argv[2]); exit(-1); } char buf[N]; while(fgets(buf, N, fp) != NULL) { if (strlen(buf) >= 1) buf[strlen(buf)-1] = '\0'; keywords[nkeywords] = malloc(sizeof(char) * (strlen(buf) + 1)); strcpy(keywords[nkeywords], buf); nkeywords++; if (nkeywords >= nalloc) { nalloc *= 2; keywords = realloc(keywords, sizeof(char *) * nalloc); } } fclose(fp); suffix = argv[3]; fp = fopen(argv[1], "r"); if (fp == NULL) { fprintf(stderr, "Cannot open %s\n", argv[1]); exit(-1); } doit(fp); fclose(fp); exit(0); } // cat sleefinline*.h | egrep -o '[a-zA-Z_][0-9a-zA-Z_]*' | sort | uniq > cand.txt sleef-3.5.1/src/libm/dd.h000066400000000000000000000346461373003144100151010ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) typedef struct { vdouble x, y; } vdouble2; static vdouble vd2getx_vd_vd2(vdouble2 v) { return v.x; } static vdouble vd2gety_vd_vd2(vdouble2 v) { return v.y; } static vdouble2 vd2setxy_vd2_vd_vd(vdouble x, vdouble y) { vdouble2 v; v.x = x; v.y = y; return v; } static vdouble2 vd2setx_vd2_vd2_vd(vdouble2 v, vdouble d) { v.x = d; return v; } static vdouble2 vd2sety_vd2_vd2_vd(vdouble2 v, vdouble d) { v.y = d; return v; } #endif static INLINE CONST VECTOR_CC vdouble vupper_vd_vd(vdouble d) { return vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vcast_vm_i_i(0xffffffff, 0xf8000000))); } static INLINE CONST VECTOR_CC vdouble2 vcast_vd2_vd_vd(vdouble h, vdouble l) { return vd2setxy_vd2_vd_vd(h, l); } static INLINE CONST VECTOR_CC vdouble2 vcast_vd2_d_d(double h, double l) { return vd2setxy_vd2_vd_vd(vcast_vd_d(h), vcast_vd_d(l)); } static INLINE CONST VECTOR_CC vdouble2 vsel_vd2_vo_vd2_vd2(vopmask m, vdouble2 x, vdouble2 y) { return vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(m, vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)), vsel_vd_vo_vd_vd(m, vd2gety_vd_vd2(x), vd2gety_vd_vd2(y))); } static INLINE CONST VECTOR_CC vdouble2 vsel_vd2_vo_d_d_d_d(vopmask o, double x1, double y1, double x0, double y0) { return vd2setxy_vd2_vd_vd(vsel_vd_vo_d_d(o, x1, x0), vsel_vd_vo_d_d(o, y1, y0)); } static INLINE CONST VECTOR_CC vdouble vadd_vd_3vd(vdouble v0, vdouble v1, vdouble v2) { return vadd_vd_vd_vd(vadd_vd_vd_vd(v0, v1), v2); } static INLINE CONST VECTOR_CC vdouble vadd_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { return vadd_vd_3vd(vadd_vd_vd_vd(v0, v1), v2, v3); } static INLINE CONST VECTOR_CC vdouble vadd_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { return vadd_vd_4vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4); } static INLINE CONST VECTOR_CC vdouble vadd_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) { return vadd_vd_5vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5); } static INLINE CONST VECTOR_CC vdouble vadd_vd_7vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5, vdouble v6) { return vadd_vd_6vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5, v6); } static INLINE CONST VECTOR_CC vdouble vsub_vd_3vd(vdouble v0, vdouble v1, vdouble v2) { return vsub_vd_vd_vd(vsub_vd_vd_vd(v0, v1), v2); } static INLINE CONST VECTOR_CC vdouble vsub_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { return vsub_vd_3vd(vsub_vd_vd_vd(v0, v1), v2, v3); } static INLINE CONST VECTOR_CC vdouble vsub_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { return vsub_vd_4vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4); } static INLINE CONST VECTOR_CC vdouble vsub_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) { return vsub_vd_5vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4, v5); } // static INLINE CONST VECTOR_CC vdouble2 ddneg_vd2_vd2(vdouble2 x) { return vcast_vd2_vd_vd(vneg_vd_vd(vd2getx_vd_vd2(x)), vneg_vd_vd(vd2gety_vd_vd2(x))); } static INLINE CONST VECTOR_CC vdouble2 ddabs_vd2_vd2(vdouble2 x) { return vcast_vd2_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(x)), vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(vd2gety_vd_vd2(x)), vand_vm_vm_vm(vreinterpret_vm_vd(vd2getx_vd_vd2(x)), vreinterpret_vm_vd(vcast_vd_d(-0.0)))))); } static INLINE CONST VECTOR_CC vdouble2 ddnormalize_vd2_vd2(vdouble2 t) { vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(t), vd2gety_vd_vd2(t)); return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(t), s), vd2gety_vd_vd2(t))); } static INLINE CONST VECTOR_CC vdouble2 ddscale_vd2_vd2_vd(vdouble2 d, vdouble s) { return vd2setxy_vd2_vd_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(d), s), vmul_vd_vd_vd(vd2gety_vd_vd2(d), s)); } static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd_vd(vdouble x, vdouble y) { vdouble s = vadd_vd_vd_vd(x, y); return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(x, s), y)); } static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd_vd(vdouble x, vdouble y) { vdouble s = vadd_vd_vd_vd(x, y); vdouble v = vsub_vd_vd_vd(s, x); return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(x, vsub_vd_vd_vd(s, v)), vsub_vd_vd_vd(y, v))); } static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), y); return vd2setxy_vd2_vd_vd(s, vadd_vd_3vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), s), y, vd2gety_vd_vd2(x))); } static INLINE CONST VECTOR_CC vdouble2 ddsub_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble s = vsub_vd_vd_vd(vd2getx_vd_vd2(x), y); return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vsub_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), s), y), vd2gety_vd_vd2(x))); } static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), y); vdouble v = vsub_vd_vd_vd(s, vd2getx_vd_vd2(x)); vdouble w = vadd_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), vsub_vd_vd_vd(s, v)), vsub_vd_vd_vd(y, v)); return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(w, vd2gety_vd_vd2(x))); } static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd_vd2(vdouble x, vdouble2 y) { vdouble s = vadd_vd_vd_vd(x, vd2getx_vd_vd2(y)); return vd2setxy_vd2_vd_vd(s, vadd_vd_3vd(vsub_vd_vd_vd(x, s), vd2getx_vd_vd2(y), vd2gety_vd_vd2(y))); } static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd_vd2(vdouble x, vdouble2 y) { vdouble s = vadd_vd_vd_vd(x, vd2getx_vd_vd2(y)); vdouble v = vsub_vd_vd_vd(s, x); return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(vadd_vd_vd_vd(vsub_vd_vd_vd(x, vsub_vd_vd_vd(s, v)), vsub_vd_vd_vd(vd2getx_vd_vd2(y), v)), vd2gety_vd_vd2(y))); } static INLINE CONST VECTOR_CC vdouble2 ddadd_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { // |x| >= |y| vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); return vd2setxy_vd2_vd_vd(s, vadd_vd_4vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), s), vd2getx_vd_vd2(y), vd2gety_vd_vd2(x), vd2gety_vd_vd2(y))); } static INLINE CONST VECTOR_CC vdouble2 ddadd2_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble s = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); vdouble v = vsub_vd_vd_vd(s, vd2getx_vd_vd2(x)); vdouble t = vadd_vd_vd_vd(vsub_vd_vd_vd(vd2getx_vd_vd2(x), vsub_vd_vd_vd(s, v)), vsub_vd_vd_vd(vd2getx_vd_vd2(y), v)); return vd2setxy_vd2_vd_vd(s, vadd_vd_vd_vd(t, vadd_vd_vd_vd(vd2gety_vd_vd2(x), vd2gety_vd_vd2(y)))); } static INLINE CONST VECTOR_CC vdouble2 ddsub_vd2_vd_vd(vdouble x, vdouble y) { // |x| >= |y| vdouble s = vsub_vd_vd_vd(x, y); return vd2setxy_vd2_vd_vd(s, vsub_vd_vd_vd(vsub_vd_vd_vd(x, s), y)); } static INLINE CONST VECTOR_CC vdouble2 ddsub_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { // |x| >= |y| vdouble s = vsub_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); vdouble t = vsub_vd_vd_vd(vd2getx_vd_vd2(x), s); t = vsub_vd_vd_vd(t, vd2getx_vd_vd2(y)); t = vadd_vd_vd_vd(t, vd2gety_vd_vd2(x)); return vd2setxy_vd2_vd_vd(s, vsub_vd_vd_vd(t, vd2gety_vd_vd2(y))); } #ifdef ENABLE_FMA_DP static INLINE CONST VECTOR_CC vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) { vdouble t = vrec_vd_vd(vd2getx_vd_vd2(d)); vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(n), t); vdouble u = vfmapn_vd_vd_vd_vd(t, vd2getx_vd_vd2(n), s); vdouble v = vfmanp_vd_vd_vd_vd(vd2gety_vd_vd2(d), t, vfmanp_vd_vd_vd_vd(vd2getx_vd_vd2(d), t, vcast_vd_d(1))); return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(s, v, vfma_vd_vd_vd_vd(vd2gety_vd_vd2(n), t, u))); } static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) { vdouble s = vmul_vd_vd_vd(x, y); return vd2setxy_vd2_vd_vd(s, vfmapn_vd_vd_vd_vd(x, y, s)); } static INLINE CONST VECTOR_CC vdouble2 ddsqu_vd2_vd2(vdouble2 x) { vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)), vd2gety_vd_vd2(x), vfmapn_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x), s))); } static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(y), vfma_vd_vd_vd_vd(vd2gety_vd_vd2(x), vd2getx_vd_vd2(y), vfmapn_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y), s)))); } static INLINE CONST VECTOR_CC vdouble ddmul_vd_vd2_vd2(vdouble2 x, vdouble2 y) { return vfma_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y), vfma_vd_vd_vd_vd(vd2gety_vd_vd2(x), vd2getx_vd_vd2(y), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(y)))); } static INLINE CONST VECTOR_CC vdouble ddsqu_vd_vd2(vdouble2 x) { return vfma_vd_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x), vadd_vd_vd_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)))); } static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), y); return vd2setxy_vd2_vd_vd(s, vfma_vd_vd_vd_vd(vd2gety_vd_vd2(x), y, vfmapn_vd_vd_vd_vd(vd2getx_vd_vd2(x), y, s))); } static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd(vdouble d) { vdouble s = vrec_vd_vd(d); return vd2setxy_vd2_vd_vd(s, vmul_vd_vd_vd(s, vfmanp_vd_vd_vd_vd(d, s, vcast_vd_d(1)))); } static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd2(vdouble2 d) { vdouble s = vrec_vd_vd(vd2getx_vd_vd2(d)); return vd2setxy_vd2_vd_vd(s, vmul_vd_vd_vd(s, vfmanp_vd_vd_vd_vd(vd2gety_vd_vd2(d), s, vfmanp_vd_vd_vd_vd(vd2getx_vd_vd2(d), s, vcast_vd_d(1))))); } #else static INLINE CONST VECTOR_CC vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) { vdouble t = vrec_vd_vd(vd2getx_vd_vd2(d)); vdouble dh = vupper_vd_vd(vd2getx_vd_vd2(d)), dl = vsub_vd_vd_vd(vd2getx_vd_vd2(d), dh); vdouble th = vupper_vd_vd(t ), tl = vsub_vd_vd_vd(t , th); vdouble nhh = vupper_vd_vd(vd2getx_vd_vd2(n)), nhl = vsub_vd_vd_vd(vd2getx_vd_vd2(n), nhh); vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(n), t); vdouble u = vadd_vd_5vd(vsub_vd_vd_vd(vmul_vd_vd_vd(nhh, th), s), vmul_vd_vd_vd(nhh, tl), vmul_vd_vd_vd(nhl, th), vmul_vd_vd_vd(nhl, tl), vmul_vd_vd_vd(s, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl)))); return vd2setxy_vd2_vd_vd(s, vmla_vd_vd_vd_vd(t, vsub_vd_vd_vd(vd2gety_vd_vd2(n), vmul_vd_vd_vd(s, vd2gety_vd_vd2(d))), u)); } static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) { vdouble xh = vupper_vd_vd(x), xl = vsub_vd_vd_vd(x, xh); vdouble yh = vupper_vd_vd(y), yl = vsub_vd_vd_vd(y, yh); vdouble s = vmul_vd_vd_vd(x, y); return vd2setxy_vd2_vd_vd(s, vadd_vd_5vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(s), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl))); } static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); vdouble yh = vupper_vd_vd(y ), yl = vsub_vd_vd_vd(y , yh); vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), y); return vd2setxy_vd2_vd_vd(s, vadd_vd_6vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(s), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(vd2gety_vd_vd2(x), y))); } static INLINE CONST VECTOR_CC vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); vdouble yh = vupper_vd_vd(vd2getx_vd_vd2(y)), yl = vsub_vd_vd_vd(vd2getx_vd_vd2(y), yh); vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); return vd2setxy_vd2_vd_vd(s, vadd_vd_7vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(s), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(y)), vmul_vd_vd_vd(vd2gety_vd_vd2(x), vd2getx_vd_vd2(y)))); } static INLINE CONST VECTOR_CC vdouble ddmul_vd_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); vdouble yh = vupper_vd_vd(vd2getx_vd_vd2(y)), yl = vsub_vd_vd_vd(vd2getx_vd_vd2(y), yh); return vadd_vd_6vd(vmul_vd_vd_vd(vd2gety_vd_vd2(x), yh), vmul_vd_vd_vd(xh, vd2gety_vd_vd2(y)), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yh)); } static INLINE CONST VECTOR_CC vdouble2 ddsqu_vd2_vd2(vdouble2 x) { vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); vdouble s = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); return vd2setxy_vd2_vd_vd(s, vadd_vd_5vd(vmul_vd_vd_vd(xh, xh), vneg_vd_vd(s), vmul_vd_vd_vd(vadd_vd_vd_vd(xh, xh), xl), vmul_vd_vd_vd(xl, xl), vmul_vd_vd_vd(vd2getx_vd_vd2(x), vadd_vd_vd_vd(vd2gety_vd_vd2(x), vd2gety_vd_vd2(x))))); } static INLINE CONST VECTOR_CC vdouble ddsqu_vd_vd2(vdouble2 x) { vdouble xh = vupper_vd_vd(vd2getx_vd_vd2(x)), xl = vsub_vd_vd_vd(vd2getx_vd_vd2(x), xh); return vadd_vd_5vd(vmul_vd_vd_vd(xh, vd2gety_vd_vd2(x)), vmul_vd_vd_vd(xh, vd2gety_vd_vd2(x)), vmul_vd_vd_vd(xl, xl), vadd_vd_vd_vd(vmul_vd_vd_vd(xh, xl), vmul_vd_vd_vd(xh, xl)), vmul_vd_vd_vd(xh, xh)); } static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd(vdouble d) { vdouble t = vrec_vd_vd(d); vdouble dh = vupper_vd_vd(d), dl = vsub_vd_vd_vd(d, dh); vdouble th = vupper_vd_vd(t), tl = vsub_vd_vd_vd(t, th); return vd2setxy_vd2_vd_vd(t, vmul_vd_vd_vd(t, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl)))); } static INLINE CONST VECTOR_CC vdouble2 ddrec_vd2_vd2(vdouble2 d) { vdouble t = vrec_vd_vd(vd2getx_vd_vd2(d)); vdouble dh = vupper_vd_vd(vd2getx_vd_vd2(d)), dl = vsub_vd_vd_vd(vd2getx_vd_vd2(d), dh); vdouble th = vupper_vd_vd(t ), tl = vsub_vd_vd_vd(t , th); return vd2setxy_vd2_vd_vd(t, vmul_vd_vd_vd(t, vsub_vd_6vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl), vmul_vd_vd_vd(vd2gety_vd_vd2(d), t)))); } #endif static INLINE CONST VECTOR_CC vdouble2 ddsqrt_vd2_vd2(vdouble2 d) { vdouble t = vsqrt_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d))); return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5)); } static INLINE CONST VECTOR_CC vdouble2 ddsqrt_vd2_vd(vdouble d) { vdouble t = vsqrt_vd_vd(d); return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5)); } sleef-3.5.1/src/libm/df.h000066400000000000000000000373731373003144100151030ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) typedef struct { vfloat x, y; } vfloat2; static vfloat vf2getx_vf_vf2(vfloat2 v) { return v.x; } static vfloat vf2gety_vf_vf2(vfloat2 v) { return v.y; } static vfloat2 vf2setxy_vf2_vf_vf(vfloat x, vfloat y) { vfloat2 v; v.x = x; v.y = y; return v; } static vfloat2 vf2setx_vf2_vf2_vf(vfloat2 v, vfloat d) { v.x = d; return v; } static vfloat2 vf2sety_vf2_vf2_vf(vfloat2 v, vfloat d) { v.y = d; return v; } #endif static INLINE CONST VECTOR_CC vfloat vupper_vf_vf(vfloat d) { return vreinterpret_vf_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0xfffff000))); } static INLINE CONST VECTOR_CC vfloat2 vcast_vf2_vf_vf(vfloat h, vfloat l) { return vf2setxy_vf2_vf_vf(h, l); } static INLINE CONST VECTOR_CC vfloat2 vcast_vf2_f_f(float h, float l) { return vf2setxy_vf2_vf_vf(vcast_vf_f(h), vcast_vf_f(l)); } static INLINE CONST VECTOR_CC vfloat2 vcast_vf2_d(double d) { return vf2setxy_vf2_vf_vf(vcast_vf_f(d), vcast_vf_f(d - (float)d)); } static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_vf2_vf2(vopmask m, vfloat2 x, vfloat2 y) { return vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(m, vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)), vsel_vf_vo_vf_vf(m, vf2gety_vf_vf2(x), vf2gety_vf_vf2(y))); } static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_f_f_f_f(vopmask o, float x1, float y1, float x0, float y0) { return vf2setxy_vf2_vf_vf(vsel_vf_vo_f_f(o, x1, x0), vsel_vf_vo_f_f(o, y1, y0)); } static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vf2_vo_vf2_vf2(o0, vcast_vf2_d(d0), vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_d(d1), vcast_vf2_d(d2))); } static INLINE CONST VECTOR_CC vfloat2 vsel_vf2_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vf2_vo_vf2_vf2(o0, vcast_vf2_d(d0), vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_d(d1), vsel_vf2_vo_vf2_vf2(o2, vcast_vf2_d(d2), vcast_vf2_d(d3)))); } static INLINE CONST VECTOR_CC vfloat2 vabs_vf2_vf2(vfloat2 x) { return vcast_vf2_vf_vf(vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0)), vreinterpret_vm_vf(vf2getx_vf_vf2(x))), vreinterpret_vm_vf(vf2getx_vf_vf2(x)))), vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0)), vreinterpret_vm_vf(vf2getx_vf_vf2(x))), vreinterpret_vm_vf(vf2gety_vf_vf2(x))))); } static INLINE CONST VECTOR_CC vfloat vadd_vf_3vf(vfloat v0, vfloat v1, vfloat v2) { return vadd_vf_vf_vf(vadd_vf_vf_vf(v0, v1), v2); } static INLINE CONST VECTOR_CC vfloat vadd_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) { return vadd_vf_3vf(vadd_vf_vf_vf(v0, v1), v2, v3); } static INLINE CONST VECTOR_CC vfloat vadd_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) { return vadd_vf_4vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4); } static INLINE CONST VECTOR_CC vfloat vadd_vf_6vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5) { return vadd_vf_5vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5); } static INLINE CONST VECTOR_CC vfloat vadd_vf_7vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5, vfloat v6) { return vadd_vf_6vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5, v6); } static INLINE CONST VECTOR_CC vfloat vsub_vf_3vf(vfloat v0, vfloat v1, vfloat v2) { return vsub_vf_vf_vf(vsub_vf_vf_vf(v0, v1), v2); } static INLINE CONST VECTOR_CC vfloat vsub_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) { return vsub_vf_3vf(vsub_vf_vf_vf(v0, v1), v2, v3); } static INLINE CONST VECTOR_CC vfloat vsub_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) { return vsub_vf_4vf(vsub_vf_vf_vf(v0, v1), v2, v3, v4); } // static INLINE CONST VECTOR_CC vfloat2 dfneg_vf2_vf2(vfloat2 x) { return vcast_vf2_vf_vf(vneg_vf_vf(vf2getx_vf_vf2(x)), vneg_vf_vf(vf2gety_vf_vf2(x))); } static INLINE CONST VECTOR_CC vfloat2 dfabs_vf2_vf2(vfloat2 x) { return vcast_vf2_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(x)), vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(x)), vand_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(x)), vreinterpret_vm_vf(vcast_vf_f(-0.0f)))))); } static INLINE CONST VECTOR_CC vfloat2 dfnormalize_vf2_vf2(vfloat2 t) { vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(t), vf2gety_vf_vf2(t)); return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(t), s), vf2gety_vf_vf2(t))); } static INLINE CONST VECTOR_CC vfloat2 dfscale_vf2_vf2_vf(vfloat2 d, vfloat s) { return vf2setxy_vf2_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(d), s), vmul_vf_vf_vf(vf2gety_vf_vf2(d), s)); } static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf_vf(vfloat x, vfloat y) { vfloat s = vadd_vf_vf_vf(x, y); return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(x, s), y)); } static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf_vf(vfloat x, vfloat y) { vfloat s = vadd_vf_vf_vf(x, y); vfloat v = vsub_vf_vf_vf(s, x); return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(x, vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(y, v))); } static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf_vf2(vfloat x, vfloat2 y) { vfloat s = vadd_vf_vf_vf(x, vf2getx_vf_vf2(y)); vfloat v = vsub_vf_vf_vf(s, x); return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vadd_vf_vf_vf(vsub_vf_vf_vf(x, vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(vf2getx_vf_vf2(y), v)), vf2gety_vf_vf2(y))); } static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), y); return vf2setxy_vf2_vf_vf(s, vadd_vf_3vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), s), y, vf2gety_vf_vf2(x))); } static INLINE CONST VECTOR_CC vfloat2 dfsub_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat s = vsub_vf_vf_vf(vf2getx_vf_vf2(x), y); return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(vsub_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), s), y), vf2gety_vf_vf2(x))); } static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), y); vfloat v = vsub_vf_vf_vf(s, vf2getx_vf_vf2(x)); vfloat t = vadd_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(y, v)); return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(t, vf2gety_vf_vf2(x))); } static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf_vf2(vfloat x, vfloat2 y) { vfloat s = vadd_vf_vf_vf(x, vf2getx_vf_vf2(y)); return vf2setxy_vf2_vf_vf(s, vadd_vf_3vf(vsub_vf_vf_vf(x, s), vf2getx_vf_vf2(y), vf2gety_vf_vf2(y))); } static INLINE CONST VECTOR_CC vfloat2 dfadd_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { // |x| >= |y| vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); return vf2setxy_vf2_vf_vf(s, vadd_vf_4vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), s), vf2getx_vf_vf2(y), vf2gety_vf_vf2(x), vf2gety_vf_vf2(y))); } static INLINE CONST VECTOR_CC vfloat2 dfadd2_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat s = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); vfloat v = vsub_vf_vf_vf(s, vf2getx_vf_vf2(x)); vfloat t = vadd_vf_vf_vf(vsub_vf_vf_vf(vf2getx_vf_vf2(x), vsub_vf_vf_vf(s, v)), vsub_vf_vf_vf(vf2getx_vf_vf2(y), v)); return vf2setxy_vf2_vf_vf(s, vadd_vf_vf_vf(t, vadd_vf_vf_vf(vf2gety_vf_vf2(x), vf2gety_vf_vf2(y)))); } static INLINE CONST VECTOR_CC vfloat2 dfsub_vf2_vf_vf(vfloat x, vfloat y) { // |x| >= |y| vfloat s = vsub_vf_vf_vf(x, y); return vf2setxy_vf2_vf_vf(s, vsub_vf_vf_vf(vsub_vf_vf_vf(x, s), y)); } static INLINE CONST VECTOR_CC vfloat2 dfsub_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { // |x| >= |y| vfloat s = vsub_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); vfloat t = vsub_vf_vf_vf(vf2getx_vf_vf2(x), s); t = vsub_vf_vf_vf(t, vf2getx_vf_vf2(y)); t = vadd_vf_vf_vf(t, vf2gety_vf_vf2(x)); return vf2setxy_vf2_vf_vf(s, vsub_vf_vf_vf(t, vf2gety_vf_vf2(y))); } #ifdef ENABLE_FMA_SP static INLINE CONST VECTOR_CC vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) { vfloat t = vrec_vf_vf(vf2getx_vf_vf2(d)); vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(n), t); vfloat u = vfmapn_vf_vf_vf_vf(t, vf2getx_vf_vf2(n), s); vfloat v = vfmanp_vf_vf_vf_vf(vf2gety_vf_vf2(d), t, vfmanp_vf_vf_vf_vf(vf2getx_vf_vf2(d), t, vcast_vf_f(1))); return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(s, v, vfma_vf_vf_vf_vf(vf2gety_vf_vf2(n), t, u))); } static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) { vfloat s = vmul_vf_vf_vf(x, y); return vf2setxy_vf2_vf_vf(s, vfmapn_vf_vf_vf_vf(x, y, s)); } static INLINE CONST VECTOR_CC vfloat2 dfsqu_vf2_vf2(vfloat2 x) { vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)), vf2gety_vf_vf2(x), vfmapn_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x), s))); } static INLINE CONST VECTOR_CC vfloat dfsqu_vf_vf2(vfloat2 x) { return vfma_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x), vadd_vf_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)), vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)))); } static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(y), vfma_vf_vf_vf_vf(vf2gety_vf_vf2(x), vf2getx_vf_vf2(y), vfmapn_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y), s)))); } static INLINE CONST VECTOR_CC vfloat dfmul_vf_vf2_vf2(vfloat2 x, vfloat2 y) { return vfma_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y), vfma_vf_vf_vf_vf(vf2gety_vf_vf2(x), vf2getx_vf_vf2(y), vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(y)))); } static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), y); return vf2setxy_vf2_vf_vf(s, vfma_vf_vf_vf_vf(vf2gety_vf_vf2(x), y, vfmapn_vf_vf_vf_vf(vf2getx_vf_vf2(x), y, s))); } static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf(vfloat d) { vfloat s = vrec_vf_vf(d); return vf2setxy_vf2_vf_vf(s, vmul_vf_vf_vf(s, vfmanp_vf_vf_vf_vf(d, s, vcast_vf_f(1)))); } static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf2(vfloat2 d) { vfloat s = vrec_vf_vf(vf2getx_vf_vf2(d)); return vf2setxy_vf2_vf_vf(s, vmul_vf_vf_vf(s, vfmanp_vf_vf_vf_vf(vf2gety_vf_vf2(d), s, vfmanp_vf_vf_vf_vf(vf2getx_vf_vf2(d), s, vcast_vf_f(1))))); } #else static INLINE CONST VECTOR_CC vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) { vfloat t = vrec_vf_vf(vf2getx_vf_vf2(d)); vfloat dh = vupper_vf_vf(vf2getx_vf_vf2(d)), dl = vsub_vf_vf_vf(vf2getx_vf_vf2(d), dh); vfloat th = vupper_vf_vf(t ), tl = vsub_vf_vf_vf(t , th); vfloat nhh = vupper_vf_vf(vf2getx_vf_vf2(n)), nhl = vsub_vf_vf_vf(vf2getx_vf_vf2(n), nhh); vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(n), t); vfloat u, w; w = vcast_vf_f(-1); w = vmla_vf_vf_vf_vf(dh, th, w); w = vmla_vf_vf_vf_vf(dh, tl, w); w = vmla_vf_vf_vf_vf(dl, th, w); w = vmla_vf_vf_vf_vf(dl, tl, w); w = vneg_vf_vf(w); u = vmla_vf_vf_vf_vf(nhh, th, vneg_vf_vf(s)); u = vmla_vf_vf_vf_vf(nhh, tl, u); u = vmla_vf_vf_vf_vf(nhl, th, u); u = vmla_vf_vf_vf_vf(nhl, tl, u); u = vmla_vf_vf_vf_vf(s, w, u); return vf2setxy_vf2_vf_vf(s, vmla_vf_vf_vf_vf(t, vsub_vf_vf_vf(vf2gety_vf_vf2(n), vmul_vf_vf_vf(s, vf2gety_vf_vf2(d))), u)); } static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) { vfloat xh = vupper_vf_vf(x), xl = vsub_vf_vf_vf(x, xh); vfloat yh = vupper_vf_vf(y), yl = vsub_vf_vf_vf(y, yh); vfloat s = vmul_vf_vf_vf(x, y), t; t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(s)); t = vmla_vf_vf_vf_vf(xl, yh, t); t = vmla_vf_vf_vf_vf(xh, yl, t); t = vmla_vf_vf_vf_vf(xl, yl, t); return vf2setxy_vf2_vf_vf(s, t); } static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); vfloat yh = vupper_vf_vf(y ), yl = vsub_vf_vf_vf(y , yh); vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), y), t; t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(s)); t = vmla_vf_vf_vf_vf(xl, yh, t); t = vmla_vf_vf_vf_vf(xh, yl, t); t = vmla_vf_vf_vf_vf(xl, yl, t); t = vmla_vf_vf_vf_vf(vf2gety_vf_vf2(x), y, t); return vf2setxy_vf2_vf_vf(s, t); } static INLINE CONST VECTOR_CC vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); vfloat yh = vupper_vf_vf(vf2getx_vf_vf2(y)), yl = vsub_vf_vf_vf(vf2getx_vf_vf2(y), yh); vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)), t; t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(s)); t = vmla_vf_vf_vf_vf(xl, yh, t); t = vmla_vf_vf_vf_vf(xh, yl, t); t = vmla_vf_vf_vf_vf(xl, yl, t); t = vmla_vf_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(y), t); t = vmla_vf_vf_vf_vf(vf2gety_vf_vf2(x), vf2getx_vf_vf2(y), t); return vf2setxy_vf2_vf_vf(s, t); } static INLINE CONST VECTOR_CC vfloat dfmul_vf_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); vfloat yh = vupper_vf_vf(vf2getx_vf_vf2(y)), yl = vsub_vf_vf_vf(vf2getx_vf_vf2(y), yh); return vadd_vf_6vf(vmul_vf_vf_vf(vf2gety_vf_vf2(x), yh), vmul_vf_vf_vf(xh, vf2gety_vf_vf2(y)), vmul_vf_vf_vf(xl, yl), vmul_vf_vf_vf(xh, yl), vmul_vf_vf_vf(xl, yh), vmul_vf_vf_vf(xh, yh)); } static INLINE CONST VECTOR_CC vfloat2 dfsqu_vf2_vf2(vfloat2 x) { vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); vfloat s = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)), t; t = vmla_vf_vf_vf_vf(xh, xh, vneg_vf_vf(s)); t = vmla_vf_vf_vf_vf(vadd_vf_vf_vf(xh, xh), xl, t); t = vmla_vf_vf_vf_vf(xl, xl, t); t = vmla_vf_vf_vf_vf(vf2getx_vf_vf2(x), vadd_vf_vf_vf(vf2gety_vf_vf2(x), vf2gety_vf_vf2(x)), t); return vf2setxy_vf2_vf_vf(s, t); } static INLINE CONST VECTOR_CC vfloat dfsqu_vf_vf2(vfloat2 x) { vfloat xh = vupper_vf_vf(vf2getx_vf_vf2(x)), xl = vsub_vf_vf_vf(vf2getx_vf_vf2(x), xh); return vadd_vf_5vf(vmul_vf_vf_vf(xh, vf2gety_vf_vf2(x)), vmul_vf_vf_vf(xh, vf2gety_vf_vf2(x)), vmul_vf_vf_vf(xl, xl), vadd_vf_vf_vf(vmul_vf_vf_vf(xh, xl), vmul_vf_vf_vf(xh, xl)), vmul_vf_vf_vf(xh, xh)); } static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf(vfloat d) { vfloat t = vrec_vf_vf(d); vfloat dh = vupper_vf_vf(d), dl = vsub_vf_vf_vf(d, dh); vfloat th = vupper_vf_vf(t), tl = vsub_vf_vf_vf(t, th); vfloat u = vcast_vf_f(-1); u = vmla_vf_vf_vf_vf(dh, th, u); u = vmla_vf_vf_vf_vf(dh, tl, u); u = vmla_vf_vf_vf_vf(dl, th, u); u = vmla_vf_vf_vf_vf(dl, tl, u); return vf2setxy_vf2_vf_vf(t, vmul_vf_vf_vf(vneg_vf_vf(t), u)); } static INLINE CONST VECTOR_CC vfloat2 dfrec_vf2_vf2(vfloat2 d) { vfloat t = vrec_vf_vf(vf2getx_vf_vf2(d)); vfloat dh = vupper_vf_vf(vf2getx_vf_vf2(d)), dl = vsub_vf_vf_vf(vf2getx_vf_vf2(d), dh); vfloat th = vupper_vf_vf(t ), tl = vsub_vf_vf_vf(t , th); vfloat u = vcast_vf_f(-1); u = vmla_vf_vf_vf_vf(dh, th, u); u = vmla_vf_vf_vf_vf(dh, tl, u); u = vmla_vf_vf_vf_vf(dl, th, u); u = vmla_vf_vf_vf_vf(dl, tl, u); u = vmla_vf_vf_vf_vf(vf2gety_vf_vf2(d), t, u); return vf2setxy_vf2_vf_vf(t, vmul_vf_vf_vf(vneg_vf_vf(t), u)); } #endif static INLINE CONST VECTOR_CC vfloat2 dfsqrt_vf2_vf2(vfloat2 d) { #ifdef ENABLE_RECSQRT_SP vfloat x = vrecsqrt_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d))); vfloat2 r = dfmul_vf2_vf2_vf(d, x); return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(r, dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(r, x), vcast_vf_f(-3.0))), vcast_vf_f(-0.5)); #else vfloat t = vsqrt_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d))); return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5)); #endif } static INLINE CONST VECTOR_CC vfloat2 dfsqrt_vf2_vf(vfloat d) { vfloat t = vsqrt_vf_vf(d); return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5f)); } sleef-3.5.1/src/libm/dispavx.c.org000066400000000000000000000132701373003144100167370ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include "misc.h" #if defined(_MSC_VER) #include #else #include #endif #if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) #define CONST const #else #define CONST #endif #define IMPORT_IS_EXPORT #include "sleef.h" static int cpuSupportsAVX2() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); ret = (reg[1] & (1 << 5)) != 0; } return ret; } static int cpuSupportsFMA() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); ret = (reg[2] & (1 << 12)) != 0; } return ret; } static int cpuSupportsFMA4() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 0x80000001, 0); ret = (reg[2] & (1 << 16)) != 0; } return ret; } #ifdef ENABLE_FMA4 #define SUBST_IF_FMA4(funcfma4) if (cpuSupportsFMA4()) p = funcfma4; #else #define SUBST_IF_FMA4(funcfma4) #endif #ifdef ENABLE_AVX2 #define SUBST_IF_AVX2(funcavx2) if (cpuSupportsAVX2() && cpuSupportsFMA()) p = funcavx2; #else #define SUBST_IF_AVX2(funcavx2) #endif #define DISPATCH_vf_vf(fptype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0); \ static CONST VECTOR_CC fptype dfn(fptype arg0) { \ fptype CONST VECTOR_CC (*p)(fptype arg0) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf(fptype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1); \ static CONST VECTOR_CC fptype dfn(fptype arg0, fptype arg1) { \ fptype CONST VECTOR_CC (*p)(fptype arg0, fptype arg1) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0, fptype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vf2_vf(fptype, fptype2, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST VECTOR_CC fptype2 (*pfn)(fptype arg0); \ static CONST VECTOR_CC fptype2 dfn(fptype arg0) { \ fptype2 CONST VECTOR_CC (*p)(fptype arg0) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST VECTOR_CC fptype2 (*pfn)(fptype arg0) = dfn; \ EXPORT CONST VECTOR_CC fptype2 funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vi(fptype, itype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, itype arg1); \ static CONST VECTOR_CC fptype dfn(fptype arg0, itype arg1) { \ fptype CONST VECTOR_CC (*p)(fptype arg0, itype arg1) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, itype arg1) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0, itype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vi_vf(fptype, itype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST VECTOR_CC itype (*pfn)(fptype arg0); \ static CONST VECTOR_CC itype dfn(fptype arg0) { \ itype CONST VECTOR_CC (*p)(fptype arg0) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST VECTOR_CC itype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST VECTOR_CC itype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf_vf(fptype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2); \ static CONST VECTOR_CC fptype dfn(fptype arg0, fptype arg1, fptype arg2) { \ fptype CONST VECTOR_CC (*p)(fptype arg0, fptype arg1, fptype arg2) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1, arg2); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0, fptype arg1, fptype arg2) { return (*pfn)(arg0, arg1, arg2); } #define DISPATCH_i_i(funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST int (*pfn)(int arg0); \ static CONST int dfn(int arg0) { \ int CONST (*p)(int) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST int (*pfn)(int arg0) = dfn; \ EXPORT CONST int funcName(int arg0) { return (*pfn)(arg0); } #define DISPATCH_p_i(funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST void *(*pfn)(int arg0); \ static CONST void *dfn(int arg0) { \ CONST void *(*p)(int) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST void *(*pfn)(int arg0) = dfn; \ EXPORT CONST void *funcName(int arg0) { return (*pfn)(arg0); } // sleef-3.5.1/src/libm/dispsse.c.org000066400000000000000000000145071373003144100167370ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include "misc.h" #if defined(_MSC_VER) #include #else #include #endif #if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) #define CONST const #else #define CONST #endif #define IMPORT_IS_EXPORT #include "sleef.h" static int cpuSupportsSSE4_1() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); ret = (reg[2] & (1 << 19)) != 0; } return ret; } static int cpuSupportsAVX2() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); ret = (reg[1] & (1 << 5)) != 0; } return ret; } static int cpuSupportsFMA() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); ret = (reg[2] & (1 << 12)) != 0; } return ret; } #define SUBST_IF_SSE4(funcsse4) if (cpuSupportsSSE4_1()) p = funcsse4; #ifdef ENABLE_AVX2 #define SUBST_IF_AVX2(funcavx2) if (cpuSupportsAVX2() && cpuSupportsFMA()) p = funcavx2; #else #define SUBST_IF_AVX2(funcavx2) #endif /* * DISPATCH_R_X, DISPATCH_R_X_Y and DISPATCH_R_X_Y_Z are the macro for * defining dispatchers. R, X, Y and Z represent the data types of * return value, first argument, second argument and third argument, * respectively. vf, vi, i and p correspond to vector FP, vector * integer, scalar integer and scalar pointer types, respectively. * * The arguments for the macros are as follows: * fptype : FP type name * funcname : Fundamental function name * pfn : Name of pointer of the function to the dispatcher * dfn : Name of the dispatcher function * funcsse2 : Name of the SSE2 function * funcsse4 : Name of the SSE4 function * funcavx2 : Name of the AVX2 function */ #define DISPATCH_vf_vf(fptype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0); \ static CONST VECTOR_CC fptype dfn(fptype arg0) { \ fptype CONST VECTOR_CC (*p)(fptype arg0) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf(fptype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1); \ static CONST VECTOR_CC fptype dfn(fptype arg0, fptype arg1) { \ fptype CONST VECTOR_CC (*p)(fptype arg0, fptype arg1) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0, fptype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vf2_vf(fptype, fptype2, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST VECTOR_CC fptype2 (*pfn)(fptype arg0); \ static CONST VECTOR_CC fptype2 dfn(fptype arg0) { \ fptype2 CONST VECTOR_CC (*p)(fptype arg0) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST VECTOR_CC fptype2 (*pfn)(fptype arg0) = dfn; \ EXPORT CONST VECTOR_CC fptype2 funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vi(fptype, itype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, itype arg1); \ static CONST VECTOR_CC fptype dfn(fptype arg0, itype arg1) { \ fptype CONST VECTOR_CC (*p)(fptype arg0, itype arg1) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, itype arg1) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0, itype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vi_vf(fptype, itype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST VECTOR_CC itype (*pfn)(fptype arg0); \ static CONST VECTOR_CC itype dfn(fptype arg0) { \ itype CONST VECTOR_CC (*p)(fptype arg0) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST VECTOR_CC itype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST VECTOR_CC itype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf_vf(fptype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2); \ static CONST VECTOR_CC fptype dfn(fptype arg0, fptype arg1, fptype arg2) { \ fptype CONST VECTOR_CC (*p)(fptype arg0, fptype arg1, fptype arg2) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1, arg2); \ } \ static CONST VECTOR_CC fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2) = dfn; \ EXPORT CONST VECTOR_CC fptype funcName(fptype arg0, fptype arg1, fptype arg2) { return (*pfn)(arg0, arg1, arg2); } #define DISPATCH_i_i(funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST int (*pfn)(int arg0); \ static CONST int dfn(int arg0) { \ int CONST (*p)(int) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST int (*pfn)(int arg0) = dfn; \ EXPORT CONST int funcName(int arg0) { return (*pfn)(arg0); } #define DISPATCH_p_i(funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST void *(*pfn)(int arg0); \ static CONST void *dfn(int arg0) { \ CONST void *(*p)(int) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST void *(*pfn)(int arg0) = dfn; \ EXPORT CONST void *funcName(int arg0) { return (*pfn)(arg0); } // sleef-3.5.1/src/libm/estrin.h000066400000000000000000000057051373003144100160100ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // These are macros for evaluating polynomials using Estrin's method #define POLY2(x, c1, c0) MLA(x, C2V(c1), C2V(c0)) #define POLY3(x, x2, c2, c1, c0) MLA(x2, C2V(c2), MLA(x, C2V(c1), C2V(c0))) #define POLY4(x, x2, c3, c2, c1, c0) MLA(x2, MLA(x, C2V(c3), C2V(c2)), MLA(x, C2V(c1), C2V(c0))) #define POLY5(x, x2, x4, c4, c3, c2, c1, c0) MLA(x4, C2V(c4), POLY4(x, x2, c3, c2, c1, c0)) #define POLY6(x, x2, x4, c5, c4, c3, c2, c1, c0) MLA(x4, POLY2(x, c5, c4), POLY4(x, x2, c3, c2, c1, c0)) #define POLY7(x, x2, x4, c6, c5, c4, c3, c2, c1, c0) MLA(x4, POLY3(x, x2, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0)) #define POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0) MLA(x4, POLY4(x, x2, c7, c6, c5, c4), POLY4(x, x2, c3, c2, c1, c0)) #define POLY9(x, x2, x4, x8, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, C2V(c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY10(x, x2, x4, x8, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, POLY2(x, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY11(x, x2, x4, x8, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, POLY3(x, x2, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY12(x, x2, x4, x8, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, POLY4(x, x2, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY13(x, x2, x4, x8, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, POLY5(x, x2, x4, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY14(x, x2, x4, x8, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, POLY6(x, x2, x4, cd, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY15(x, x2, x4, x8, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, POLY7(x, x2, x4, ce, cd, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x8, POLY8(x, x2, x4, cf, ce, cd, cc, cb, ca, c9, c8), POLY8(x, x2, x4, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY17(x, x2, x4, x8, x16, d0, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x16, C2V(d0), POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY18(x, x2, x4, x8, x16, d1, d0, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x16, POLY2(x, d1, d0), POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)) #define POLY19(x, x2, x4, x8, x16, d2, d1, d0, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)\ MLA(x16, POLY3(x, x2, d2, d1, d0), POLY16(x, x2, x4, x8, cf, ce, cd, cc, cb, ca, c9, c8, c7, c6, c5, c4, c3, c2, c1, c0)) sleef-3.5.1/src/libm/funcproto.h000066400000000000000000000060171373003144100165200ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) typedef struct { char *name; int ulp; int ulpSuffix; int funcType; int flags; } funcSpec; /* ulp : (error bound in ulp) * 10 ulpSuffix: 0 : "" 1 : "_u1" 2 : "_u05" 3 : "_u35" 4 : "_u15" 5 : "_u3500" funcType: 0 : vdouble func(vdouble); 1 : vdouble func(vdouble, vdouble); 2 : vdouble2 func(vdouble); GNUABI : void func(vdouble, double *, double *); 3 : vdouble func(vdouble, vint); 4 : vint func(vdouble); 5 : vdouble func(vdouble, vdouble, vdouble); 6 : vdouble2 func(vdouble); GNUABI : vdouble func(vdouble, double *); 7 : int func(int); 8 : void *func(int); flags: 1 : No GNUABI 2 : No double func */ funcSpec funcList[] = { { "sin", 35, 0, 0, 0 }, { "cos", 35, 0, 0, 0 }, { "sincos", 35, 0, 2, 0 }, { "tan", 35, 0, 0, 0 }, { "asin", 35, 0, 0, 0 }, { "acos", 35, 0, 0, 0 }, { "atan", 35, 0, 0, 0 }, { "atan2", 35, 0, 1, 0 }, { "log", 35, 0, 0, 0 }, { "cbrt", 35, 0, 0, 0 }, { "sin", 10, 1, 0, 0 }, { "cos", 10, 1, 0, 0 }, { "sincos", 10, 1, 2, 0 }, { "tan", 10, 1, 0, 0 }, { "asin", 10, 1, 0, 0 }, { "acos", 10, 1, 0, 0 }, { "atan", 10, 1, 0, 0 }, { "atan2", 10, 1, 1, 0 }, { "log", 10, 1, 0, 0 }, { "cbrt", 10, 1, 0, 0 }, { "exp", 10, 0, 0, 0 }, { "pow", 10, 0, 1, 0 }, { "sinh", 10, 0, 0, 0 }, { "cosh", 10, 0, 0, 0 }, { "tanh", 10, 0, 0, 0 }, { "sinh", 35, 3, 0, 0 }, { "cosh", 35, 3, 0, 0 }, { "tanh", 35, 3, 0, 0 }, { "fastsin", 3500, 5, 0, 2 }, { "fastcos", 3500, 5, 0, 2 }, { "fastpow", 3500, 5, 1, 2 }, { "asinh", 10, 0, 0, 0 }, { "acosh", 10, 0, 0, 0 }, { "atanh", 10, 0, 0, 0 }, { "exp2", 10, 0, 0, 0 }, { "exp2", 35, 3, 0, 0 }, { "exp10", 10, 0, 0, 0 }, { "exp10", 35, 3, 0, 0 }, { "expm1", 10, 0, 0, 0 }, { "log10", 10, 0, 0, 0 }, { "log2", 10, 0, 0, 0 }, { "log2", 35, 3, 0, 0 }, { "log1p", 10, 0, 0, 0 }, { "sincospi", 5, 2, 2, 0 }, { "sincospi", 35, 3, 2, 0 }, { "sinpi", 5, 2, 0, 0 }, { "cospi", 5, 2, 0, 0 }, { "ldexp", -1, 0, 3, 0 }, { "ilogb", -1, 0, 4, 0 }, { "fma", -1, 0, 5, 0 }, { "sqrt", -1, 0, 0, 0 }, { "sqrt", 5, 2, 0, 1 }, { "sqrt", 35, 3, 0, 0 }, { "hypot", 5, 2, 1, 0 }, { "hypot", 35, 3, 1, 0 }, { "fabs", -1, 0, 0, 0 }, { "copysign", -1, 0, 1, 0 }, { "fmax", -1, 0, 1, 0 }, { "fmin", -1, 0, 1, 0 }, { "fdim", -1, 0, 1, 0 }, { "trunc", -1, 0, 0, 0 }, { "floor", -1, 0, 0, 0 }, { "ceil", -1, 0, 0, 0 }, { "round", -1, 0, 0, 0 }, { "rint", -1, 0, 0, 0 }, { "nextafter", -1, 0, 1, 0 }, { "frfrexp", -1, 0, 0, 0 }, { "expfrexp", -1, 0, 4, 0 }, { "fmod", -1, 0, 1, 0 }, { "remainder", -1, 0, 1, 0 }, { "modf", -1, 0, 6, 0 }, { "lgamma", 10, 1, 0, 0 }, { "tgamma", 10, 1, 0, 0 }, { "erf", 10, 1, 0, 0 }, { "erfc", 15, 4, 0, 0 }, { "getInt", -1, 0, 7, 1}, { "getPtr", -1, 0, 8, 1}, { NULL, -1, 0, 0, 0 }, }; sleef-3.5.1/src/libm/keywords.txt000066400000000000000000000135711373003144100167430ustar00rootroot00000000000000vdouble vdouble2 vfloat vfloat2 vint vint2 vmask vmask2 vopmask versatileVector dd2 df2 ddi ddi_t dfi dfi_t di_t fi_t # atan2k atan2k_u1 atan2kf atan2kf_u1 cospifk cospik expk expk2 expk2f expk3f expkf expm1fk expm1k gammafk gammak logk logk2 logk2f logk3f logkf sinpifk sinpik # rempi rempif rempisub rempisubf # dd2geta_vd2_dd2 dd2getb_vd2_dd2 dd2setab_dd2_vd2_vd2 ddabs_vd2_vd2 ddadd2_vd2_vd2_vd ddadd2_vd2_vd2_vd2 ddadd2_vd2_vd_vd ddadd2_vd2_vd_vd2 ddadd_vd2_vd2_vd ddadd_vd2_vd2_vd2 ddadd_vd2_vd_vd ddadd_vd2_vd_vd2 dddiv_vd2_vd2_vd2 ddigetdd_vd2_ddi ddigeti_vi_ddi ddisetdd_ddi_ddi_vd2 ddisetddi_ddi_vd2_vi ddmul_vd2_vd2_vd ddmul_vd2_vd2_vd2 ddmul_vd2_vd_vd ddmul_vd_vd2_vd2 ddneg_vd2_vd2 ddnormalize_vd2_vd2 ddrec_vd2_vd ddrec_vd2_vd2 ddscale_vd2_vd2_vd ddsqrt_vd2_vd ddsqrt_vd2_vd2 ddsqu_vd2_vd2 ddsqu_vd_vd2 ddsub_vd2_vd2_vd ddsub_vd2_vd2_vd2 ddsub_vd2_vd_vd df2geta_vf2_df2 df2getb_vf2_df2 df2setab_df2_vf2_vf2 dfabs_vf2_vf2 dfadd2_vf2_vf2_vf dfadd2_vf2_vf2_vf2 dfadd2_vf2_vf_vf dfadd2_vf2_vf_vf2 dfadd_vf2_vf2_vf dfadd_vf2_vf2_vf2 dfadd_vf2_vf_vf dfadd_vf2_vf_vf2 dfdiv_vf2_vf2_vf2 dfigetdf_vf2_dfi dfigeti_vi2_dfi dfisetdf_dfi_dfi_vf2 dfisetdfi_dfi_vf2_vi2 dfmul_vf2_vf2_vf dfmul_vf2_vf2_vf2 dfmul_vf2_vf_vf dfmul_vf_vf2_vf2 dfneg_vf2_vf2 dfnormalize_vf2_vf2 dfrec_vf2_vf dfrec_vf2_vf2 dfscale_vf2_vf2_vf dfsqrt_vf2_vf dfsqrt_vf2_vf2 dfsqu_vf2_vf2 dfsqu_vf_vf2 dfsub_vf2_vf2_vf dfsub_vf2_vf2_vf2 dfsub_vf2_vf_vf digetd_vd_di digeti_vi_di disetdi_di_vd_vi figetd_vf_di figeti_vi2_di fisetdi_fi_vf_vi2 # vabs_vd_vd vabs_vf2_vf2 vabs_vf_vf vadd64_vm_vm_vm vadd_vd_3vd vadd_vd_4vd vadd_vd_5vd vadd_vd_6vd vadd_vd_7vd vadd_vd_vd_vd vadd_vf_3vf vadd_vf_4vf vadd_vf_5vf vadd_vf_6vf vadd_vf_7vf vadd_vf_vf_vf vadd_vi2_vi2_vi2 vadd_vi_vi_vi vand_vi2_vi2_vi2 vand_vi2_vo_vi2 vand_vi_vi_vi vand_vi_vo_vi vand_vm_vm_vm vand_vm_vo32_vm vand_vm_vo64_vm vand_vo_vo_vo vandnot_vi2_vi2_vi2 vandnot_vi2_vo_vi2 vandnot_vi_vi_vi vandnot_vi_vo_vi vandnot_vm_vm_vm vandnot_vm_vo32_vm vandnot_vm_vo64_vm vandnot_vo_vo_vo vavailability_i vcast_d_vd vcast_f_vf vcast_vd2_d_d vcast_vd2_vd_vd vcast_vd_d vcast_vd_vi vcast_vf2_d vcast_vf2_f_f vcast_vf2_vf_vf vcast_vf_f vcast_vf_vi2 vcast_vi2_i vcast_vi2_i_i vcast_vi2_vm vcast_vi_i vcast_vi_vm vcast_vm_i_i vcast_vm_vi vcast_vm_vi2 vcast_vm_vo vcast_vo32_vo64 vcast_vo64_vo32 vcastu_vi2_vi vcastu_vi_vi2 vcopysign_vd_vd_vd vcopysign_vf_vf_vf vd2getx_vd_vd2 vd2gety_vd_vd2 vd2setx_vd2_vd2_vd vd2setxy_vd2_vd_vd vd2sety_vd2_vd2_vd vdiv_vd_vd_vd vdiv_vf_vf_vf veq64_vo_vm_vm veq_vi2_vi2_vi2 veq_vi_vi_vi veq_vo_vd_vd veq_vo_vf_vf veq_vo_vi2_vi2 veq_vo_vi_vi vf2getx_vf_vf2 vf2gety_vf_vf2 vf2setx_vf2_vf2_vf vf2setxy_vf2_vf_vf vf2sety_vf2_vf2_vf vfma_vd_vd_vd_vd vfma_vf_vf_vf_vf vfmann_vd_vd_vd_vd vfmann_vf_vf_vf_vf vfmanp_vd_vd_vd_vd vfmanp_vf_vf_vf_vf vfmapn_vd_vd_vd_vd vfmapn_vf_vf_vf_vf vfmapp_vd_vd_vd_vd vfmapp_vf_vf_vf_vf vgather_vd_p_vi vgather_vf_p_vi2 vge_vo_vd_vd vge_vo_vf_vf vgetexp_vd_vd vgetexp_vf_vf vgetmant_vd_vd vgetmant_vf_vf vgt64_vo_vm_vm vgt_vi2_vi2_vi2 vgt_vi_vi_vi vgt_vo_vd_vd vgt_vo_vf_vf vgt_vo_vi2_vi2 vgt_vo_vi_vi vilogb2k_vi2_vf vilogb2k_vi_vd vilogbk_vi2_vf vilogbk_vi_vd vinterleave_vd_vd vinterleave_vm2_vm2 vinterleave_vm_vm visinf2_vd_vd_vd visinf2_vf_vf_vf visinf_vo_vd visinf_vo_vf visint_vo_vd visint_vo_vf visminf_vo_vd visminf_vo_vf visnan_vo_vd visnan_vo_vf visnegzero_vo_vd visnegzero_vo_vf visnumber_vo_vd visnumber_vo_vf visodd_vo_vd vispinf_vo_vd vispinf_vo_vf vldexp2_vd_vd_vi vldexp2_vf_vf_vi2 vldexp3_vd_vd_vi vldexp3_vf_vf_vi2 vldexp_vd_vd_vi vldexp_vf_vf_vi2 vle_vo_vd_vd vle_vo_vf_vf vload_vd_p vload_vf_p vloadu_vd_p vloadu_vf_p vloadu_vi2_p vloadu_vi_p vloadu_vm2_p vlt_vo_vd_vd vlt_vo_vf_vf vmax_vd_vd_vd vmax_vf_vf_vf vmin_vd_vd_vd vmin_vf_vf_vf vmla_vd_vd_vd_vd vmla_vf_vf_vf_vf vmlanp_vd_vd_vd_vd vmlanp_vf_vf_vf_vf vmlapn_vd_vd_vd_vd vmlapn_vf_vf_vf_vf vmlsubadd_vd_vd_vd_vd vmlsubadd_vf_vf_vf_vf vmul_vd_vd_vd vmul_vf_vf_vf vmulsign_vd_vd_vd vmulsign_vf_vf_vf vneg64_vm_vm vneg_vd_vd vneg_vf_vf vneg_vi2_vi2 vneg_vi_vi vnegpos_vd_vd vnegpos_vf_vf vneq_vo_vd_vd vneq_vo_vf_vf vnot_vo32_vo32 vnot_vo64_vo64 vor_vi2_vi2_vi2 vor_vi_vi_vi vor_vm_vm_vm vor_vm_vo32_vm vor_vm_vo64_vm vor_vo_vo_vo vorsign_vd_vd_vd vorsign_vf_vf_vf vposneg_vd_vd vposneg_vf_vf vpow2i_vd_vi vpow2i_vf_vi2 vprefetch_v_p vptrunc vptruncf vrec_vd_vd vrec_vf_vf vreinterpret_vd_vf vreinterpret_vd_vi2 vreinterpret_vd_vm vreinterpret_vf_vd vreinterpret_vf_vi2 vreinterpret_vf_vm vreinterpret_vi2_vd vreinterpret_vi2_vf vreinterpret_vm_vd vreinterpret_vm_vf vrev21_vd_vd vrev21_vf_vf vrev21_vi2_vi2 vreva2_vd_vd vreva2_vf_vf vrint_vd_vd vrint_vf_vf vrint_vi2_vf vrint_vi_vd vrintfk2_vf_vf vrintk2_vd_vd vscatter2_v_p_i_i_vd vscatter2_v_p_i_i_vf vsel_vd2_vo_d_d_d_d vsel_vd2_vo_vd2_vd2 vsel_vd_vo_d_d vsel_vd_vo_vd_vd vsel_vd_vo_vo_d_d_d vsel_vd_vo_vo_vo_d_d_d_d vsel_vf2_vo_f_f_f_f vsel_vf2_vo_vf2_vf2 vsel_vf2_vo_vo_d_d_d vsel_vf2_vo_vo_vo_d_d_d_d vsel_vf_vo_f_f vsel_vf_vo_vf_vf vsel_vf_vo_vo_f_f_f vsel_vf_vo_vo_vo_f_f_f_f vsel_vi2_vf_vf_vi2_vi2 vsel_vi2_vf_vi2 vsel_vi2_vo_vi2_vi2 vsel_vi_vd_vd_vi_vi vsel_vi_vd_vi vsel_vi_vo_vi_vi vsel_vm_vo64_vm_vm vsign_vd_vd vsign_vf_vf vsignbit_vm_vd vsignbit_vm_vf vsignbit_vo_vd vsignbit_vo_vf vsll_vi2_vi2_i vsll_vi_vi_i vsqrt_vd_vd vsqrt_vf_vf vsra_vi2_vi2_i vsra_vi_vi_i vsrl_vi2_vi2_i vsrl_vi_vi_i vsscatter2_v_p_i_i_vd vsscatter2_v_p_i_i_vf vstore_v_p_vd vstore_v_p_vf vstoreu_v_p_vd vstoreu_v_p_vf vstoreu_v_p_vi vstoreu_v_p_vi2 vstream_v_p_vd vstream_v_p_vf vsub64_vm_vm_vm vsub_vd_3vd vsub_vd_4vd vsub_vd_5vd vsub_vd_6vd vsub_vd_vd_vd vsub_vf_3vf vsub_vf_4vf vsub_vf_5vf vsub_vf_vf_vf vsub_vi2_vi2_vi2 vsub_vi_vi_vi vsubadd_vd_vd_vd vsubadd_vf_vf_vf vtestallones_i_vo32 vtestallones_i_vo64 vtestallzeros_i_vo64 vtoward0 vtoward0f vtruncate_vd_vd vtruncate_vf_vf vtruncate_vi2_vf vtruncate_vi_vd vuninterleave_vd_vd vuninterleave_vi_vi vuninterleave_vm2_vm2 vuninterleave_vm_vm vupper_vd_vd vupper_vf_vf vxor_vi2_vi2_vi2 vxor_vi_vi_vi vxor_vm_vm_vm vxor_vm_vo32_vm vxor_vm_vo64_vm vxor_vo_vo_vo sleef-3.5.1/src/libm/mkalias.c000066400000000000000000000112171373003144100161130ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc == 2 && strcmp(argv[1], "0") == 0) exit(0); if (argc < 6) { fprintf(stderr, "Usage : %s \n", argv[0]); exit(-1); } int vw = atoi(argv[1]); int fptype = vw >= 0 ? 0 : 1; vw = vw < 0 ? -vw : vw; char *mangledisa = argv[4]; char *isaname = argc == 6 ? argv[5] : ""; char * vectorcc=""; #ifdef ENABLE_AAVPCS if (strcmp(isaname, "advsimd") == 0) vectorcc =" __attribute__((aarch64_vector_pcs))"; #endif static char *argType2[] = { "a0", "a0, a1", "a0", "a0, a1", "a0", "a0, a1, a2", "a0", "a0", "a0" }; static char *typeSpecS[] = { "", "f" }; static char *typeSpec[] = { "d", "f" }; static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15", "_u3500" }; static char *vparameterStr[7] = { "v", "vv", "", "vv", "v", "vvv", "" }; static char returnType[9][1000]; static char argType0[9][1000]; static char argType1[9][1000]; sprintf(returnType[0], "%s", argv[2]); sprintf(returnType[1], "%s", argv[2]); sprintf(returnType[2], "%s", fptype ? "vfloat2" : "vdouble2"); sprintf(returnType[3], "%s", argv[2]); sprintf(returnType[4], "%s", argv[3]); sprintf(returnType[5], "%s", argv[2]); sprintf(returnType[6], "%s", fptype ? "vfloat2" : "vdouble2"); sprintf(returnType[7], "int"); sprintf(returnType[8], "void *"); sprintf(argType0[0], "%s", argv[2]); sprintf(argType0[1], "%s, %s", argv[2], argv[2]); sprintf(argType0[2], "%s", argv[2]); sprintf(argType0[3], "%s, %s", argv[2], argv[3]); sprintf(argType0[4], "%s", argv[2]); sprintf(argType0[5], "%s, %s, %s", argv[2], argv[2], argv[2]); sprintf(argType0[6], "%s", argv[2]); sprintf(argType0[7], "int"); sprintf(argType0[8], "int"); sprintf(argType1[0], "%s a0", argv[2]); sprintf(argType1[1], "%s a0, %s a1", argv[2], argv[2]); sprintf(argType1[2], "%s a0", argv[2]); sprintf(argType1[3], "%s a0, %s a1", argv[2], argv[3]); sprintf(argType1[4], "%s a0", argv[2]); sprintf(argType1[5], "%s a0, %s a1, %s a2", argv[2], argv[2], argv[2]); sprintf(argType1[6], "%s a0", argv[2]); sprintf(argType1[7], "int a0"); sprintf(argType1[8], "int a0"); // if (fptype == 0) { printf("#ifdef __SLEEFSIMDDP_C__\n"); } else { printf("#ifdef __SLEEFSIMDSP_C__\n"); } printf("#ifdef ENABLE_ALIAS\n"); if (argc == 6) { for(int i=0;funcList[i].name != NULL;i++) { if (fptype == 0 && (funcList[i].flags & 2) != 0) continue; if (funcList[i].ulp >= 0) { printf("EXPORT CONST %s Sleef_%s%s%d_u%02d(%s) __attribute__((alias(\"Sleef_%s%s%d_u%02d%s\"))) %s;\n", returnType[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, argType0[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, isaname, vectorcc ); } else { printf("EXPORT CONST %s Sleef_%s%s%d(%s) __attribute__((alias(\"Sleef_%s%s%d_%s\"))) %s;\n", returnType[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, argType0[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, isaname, vectorcc ); } } printf("\n"); } printf("#else // #ifdef ENABLE_ALIAS\n"); if (argc == 6) { for(int i=0;funcList[i].name != NULL;i++) { if (fptype == 0 && (funcList[i].flags & 2) != 0) continue; if (funcList[i].ulp >= 0) { printf("EXPORT CONST %s %s Sleef_%s%s%d_u%02d(%s) { return Sleef_%s%s%d_u%02d%s(%s); }\n", returnType[funcList[i].funcType], vectorcc, funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, argType1[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, isaname, argType2[funcList[i].funcType] ); } else { printf("EXPORT CONST %s %s Sleef_%s%s%d(%s) { return Sleef_%s%s%d_%s(%s); }\n", returnType[funcList[i].funcType], vectorcc, funcList[i].name, typeSpec[fptype], vw, argType1[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, isaname, argType2[funcList[i].funcType] ); } } printf("\n"); } printf("#endif // #ifdef ENABLE_ALIAS\n"); if (fptype == 0) { printf("#endif // #ifdef __SLEEFSIMDDP_C__\n"); } else { printf("#endif // #ifdef __SLEEFSIMDSP_C__\n"); } exit(0); } sleef-3.5.1/src/libm/mkdisp.c000066400000000000000000000143121373003144100157600ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc < 7) { fprintf(stderr, "Usage : %s [ ...]\n", argv[0]); fprintf(stderr, "\n"); exit(-1); } const int wdp = atoi(argv[1]), wsp = atoi(argv[2]); const char *vdoublename = argv[3], *vfloatname = argv[4], *vintname = argv[5]; const int isastart = 6, nisa = argc - isastart; for(int i=0;funcList[i].name != NULL;i++) { char ulpSuffix0[100] = "", ulpSuffix1[100] = "_"; if (funcList[i].ulp >= 0) { sprintf(ulpSuffix0, "_u%02d", funcList[i].ulp); sprintf(ulpSuffix1, "_u%02d", funcList[i].ulp); } switch(funcList[i].funcType) { case 0: if ((funcList[i].flags & 2) == 0) { printf("DISPATCH_vf_vf(%s, Sleef_%sd%d%s, pnt_%sd%d%s, disp_%sd%d%s", vdoublename, funcList[i].name, wdp, ulpSuffix0, funcList[i].name, wdp, ulpSuffix0, funcList[i].name, wdp, ulpSuffix0); for(int j=0;j #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc < 4) { fprintf(stderr, "\nUsage : %s \n\n", argv[0]); fprintf(stderr, "This program generates an include file defining masked functions.\n"); exit(-1); } // const char *isaname = argv[1]; const char *mangledisa = argv[2]; const int vw = atoi(argv[3]); int fptype = 0; // Remove the "-" sign in the SP value const char *cvw = (vw < 0) ? argv[3] + 1 : argv[3]; if (vw < 0) { fptype = 1; } // VLA SVE does not set the vector length in the mangled names. if (strcmp(isaname, "sve") == 0) cvw = "x"; // #define LEN 256 static char *vfpname[] = { "vdouble", "vfloat" }; static char *vintname[] = { "vint", "vint2" }; static int sizeoffp[] = { 8, 4 }; static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15", "_u3500" }; static char vparameterStr[7][LEN] = { "v", "vv", "vl8l8", "vv", "v", "vvv", "vl8" }; static char *typeSpecS[] = { "", "f" }; static char *typeSpec[] = { "d", "f" }; static char funcname[4][LEN]; snprintf(vparameterStr[2], LEN, "vl%dl%d", sizeoffp[fptype], sizeoffp[fptype]); snprintf(vparameterStr[6], LEN, "vl%d", sizeoffp[fptype]); // for(int i=0;funcList[i].name != NULL;i++) { if ((funcList[i].flags & 1) != 0) continue; if (fptype == 0 && (funcList[i].flags & 2) != 0) continue; if (funcList[i].ulp < 20) { snprintf(funcname[0], LEN, "_ZGV%sN%s%s_%s%s", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); snprintf(funcname[1], LEN, "_ZGV%sM%s%s_%s%s", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); } else { snprintf(funcname[0], LEN, "_ZGV%sN%s%s_%s%s_u%d", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype], funcList[i].ulp); snprintf(funcname[1], LEN, "_ZGV%sM%s%s_%s%s_u%d", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype], funcList[i].ulp); } snprintf(funcname[2], LEN, "_ZGV%sN%s%s___%s%s_finite", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); snprintf(funcname[3], LEN, "_ZGV%sM%s%s___%s%s_finite", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); switch(funcList[i].funcType) { case 0: { printf("EXPORT CONST %s %s(%s a0, vopmask m) { return %s(a0); }\n", vfpname[fptype], funcname[1], vfpname[fptype], funcname[0]); if (funcList[i].ulp < 20) printf("EXPORT CONST %s %s(%s) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3], vfpname[fptype], funcname[0]); else printf("EXPORT CONST %s %s_u%d(%s) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3],funcList[i].ulp, vfpname[fptype], funcname[0]); break; } case 1: { printf("EXPORT CONST %s %s(%s a0, %s a1, vopmask m) { return %s(a0, a1); }\n", vfpname[fptype], funcname[1], vfpname[fptype], vfpname[fptype], funcname[0]); if (funcList[i].ulp < 20) printf("EXPORT CONST %s %s(%s, %s, vopmask) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3], vfpname[fptype], vfpname[fptype], funcname[0]); else printf("EXPORT CONST %s %s_u%d(%s, %s, vopmask) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3],funcList[i].ulp, vfpname[fptype], vfpname[fptype], funcname[0]); break; } case 2: if (sizeoffp[fptype] == sizeof(double)) { printf("#ifndef ENABLE_SVE\n"); printf("EXPORT void %s(vdouble a0, double *a1, double *a2, vopmask m) {\n", funcname[1]); printf(" double s[VECTLENDP], c[VECTLENDP];\n"); printf(" int32_t mbuf[VECTLENSP];\n"); printf(" %s(a0, s, c);\n", funcname[0]); printf(" vstoreu_v_p_vi2(mbuf, vcast_vi2_vm(vand_vm_vo64_vm(m, vcast_vm_i_i(-1, -1))));\n"); printf(" for(int i=0;i #include #include #include #include "funcproto.h" // In VSX intrinsics, vector data types are like "vector float". // This function replaces space characters with '_'. char *escapeSpace(char *str) { char *ret = malloc(strlen(str) + 10); strcpy(ret, str); for(char *p = ret;*p != '\0';p++) if (*p == ' ') *p = '_'; return ret; } int main(int argc, char **argv) { if (argc < 4) { fprintf(stderr, "Generate a header for renaming functions\n"); fprintf(stderr, "Usage : %s []\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Generate a part of header for library functions\n"); fprintf(stderr, "Usage : %s []\n", argv[0]); fprintf(stderr, "\n"); exit(-1); } static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15", "_u3500" }; if (argc == 4 || argc == 5) { char *atrPrefix = strcmp(argv[1], "-") == 0 ? "" : argv[1]; char *wdp = argv[2]; char *wsp = argv[3]; char *isaname = argc == 4 ? "" : argv[4]; char *isaub = argc == 5 ? "_" : ""; // printf("#ifndef DETERMINISTIC\n\n"); for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("#define x%s%s Sleef_%s%sd%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], "", funcList[i].name, wdp, funcList[i].ulp, isaname); printf("#define y%s%s Sleef_%s%sd%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname); } else { printf("#define x%s Sleef_%s%sd%s%s%s\n", funcList[i].name, "", funcList[i].name, wdp, isaub, isaname); printf("#define y%s Sleef_%s%sd%s%s%s\n", funcList[i].name, atrPrefix, funcList[i].name, wdp, isaub, isaname); } } printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("#define x%sf%s Sleef_%s%sf%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], "", funcList[i].name, wsp, funcList[i].ulp, isaname); printf("#define y%sf%s Sleef_%s%sf%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], atrPrefix, funcList[i].name, wsp, funcList[i].ulp, isaname); } else { printf("#define x%sf Sleef_%s%sf%s%s%s\n", funcList[i].name, "", funcList[i].name, wsp, isaub, isaname); printf("#define y%sf Sleef_%s%sf%s%s%s\n", funcList[i].name, atrPrefix, funcList[i].name, wsp, isaub, isaname); } } // printf("\n#else //#ifndef DETERMINISTIC\n\n"); for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("#define x%s%s Sleef_%s%sd%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname); } else { printf("#define x%s Sleef_%s%sd%s%s%s\n", funcList[i].name, atrPrefix, funcList[i].name, wdp, isaub, isaname); } } printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("#define x%sf%s Sleef_%s%sf%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], atrPrefix, funcList[i].name, wsp, funcList[i].ulp, isaname); } else { printf("#define x%sf Sleef_%s%sf%s%s%s\n", funcList[i].name, atrPrefix, funcList[i].name, wsp, isaub, isaname); } } printf("\n#endif // #ifndef DETERMINISTIC\n"); } else { char *atrPrefix = strcmp(argv[1], "-") == 0 ? "" : argv[1]; char *wdp = argv[2]; char *wsp = argv[3]; char *vdoublename = argv[4], *vdoublename_escspace = escapeSpace(vdoublename); char *vfloatname = argv[5], *vfloatname_escspace = escapeSpace(vfloatname); char *vintname = argv[6], *vintname_escspace = escapeSpace(vintname); char *vint2name = argv[7], *vint2name_escspace = escapeSpace(vint2name); char *architecture = argv[8]; char *isaname = argc == 10 ? argv[9] : ""; char *isaub = argc == 10 ? "_" : ""; if (strcmp(isaname, "sve") == 0) wdp = wsp = "x"; char * vectorcc = ""; #ifdef ENABLE_AAVPCS if (strcmp(isaname, "advsimd") == 0) vectorcc =" __attribute__((aarch64_vector_pcs))"; #endif printf("#ifdef %s\n", architecture); if (strcmp(vdoublename, "-") != 0) { printf("\n"); printf("#ifndef Sleef_%s_2_DEFINED\n", vdoublename_escspace); if (strcmp(architecture, "__ARM_FEATURE_SVE") == 0) { printf("typedef svfloat64x2_t Sleef_%s_2;\n", vdoublename_escspace); } else { printf("typedef struct {\n"); printf(" %s x, y;\n", vdoublename); printf("} Sleef_%s_2;\n", vdoublename_escspace); } printf("#define Sleef_%s_2_DEFINED\n", vdoublename_escspace); printf("#endif\n"); printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { switch(funcList[i].funcType) { case 0: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s)%s;\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s_u%02d%s(%s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s)%s;\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s%s%s(%s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, isaub, isaname, vdoublename, vectorcc); } break; case 1: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s, %s)%s;\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s_u%02d%s(%s, %s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vdoublename, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s, %s)%s;\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s%s%s(%s, %s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, isaub, isaname, vdoublename, vdoublename, vectorcc); } break; case 2: case 6: if (funcList[i].ulp >= 0) { printf("IMPORT CONST Sleef_%s_2 Sleef_%sd%s_u%02d%s(%s)%s;\n", vdoublename_escspace, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vectorcc); printf("IMPORT CONST Sleef_%s_2 Sleef_%s%sd%s_u%02d%s(%s)%s;\n", vdoublename_escspace, atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vectorcc); } else { printf("IMPORT CONST Sleef_%s_2 Sleef_%sd%s%s%s(%s)%s;\n", vdoublename_escspace, funcList[i].name, wdp, isaub, isaname, vdoublename, vectorcc); printf("IMPORT CONST Sleef_%s_2 Sleef_%s%sd%s%s%s(%s)%s;\n", vdoublename_escspace, atrPrefix, funcList[i].name, wdp, isaub, isaname, vdoublename, vectorcc); } break; case 3: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s, %s)%s;\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vintname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s_u%02d%s(%s, %s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vintname, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s, %s)%s;\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename, vintname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s%s%s(%s, %s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, isaub, isaname, vdoublename, vintname, vectorcc); } break; case 4: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s)%s;\n", vintname, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s_u%02d%s(%s)%s;\n", vintname, atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s)%s;\n", vintname, funcList[i].name, wdp, isaub, isaname, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s%s%s(%s)%s;\n", vintname, atrPrefix, funcList[i].name, wdp, isaub, isaname, vdoublename, vectorcc); } break; case 5: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s, %s, %s)%s;\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vdoublename, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s_u%02d%s(%s, %s, %s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vdoublename, vdoublename, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s, %s, %s)%s;\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename, vdoublename, vdoublename, vectorcc); printf("IMPORT CONST %s Sleef_%s%sd%s%s%s(%s, %s, %s)%s;\n", vdoublename, atrPrefix, funcList[i].name, wdp, isaub, isaname, vdoublename, vdoublename, vdoublename, vectorcc); } break; // The two cases below should not use vector calling convention. // They do not have vector type as argument or return value. // Also, the corresponding definition (`getPtr` and `getInt`) in `sleefsimd*.c` // are not defined with `VECTOR_CC`. (Same for single precision case below) case 7: printf("IMPORT CONST int Sleef_%sd%s%s%s(int);\n", funcList[i].name, wdp, isaub, isaname); break; case 8: printf("IMPORT CONST void *Sleef_%sd%s%s%s(int);\n", funcList[i].name, wdp, isaub, isaname); break; } } } printf("\n"); printf("#ifndef Sleef_%s_2_DEFINED\n", vfloatname_escspace); if (strcmp(architecture, "__ARM_FEATURE_SVE") == 0) { printf("typedef svfloat32x2_t Sleef_%s_2;\n", vfloatname_escspace); } else { printf("typedef struct {\n"); printf(" %s x, y;\n", vfloatname); printf("} Sleef_%s_2;\n", vfloatname_escspace); } printf("#define Sleef_%s_2_DEFINED\n", vfloatname_escspace); printf("#endif\n"); printf("\n"); //printf("typedef %s vint2_%s;\n", vint2name, isaname); //printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { switch(funcList[i].funcType) { case 0: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sf%s_u%02d%s(%s)%s;\n", vfloatname, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sf%s_u%02d%s(%s)%s;\n", vfloatname, atrPrefix, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sf%s%s%s(%s)%s;\n", vfloatname, funcList[i].name, wsp, isaub, isaname, vfloatname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sf%s%s%s(%s)%s;\n", vfloatname, atrPrefix, funcList[i].name, wsp, isaub, isaname, vfloatname, vectorcc); } break; case 1: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sf%s_u%02d%s(%s, %s)%s;\n", vfloatname, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vfloatname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sf%s_u%02d%s(%s, %s)%s;\n", vfloatname, atrPrefix, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vfloatname, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sf%s%s%s(%s, %s)%s;\n", vfloatname, funcList[i].name, wsp, isaub, isaname, vfloatname, vfloatname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sf%s%s%s(%s, %s)%s;\n", vfloatname, atrPrefix, funcList[i].name, wsp, isaub, isaname, vfloatname, vfloatname, vectorcc); } break; case 2: case 6: if (funcList[i].ulp >= 0) { printf("IMPORT CONST Sleef_%s_2 Sleef_%sf%s_u%02d%s(%s)%s;\n", vfloatname_escspace, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vectorcc); printf("IMPORT CONST Sleef_%s_2 Sleef_%s%sf%s_u%02d%s(%s)%s;\n", vfloatname_escspace, atrPrefix, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vectorcc); } else { printf("IMPORT CONST Sleef_%s_2 Sleef_%sf%s%s%s(%s)%s;\n", vfloatname_escspace, funcList[i].name, wsp, isaub, isaname, vfloatname, vectorcc); printf("IMPORT CONST Sleef_%s_2 Sleef_%s%sf%s%s%s(%s)%s;\n", vfloatname_escspace, atrPrefix, funcList[i].name, wsp, isaub, isaname, vfloatname, vectorcc); } break; /* case 3: printf("IMPORT CONST %s Sleef_%sf%d_%s(%s, vint2_%s);\n", vfloatname, funcList[i].name, wsp, isaname, vfloatname, isaname); break; case 4: printf("IMPORT CONST vint2_%s Sleef_%sf%d_%s(%s);\n", isaname, funcList[i].name, wsp, isaname, vfloatname); break; */ case 5: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sf%s_u%02d%s(%s, %s, %s)%s;\n", vfloatname, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vfloatname, vfloatname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sf%s_u%02d%s(%s, %s, %s)%s;\n", vfloatname, atrPrefix, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vfloatname, vfloatname, vectorcc); } else { printf("IMPORT CONST %s Sleef_%sf%s%s%s(%s, %s, %s)%s;\n", vfloatname, funcList[i].name, wsp, isaub, isaname, vfloatname, vfloatname, vfloatname, vectorcc); printf("IMPORT CONST %s Sleef_%s%sf%s%s%s(%s, %s, %s)%s;\n", vfloatname, atrPrefix, funcList[i].name, wsp, isaub, isaname, vfloatname, vfloatname, vfloatname, vectorcc); } break; // The two cases below should not use vector calling convention. // See comments for double precision case above. case 7: printf("IMPORT CONST int Sleef_%sf%s%s%s(int);\n", funcList[i].name, wsp, isaub, isaname); printf("IMPORT CONST int Sleef_%s%sf%s%s%s(int);\n", atrPrefix, funcList[i].name, wsp, isaub, isaname); break; case 8: printf("IMPORT CONST void *Sleef_%sf%s%s%s(int);\n", funcList[i].name, wsp, isaub, isaname); printf("IMPORT CONST void *Sleef_%s%sf%s%s%s(int);\n", atrPrefix, funcList[i].name, wsp, isaub, isaname); break; } } printf("#endif\n"); free(vdoublename_escspace); free(vfloatname_escspace); free(vintname_escspace); free(vint2name_escspace); } exit(0); } sleef-3.5.1/src/libm/mkrename_gnuabi.c000066400000000000000000000114621373003144100176200ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc < 5) { fprintf(stderr, "Usage : %s \n", argv[0]); exit(-1); } char *isaname = argv[1]; char *mangledisa = argv[2]; char *wdp = argv[3]; char *wsp = argv[4]; // VLA SVE does not set the vector length in the mangled names. if (strcmp(isaname, "sve") == 0) wdp = wsp = "x"; static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15", "_u3500" }; static char *vparameterStrDP[] = { "v", "vv", "vl8l8", "vv", "v", "vvv", "vl8" }; static char *vparameterStrSP[] = { "v", "vv", "vl4l4", "vv", "v", "vvv", "vl4" }; for(int i=0;funcList[i].name != NULL;i++) { if ((funcList[i].flags & 1) != 0) continue; if ((funcList[i].flags & 2) != 0) continue; if (funcList[i].ulp < 0) { printf("#define x%s _ZGV%sN%s%s_%s\n", funcList[i].name, mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define str_x%s \"_ZGV%sN%s%s_%s\"\n", funcList[i].name, mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define __%s_finite _ZGV%sN%s%s___%s_finite\n", funcList[i].name, mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); } else if (funcList[i].ulp < 20) { printf("#define x%s%s _ZGV%sN%s%s_%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define str_x%s%s \"_ZGV%sN%s%s_%s\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define __%s%s_finite _ZGV%sN%s%s___%s_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); } else { printf("#define x%s%s _ZGV%sN%s%s_%s_u%d\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define str_x%s%s \"_ZGV%sN%s%s_%s_u%d\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define __%s%s_finite _ZGV%sN%s%s___%s_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); } } printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { if ((funcList[i].flags & 1) != 0) continue; if (funcList[i].ulp < 0) { printf("#define x%sf _ZGV%sN%s%s_%sf\n", funcList[i].name, mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define str_x%sf \"_ZGV%sN%s%s_%sf\"\n", funcList[i].name, mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define __%sf_finite _ZGV%sN%s%s___%sf_finite\n", funcList[i].name, mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); } else if (funcList[i].ulp < 20) { printf("#define x%sf%s _ZGV%sN%s%s_%sf\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define str_x%sf%s \"_ZGV%sN%s%s_%sf\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define __%sf%s_finite _ZGV%sN%s%s___%sf_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); } else { printf("#define x%sf%s _ZGV%sN%s%s_%sf_u%d\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define str_x%sf%s \"_ZGV%sN%s%s_%sf_u%d\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define __%sf%s_finite _ZGV%sN%s%s___%sf_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); } } exit(0); } sleef-3.5.1/src/libm/norename.h000077500000000000000000000114231373003144100163050ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #ifdef ENABLE_DP #ifdef ENABLE_SVE typedef svfloat64x2_t vdouble2; #else typedef struct { vdouble x, y; } vdouble2; #endif vdouble xldexp(vdouble x, vint q); vint xilogb(vdouble d); vdouble xsin(vdouble d); vdouble xcos(vdouble d); vdouble2 xsincos(vdouble d); vdouble xtan(vdouble d); vdouble xasin(vdouble s); vdouble xacos(vdouble s); vdouble xatan(vdouble s); vdouble xatan2(vdouble y, vdouble x); vdouble xlog(vdouble d); vdouble xexp(vdouble d); vdouble xpow(vdouble x, vdouble y); vdouble xsinh(vdouble d); vdouble xcosh(vdouble d); vdouble xtanh(vdouble d); vdouble xsinh_u35(vdouble d); vdouble xcosh_u35(vdouble d); vdouble xtanh_u35(vdouble d); vdouble xasinh(vdouble s); vdouble xacosh(vdouble s); vdouble xatanh(vdouble s); vdouble xcbrt(vdouble d); vdouble xexp2(vdouble a); vdouble xexp10(vdouble a); vdouble xexp2_u35(vdouble a); vdouble xexp10_u35(vdouble a); vdouble xexpm1(vdouble a); vdouble xlog10(vdouble a); vdouble xlog2(vdouble a); vdouble xlog2_u35(vdouble a); vdouble xlog1p(vdouble a); vdouble xsin_u1(vdouble d); vdouble xcos_u1(vdouble d); vdouble2 xsincos_u1(vdouble d); vdouble xtan_u1(vdouble d); vdouble xasin_u1(vdouble s); vdouble xacos_u1(vdouble s); vdouble xatan_u1(vdouble s); vdouble xatan2_u1(vdouble y, vdouble x); vdouble xlog_u1(vdouble d); vdouble xcbrt_u1(vdouble d); vdouble2 xsincospi_u05(vdouble d); vdouble2 xsincospi_u35(vdouble d); vdouble xsinpi_u05(vdouble d); vdouble xcospi_u05(vdouble d); vdouble xldexp(vdouble, vint); vint xilogb(vdouble); vdouble xfma(vdouble, vdouble, vdouble); vdouble xsqrt(vdouble); vdouble xsqrt_u05(vdouble); vdouble xsqrt_u35(vdouble); vdouble xhypot_u05(vdouble, vdouble); vdouble xhypot_u35(vdouble, vdouble); vdouble xfabs(vdouble); vdouble xcopysign(vdouble, vdouble); vdouble xfmax(vdouble, vdouble); vdouble xfmin(vdouble, vdouble); vdouble xfdim(vdouble, vdouble); vdouble xtrunc(vdouble); vdouble xfloor(vdouble); vdouble xceil(vdouble); vdouble xround(vdouble); vdouble xrint(vdouble); vdouble xnextafter(vdouble, vdouble); vdouble xfrfrexp(vdouble); vint xexpfrexp(vdouble); vdouble xfmod(vdouble, vdouble); vdouble xremainder(vdouble, vdouble); vdouble2 xmodf(vdouble); vdouble xlgamma_u1(vdouble); vdouble xtgamma_u1(vdouble); vdouble xerf_u1(vdouble); vdouble xerfc_u15(vdouble); #endif // #ifdef ENABLE_SP #ifdef ENABLE_SVE typedef svfloat32x2_t vfloat2; #else typedef struct { vfloat x, y; } vfloat2; #endif vfloat xldexpf(vfloat x, vint2 q); vint2 xilogbf(vfloat d); vfloat xsinf(vfloat d); vfloat xcosf(vfloat d); vfloat2 xsincosf(vfloat d); vfloat xtanf(vfloat d); vfloat xasinf(vfloat s); vfloat xacosf(vfloat s); vfloat xatanf(vfloat s); vfloat xatan2f(vfloat y, vfloat x); vfloat xlogf(vfloat d); vfloat xexpf(vfloat d); vfloat xcbrtf(vfloat s); vfloat xpowf(vfloat x, vfloat y); vfloat xsinhf(vfloat x); vfloat xcoshf(vfloat x); vfloat xtanhf(vfloat x); vfloat xsinhf_u35(vfloat x); vfloat xcoshf_u35(vfloat x); vfloat xtanhf_u35(vfloat x); vfloat xasinhf(vfloat x); vfloat xacoshf(vfloat x); vfloat xatanhf(vfloat x); vfloat xexp2f(vfloat a); vfloat xexp10f(vfloat a); vfloat xexp2f_u35(vfloat a); vfloat xexp10f_u35(vfloat a); vfloat xexpm1f(vfloat a); vfloat xlog10f(vfloat a); vfloat xlog2f(vfloat a); vfloat xlog2f_u35(vfloat a); vfloat xlog1pf(vfloat a); vfloat xsinf_u1(vfloat d); vfloat xcosf_u1(vfloat d); vfloat2 xsincosf_u1(vfloat d); vfloat xtanf_u1(vfloat d); vfloat xasinf_u1(vfloat s); vfloat xacosf_u1(vfloat s); vfloat xatanf_u1(vfloat s); vfloat xatan2f_u1(vfloat y, vfloat x); vfloat xlogf_u1(vfloat d); vfloat xcbrtf_u1(vfloat s); vfloat2 xsincospif_u05(vfloat d); vfloat2 xsincospif_u35(vfloat d); vfloat xsinpif_u05(vfloat d); vfloat xcospif_u05(vfloat d); vfloat xldexpf(vfloat, vint2); vint2 xilogbf(vfloat); vfloat xfmaf(vfloat, vfloat, vfloat); vfloat xsqrtf(vfloat s); vfloat xsqrtf_u05(vfloat s); vfloat xsqrtf_u35(vfloat s); vfloat xhypotf_u05(vfloat, vfloat); vfloat xhypotf_u35(vfloat, vfloat); vfloat xfabsf(vfloat); vfloat xcopysignf(vfloat, vfloat); vfloat xfmaxf(vfloat, vfloat); vfloat xfminf(vfloat, vfloat); vfloat xfdimf(vfloat, vfloat); vfloat xtruncf(vfloat); vfloat xfloorf(vfloat); vfloat xceilf(vfloat); vfloat xroundf(vfloat); vfloat xrintf(vfloat); vfloat xnextafterf(vfloat, vfloat); vfloat xfrfrexpf(vfloat); vint2 xexpfrexpf(vfloat); vfloat xfmodf(vfloat, vfloat); vfloat xremainderf(vfloat, vfloat); vfloat2 xmodff(vfloat); vfloat xlgammaf_u1(vfloat); vfloat xtgammaf_u1(vfloat); vfloat xerff_u1(vfloat); vfloat xerfcf_u15(vfloat); vfloat xfastsinf_u3500(vfloat d); vfloat xfastcosf_u3500(vfloat d); vfloat xfastpowf_u3500(vfloat x, vfloat y); #endif sleef-3.5.1/src/libm/rempitab.c000066400000000000000000003462541373003144100163110ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include "misc.h" #if !defined(SLEEF_GENHEADER) #define FUNCATR NOEXPORT ALIGNED(64) #else #define FUNCATR EXPORT ALIGNED(64) #endif FUNCATR const double Sleef_rempitabdp[] = { 0.15915494309189531785, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.00095181809189533563356, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762436344e-52, 0.00095181809189533563356, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762436344e-52, 0.00046353684189533574198, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.301187206862134399e-54, 0.00021939621689533574198, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.301187206862134399e-54, 9.7325904395335769087e-05, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 3.6290748145335769087e-05, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 1.9584727547107690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 2.1321799510573569745e-08, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369025999e-57, 6.4206383167259151492e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, 6.4206383167259151492e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, 2.6953480182640010867e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, 8.3270286903304384868e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 8.3270286903304384868e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 3.6704158172530459087e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 1.3421093807143501366e-10, 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 1.4247116125875099096e-12, 2.5861333686050385673e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 5.1521691081458187359e-13, 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 1.3348904870778067446e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 6.5726412927436632287e-21, 1.0820844071023395684e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, 3.1845095037264626247e-21, 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 1.4904436092178623228e-21, -4.6390169687056261795e-38, -1.1392999419355048437e-54, -4.587677453735884283e-71, 6.4341066196356198368e-22, -4.6390169687056261795e-38, -1.1392999419355048437e-54, -4.587677453735884283e-71, 2.1989418833641172011e-22, 4.7649378378726728402e-38, 9.3011872068621332399e-54, 1.113250147552460308e-69, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 6.9132600985943383921e-25, 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 2.7773570358292009361e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, -3.2399200798614356002e-74, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 6.317065088957874881e-27, -3.2976062348358281152e-43, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 6.317065088957874881e-27, -3.2976062348358281152e-43, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 3.0858908211726098086e-27, 3.8770419025072344914e-43, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 1.4703036872799779898e-27, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 6.625101203336619011e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 2.5861333686050385673e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 3.0224035688960604996e-30, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 1.4446817584540368888e-30, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.5582085323302525856e-31, 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639313137e-79, 2.6139040062251944343e-31, -1.7578597149294783985e-47, 8.4432539107728090768e-64, 1.9517662449371102229e-79, 6.4175174317266470186e-32, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371102229e-79, 6.4175174317266470186e-32, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371102229e-79, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659569668e-65, -7.2335760163150273591e-81, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659569668e-65, -7.2335760163150273591e-81, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.3430016361024414106e-34, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.3430016361024414106e-34, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, 1.7633044866680145008e-35, 2.8491136916798196016e-51, 4.0680767287898916022e-67, 1.4185069655957361252e-83, 5.595982714259923599e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 5.595982714259923599e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 2.5867171761548675786e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 1.0820844071023395684e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 1.4168892644450972904e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 4.7649378378726728402e-38, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 3.2673620808294506214e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 1.1051690039850297894e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 1.1051690039850297894e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 3.8770419025072344914e-43, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.5355611056488084652e-94, 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.5355611056488084652e-94, 2.6211979860855749482e-47, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 1.5797802926460750146e-48, 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 1.8885701952232994665e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 8.1946431118642097069e-51, 1.5937536410989638719e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 2.8491136916798196016e-51, 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 4.0809436324633147776e-54, -4.587677453735884283e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, 1.470821845263904967e-54, -4.587677453735884283e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 3.9565608646667614317e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 1.9651959757511960854e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 9.6951353129341363331e-60, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.7167230906452229674e-60, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 2.2275169795007668372e-60, 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 9.8291392392853877215e-61, -6.5385728340754726503e-77, -1.3520652573660833788e-93, -2.3220403312043059402e-109, 3.6061239614242446325e-61, 7.2792968540756372162e-77, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.535561105648808199e-94, -1.9306041120023063932e-110, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.535561105648808199e-94, -1.9306041120023063932e-110, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.4679971416497210292e-65, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 3.9676455775389135587e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 1.5937536410989638719e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 1.1007118082399544936e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 1.1007118082399544936e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, 1.7341027056809927069e-68, 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418876704e-116, 8.0680116800913756637e-69, -2.2809159455312046184e-85, -4.0748824503880445403e-101, -6.3915272253158644628e-117, 3.4315039917320989315e-69, -2.2809159455312046184e-85, -4.0748824503880445403e-101, -6.3915272253158644628e-117, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 5.3368668650755071652e-70, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 2.4390495598509592076e-70, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 9.901409072386855505e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, -4.6672632026740766185e-119, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 8.4572999356014273536e-72, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 8.4572999356014273536e-72, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 3.9294603961880721752e-72, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894729832e-121, 1.6655406264813940833e-72, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894729832e-121, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 2.5059077041472040156e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 1.0909578480805302081e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 3.8348292004719330442e-74, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 7.2792968540756372162e-77, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 1.5445779612272179051e-78, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, 4.6505689184041232695e-79, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, 4.6505689184041232695e-79, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, 6.0236490820360325022e-80, -3.7424672147304925625e-96, -1.784871512364483542e-112, 6.7095375687163151728e-129, 6.0236490820360325022e-80, -3.7424672147304925625e-96, -1.784871512364483542e-112, 6.7095375687163151728e-129, 2.6501457402022643213e-80, 3.7482149527770239293e-96, 6.5314563001514349095e-112, 9.9039323746573674262e-128, 9.6339406928538097998e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, 8.0141992334048515034e-85, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, 2.8666416439368237283e-85, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 1.3200167453193350837e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 6.3183932821616130831e-93, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 2.4831640123977650651e-93, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007823264e-142, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 1.1238897120284541253e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 3.7482149527770239293e-96, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 1.0450891972142805974e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 1.0404514546648604359e-103, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 1.0404514546648604359e-103, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 4.8235214251531210473e-104, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 2.0330248644053793915e-104, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 6.3777658403150887343e-105, -2.0152904854894725532e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, 6.3777658403150887343e-105, -2.0152904854894725532e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, 2.88964513938041089e-105, 5.7298933442091639924e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, 2.7355461367940366859e-106, -7.8994528064813712419e-123, -2.0037599452814940222e-138, 9.1598554579059548847e-155, 2.7355461367940366859e-106, -7.8994528064813712419e-123, -2.0037599452814940222e-138, 9.1598554579059548847e-155, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 8.7142954880180709975e-110, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 3.3918456880078814158e-110, 6.931443500908017045e-126, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220312367e-159, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220312367e-159, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 2.3732923938934761454e-112, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 8.2436437080731844263e-116, 1.4726412753514008951e-131, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942429241e-163, 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, 3.1257546646178208289e-117, -6.6414926959353515111e-134, -5.7828074707888119584e-150, -1.2825052715093464343e-165, 1.5395410162955400644e-117, -6.6414926959353515111e-134, -5.7828074707888119584e-150, -1.2825052715093464343e-165, 7.4643419213439950602e-118, 1.0969016447485317626e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, 3.4988078005382940294e-118, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.5160407401354430737e-118, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 1.3475077173907800538e-120, -3.156241481857667737e-137, -7.0684085473731388916e-153, -3.3573283875161501977e-170, 5.7298933442091639924e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, -3.3573283875161501977e-170, 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 8.8915345064751572143e-122, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 4.0507946129135104481e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 4.2023969274227456735e-123, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 4.2023969274227456735e-123, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 1.1769344939467164447e-123, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, 1.1769344939467164447e-123, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, 4.2056888557770896953e-124, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 1.8749656131673758844e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 6.931443500908017045e-126, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, 2.8369889610228834887e-127, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, 2.8369889610228834887e-127, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657612913e-160, -2.5389576707476506925e-176, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, 2.175994780857201024e-130, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 2.175994780857201024e-130, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 1.4726412753514008951e-131, -3.9681466199873824165e-148, 2.9106774506606941983e-164, 5.1948630316441296498e-180, 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, 2.8579525590905986764e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, -1.0696067158221530218e-181, 1.0969016447485317626e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, -1.0696067158221530218e-181, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 1.0631050543111905033e-134, 1.5490398016102376505e-150, 3.4549185946116918017e-166, 1.3535321672928907047e-182, 5.1277664357929471499e-135, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 2.3761243821334675971e-135, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 1.0003033553037281263e-135, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 1.4041521353514076604e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 5.4426399358282049106e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 4.5016298192952031469e-142, -2.8326669474241479263e-158, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, 1.9635033141346264592e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, 9.3843676940087855824e-144, 1.2626949989038732076e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, 4.2590349703400483539e-144, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896458822e-192, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896458822e-192, 4.1503542758849472122e-145, -1.7614040799531193879e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 4.1503542758849472122e-145, -1.7614040799531193879e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 2.105789206980137775e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 2.2883630524598079723e-148, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091032843e-196, 2.2883630524598079723e-148, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091032843e-196, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.1067843414450286726e-196, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.1067843414450286726e-196, 3.3320377982006123631e-149, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 1.3768785255608653665e-149, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.9929888924099219388e-150, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 3.9929888924099219388e-150, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 1.5490398016102376505e-150, 3.4549185946116918017e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 7.6922213530572229852e-156, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, -3.2905064432040069127e-204, 7.0002691755702864582e-157, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, 7.0002691755702864582e-157, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 4.4508689228885539715e-158, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 3.5387999583765925506e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.3321093418096261919e-207, 1.2626949989038732076e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.3321093418096261919e-207, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 5.3514239183991277695e-161, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.8567941091539589297e-193, -1.8074851186411640793e-209, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756583552e-212, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 8.8815756978467430465e-166, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 8.8815756978467430465e-166, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 3.4549185946116918017e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 2.0862146470760309789e-168, -1.146150630053972131e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 2.0862146470760309789e-168, -1.146150630053972131e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 1.026320681600434562e-168, 1.2072867382105631402e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 4.9637369886263658882e-169, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 2.3140020749373754342e-169, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 9.8913461809288020723e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 3.2670088967063259373e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 3.2670088967063259373e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 1.6109245756507072713e-170, -6.2044048008378732802e-187, -5.4322544592823556944e-203, 4.2491789852161138683e-219, 7.8288241512289757055e-171, 1.2181824638728806485e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 3.6886133485899290404e-171, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161138683e-219, 1.6185079472704052482e-171, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161138683e-219, 5.8345524661064358191e-172, 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190082842e-224, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.0095962991602958391e-175, -6.2404128071707654958e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 3.7785026604276538491e-176, -6.2404128071707654958e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, 2.2493122414154495675e-177, 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 1.2906606599973359683e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189355449e-211, 1.6821693549018732055e-227, 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756578275e-212, 6.2685154049107876715e-228, 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756578275e-212, 6.2685154049107876715e-228, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 6.0043220944823941786e-183, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 2.2388223052591377446e-183, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 1.2072867382105631402e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 1.2181824638728806485e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161132393e-219, 7.4467067939231424594e-235, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161132393e-219, 7.4467067939231424594e-235, 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 4.4040360264865697732e-189, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 3.6409303439428119063e-190, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.3965175705582071936e-190, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.3403538552936701153e-191, 1.7826390804083638359e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 6.389748636109812983e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, 2.8828536776963681193e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 4.2560351759808952526e-241, 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 4.2560351759808952526e-241, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 2.6792050150137250131e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 9.6685396110091013832e-196, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 1.9005753194802080146e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 1.9005753194802080146e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 9.3660737343905436753e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 4.5462340041847754398e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 2.1363141390818913221e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 9.3135420653044926323e-182, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 3.2887424025472810002e-182, 7.185309278132283136e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 8.806758170751374203e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, 8.806758170751374203e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, 4.0998834342223036605e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, 1.7464460659577689118e-184, 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749095611e-233, 5.697273818255015375e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 5.697273818255015375e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 2.755477107924346286e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 1.2845787527590117414e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 5.4912957517634446918e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 1.8140498638501083305e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 1.8140498638501083305e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 8.9473839187177424013e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 4.3508265588260719497e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 2.0525478788802367239e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 9.0340853890731911095e-188, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 3.288388689208603045e-188, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, 1.1546040067079994973e-190, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 1.1546040067079994973e-190, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 1.4863145223629928288e-192, -7.9038076992129241506e-209, -1.609965144193984205e-224, -1.8313007053436627876e-240, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, -1.8313007053436627876e-240, 1.712289129579509076e-193, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, 1.712289129579509076e-193, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, 6.1638445507530779946e-194, -6.0361608463951204924e-210, 1.1003018740995688645e-226, 5.827891678485165325e-243, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 3.418509674495068119e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 1.7061586205822532442e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 8.499830936258458068e-196, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 4.218953301476420881e-196, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 2.0785144840854027628e-196, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 1.008295075389893466e-196, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 4.7318537104213881764e-197, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 2.0563051886826149345e-197, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 7.185309278132283136e-198, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 1.306250843215349634e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 6.5304075490021959302e-201, 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, 3.2643571074265457254e-201, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, 1.6313318866387202604e-201, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, 8.1481927624480752786e-202, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, 4.0656297104785107096e-202, 4.8431832608149701961e-218, 8.3111403472061145651e-234, 1.6001805286092554504e-249, 2.0243481844937293316e-202, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 1.0037074215013384159e-202, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 4.9338704000514295811e-203, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 2.3822684925704522921e-203, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 1.1064675388299639308e-203, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608782288e-251, 4.6856706195971960852e-204, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608782288e-251, 1.4961682352459748279e-204, -8.0675475439086544798e-221, -3.6970842501441777651e-237, -5.7032870362481275794e-253, 1.4961682352459748279e-204, -8.0675475439086544798e-221, -3.6970842501441777651e-237, -5.7032870362481275794e-253, 6.9879263915816924805e-205, 9.6377473771091526132e-221, 1.5959741828948633012e-236, 2.7031904319843495713e-252, 3.0010484111426663515e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, 1.156336993964950812e-208, 2.7126166236326293347e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 6.1308251778939023781e-210, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 2.3568521170701555846e-212, -7.7818310317651142243e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 1.1686698881356804311e-212, 1.8601114328504743806e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 5.7457877366844311816e-213, 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 2.7753321643482446169e-213, -1.1860946916976500828e-229, 6.3146909508553973881e-246, 1.2573885592501532045e-261, 1.290104378180150675e-213, 2.1117734783360818049e-229, 4.2928382696354204061e-245, -2.8075477999879273582e-261, 5.4749048509610403382e-214, 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501532045e-261, 1.7618353855408067201e-214, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 1.7618353855408067201e-214, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.3356801918574821257e-215, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 3.6943433600821895879e-215, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, 3.2038516259498326923e-217, -1.1817449557784924788e-233, -6.3454186796659920093e-250, -2.6436684620390282645e-267, 1.3908294260376086421e-217, 2.8439730252197153919e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, 4.8431832608149701961e-218, 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 9.6377473771091526132e-221, 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844372114e-268, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 2.318094503184431479e-222, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.318094503184431479e-222, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 9.3486833747991514629e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 7.0351983914592419146e-224, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 7.0351983914592419146e-224, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.7126166236326293347e-224, -1.8313007053436625212e-240, -2.3341145329525056675e-256, -2.0046830753539155726e-272, 5.5132573971932232487e-225, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, 5.5132573971932232487e-225, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 2.560476225709334075e-227, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 2.560476225709334075e-227, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 1.8601114328504743806e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 2.1117734783360818049e-229, 4.2928382696354204061e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 2.4841276986611042098e-231, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 1.1958979447416775482e-231, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 5.5178306778196421733e-232, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 2.2972562930210755192e-232, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 2.8439730252197153919e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 3.2789928709583552854e-234, 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, -4.3807022524130141006e-284, 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, 6.1313287894022281692e-237, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006739096e-285, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 6.0284645465737476297e-238, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 2.9570854717154947523e-238, 4.3456134301905148502e-254, 6.3684349745470443788e-270, -9.5347405022956042207e-287, 1.4213959342863689955e-238, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956042207e-287, 6.5355116557180594664e-239, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956042207e-287, 2.6962878121452450746e-239, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.9677290991223565342e-240, -2.3341145329525056675e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 2.6827483411022054912e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 1.1830515272065748694e-241, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 4.3320312025875939195e-242, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 5.5552006713333735927e-244, 7.8491179384773690214e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 2.6261053316934700345e-244, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997740506e-292, 1.1615576618735179302e-244, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997740506e-292, 4.2928382696354204061e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 2.8287088295287585094e-294, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, 1.7379794826680480784e-246, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, 1.7379794826680480784e-246, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580810531e-280, 8.8634899828990930877e-296, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 6.3808880963355377617e-251, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 6.3808880963355377617e-251, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 2.8891343516857640937e-251, 5.1095823452235464813e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 1.1432574793608780349e-251, 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 2.4805108027747776379e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 1.1165444962709601017e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 4.3456134301905148502e-254, 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 2.9938788518280315834e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 1.6338236616337094706e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 8.0132469526175071002e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 3.850752120757712373e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 1.7695047048278150093e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 7.2888099686286655858e-259, 5.581381609158630475e-275, 6.1155422068568946933e-291, 1.0380272777574237546e-306, 2.0856914288039227544e-259, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, 2.0856914288039227544e-259, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, 7.8491179384773690214e-260, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, 5.3223249184882342185e-261, -1.472095602234059958e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 1.1412520821444306741e-262, -6.1787496089661820348e-279, -3.028042329852615431e-295, -2.182740474438892116e-311, 5.0610577601348040988e-263, 7.9243314524777990283e-279, -3.028042329852615431e-295, -2.182740474438892116e-311, 1.8853262294800541881e-263, 8.7279092175580810531e-280, 8.8634899828990930877e-296, -9.8167844904532653004e-314, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 9.8977243486757054781e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 9.8977243486757054781e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 4.9356438320276576408e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.4546035737036337221e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 1.2140834445416214873e-265, 1.8893435613692150014e-281, 3.0075895258731974416e-297, -9.8167844904532653004e-314, 5.9382337996061564537e-266, 5.1208955146257653156e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.8369334767011265554e-266, 5.1208955146257653156e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 1.2862833152486119506e-266, 1.6777604898591683764e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 5.1095823452235464813e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, 6.8978448094652555593e-271, 1.1480487920352081009e-286, 7.5257037990230704094e-303, 3.6369654387311681856e-319, 6.8978448094652555593e-271, 1.1480487920352081009e-286, 7.5257037990230704094e-303, 3.6369654387311681856e-319, 2.1656360647981577662e-271, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 2.1656360647981577662e-271, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 9.825838786313830552e-272, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 3.9105778554799569972e-272, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 9.5294739006302120482e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 9.5294739006302120482e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 6.4228533959362050743e-323, }; NOEXPORT ALIGNED(64) const float Sleef_rempitabsp[] = { 0.159154892, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 0.03415493667, 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, 0.03415493667, 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, 0.002904943191, -9.861969574e-11, -9.839336547e-18, -1.790215892e-24, 0.0009518179577, 1.342109202e-10, 1.791623576e-17, 1.518506657e-24, 0.0009518179577, 1.342109202e-10, 1.791623576e-17, 1.518506657e-24, 0.0004635368241, 1.779561221e-11, 4.038449606e-18, -1.358546052e-25, 0.0002193961991, 1.779561221e-11, 4.038449606e-18, -1.358546052e-25, 9.73258866e-05, 1.779561221e-11, 4.038449606e-18, -1.358546052e-25, 3.62907449e-05, 3.243700447e-12, 5.690024473e-19, 7.09405479e-26, 5.773168596e-06, 1.424711477e-12, 1.3532163e-19, 1.92417627e-26, 5.773168596e-06, 1.424711477e-12, 1.3532163e-19, 1.92417627e-26, 5.773168596e-06, 1.424711477e-12, 1.3532163e-19, 1.92417627e-26, 1.958472239e-06, 5.152167755e-13, 1.3532163e-19, 1.92417627e-26, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, 5.112411827e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, 2.132179588e-08, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.420638243e-09, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 2.695347945e-09, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 8.327027956e-10, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 8.327027956e-10, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 3.670415083e-10, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 1.342109202e-10, 1.791623576e-17, 1.518506361e-24, 2.613904e-31, 1.779561221e-11, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, 1.779561221e-11, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, 1.779561221e-11, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, 3.243700447e-12, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 3.243700447e-12, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 3.243700447e-12, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 1.424711477e-12, 1.3532163e-19, 1.924175961e-26, 2.545416018e-33, 5.152167755e-13, 1.3532163e-19, 1.924175961e-26, 2.545416018e-33, 6.046956013e-14, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, 6.046956013e-14, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, 6.046956013e-14, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, 3.626141271e-15, -2.036222915e-22, 6.177846108e-30, 1.082084378e-36, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, 7.342738037e-17, 8.135951656e-24, -1.330400526e-31, 6.296048013e-40, 1.791623576e-17, 1.518506361e-24, 2.61390353e-31, 4.764937743e-38, 1.791623576e-17, 1.518506361e-24, 2.61390353e-31, 4.764937743e-38, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, 6.296048013e-40, 4.038449606e-18, -1.358545683e-25, -3.443243946e-32, 6.296048013e-40, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 6.296048013e-40, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 6.296048013e-40, 5.690024473e-19, 7.094053557e-26, 1.487136711e-32, 6.296048013e-40, 1.3532163e-19, 1.924175961e-26, 2.545415467e-33, 6.296048013e-40, 1.3532163e-19, 1.924175961e-26, 2.545415467e-33, 6.296048013e-40, 2.690143217e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, 2.690143217e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, 2.690143217e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, 1.334890502e-20, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, 6.572641438e-21, -1.452834402e-28, -6.441077673e-36, -1.764234767e-42, 0.05874381959, 1.222115387e-08, 7.693612965e-16, 1.792054435e-22, 0.02749382704, 4.77057327e-09, 7.693612965e-16, 1.792054435e-22, 0.01186883077, 1.045283415e-09, 3.252721926e-16, 7.332633139e-23, 0.00405633077, 1.045283415e-09, 3.252721926e-16, 7.332633139e-23, 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, 0.000150081818, -2.454155802e-12, 1.161414894e-20, 1.291319272e-27, 2.801149822e-05, 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, 2.801149822e-05, 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, 2.801149822e-05, 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, 1.275271279e-05, 1.183823005e-12, 1.161414894e-20, 1.291319272e-27, 5.12331826e-06, 1.183823005e-12, 1.161414894e-20, 1.291319272e-27, 1.308621904e-06, 2.743283031e-13, 1.161414894e-20, 1.291319272e-27, 1.308621904e-06, 2.743283031e-13, 1.161414894e-20, 1.291319272e-27, 3.549478151e-07, 4.695462769e-14, 1.161414894e-20, 1.291319272e-27, 3.549478151e-07, 4.695462769e-14, 1.161414894e-20, 1.291319272e-27, 1.165292645e-07, 1.853292503e-14, 4.837885366e-21, 1.291319272e-27, 1.165292645e-07, 1.853292503e-14, 4.837885366e-21, 1.291319272e-27, 5.69246339e-08, 4.322073705e-15, 1.449754789e-21, 7.962890365e-29, 2.712231151e-08, 4.322073705e-15, 1.449754789e-21, 7.962890365e-29, 1.222115387e-08, 7.693612965e-16, 1.792054182e-22, 2.91418027e-29, 4.77057327e-09, 7.693612965e-16, 1.792054182e-22, 2.91418027e-29, 1.045283415e-09, 3.252721926e-16, 7.332632508e-23, 3.898253736e-30, 1.045283415e-09, 3.252721926e-16, 7.332632508e-23, 3.898253736e-30, 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, 1.139611461e-10, 1.996093359e-17, 5.344349223e-25, 1.511644828e-31, 5.575349904e-11, 6.083145782e-18, 5.344349223e-25, 1.511644828e-31, 2.664967552e-11, -8.557475018e-19, -8.595036458e-26, -2.139883875e-32, 1.209775682e-11, 2.61369883e-18, 5.344349223e-25, 1.511644828e-31, 4.821800945e-12, 8.789757674e-19, 1.208447639e-25, 3.253064536e-33, 1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34, 1.183823005e-12, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34, 2.743283031e-13, 1.161414894e-20, 1.29131908e-27, 1.715766248e-34, }; sleef-3.5.1/src/libm/rename.h000066400000000000000000000120121373003144100157400ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define xsin Sleef_sin_u35 #define xcos Sleef_cos_u35 #define xsincos Sleef_sincos_u35 #define xtan Sleef_tan_u35 #define xasin Sleef_asin_u35 #define xacos Sleef_acos_u35 #define xatan Sleef_atan_u35 #define xatan2 Sleef_atan2_u35 #define xlog Sleef_log_u35 #define xcbrt Sleef_cbrt_u35 #define xsin_u1 Sleef_sin_u10 #define xcos_u1 Sleef_cos_u10 #define xsincos_u1 Sleef_sincos_u10 #define xtan_u1 Sleef_tan_u10 #define xasin_u1 Sleef_asin_u10 #define xacos_u1 Sleef_acos_u10 #define xatan_u1 Sleef_atan_u10 #define xatan2_u1 Sleef_atan2_u10 #define xlog_u1 Sleef_log_u10 #define xcbrt_u1 Sleef_cbrt_u10 #define xexp Sleef_exp_u10 #define xpow Sleef_pow_u10 #define xsinh Sleef_sinh_u10 #define xcosh Sleef_cosh_u10 #define xtanh Sleef_tanh_u10 #define xsinh_u35 Sleef_sinh_u35 #define xcosh_u35 Sleef_cosh_u35 #define xtanh_u35 Sleef_tanh_u35 #define xasinh Sleef_asinh_u10 #define xacosh Sleef_acosh_u10 #define xatanh Sleef_atanh_u10 #define xexp2 Sleef_exp2_u10 #define xexp10 Sleef_exp10_u10 #define xexp2_u35 Sleef_exp2_u35 #define xexp10_u35 Sleef_exp10_u35 #define xexpm1 Sleef_expm1_u10 #define xlog10 Sleef_log10_u10 #define xlog2 Sleef_log2_u10 #define xlog2_u35 Sleef_log2_u35 #define xlog1p Sleef_log1p_u10 #define xsincospi_u05 Sleef_sincospi_u05 #define xsincospi_u35 Sleef_sincospi_u35 #define xsinpi_u05 Sleef_sinpi_u05 #define xcospi_u05 Sleef_cospi_u05 #define xldexp Sleef_ldexp #define xilogb Sleef_ilogb #define xfma Sleef_fma #define xsqrt Sleef_sqrt #define xsqrt_u05 Sleef_sqrt_u05 #define xsqrt_u35 Sleef_sqrt_u35 #define xhypot_u05 Sleef_hypot_u05 #define xhypot_u35 Sleef_hypot_u35 #define xfabs Sleef_fabs #define xcopysign Sleef_copysign #define xfmax Sleef_fmax #define xfmin Sleef_fmin #define xfdim Sleef_fdim #define xtrunc Sleef_trunc #define xfloor Sleef_floor #define xceil Sleef_ceil #define xround Sleef_round #define xrint Sleef_rint #define xnextafter Sleef_nextafter #define xfrfrexp Sleef_frfrexp #define xexpfrexp Sleef_expfrexp #define xfmod Sleef_fmod #define xremainder Sleef_remainder #define xmodf Sleef_modf #define xlgamma_u1 Sleef_lgamma_u10 #define xtgamma_u1 Sleef_tgamma_u10 #define xerf_u1 Sleef_erf_u10 #define xerfc_u15 Sleef_erfc_u15 // #define xsinf Sleef_sinf_u35 #define xcosf Sleef_cosf_u35 #define xsincosf Sleef_sincosf_u35 #define xtanf Sleef_tanf_u35 #define xasinf Sleef_asinf_u35 #define xacosf Sleef_acosf_u35 #define xatanf Sleef_atanf_u35 #define xatan2f Sleef_atan2f_u35 #define xlogf Sleef_logf_u35 #define xcbrtf Sleef_cbrtf_u35 #define xsinf_u1 Sleef_sinf_u10 #define xcosf_u1 Sleef_cosf_u10 #define xsincosf_u1 Sleef_sincosf_u10 #define xtanf_u1 Sleef_tanf_u10 #define xasinf_u1 Sleef_asinf_u10 #define xacosf_u1 Sleef_acosf_u10 #define xatanf_u1 Sleef_atanf_u10 #define xatan2f_u1 Sleef_atan2f_u10 #define xlogf_u1 Sleef_logf_u10 #define xcbrtf_u1 Sleef_cbrtf_u10 #define xexpf Sleef_expf_u10 #define xpowf Sleef_powf_u10 #define xsinhf Sleef_sinhf_u10 #define xcoshf Sleef_coshf_u10 #define xtanhf Sleef_tanhf_u10 #define xsinhf_u35 Sleef_sinhf_u35 #define xcoshf_u35 Sleef_coshf_u35 #define xtanhf_u35 Sleef_tanhf_u35 #define xasinhf Sleef_asinhf_u10 #define xacoshf Sleef_acoshf_u10 #define xatanhf Sleef_atanhf_u10 #define xexp2f Sleef_exp2f_u10 #define xexp10f Sleef_exp10f_u10 #define xexp2f_u35 Sleef_exp2f_u35 #define xexp10f_u35 Sleef_exp10f_u35 #define xexpm1f Sleef_expm1f_u10 #define xlog10f Sleef_log10f_u10 #define xlog2f Sleef_log2f_u10 #define xlog10f_u35 Sleef_log10f_u35 #define xlog2f_u35 Sleef_log2f_u35 #define xlog1pf Sleef_log1pf_u10 #define xsincospif_u05 Sleef_sincospif_u05 #define xsincospif_u35 Sleef_sincospif_u35 #define xsinpif_u05 Sleef_sinpif_u05 #define xcospif_u05 Sleef_cospif_u05 #define xldexpf Sleef_ldexpf #define xilogbf Sleef_ilogbf #define xfmaf Sleef_fmaf #define xsqrtf Sleef_sqrtf #define xsqrtf_u05 Sleef_sqrtf_u05 #define xsqrtf_u35 Sleef_sqrtf_u35 #define xhypotf_u05 Sleef_hypotf_u05 #define xhypotf_u35 Sleef_hypotf_u35 #define xfabsf Sleef_fabsf #define xcopysignf Sleef_copysignf #define xfmaxf Sleef_fmaxf #define xfminf Sleef_fminf #define xfdimf Sleef_fdimf #define xtruncf Sleef_truncf #define xfloorf Sleef_floorf #define xceilf Sleef_ceilf #define xroundf Sleef_roundf #define xrintf Sleef_rintf #define xnextafterf Sleef_nextafterf #define xfrfrexpf Sleef_frfrexpf #define xexpfrexpf Sleef_expfrexpf #define xfmodf Sleef_fmodf #define xremainderf Sleef_remainderf #define xmodff Sleef_modff #define xlgammaf_u1 Sleef_lgammaf_u10 #define xtgammaf_u1 Sleef_tgammaf_u10 #define xerff_u1 Sleef_erff_u10 #define xerfcf_u15 Sleef_erfcf_u15 #define xfastsinf_u3500 Sleef_fastsinf_u3500 #define xfastcosf_u3500 Sleef_fastcosf_u3500 #define xfastpowf_u3500 Sleef_fastpowf_u3500 // #define xsincospil_u05 Sleef_sincospil_u05 #define xsincospil_u35 Sleef_sincospil_u35 #define xsincospiq_u05 Sleef_sincospiq_u05 #define xsincospiq_u35 Sleef_sincospiq_u35 sleef-3.5.1/src/libm/sleef.pc.in000066400000000000000000000005221373003144100163520ustar00rootroot00000000000000prefix=@CMAKE_INSTALL_PREFIX@ libdir=@CMAKE_INSTALL_FULL_LIBDIR@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: SLEEF Description: SIMD Library for Evaluating Elementary Functions, vectorized libm and DFT Version: @SLEEF_VERSION_MAJOR@.@SLEEF_VERSION_MINOR@.@SLEEF_VERSION_PATCHLEVEL@ Cflags: -I${includedir} Libs: -L${libdir} -lsleef sleef-3.5.1/src/libm/sleefdp.c000066400000000000000000002354451373003144100161270ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include #ifndef ENABLE_BUILTIN_MATH #include #define SQRT sqrt #else #define SQRT __builtin_sqrt #endif #include "misc.h" extern const double Sleef_rempitabdp[]; #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif #define MLA mla #define C2V(x) (x) #include "estrin.h" static INLINE CONST int64_t doubleToRawLongBits(double d) { union { double f; int64_t i; } tmp; tmp.f = d; return tmp.i; } static INLINE CONST double longBitsToDouble(int64_t i) { union { double f; int64_t i; } tmp; tmp.i = i; return tmp.f; } static INLINE CONST double fabsk(double x) { return longBitsToDouble(INT64_C(0x7fffffffffffffff) & doubleToRawLongBits(x)); } static INLINE CONST double mulsign(double x, double y) { return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (INT64_C(1) << 63))); } static INLINE CONST double copysignk(double x, double y) { return longBitsToDouble((doubleToRawLongBits(x) & ~(INT64_C(1) << 63)) ^ (doubleToRawLongBits(y) & (INT64_C(1) << 63))); } static INLINE CONST double sign(double d) { return mulsign(1, d); } static INLINE CONST double mla(double x, double y, double z) { return x * y + z; } static INLINE CONST double rintk(double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); } static INLINE CONST int ceilk(double x) { return (int)x + (x < 0 ? 0 : 1); } static INLINE CONST double trunck(double x) { return (double)(int)x; } static INLINE CONST double fmink(double x, double y) { return x < y ? x : y; } static INLINE CONST double fmaxk(double x, double y) { return x > y ? x : y; } static INLINE CONST int xisnan(double x) { return x != x; } static INLINE CONST int xisinf(double x) { return x == SLEEF_INFINITY || x == -SLEEF_INFINITY; } static INLINE CONST int xisminf(double x) { return x == -SLEEF_INFINITY; } static INLINE CONST int xispinf(double x) { return x == SLEEF_INFINITY; } static INLINE CONST int xisnegzero(double x) { return doubleToRawLongBits(x) == doubleToRawLongBits(-0.0); } static INLINE CONST int xisnumber(double x) { return !xisinf(x) && !xisnan(x); } static INLINE CONST int xisint(double d) { double x = d - (double)(INT64_C(1) << 31) * (int)(d * (1.0 / (INT64_C(1) << 31))); return (x == (int)x) || (fabsk(d) >= (double)(INT64_C(1) << 53)); } static INLINE CONST int xisodd(double d) { double x = d - (double)(INT64_C(1) << 31) * (int)(d * (1.0 / (INT64_C(1) << 31))); return (1 & (int)x) != 0 && fabsk(d) < (double)(INT64_C(1) << 53); } static INLINE CONST double pow2i(int q) { return longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); } static INLINE CONST double ldexpk(double x, int q) { double u; int m; m = q >> 31; m = (((m + q) >> 9) - m) << 7; q = q - (m << 2); m += 0x3ff; m = m < 0 ? 0 : m; m = m > 0x7ff ? 0x7ff : m; u = longBitsToDouble(((int64_t)m) << 52); x = x * u * u * u * u; u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); return x * u; } static INLINE CONST double ldexp2k(double d, int e) { // faster than ldexpk, short reach return d * pow2i(e >> 1) * pow2i(e - (e >> 1)); } static INLINE CONST double ldexp3k(double d, int e) { // very fast, no denormal return longBitsToDouble(doubleToRawLongBits(d) + (((int64_t)e) << 52)); } EXPORT CONST double xldexp(double x, int exp) { if (exp > 2100) exp = 2100; if (exp < -2100) exp = -2100; int e0 = exp >> 2; if (exp < 0) e0++; if (-100 < exp && exp < 100) e0 = 0; int e1 = exp - (e0 << 2); double p = pow2i(e0); double ret = x * pow2i(e1) * p * p * p * p; return ret; } static INLINE CONST int ilogbk(double d) { int m = d < 4.9090934652977266E-91; d = m ? 2.037035976334486E90 * d : d; int q = (doubleToRawLongBits(d) >> 52) & 0x7ff; q = m ? q - (300 + 0x03ff) : q - 0x03ff; return q; } // ilogb2k is similar to ilogbk, but the argument has to be a // normalized FP value. static INLINE CONST int ilogb2k(double d) { return ((doubleToRawLongBits(d) >> 52) & 0x7ff) - 0x3ff; } EXPORT CONST int xilogb(double d) { int e = ilogbk(fabsk(d)); e = d == 0.0 ? SLEEF_FP_ILOGB0 : e; e = xisnan(d) ? SLEEF_FP_ILOGBNAN : e; e = xisinf(d) ? INT_MAX : e; return e; } // #ifndef NDEBUG static int checkfp(double x) { if (xisinf(x) || xisnan(x)) return 1; return 0; } #endif static INLINE CONST double upper(double d) { return longBitsToDouble(doubleToRawLongBits(d) & INT64_C(0xfffffffff8000000)); } static INLINE CONST Sleef_double2 dd(double h, double l) { Sleef_double2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_double2 ddnormalize_d2_d2(Sleef_double2 t) { Sleef_double2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_double2 ddscale_d2_d2_d(Sleef_double2 d, double s) { Sleef_double2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_double2 ddneg_d2_d2(Sleef_double2 d) { Sleef_double2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_double2 ddabs_d2_d2(Sleef_double2 x) { return dd(x.x < 0 ? -x.x : x.x, x.x < 0 ? -x.y : x.y); } /* * ddadd and ddadd2 are functions for double-double addition. ddadd * is simpler and faster than ddadd2, but it requires the absolute * value of first argument to be larger than the second argument. The * exact condition that should be met is checked if NDEBUG macro is * not defined. * * Please note that if the results won't be used, it is no problem to * feed arguments that do not meet this condition. You will see * warning messages if you turn off NDEBUG macro and run tester2, but * this is normal. * * Please see : * Jonathan Richard Shewchuk, Adaptive Precision Floating-Point * Arithmetic and Fast Robust Geometric Predicates, Discrete & * Computational Geometry 18:305-363, 1997. */ static INLINE CONST Sleef_double2 ddadd_d2_d_d(double x, double y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || fabsk(x) >= fabsk(y) || (fabsk(x+y) <= fabsk(x) && fabsk(x+y) <= fabsk(y)))) { fprintf(stderr, "[ddadd_d2_d_d : %g, %g]\n", x, y); fflush(stderr); } #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d_d(double x, double y) { Sleef_double2 r; r.x = x + y; double v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_double2 ddadd_d2_d2_d(Sleef_double2 x, double y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || fabsk(x.x) >= fabsk(y) || (fabsk(x.x+y) <= fabsk(x.x) && fabsk(x.x+y) <= fabsk(y)))) { fprintf(stderr, "[ddadd_d2_d2_d : %g %g]\n", x.x, y); fflush(stderr); } #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d2_d(Sleef_double2 x, double y) { Sleef_double2 r; r.x = x.x + y; double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_double2 ddadd_d2_d_d2(double x, Sleef_double2 y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || fabsk(x) >= fabsk(y.x) || (fabsk(x+y.x) <= fabsk(x) && fabsk(x+y.x) <= fabsk(y.x)))) { fprintf(stderr, "[ddadd_d2_d_d2 : %g %g]\n", x, y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d_d2(double x, Sleef_double2 y) { Sleef_double2 r; r.x = x + y.x; double v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST double ddadd2_d_d_d2(double x, Sleef_double2 y) { return y.y + y.x + x; } static INLINE CONST Sleef_double2 ddadd_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(x.x == 0 || checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabsk(x.x+y.x) <= fabsk(x.x) && fabsk(x.x+y.x) <= fabsk(y.x)))) { fprintf(stderr, "[ddadd_d2_d2_d2 : %g %g]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { Sleef_double2 r; r.x = x.x + y.x; double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_double2 ddsub_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabsk(x.x-y.x) <= fabsk(x.x) && fabsk(x.x-y.x) <= fabsk(y.x)))) { fprintf(stderr, "[ddsub_d2_d2_d2 : %g %g]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_double2 dddiv_d2_d2_d2(Sleef_double2 n, Sleef_double2 d) { double t = 1.0 / d.x; double dh = upper(d.x), dl = d.x - dh; double th = upper(t ), tl = t - th; double nhh = upper(n.x), nhl = n.x - nhh; Sleef_double2 q; q.x = n.x * t; double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_double2 ddmul_d2_d_d(double x, double y) { double xh = upper(x), xl = x - xh; double yh = upper(y), yl = y - yh; Sleef_double2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_double2 ddmul_d2_d2_d(Sleef_double2 x, double y) { double xh = upper(x.x), xl = x.x - xh; double yh = upper(y ), yl = y - yh; Sleef_double2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_double2 ddmul_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { double xh = upper(x.x), xl = x.x - xh; double yh = upper(y.x), yl = y.x - yh; Sleef_double2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST double ddmul_d_d2_d2(Sleef_double2 x, Sleef_double2 y) { double xh = upper(x.x), xl = x.x - xh; double yh = upper(y.x), yl = y.x - yh; return x.y * yh + xh * y.y + xl * yl + xh * yl + xl * yh + xh * yh; } static INLINE CONST Sleef_double2 ddsqu_d2_d2(Sleef_double2 x) { double xh = upper(x.x), xl = x.x - xh; Sleef_double2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST double ddsqu_d_d2(Sleef_double2 x) { double xh = upper(x.x), xl = x.x - xh; return xh * x.y + xh * x.y + xl * xl + (xh * xl + xh * xl) + xh * xh; } static INLINE CONST Sleef_double2 ddrec_d2_d(double d) { double t = 1.0 / d; double dh = upper(d), dl = d - dh; double th = upper(t), tl = t - th; Sleef_double2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_double2 ddrec_d2_d2(Sleef_double2 d) { double t = 1.0 / d.x; double dh = upper(d.x), dl = d.x - dh; double th = upper(t ), tl = t - th; Sleef_double2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } static INLINE CONST Sleef_double2 ddsqrt_d2_d2(Sleef_double2 d) { double t = SQRT(d.x + d.y); return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d2_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5); } static INLINE CONST Sleef_double2 ddsqrt_d2_d(double d) { double t = SQRT(d); return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5); } // static INLINE CONST double atan2k(double y, double x) { double s, t, u; int q = 0; if (x < 0) { x = -x; q = -2; } if (y > x) { t = x; x = y; y = -t; q += 1; } s = y / x; t = s * s; double t2 = t * t, t4 = t2 * t2, t8 = t4 * t4, t16 = t8 * t8; u = POLY19(t, t2, t4, t8, t16, -1.88796008463073496563746e-05, 0.000209850076645816976906797, -0.00110611831486672482563471, 0.00370026744188713119232403, -0.00889896195887655491740809, 0.016599329773529201970117, -0.0254517624932312641616861, 0.0337852580001353069993897, -0.0407629191276836500001934, 0.0466667150077840625632675, -0.0523674852303482457616113, 0.0587666392926673580854313, -0.0666573579361080525984562, 0.0769219538311769618355029, -0.090908995008245008229153, 0.111111105648261418443745, -0.14285714266771329383765, 0.199999999996591265594148, -0.333333333333311110369124); t = u * t * s + s; t = q * (M_PI/2) + t; return t; } EXPORT CONST double xatan2(double y, double x) { double r = atan2k(fabsk(y), x); r = mulsign(r, x); if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI /2)) : 0); if (xisinf(y) ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0); if ( y == 0) r = (sign(x) == -1 ? M_PI : 0); return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y); } EXPORT CONST double xasin(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), x = o ? fabsk(d) : SQRT(x2), u; double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u = mla(u, x * x2, x); double r = o ? u : (M_PI/2 - 2*u); r = mulsign(r, d); return r; } EXPORT CONST double xacos(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; double x = o ? fabsk(d) : SQRT(x2); x = fabsk(d) == 1.0 ? 0 : x; double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u *= x * x2; double y = 3.1415926535897932/2 - (mulsign(x, d) + mulsign(u, d)); x += u; double r = o ? y : (x*2); if (!o && d < 0) r = ddadd_d2_d2_d(dd(3.141592653589793116, 1.2246467991473532072e-16), -r).x; return r; } EXPORT CONST double xatan(double s) { double t, u; int q = 0; if (sign(s) == -1) { s = -s; q = 2; } if (s > 1) { s = 1.0 / s; q |= 1; } t = s * s; double t2 = t * t, t4 = t2 * t2, t8 = t4 * t4, t16 = t8 * t8; u = POLY19(t, t2, t4, t8, t16, -1.88796008463073496563746e-05, 0.000209850076645816976906797, -0.00110611831486672482563471, 0.00370026744188713119232403, -0.00889896195887655491740809, 0.016599329773529201970117, -0.0254517624932312641616861, 0.0337852580001353069993897, -0.0407629191276836500001934, 0.0466667150077840625632675, -0.0523674852303482457616113, 0.0587666392926673580854313, -0.0666573579361080525984562, 0.0769219538311769618355029, -0.090908995008245008229153, 0.111111105648261418443745, -0.14285714266771329383765, 0.199999999996591265594148, -0.333333333333311110369124); t = s + s * (t * u); if ((q & 1) != 0) t = 1.570796326794896557998982 - t; if ((q & 2) != 0) t = -t; return t; } static Sleef_double2 atan2k_u1(Sleef_double2 y, Sleef_double2 x) { double u; Sleef_double2 s, t; int q = 0; if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; } if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; } s = dddiv_d2_d2_d2(y, x); t = ddsqu_d2_d2(s); t = ddnormalize_d2_d2(t); double t2 = t.x * t.x, t4 = t2 * t2, t8 = t4 * t4, t16 = t8 * t8; u = POLY16(t.x, t2, t4, t8, 1.06298484191448746607415e-05, -0.000125620649967286867384336, 0.00070557664296393412389774, -0.00251865614498713360352999, 0.00646262899036991172313504, -0.0128281333663399031014274, 0.0208024799924145797902497, -0.0289002344784740315686289, 0.0359785005035104590853656, -0.041848579703592507506027, 0.0470843011653283988193763, -0.0524914210588448421068719, 0.0587946590969581003860434, -0.0666620884778795497194182, 0.0769225330296203768654095, -0.0909090442773387574781907); u = mla(u, t.x, 0.111111108376896236538123); u = mla(u, t.x, -0.142857142756268568062339); u = mla(u, t.x, 0.199999999997977351284817); u = mla(u, t.x, -0.333333333333317605173818); t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddmul_d2_d2_d2(s, t), u)); if (fabsk(s.x) < 1e-200) t = s; t = ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(1.570796326794896557998982, 6.12323399573676603586882e-17), q), t); return t; } EXPORT CONST double xatan2_u1(double y, double x) { if (fabsk(x) < 5.5626846462680083984e-309) { y *= (UINT64_C(1) << 53); x *= (UINT64_C(1) << 53); } // nexttoward((1.0 / DBL_MAX), 1) Sleef_double2 d = atan2k_u1(dd(fabsk(y), 0), dd(x, 0)); double r = d.x + d.y; r = mulsign(r, x); if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI /2)) : 0); if (xisinf(y) ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0); if ( y == 0) r = (sign(x) == -1 ? M_PI : 0); return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y); } EXPORT CONST double xasin_u1(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2); x = fabsk(d) == 1.0 ? dd(0, 0) : x; double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u *= x2 * x.x; Sleef_double2 y = ddadd_d2_d2_d(ddsub_d2_d2_d2(dd(3.141592653589793116/4, 1.2246467991473532072e-16/4), x), -u); double r = o ? (u + x.x) : ((y.x + y.y)*2); r = mulsign(r, d); return r; } EXPORT CONST double xacos_u1(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2), w; x = fabsk(d) == 1.0 ? dd(0, 0) : x; double x4 = x2 * x2, x8 = x4 * x4, x16 = x8 * x8; u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u *= x.x * x2; Sleef_double2 y = ddsub_d2_d2_d2(dd(3.141592653589793116/2, 1.2246467991473532072e-16/2), ddadd_d2_d_d(mulsign(x.x, d), mulsign(u, d))); x = ddadd_d2_d2_d(x, u); y = o ? y : ddscale_d2_d2_d(x, 2); if (!o && d < 0) y = ddsub_d2_d2_d2(dd(3.141592653589793116, 1.2246467991473532072e-16), y); return y.x + y.y; } EXPORT CONST double xatan_u1(double d) { Sleef_double2 d2 = atan2k_u1(dd(fabsk(d), 0), dd(1, 0)); double r = d2.x + d2.y; if (xisinf(d)) r = 1.570796326794896557998982; return mulsign(r, d); } typedef struct { double d; int32_t i; } di_t; typedef struct { Sleef_double2 dd; int32_t i; } ddi_t; static INLINE CONST double orsign(double x, double y) { return longBitsToDouble(doubleToRawLongBits(x) | (doubleToRawLongBits(y) & (INT64_C(1) << 63))); } static CONST di_t rempisub(double x) { // This function is equivalent to : // di_t ret = { x - rint(4 * x) * 0.25, (int32_t)(rint(4 * x) - rint(x) * 4) }; di_t ret; double c = mulsign(INT64_C(1) << 52, x); double rint4x = fabsk(4*x) > INT64_C(1) << 52 ? (4*x) : orsign(mla(4, x, c) - c, x); double rintx = fabsk( x) > INT64_C(1) << 52 ? x : orsign(x + c - c , x); ret.d = mla(-0.25, rint4x, x); ret.i = mla(-4 , rintx , rint4x); return ret; } // Payne-Hanek like argument reduction static CONST ddi_t rempi(double a) { Sleef_double2 x, y, z; di_t di; double t; int ex = ilogb2k(a) - 55, q = ex > (700-55) ? -64 : 0; a = ldexp3k(a, q); if (ex < 0) ex = 0; ex *= 4; x = ddmul_d2_d_d(a, Sleef_rempitabdp[ex]); di = rempisub(x.x); q = di.i; x.x = di.d; x = ddnormalize_d2_d2(x); y = ddmul_d2_d_d(a, Sleef_rempitabdp[ex+1]); x = ddadd2_d2_d2_d2(x, y); di = rempisub(x.x); q += di.i; x.x = di.d; x = ddnormalize_d2_d2(x); y = ddmul_d2_d2_d(dd(Sleef_rempitabdp[ex+2], Sleef_rempitabdp[ex+3]), a); x = ddadd2_d2_d2_d2(x, y); x = ddnormalize_d2_d2(x); x = ddmul_d2_d2_d2(x, dd(3.141592653589793116*2, 1.2246467991473532072e-16*2)); ddi_t ret = { fabsk(a) < 0.7 ? dd(a, 0) : x, q }; return ret; } EXPORT CONST double xsin(double d) { double u, s, t = d; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * M_1_PI); d = mla(ql, -PI_A2, d); d = mla(ql, -PI_B2, d); } else if (fabsk(d) < TRIGRANGEMAX) { double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24); ql = rintk(mla(d, M_1_PI, -dqh)); d = mla(dqh, -PI_A, d); d = mla( ql, -PI_A, d); d = mla(dqh, -PI_B, d); d = mla( ql, -PI_B, d); d = mla(dqh, -PI_C, d); d = mla( ql, -PI_C, d); d = mla(dqh + ql, -PI_D, d); } else { ddi_t ddi = rempi(t); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 1) >> 2; if ((ddi.i & 1) != 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x))); } d = ddi.dd.x + ddi.dd.y; if (xisinf(t) || xisnan(t)) d = SLEEF_NAN; } s = d * d; if ((ql & 1) != 0) d = -d; double s2 = s * s, s4 = s2 * s2; u = POLY8(s, s2, s4, -7.97255955009037868891952e-18, 2.81009972710863200091251e-15, -7.64712219118158833288484e-13, 1.60590430605664501629054e-10, -2.50521083763502045810755e-08, 2.75573192239198747630416e-06, -0.000198412698412696162806809, 0.00833333333333332974823815); u = mla(u, s, -0.166666666666666657414808); u = mla(s, u * d, d); if (xisnegzero(t)) u = t; return u; } EXPORT CONST double xsin_u1(double d) { double u; Sleef_double2 s, t, x; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * M_1_PI); u = mla(ql, -PI_A2, d); s = ddadd_d2_d_d (u, ql * -PI_B2); } else if (fabsk(d) < TRIGRANGEMAX) { const double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24); ql = rintk(mla(d, M_1_PI, -dqh)); u = mla(dqh, -PI_A, d); s = ddadd_d2_d_d (u, ql * -PI_A); s = ddadd2_d2_d2_d(s, dqh * -PI_B); s = ddadd2_d2_d2_d(s, ql * -PI_B); s = ddadd2_d2_d2_d(s, dqh * -PI_C); s = ddadd2_d2_d2_d(s, ql * -PI_C); s = ddadd_d2_d2_d (s, (dqh + ql) * -PI_D); } else { ddi_t ddi = rempi(d); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 1) >> 2; if ((ddi.i & 1) != 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x))); } s = ddnormalize_d2_d2(ddi.dd); if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; } t = s; s = ddsqu_d2_d2(s); double s2 = s.x * s.x, s4 = s2 * s2; u = POLY6(s.x, s2, s4, 2.72052416138529567917983e-15, -7.6429259411395447190023e-13, 1.60589370117277896211623e-10, -2.5052106814843123359368e-08, 2.75573192104428224777379e-06, -0.000198412698412046454654947); u = mla(u, s.x, 0.00833333333333318056201922); x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s)); u = ddmul_d_d2_d2(t, x); if ((ql & 1) != 0) u = -u; if (xisnegzero(d)) u = d; return u; } EXPORT CONST double xcos(double d) { double u, s, t = d; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = mla(2, rintk(d * M_1_PI - 0.5), 1); d = mla(ql, -PI_A2*0.5, d); d = mla(ql, -PI_B2*0.5, d); } else if (fabsk(d) < TRIGRANGEMAX) { double dqh = trunck(d * (M_1_PI / (INT64_C(1) << 23)) - 0.5 * (M_1_PI / (INT64_C(1) << 23))); ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(INT64_C(1) << 23))+1; dqh *= 1 << 24; d = mla(dqh, -PI_A*0.5, d); d = mla( ql, -PI_A*0.5, d); d = mla(dqh, -PI_B*0.5, d); d = mla( ql, -PI_B*0.5, d); d = mla(dqh, -PI_C*0.5, d); d = mla( ql, -PI_C*0.5, d); d = mla(dqh + ql , -PI_D*0.5, d); } else { ddi_t ddi = rempi(t); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 7) >> 1; if ((ddi.i & 1) == 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x > 0 ? 1 : -1), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x > 0 ? 1 : -1))); } d = ddi.dd.x + ddi.dd.y; if (xisinf(t) || xisnan(t)) d = SLEEF_NAN; } s = d * d; if ((ql & 2) == 0) d = -d; double s2 = s * s, s4 = s2 * s2; u = POLY8(s, s2, s4, -7.97255955009037868891952e-18, 2.81009972710863200091251e-15, -7.64712219118158833288484e-13, 1.60590430605664501629054e-10, -2.50521083763502045810755e-08, 2.75573192239198747630416e-06, -0.000198412698412696162806809, 0.00833333333333332974823815); u = mla(u, s, -0.166666666666666657414808); u = mla(s, u * d, d); return u; } EXPORT CONST double xcos_u1(double d) { double u; Sleef_double2 s, t, x; int ql; d = fabsk(d); if (d < TRIGRANGEMAX2) { ql = mla(2, rintk(d * M_1_PI - 0.5), 1); s = ddadd2_d2_d_d(d, ql * (-PI_A2*0.5)); s = ddadd_d2_d2_d(s, ql * (-PI_B2*0.5)); } else if (d < TRIGRANGEMAX) { double dqh = trunck(d * (M_1_PI / (INT64_C(1) << 23)) - 0.5 * (M_1_PI / (INT64_C(1) << 23))); ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(INT64_C(1) << 23))+1; dqh *= 1 << 24; u = mla(dqh, -PI_A*0.5, d); s = ddadd2_d2_d_d (u, ql * (-PI_A*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); } else { ddi_t ddi = rempi(d); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 7) >> 1; if ((ddi.i & 1) == 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x > 0 ? 1 : -1), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x > 0 ? 1 : -1))); } s = ddnormalize_d2_d2(ddi.dd); if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; } t = s; s = ddsqu_d2_d2(s); double s2 = s.x * s.x, s4 = s2 * s2; u = POLY6(s.x, s2, s4, 2.72052416138529567917983e-15, -7.6429259411395447190023e-13, 1.60589370117277896211623e-10, -2.5052106814843123359368e-08, 2.75573192104428224777379e-06, -0.000198412698412046454654947); u = mla(u, s.x, 0.00833333333333318056201922); x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s)); u = ddmul_d_d2_d2(t, x); if ((((int)ql) & 2) == 0) u = -u; return u; } EXPORT CONST Sleef_double2 xsincos(double d) { double u, s, t; Sleef_double2 r; int ql; s = d; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(s * (2 * M_1_PI)); s = mla(ql, -PI_A2*0.5, s); s = mla(ql, -PI_B2*0.5, s); } else if (fabsk(d) < TRIGRANGEMAX) { double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); ql = rintk(d * (2 * M_1_PI) - dqh); s = mla(dqh, -PI_A * 0.5, s); s = mla( ql, -PI_A * 0.5, s); s = mla(dqh, -PI_B * 0.5, s); s = mla( ql, -PI_B * 0.5, s); s = mla(dqh, -PI_C * 0.5, s); s = mla( ql, -PI_C * 0.5, s); s = mla(dqh + ql, -PI_D * 0.5, s); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd.x + ddi.dd.y; if (xisinf(d) || xisnan(d)) s = SLEEF_NAN; } t = s; s = s * s; u = 1.58938307283228937328511e-10; u = mla(u, s, -2.50506943502539773349318e-08); u = mla(u, s, 2.75573131776846360512547e-06); u = mla(u, s, -0.000198412698278911770864914); u = mla(u, s, 0.0083333333333191845961746); u = mla(u, s, -0.166666666666666130709393); u = u * s * t; r.x = t + u; if (xisnegzero(d)) r.x = -0.0; u = -1.13615350239097429531523e-11; u = mla(u, s, 2.08757471207040055479366e-09); u = mla(u, s, -2.75573144028847567498567e-07); u = mla(u, s, 2.48015872890001867311915e-05); u = mla(u, s, -0.00138888888888714019282329); u = mla(u, s, 0.0416666666666665519592062); u = mla(u, s, -0.5); r.y = u * s + 1; if ((ql & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((ql & 2) != 0) { r.x = -r.x; } if (((ql+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_double2 xsincos_u1(double d) { double u; Sleef_double2 r, s, t, x; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * (2 * M_1_PI)); u = mla(ql, -PI_A2*0.5, d); s = ddadd_d2_d_d (u, ql * (-PI_B2*0.5)); } else if (fabsk(d) < TRIGRANGEMAX) { const double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); ql = rintk(d * (2 * M_1_PI) - dqh); u = mla(dqh, -PI_A*0.5, d); s = ddadd_d2_d_d(u, ql * (-PI_A*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd; if (xisinf(d) || xisnan(d)) s = dd(SLEEF_NAN, SLEEF_NAN); } t = s; s.x = ddsqu_d_d2(s); u = 1.58938307283228937328511e-10; u = mla(u, s.x, -2.50506943502539773349318e-08); u = mla(u, s.x, 2.75573131776846360512547e-06); u = mla(u, s.x, -0.000198412698278911770864914); u = mla(u, s.x, 0.0083333333333191845961746); u = mla(u, s.x, -0.166666666666666130709393); u *= s.x * t.x; x = ddadd_d2_d2_d(t, u); r.x = x.x + x.y; if (xisnegzero(d)) r.x = -0.0; u = -1.13615350239097429531523e-11; u = mla(u, s.x, 2.08757471207040055479366e-09); u = mla(u, s.x, -2.75573144028847567498567e-07); u = mla(u, s.x, 2.48015872890001867311915e-05); u = mla(u, s.x, -0.00138888888888714019282329); u = mla(u, s.x, 0.0416666666666665519592062); u = mla(u, s.x, -0.5); x = ddadd_d2_d_d2(1, ddmul_d2_d_d(s.x, u)); r.y = x.x + x.y; if ((ql & 1) != 0) { u = r.y; r.y = r.x; r.x = u; } if ((ql & 2) != 0) { r.x = -r.x; } if (((ql+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_double2 xsincospi_u05(double d) { double u, s, t; Sleef_double2 r, x, s2; u = d * 4; int q = ceilk(u) & ~(int)1; s = u - (double)q; t = s; s = s * s; s2 = ddmul_d2_d_d(t, t); // u = -2.02461120785182399295868e-14; u = mla(u, s, 6.94821830580179461327784e-12); u = mla(u, s, -1.75724749952853179952664e-09); u = mla(u, s, 3.13361688966868392878422e-07); u = mla(u, s, -3.6576204182161551920361e-05); u = mla(u, s, 0.00249039457019271850274356); x = ddadd2_d2_d_d2(u * s, dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_d2_d2_d(x, t); r.x = x.x + x.y; if (xisnegzero(d)) r.x = -0.0; // u = 9.94480387626843774090208e-16; u = mla(u, s, -3.89796226062932799164047e-13); u = mla(u, s, 1.15011582539996035266901e-10); u = mla(u, s, -2.4611369501044697495359e-08); u = mla(u, s, 3.59086044859052754005062e-06); u = mla(u, s, -0.000325991886927389905997954); x = ddadd2_d2_d_d2(u * s, dd(0.0158543442438155018914259, -1.04693272280631521908845e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(-0.308425137534042437259529, -1.95698492133633550338345e-17)); x = ddadd2_d2_d2_d(ddmul_d2_d2_d2(x, s2), 1); r.y = x.x + x.y; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; } if (xisinf(d)) { r.x = r.y = SLEEF_NAN; } return r; } EXPORT CONST Sleef_double2 xsincospi_u35(double d) { double u, s, t; Sleef_double2 r; u = d * 4; int q = ceilk(u) & ~(int)1; s = u - (double)q; t = s; s = s * s; // u = +0.6880638894766060136e-11; u = mla(u, s, -0.1757159564542310199e-8); u = mla(u, s, +0.3133616327257867311e-6); u = mla(u, s, -0.3657620416388486452e-4); u = mla(u, s, +0.2490394570189932103e-2); u = mla(u, s, -0.8074551218828056320e-1); u = mla(u, s, +0.7853981633974482790e+0); r.x = u * t; // u = -0.3860141213683794352e-12; u = mla(u, s, +0.1150057888029681415e-9); u = mla(u, s, -0.2461136493006663553e-7); u = mla(u, s, +0.3590860446623516713e-5); u = mla(u, s, -0.3259918869269435942e-3); u = mla(u, s, +0.1585434424381541169e-1); u = mla(u, s, -0.3084251375340424373e+0); u = mla(u, s, 1); r.y = u; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; } if (xisinf(d)) { r.x = r.y = SLEEF_NAN; } return r; } static INLINE CONST Sleef_double2 sinpik(double d) { double u, s, t; Sleef_double2 x, s2; u = d * 4; int q = ceilk(u) & ~1; int o = (q & 2) != 0; s = u - (double)q; t = s; s = s * s; s2 = ddmul_d2_d_d(t, t); // u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14; u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12); u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09); u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07); u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05); u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356); x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) : dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) : dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0)); x = o ? ddadd2_d2_d2_d(x, 1) : x; // if ((q & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST double xsinpi_u05(double d) { Sleef_double2 x = sinpik(d); double r = x.x + x.y; if (xisnegzero(d)) r = -0.0; if (fabsk(d) > TRIGRANGEMAX3/4) r = 0; if (xisinf(d)) r = SLEEF_NAN; return r; } static INLINE CONST Sleef_double2 cospik(double d) { double u, s, t; Sleef_double2 x, s2; u = d * 4; int q = ceilk(u) & ~1; int o = (q & 2) == 0; s = u - (double)q; t = s; s = s * s; s2 = ddmul_d2_d_d(t, t); // u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14; u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12); u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09); u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07); u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05); u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356); x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) : dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) : dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0)); x = o ? ddadd2_d2_d2_d(x, 1) : x; // if (((q+2) & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST double xcospi_u05(double d) { Sleef_double2 x = cospik(d); double r = x.x + x.y; if (fabsk(d) > TRIGRANGEMAX3/4) r = 1; if (xisinf(d)) r = SLEEF_NAN; return r; } EXPORT CONST double xtan(double d) { double u, s, x, y; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * (2 * M_1_PI)); x = mla(ql, -PI_A2*0.5, d); x = mla(ql, -PI_B2*0.5, x); } else if (fabsk(d) < 1e+6) { double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); ql = rintk(d * (2 * M_1_PI) - dqh); x = mla(dqh, -PI_A * 0.5, d); x = mla( ql, -PI_A * 0.5, x); x = mla(dqh, -PI_B * 0.5, x); x = mla( ql, -PI_B * 0.5, x); x = mla(dqh, -PI_C * 0.5, x); x = mla( ql, -PI_C * 0.5, x); x = mla(dqh + ql, -PI_D * 0.5, x); } else { ddi_t ddi = rempi(d); ql = ddi.i; x = ddi.dd.x + ddi.dd.y; if (xisinf(d) || xisnan(d)) x = SLEEF_NAN; } x *= 0.5; s = x * x; double s2 = s * s, s4 = s2 * s2; u = POLY8(s, s2, s4, +0.3245098826639276316e-3, +0.5619219738114323735e-3, +0.1460781502402784494e-2, +0.3591611540792499519e-2, +0.8863268409563113126e-2, +0.2186948728185535498e-1, +0.5396825399517272970e-1, +0.1333333333330500581e+0); u = mla(u, s, +0.3333333333333343695e+0); u = mla(s, u * x, x); y = mla(u, u, -1); x = -2 * u; if ((ql & 1) != 0) { double t = x; x = y; y = -t; } u = x / y; return u; } EXPORT CONST double xtan_u1(double d) { double u; Sleef_double2 s, t, x, y; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * (2 * M_1_PI)); u = mla(ql, -PI_A2*0.5, d); s = ddadd_d2_d_d(u, ql * (-PI_B2*0.5)); } else if (fabsk(d) < TRIGRANGEMAX) { const double dqh = trunck(d * (M_2_PI / (1 << 24))) * (double)(1 << 24); s = ddadd2_d2_d2_d(ddmul_d2_d2_d(dd(M_2_PI_H, M_2_PI_L), d), (d < 0 ? -0.5 : 0.5) - dqh); ql = s.x + s.y; u = mla(dqh, -PI_A*0.5, d); s = ddadd_d2_d_d (u, ql * (-PI_A*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd; if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; } t = ddscale_d2_d2_d(s, 0.5); s = ddsqu_d2_d2(t); double s2 = s.x * s.x, s4 = s2 * s2; u = POLY8(s.x, s2, s4, +0.3245098826639276316e-3, +0.5619219738114323735e-3, +0.1460781502402784494e-2, +0.3591611540792499519e-2, +0.8863268409563113126e-2, +0.2186948728185535498e-1, +0.5396825399517272970e-1, +0.1333333333330500581e+0); u = mla(u, s.x, +0.3333333333333343695e+0); x = ddadd_d2_d2_d2(t, ddmul_d2_d2_d(ddmul_d2_d2_d2(s, t), u)); y = ddadd_d2_d_d2(-1, ddsqu_d2_d2(x)); x = ddscale_d2_d2_d(x, -2); if ((ql & 1) != 0) { t = x; x = y; y = ddneg_d2_d2(t); } x = dddiv_d2_d2_d2(x, y); u = x.x + x.y; if (xisnegzero(d)) u = d; return u; } EXPORT CONST double xlog(double d) { double x, x2, t, m; int e; int o = d < DBL_MIN; if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = (m-1) / (m+1); x2 = x * x; double x4 = x2 * x2, x8 = x4 * x4; t = POLY7(x2, x4, x8, 0.153487338491425068243146, 0.152519917006351951593857, 0.181863266251982985677316, 0.222221366518767365905163, 0.285714294746548025383248, 0.399999999950799600689777, 0.6666666666667778740063); x = x * 2 + 0.693147180559945286226764 * e + x * x2 * t; if (xisinf(d)) x = SLEEF_INFINITY; if (d < 0 || xisnan(d)) x = SLEEF_NAN; if (d == 0) x = -SLEEF_INFINITY; return x; } EXPORT CONST double xexp(double d) { int q = (int)rintk(d * R_LN2); double s, u; s = mla(q, -L2U, d); s = mla(q, -L2L, s); double s2 = s * s, s4 = s2 * s2, s8 = s4 * s4; u = POLY10(s, s2, s4, s8, 2.08860621107283687536341e-09, 2.51112930892876518610661e-08, 2.75573911234900471893338e-07, 2.75572362911928827629423e-06, 2.4801587159235472998791e-05, 0.000198412698960509205564975, 0.00138888888889774492207962, 0.00833333333331652721664984, 0.0416666666666665047591422, 0.166666666666666851703837); u = mla(u, s, +0.5); u = s * s * u + s + 1; u = ldexp2k(u, q); if (d > 709.78271114955742909217217426) u = SLEEF_INFINITY; if (d < -1000) u = 0; return u; } static INLINE CONST double expm1k(double d) { int q = (int)rintk(d * R_LN2); double s, u; s = mla(q, -L2U, d); s = mla(q, -L2L, s); double s2 = s * s, s4 = s2 * s2, s8 = s4 * s4; u = POLY10(s, s2, s4, s8, 2.08860621107283687536341e-09, 2.51112930892876518610661e-08, 2.75573911234900471893338e-07, 2.75572362911928827629423e-06, 2.4801587159235472998791e-05, 0.000198412698960509205564975, 0.00138888888889774492207962, 0.00833333333331652721664984, 0.0416666666666665047591422, 0.166666666666666851703837); u = mla(s2, 0.5, s2 * s * u) + s; if (q != 0) u = ldexp2k(u + 1, q) - 1; return u; } static INLINE CONST Sleef_double2 logk(double d) { Sleef_double2 x, x2, s; double m, t; int e; int o = d < DBL_MIN; if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = ddsqu_d2_d2(x); double x4 = x2.x * x2.x, x8 = x4 * x4, x16 = x8 * x8; t = POLY9(x2.x, x4, x8, x16, 0.116255524079935043668677, 0.103239680901072952701192, 0.117754809412463995466069, 0.13332981086846273921509, 0.153846227114512262845736, 0.181818180850050775676507, 0.222222222230083560345903, 0.285714285714249172087875, 0.400000000000000077715612); Sleef_double2 c = dd(0.666666666666666629659233, 3.80554962542412056336616e-17); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); x = ddmul_d2_d2_d2(x2, x); s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(x, c)); x = ddmul_d2_d2_d2(x2, x); s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(x, t)); return s; } EXPORT CONST double xlog_u1(double d) { Sleef_double2 x, s; double m, t, x2; int e; int o = d < DBL_MIN; if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = x.x * x.x; double x4 = x2 * x2, x8 = x4 * x4; t = POLY7(x2, x4, x8, 0.1532076988502701353e+0, 0.1525629051003428716e+0, 0.1818605932937785996e+0, 0.2222214519839380009e+0, 0.2857142932794299317e+0, 0.3999999999635251990e+0, 0.6666666666667333541e+0); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); s = ddadd_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (xisinf(d)) r = SLEEF_INFINITY; if (d < 0 || xisnan(d)) r = SLEEF_NAN; if (d == 0) r = -SLEEF_INFINITY; return r; } static INLINE CONST double expk(Sleef_double2 d) { int q = (int)rintk((d.x + d.y) * R_LN2); Sleef_double2 s, t; double u; s = ddadd2_d2_d2_d(d, q * -L2U); s = ddadd2_d2_d2_d(s, q * -L2L); s = ddnormalize_d2_d2(s); double s2 = s.x * s.x, s4 = s2 * s2, s8 = s4 * s4; u = POLY10(s.x, s2, s4, s8, 2.51069683420950419527139e-08, 2.76286166770270649116855e-07, 2.75572496725023574143864e-06, 2.48014973989819794114153e-05, 0.000198412698809069797676111, 0.0013888888939977128960529, 0.00833333333332371417601081, 0.0416666666665409524128449, 0.166666666666666740681535, 0.500000000000000999200722); t = ddadd_d2_d_d2(1, s); t = ddadd_d2_d2_d2(t, ddmul_d2_d2_d(ddsqu_d2_d2(s), u)); u = ldexpk(t.x + t.y, q); if (d.x < -1000) u = 0; return u; } EXPORT CONST double xpow(double x, double y) { int yisint = xisint(y); int yisodd = yisint && xisodd(y); Sleef_double2 d = ddmul_d2_d2_d(logk(fabsk(x)), y); double result = expk(d); if (d.x > 709.78271114955742909217217426) result = SLEEF_INFINITY; result = xisnan(result) ? SLEEF_INFINITY : result; result *= (x > 0 ? 1 : (!yisint ? SLEEF_NAN : (yisodd ? -1 : 1))); double efx = mulsign(fabsk(x) - 1, y); if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : SLEEF_INFINITY); if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : SLEEF_INFINITY); if (xisnan(x) || xisnan(y)) result = SLEEF_NAN; if (y == 0 || x == 1) result = 1; return result; } static INLINE CONST Sleef_double2 expk2(Sleef_double2 d) { int q = (int)rintk((d.x + d.y) * R_LN2); Sleef_double2 s, t; double u; s = ddadd2_d2_d2_d(d, q * -L2U); s = ddadd2_d2_d2_d(s, q * -L2L); u = +0.1602472219709932072e-9; u = mla(u, s.x, +0.2092255183563157007e-8); u = mla(u, s.x, +0.2505230023782644465e-7); u = mla(u, s.x, +0.2755724800902135303e-6); u = mla(u, s.x, +0.2755731892386044373e-5); u = mla(u, s.x, +0.2480158735605815065e-4); u = mla(u, s.x, +0.1984126984148071858e-3); u = mla(u, s.x, +0.1388888888886763255e-2); u = mla(u, s.x, +0.8333333333333347095e-2); u = mla(u, s.x, +0.4166666666666669905e-1); t = ddadd2_d2_d2_d(ddmul_d2_d2_d(s, u), +0.1666666666666666574e+0); t = ddadd2_d2_d2_d(ddmul_d2_d2_d2(s, t), 0.5); t = ddadd2_d2_d2_d2(s, ddmul_d2_d2_d2(ddsqu_d2_d2(s), t)); t = ddadd2_d2_d_d2(1, t); t.x = ldexp2k(t.x, q); t.y = ldexp2k(t.y, q); return d.x < -1000 ? dd(0, 0) : t; } EXPORT CONST double xsinh(double x) { double y = fabsk(x); Sleef_double2 d = expk2(dd(y, 0)); d = ddsub_d2_d2_d2(d, ddrec_d2_d2(d)); y = (d.x + d.y) * 0.5; y = fabsk(x) > 710 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xcosh(double x) { double y = fabsk(x); Sleef_double2 d = expk2(dd(y, 0)); d = ddadd_d2_d2_d2(d, ddrec_d2_d2(d)); y = (d.x + d.y) * 0.5; y = fabsk(x) > 710 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xtanh(double x) { double y = fabsk(x); Sleef_double2 d = expk2(dd(y, 0)); Sleef_double2 e = ddrec_d2_d2(d); d = dddiv_d2_d2_d2(ddsub_d2_d2_d2(d, e), ddadd_d2_d2_d2(d, e)); y = d.x + d.y; y = fabsk(x) > 18.714973875 ? 1.0 : y; y = xisnan(y) ? 1.0 : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xsinh_u35(double x) { double e = expm1k(fabsk(x)); double y = (e + 2) / (e + 1) * (0.5 * e); y = fabsk(x) > 709 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xcosh_u35(double x) { double e = xexp(fabsk(x)); double y = 0.5 / e + 0.5 * e; y = fabsk(x) > 709 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xtanh_u35(double x) { double y = fabsk(x); double d = expm1k(2*y); y = d / (d + 2); y = fabsk(x) > 18.714973875 ? 1.0 : y; y = xisnan(y) ? 1.0 : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } static INLINE CONST Sleef_double2 logk2(Sleef_double2 d) { Sleef_double2 x, x2, m, s; double t; int e; e = ilogbk(d.x * (1.0/0.75)); m.x = ldexp2k(d.x, -e); m.y = ldexp2k(d.y, -e); x = dddiv_d2_d2_d2(ddadd2_d2_d2_d(m, -1), ddadd2_d2_d2_d(m, 1)); x2 = ddsqu_d2_d2(x); double x4 = x2.x * x2.x, x8 = x4 * x4; t = POLY7(x2.x, x4, x8, 0.13860436390467167910856, 0.131699838841615374240845, 0.153914168346271945653214, 0.181816523941564611721589, 0.22222224632662035403996, 0.285714285511134091777308, 0.400000000000914013309483); t = mla(t, x2.x, 0.666666666666664853302393); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)); return s; } EXPORT CONST double xasinh(double x) { double y = fabsk(x); Sleef_double2 d; d = y > 1 ? ddrec_d2_d(x) : dd(y, 0); d = ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(d), 1)); d = y > 1 ? ddmul_d2_d2_d(d, y) : d; d = logk2(ddnormalize_d2_d2(ddadd_d2_d2_d(d, x))); y = d.x + d.y; y = (fabsk(x) > SQRT_DBL_MAX || xisnan(y)) ? mulsign(SLEEF_INFINITY, x) : y; y = xisnan(x) ? SLEEF_NAN : y; y = xisnegzero(x) ? -0.0 : y; return y; } EXPORT CONST double xacosh(double x) { Sleef_double2 d = logk2(ddadd2_d2_d2_d(ddmul_d2_d2_d2(ddsqrt_d2_d2(ddadd2_d2_d_d(x, 1)), ddsqrt_d2_d2(ddadd2_d2_d_d(x, -1))), x)); double y = d.x + d.y; y = (x > SQRT_DBL_MAX || xisnan(y)) ? SLEEF_INFINITY : y; y = x == 1.0 ? 0.0 : y; y = x < 1.0 ? SLEEF_NAN : y; y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xatanh(double x) { double y = fabsk(x); Sleef_double2 d = logk2(dddiv_d2_d2_d2(ddadd2_d2_d_d(1, y), ddadd2_d2_d_d(1, -y))); y = y > 1.0 ? SLEEF_NAN : (y == 1.0 ? SLEEF_INFINITY : (d.x + d.y) * 0.5); y = mulsign(y, x); y = (xisinf(x) || xisnan(y)) ? SLEEF_NAN : y; return y; } // EXPORT CONST double xcbrt(double d) { // max error : 2 ulps double x, y, q = 1.0; int e, r; e = ilogbk(fabsk(d))+1; d = ldexp2k(d, -e); r = (e + 6144) % 3; q = (r == 1) ? 1.2599210498948731647672106 : q; q = (r == 2) ? 1.5874010519681994747517056 : q; q = ldexp2k(q, (e + 6144) / 3 - 2048); q = mulsign(q, d); d = fabsk(d); x = -0.640245898480692909870982; x = mla(x, d, 2.96155103020039511818595); x = mla(x, d, -5.73353060922947843636166); x = mla(x, d, 6.03990368989458747961407); x = mla(x, d, -3.85841935510444988821632); x = mla(x, d, 2.2307275302496609725722); y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); y = d * x * x; y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q; return y; } EXPORT CONST double xcbrt_u1(double d) { double x, y, z; Sleef_double2 q2 = dd(1, 0), u, v; int e, r; e = ilogbk(fabsk(d))+1; d = ldexp2k(d, -e); r = (e + 6144) % 3; q2 = (r == 1) ? dd(1.2599210498948731907, -2.5899333753005069177e-17) : q2; q2 = (r == 2) ? dd(1.5874010519681995834, -1.0869008194197822986e-16) : q2; q2.x = mulsign(q2.x, d); q2.y = mulsign(q2.y, d); d = fabsk(d); x = -0.640245898480692909870982; x = mla(x, d, 2.96155103020039511818595); x = mla(x, d, -5.73353060922947843636166); x = mla(x, d, 6.03990368989458747961407); x = mla(x, d, -3.85841935510444988821632); x = mla(x, d, 2.2307275302496609725722); y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); z = x; u = ddmul_d2_d_d(x, x); u = ddmul_d2_d2_d2(u, u); u = ddmul_d2_d2_d(u, d); u = ddadd2_d2_d2_d(u, -x); y = u.x + u.y; y = -2.0 / 3.0 * y * z; v = ddadd2_d2_d2_d(ddmul_d2_d_d(z, z), y); v = ddmul_d2_d2_d(v, d); v = ddmul_d2_d2_d2(v, q2); z = ldexp2k(v.x + v.y, (e + 6144) / 3 - 2048); if (xisinf(d)) { z = mulsign(SLEEF_INFINITY, q2.x); } if (d == 0) { z = mulsign(0, q2.x); } return z; } EXPORT CONST double xexp2(double d) { int q = (int)rintk(d); double s, u; s = d - q; double s2 = s * s, s4 = s2 * s2, s8 = s4 * s4; u = POLY10(s, s2, s4, s8, +0.4434359082926529454e-9, +0.7073164598085707425e-8, +0.1017819260921760451e-6, +0.1321543872511327615e-5, +0.1525273353517584730e-4, +0.1540353045101147808e-3, +0.1333355814670499073e-2, +0.9618129107597600536e-2, +0.5550410866482046596e-1, +0.2402265069591012214e+0); u = mla(u, s, +0.6931471805599452862e+0); u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x; u = ldexp2k(u, q); if (d >= 1024) u = SLEEF_INFINITY; if (d < -2000) u = 0; return u; } EXPORT CONST double xexp2_u35(double d) { int q = (int)rintk(d); double s, u; s = d - q; u = +0.4434359082926529454e-9; u = mla(u, s, +0.7073164598085707425e-8); u = mla(u, s, +0.1017819260921760451e-6); u = mla(u, s, +0.1321543872511327615e-5); u = mla(u, s, +0.1525273353517584730e-4); u = mla(u, s, +0.1540353045101147808e-3); u = mla(u, s, +0.1333355814670499073e-2); u = mla(u, s, +0.9618129107597600536e-2); u = mla(u, s, +0.5550410866482046596e-1); u = mla(u, s, +0.2402265069591012214e+0); u = mla(u, s, +0.6931471805599452862e+0); u = mla(u, s, +0.1000000000000000000e+1); u = ldexp2k(u, q); if (d >= 1024) u = SLEEF_INFINITY; if (d < -2000) u = 0; return u; } EXPORT CONST double xexp10(double d) { int q = (int)rintk(d * LOG10_2); double s, u; s = mla(q, -L10U, d); s = mla(q, -L10L, s); u = +0.2411463498334267652e-3; u = mla(u, s, +0.1157488415217187375e-2); u = mla(u, s, +0.5013975546789733659e-2); u = mla(u, s, +0.1959762320720533080e-1); u = mla(u, s, +0.6808936399446784138e-1); u = mla(u, s, +0.2069958494722676234e+0); u = mla(u, s, +0.5393829292058536229e+0); u = mla(u, s, +0.1171255148908541655e+1); u = mla(u, s, +0.2034678592293432953e+1); u = mla(u, s, +0.2650949055239205876e+1); u = mla(u, s, +0.2302585092994045901e+1); u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x; u = ldexp2k(u, q); if (d > 308.25471555991671) u = SLEEF_INFINITY; // log10(DBL_MAX) if (d < -350) u = 0; return u; } EXPORT CONST double xexp10_u35(double d) { int q = (int)rintk(d * LOG10_2); double s, u; s = mla(q, -L10U, d); s = mla(q, -L10L, s); u = +0.2411463498334267652e-3; u = mla(u, s, +0.1157488415217187375e-2); u = mla(u, s, +0.5013975546789733659e-2); u = mla(u, s, +0.1959762320720533080e-1); u = mla(u, s, +0.6808936399446784138e-1); u = mla(u, s, +0.2069958494722676234e+0); u = mla(u, s, +0.5393829292058536229e+0); u = mla(u, s, +0.1171255148908541655e+1); u = mla(u, s, +0.2034678592293432953e+1); u = mla(u, s, +0.2650949055239205876e+1); u = mla(u, s, +0.2302585092994045901e+1); u = mla(u, s, +0.1000000000000000000e+1); u = ldexp2k(u, q); if (d > 308.25471555991671) u = SLEEF_INFINITY; if (d < -350) u = 0; return u; } EXPORT CONST double xexpm1(double a) { Sleef_double2 d = ddadd2_d2_d2_d(expk2(dd(a, 0)), -1.0); double x = d.x + d.y; if (a > 709.782712893383996732223) x = SLEEF_INFINITY; // log(DBL_MAX) if (a < -36.736800569677101399113302437) x = -1; // log(1 - nexttoward(1, 0)) if (xisnegzero(a)) x = -0.0; return x; } EXPORT CONST double xlog10(double d) { Sleef_double2 x, s; double m, t, x2; int e; int o = d < DBL_MIN; if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = x.x * x.x; double x4 = x2 * x2, x8 = x4 * x4; t = POLY7(x2, x4, x8, +0.6653725819576758460e-1, +0.6625722782820833712e-1, +0.7898105214313944078e-1, +0.9650955035715275132e-1, +0.1240841409721444993e+0, +0.1737177927454605086e+0, +0.2895296546021972617e+0); s = ddmul_d2_d2_d(dd(0.30102999566398119802, -2.803728127785170339e-18), (double)e); s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(x, dd(0.86858896380650363334, 1.1430059694096389311e-17))); s = ddadd_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (xisinf(d)) r = SLEEF_INFINITY; if (d < 0 || xisnan(d)) r = SLEEF_NAN; if (d == 0) r = -SLEEF_INFINITY; return r; } EXPORT CONST double xlog2(double d) { Sleef_double2 x, s; double m, t, x2; int e; int o = d < DBL_MIN; if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = x.x * x.x; double x4 = x2 * x2, x8 = x4 * x4; t = POLY7(x2, x4, x8, +0.2211941750456081490e+0, +0.2200768693152277689e+0, +0.2623708057488514656e+0, +0.3205977477944495502e+0, +0.4121985945485324709e+0, +0.5770780162997058982e+0, +0.96179669392608091449); s = ddadd2_d2_d_d2(e, ddmul_d2_d2_d2(x, dd(2.885390081777926774, 6.0561604995516736434e-18))); s = ddadd2_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (xisinf(d)) r = SLEEF_INFINITY; if (d < 0 || xisnan(d)) r = SLEEF_NAN; if (d == 0) r = -SLEEF_INFINITY; return r; } EXPORT CONST double xlog2_u35(double d) { double m, t, x, x2; int e; int o = d < DBL_MIN; if (o) d *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = (m - 1) / (m + 1); x2 = x * x; t = +0.2211941750456081490e+0; t = mla(t, x2, +0.2200768693152277689e+0); t = mla(t, x2, +0.2623708057488514656e+0); t = mla(t, x2, +0.3205977477944495502e+0); t = mla(t, x2, +0.4121985945485324709e+0); t = mla(t, x2, +0.5770780162997058982e+0); t = mla(t, x2, +0.96179669392608091449 ); Sleef_double2 s = ddadd_d2_d_d2(e, ddmul_d2_d_d(2.885390081777926774, x)); double r = mla(t, x * x2, s.x + s.y); if (xisinf(d)) r = SLEEF_INFINITY; if (d < 0 || xisnan(d)) r = SLEEF_NAN; if (d == 0) r = -SLEEF_INFINITY; return r; } EXPORT CONST double xlog1p(double d) { Sleef_double2 x, s; double m, t, x2; int e; double dp1 = d + 1; int o = dp1 < DBL_MIN; if (o) dp1 *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); e = ilogb2k(dp1 * (1.0/0.75)); t = ldexp3k(1, -e); m = mla(d, t, t - 1); if (o) e -= 64; x = dddiv_d2_d2_d2(dd(m, 0), ddadd_d2_d_d(2, m)); x2 = x.x * x.x; double x4 = x2 * x2, x8 = x4 * x4; t = POLY7(x2, x4, x8, 0.1532076988502701353e+0, 0.1525629051003428716e+0, 0.1818605932937785996e+0, 0.2222214519839380009e+0, 0.2857142932794299317e+0, 0.3999999999635251990e+0, 0.6666666666667333541e+0); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); s = ddadd_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (d > 1e+307) r = SLEEF_INFINITY; if (d < -1 || xisnan(d)) r = SLEEF_NAN; if (d == -1) r = -SLEEF_INFINITY; if (xisnegzero(d)) r = -0.0; return r; } // EXPORT CONST double xfma(double x, double y, double z) { double h2 = x * y + z, q = 1; if (fabsk(h2) < 1e-300) { const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; x *= c1; y *= c1; z *= c2; q = 1.0 / c2; } if (fabsk(h2) > 1e+299) { const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; x *= 1.0 / c1; y *= 1.0 / c1; z *= 1. / c2; q = c2; } Sleef_double2 d = ddmul_d2_d_d(x, y); d = ddadd2_d2_d2_d(d, z); double ret = (x == 0 || y == 0) ? z : (d.x + d.y); if ((xisinf(z) && !xisinf(x) && !xisnan(x) && !xisinf(y) && !xisnan(y))) h2 = z; return (xisinf(h2) || xisnan(h2)) ? h2 : ret*q; } EXPORT CONST double xsqrt_u05(double d) { double q = 0.5; d = d < 0 ? SLEEF_NAN : d; if (d < 8.636168555094445E-78) { d *= 1.157920892373162E77; q = 2.9387358770557188E-39 * 0.5; } if (d > 1.3407807929942597e+154) { d *= 7.4583407312002070e-155; q = 1.1579208923731620e+77 * 0.5; } // http://en.wikipedia.org/wiki/Fast_inverse_square_root double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1)); x = x * (1.5 - 0.5 * d * x * x); x = x * (1.5 - 0.5 * d * x * x); x = x * (1.5 - 0.5 * d * x * x) * d; Sleef_double2 d2 = ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(x, x)), ddrec_d2_d(x)); double ret = (d2.x + d2.y) * q; ret = d == SLEEF_INFINITY ? SLEEF_INFINITY : ret; ret = d == 0 ? d : ret; return ret; } EXPORT CONST double xsqrt_u35(double d) { return xsqrt_u05(d); } EXPORT CONST double xsqrt(double d) { return SQRT(d); } EXPORT CONST double xfabs(double x) { return fabsk(x); } EXPORT CONST double xcopysign(double x, double y) { return copysignk(x, y); } EXPORT CONST double xfmax(double x, double y) { return y != y ? x : (x > y ? x : y); } EXPORT CONST double xfmin(double x, double y) { return y != y ? x : (x < y ? x : y); } EXPORT CONST double xfdim(double x, double y) { double ret = x - y; if (ret < 0 || x == y) ret = 0; return ret; } EXPORT CONST double xtrunc(double x) { double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); fr = fr - (int32_t)fr; return (xisinf(x) || fabsk(x) >= (double)(INT64_C(1) << 52)) ? x : copysignk(x - fr, x); } EXPORT CONST double xfloor(double x) { double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); fr = fr - (int32_t)fr; fr = fr < 0 ? fr+1.0 : fr; return (xisinf(x) || fabsk(x) >= (double)(INT64_C(1) << 52)) ? x : copysignk(x - fr, x); } EXPORT CONST double xceil(double x) { double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); fr = fr - (int32_t)fr; fr = fr <= 0 ? fr : fr-1.0; return (xisinf(x) || fabsk(x) >= (double)(INT64_C(1) << 52)) ? x : copysignk(x - fr, x); } EXPORT CONST double xround(double d) { double x = d + 0.5; double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); fr = fr - (int32_t)fr; if (fr == 0 && x <= 0) x--; fr = fr < 0 ? fr+1.0 : fr; x = d == 0.49999999999999994449 ? 0 : x; // nextafter(0.5, 0) return (xisinf(d) || fabsk(d) >= (double)(INT64_C(1) << 52)) ? d : copysignk(x - fr, d); } EXPORT CONST double xrint(double d) { double c = mulsign(INT64_C(1) << 52, d); return fabsk(d) > INT64_C(1) << 52 ? d : orsign(d + c - c, d); } EXPORT CONST double xhypot_u05(double x, double y) { x = fabsk(x); y = fabsk(y); double min = fmink(x, y), n = min; double max = fmaxk(x, y), d = max; if (max < DBL_MIN) { n *= UINT64_C(1) << 54; d *= UINT64_C(1) << 54; } Sleef_double2 t = dddiv_d2_d2_d2(dd(n, 0), dd(d, 0)); t = ddmul_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(t), 1)), max); double ret = t.x + t.y; if (xisnan(ret)) ret = SLEEF_INFINITY; if (min == 0) ret = max; if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN; if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY; return ret; } EXPORT CONST double xhypot_u35(double x, double y) { x = fabsk(x); y = fabsk(y); double min = fmink(x, y); double max = fmaxk(x, y); double t = min / max; double ret = max * SQRT(1 + t*t); if (min == 0) ret = max; if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN; if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY; return ret; } EXPORT CONST double xnextafter(double x, double y) { union { double f; int64_t i; } cx; x = x == 0 ? mulsign(0, y) : x; cx.f = x; int c = (cx.i < 0) == (y < x); if (c) cx.i = -(cx.i ^ (UINT64_C(1) << 63)); if (x != y) cx.i--; if (c) cx.i = -(cx.i ^ (UINT64_C(1) << 63)); if (cx.f == 0 && x != 0) cx.f = mulsign(0, x); if (x == 0 && y == 0) cx.f = y; if (xisnan(x) || xisnan(y)) cx.f = SLEEF_NAN; return cx.f; } EXPORT CONST double xfrfrexp(double x) { union { double f; uint64_t u; } cx; if (fabsk(x) < DBL_MIN) x *= (UINT64_C(1) << 63); cx.f = x; cx.u &= ~UINT64_C(0x7ff0000000000000); cx.u |= UINT64_C(0x3fe0000000000000); if (xisinf(x)) cx.f = mulsign(SLEEF_INFINITY, x); if (x == 0) cx.f = x; return cx.f; } EXPORT CONST int xexpfrexp(double x) { union { double f; uint64_t u; } cx; int ret = 0; if (fabsk(x) < DBL_MIN) { x *= (UINT64_C(1) << 63); ret = -63; } cx.f = x; ret += (int32_t)(((cx.u >> 52) & 0x7ff)) - 0x3fe; if (x == 0 || xisnan(x) || xisinf(x)) ret = 0; return ret; } static INLINE CONST double toward0(double d) { return d == 0 ? 0 : longBitsToDouble(doubleToRawLongBits(d)-1); } static INLINE CONST double removelsb(double d) { return longBitsToDouble(doubleToRawLongBits(d) & INT64_C(0xfffffffffffffffe)); } static INLINE CONST double ptrunc(double x) { double fr = mla(-(double)(INT64_C(1) << 31), (int32_t)(x * (1.0 / (INT64_C(1) << 31))), x); return fabsk(x) >= (double)(INT64_C(1) << 52) ? x : (x - (fr - (int32_t)fr)); } EXPORT CONST double xfmod(double x, double y) { double n = fabsk(x), d = fabsk(y), s = 1, q; if (d < DBL_MIN) { n *= UINT64_C(1) << 54; d *= UINT64_C(1) << 54; s = 1.0 / (UINT64_C(1) << 54); } Sleef_double2 r = dd(n, 0); double rd = toward0(1.0 / d); for(int i=0;i < 21;i++) { // ceil(log2(DBL_MAX) / 52) q = removelsb(ptrunc(toward0(r.x) * rd)); q = (3*d > r.x && r.x > d) ? 2 : q; q = (2*d > r.x && r.x > d) ? 1 : q; q = r.x == d ? (r.y >= 0 ? 1 : 0) : q; r = ddnormalize_d2_d2(ddadd2_d2_d2_d2(r, ddmul_d2_d_d(q, -d))); if (r.x < d) break; } double ret = r.x * s; if (r.x + r.y == d) ret = 0; ret = mulsign(ret, x); if (n < d) ret = x; if (d == 0) ret = SLEEF_NAN; return ret; } static INLINE CONST double rintk2(double d) { double c = mulsign(INT64_C(1) << 52, d); return fabsk(d) > INT64_C(1) << 52 ? d : orsign(d + c - c, d); } EXPORT CONST double xremainder(double x, double y) { double n = fabsk(x), d = fabsk(y), s = 1, q; if (d < DBL_MIN*2) { n *= UINT64_C(1) << 54; d *= UINT64_C(1) << 54; s = 1.0 / (UINT64_C(1) << 54); } double rd = 1.0 / d; Sleef_double2 r = dd(n, 0); int qisodd = 0; for(int i=0;i < 21;i++) { // ceil(log2(DBL_MAX) / 52) q = removelsb(rintk2(r.x * rd)); if (fabsk(r.x) < 1.5 * d) q = r.x < 0 ? -1 : 1; if (fabsk(r.x) < 0.5 * d || (fabsk(r.x) == 0.5 * d && !qisodd)) q = 0; if (q == 0) break; if (xisinf(q * -d)) q = q + mulsign(-1, r.x); qisodd ^= xisodd(q); r = ddnormalize_d2_d2(ddadd2_d2_d2_d2(r, ddmul_d2_d_d(q, -d))); } double ret = r.x * s; ret = mulsign(ret, x); if (xisinf(y)) ret = xisinf(x) ? SLEEF_NAN : x; if (d == 0) ret = SLEEF_NAN; return ret; } EXPORT CONST Sleef_double2 xmodf(double x) { double fr = x - (double)(INT64_C(1) << 31) * (int32_t)(x * (1.0 / (INT64_C(1) << 31))); fr = fr - (int32_t)fr; fr = fabsk(x) >= (double)(INT64_C(1) << 52) ? 0 : fr; Sleef_double2 ret = { copysignk(fr, x), copysignk(x - fr, x) }; return ret; } typedef struct { Sleef_double2 a, b; } dd2; static CONST dd2 gammak(double a) { Sleef_double2 clc = dd(0, 0), clln = dd(1, 0), clld = dd(1, 0), v = dd(1, 0), x, y, z; double t, u; int otiny = fabsk(a) < 1e-306, oref = a < 0.5; x = otiny ? dd(0, 0) : (oref ? ddadd2_d2_d_d(1, -a) : dd(a, 0)); int o0 = (0.5 <= x.x && x.x <= 1.1), o2 = 2.3 < x.x; y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 1), x)); y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 2), y)); y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 3), y)); y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 4), y)); clln = (o2 && x.x <= 7) ? y : clln; x = (o2 && x.x <= 7) ? ddadd2_d2_d2_d(x, 5) : x; t = o2 ? (1.0 / x.x) : ddnormalize_d2_d2(ddadd2_d2_d2_d(x, o0 ? -1 : -2)).x; u = o2 ? -156.801412704022726379848862 : (o0 ? +0.2947916772827614196e+2 : +0.7074816000864609279e-7); u = mla(u, t, o2 ? +1.120804464289911606838558160000 : (o0 ? +0.1281459691827820109e+3 : +0.4009244333008730443e-6)); u = mla(u, t, o2 ? +13.39798545514258921833306020000 : (o0 ? +0.2617544025784515043e+3 : +0.1040114641628246946e-5)); u = mla(u, t, o2 ? -0.116546276599463200848033357000 : (o0 ? +0.3287022855685790432e+3 : +0.1508349150733329167e-5)); u = mla(u, t, o2 ? -1.391801093265337481495562410000 : (o0 ? +0.2818145867730348186e+3 : +0.1288143074933901020e-5)); u = mla(u, t, o2 ? +0.015056113040026424412918973400 : (o0 ? +0.1728670414673559605e+3 : +0.4744167749884993937e-6)); u = mla(u, t, o2 ? +0.179540117061234856098844714000 : (o0 ? +0.7748735764030416817e+2 : -0.6554816306542489902e-7)); u = mla(u, t, o2 ? -0.002481743600264997730942489280 : (o0 ? +0.2512856643080930752e+2 : -0.3189252471452599844e-6)); u = mla(u, t, o2 ? -0.029527880945699120504851034100 : (o0 ? +0.5766792106140076868e+1 : +0.1358883821470355377e-6)); u = mla(u, t, o2 ? +0.000540164767892604515196325186 : (o0 ? +0.7270275473996180571e+0 : -0.4343931277157336040e-6)); u = mla(u, t, o2 ? +0.006403362833808069794787256200 : (o0 ? +0.8396709124579147809e-1 : +0.9724785897406779555e-6)); u = mla(u, t, o2 ? -0.000162516262783915816896611252 : (o0 ? -0.8211558669746804595e-1 : -0.2036886057225966011e-5)); u = mla(u, t, o2 ? -0.001914438498565477526465972390 : (o0 ? +0.6828831828341884458e-1 : +0.4373363141819725815e-5)); u = mla(u, t, o2 ? +7.20489541602001055898311517e-05 : (o0 ? -0.7712481339961671511e-1 : -0.9439951268304008677e-5)); u = mla(u, t, o2 ? +0.000839498720672087279971000786 : (o0 ? +0.8337492023017314957e-1 : +0.2050727030376389804e-4)); u = mla(u, t, o2 ? -5.17179090826059219329394422e-05 : (o0 ? -0.9094964931456242518e-1 : -0.4492620183431184018e-4)); u = mla(u, t, o2 ? -0.000592166437353693882857342347 : (o0 ? +0.1000996313575929358e+0 : +0.9945751236071875931e-4)); u = mla(u, t, o2 ? +6.97281375836585777403743539e-05 : (o0 ? -0.1113342861544207724e+0 : -0.2231547599034983196e-3)); u = mla(u, t, o2 ? +0.000784039221720066627493314301 : (o0 ? +0.1255096673213020875e+0 : +0.5096695247101967622e-3)); u = mla(u, t, o2 ? -0.000229472093621399176949318732 : (o0 ? -0.1440498967843054368e+0 : -0.1192753911667886971e-2)); u = mla(u, t, o2 ? -0.002681327160493827160473958490 : (o0 ? +0.1695571770041949811e+0 : +0.2890510330742210310e-2)); u = mla(u, t, o2 ? +0.003472222222222222222175164840 : (o0 ? -0.2073855510284092762e+0 : -0.7385551028674461858e-2)); u = mla(u, t, o2 ? +0.083333333333333333335592087900 : (o0 ? +0.2705808084277815939e+0 : +0.2058080842778455335e-1)); y = ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, -0.5), logk2(x)); y = ddadd2_d2_d2_d2(y, ddneg_d2_d2(x)); y = ddadd2_d2_d2_d2(y, dd(0.91893853320467278056, -3.8782941580672414498e-17)); // 0.5*log(2*M_PI) z = ddadd2_d2_d2_d(ddmul_d2_d_d (u, t), o0 ? -0.4006856343865314862e+0 : -0.6735230105319810201e-1); z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? +0.8224670334241132030e+0 : +0.3224670334241132030e+0); z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? -0.5772156649015328655e+0 : +0.4227843350984671345e+0); z = ddmul_d2_d2_d(z, t); clc = o2 ? y : z; clld = o2 ? ddadd2_d2_d2_d(ddmul_d2_d_d(u, t), 1) : clld; y = clln; clc = otiny ? dd(83.1776616671934334590333, 3.67103459631568507221878e-15) : // log(2^120) (oref ? ddadd2_d2_d2_d2(dd(1.1447298858494001639, 1.026595116270782638e-17), ddneg_d2_d2(clc)) : clc); // log(M_PI) clln = otiny ? dd(1, 0) : (oref ? clln : clld); if (oref) x = ddmul_d2_d2_d2(clld, sinpik(a - (double)(INT64_C(1) << 28) * (int32_t)(a * (1.0 / (INT64_C(1) << 28))))); clld = otiny ? dd(a*((INT64_C(1) << 60)*(double)(INT64_C(1) << 60)), 0) : (oref ? x : y); dd2 ret = { clc, dddiv_d2_d2_d2(clln, clld) }; return ret; } EXPORT CONST double xtgamma_u1(double a) { dd2 d = gammak(a); Sleef_double2 y = ddmul_d2_d2_d2(expk2(d.a), d.b); double r = y.x + y.y; r = (a == -SLEEF_INFINITY || (a < 0 && xisint(a)) || (xisnumber(a) && a < 0 && xisnan(r))) ? SLEEF_NAN : r; r = ((a == SLEEF_INFINITY || xisnumber(a)) && a >= -DBL_MIN && (a == 0 || a > 200 || xisnan(r))) ? mulsign(SLEEF_INFINITY, a) : r; return r; } EXPORT CONST double xlgamma_u1(double a) { dd2 d = gammak(a); Sleef_double2 y = ddadd2_d2_d2_d2(d.a, logk2(ddabs_d2_d2(d.b))); double r = y.x + y.y; r = (xisinf(a) || (a <= 0 && xisint(a)) || (xisnumber(a) && xisnan(r))) ? SLEEF_INFINITY : r; return r; } EXPORT CONST double xerf_u1(double a) { double s = a, t, u; Sleef_double2 d; a = fabsk(a); int o0 = a < 1.0, o1 = a < 3.7, o2 = a < 6.0; u = o0 ? (a*a) : a; t = o0 ? +0.6801072401395392157e-20 : o1 ? +0.2830954522087717660e-13 : -0.5846750404269610493e-17; t = mla(t, u, o0 ? -0.2161766247570056391e-18 : o1 ? -0.1509491946179481940e-11 : +0.6076691048812607898e-15); t = mla(t, u, o0 ? +0.4695919173301598752e-17 : o1 ? +0.3827857177807173152e-10 : -0.3007518609604893831e-13); t = mla(t, u, o0 ? -0.9049140419888010819e-16 : o1 ? -0.6139733921558987241e-09 : +0.9427906260824646063e-12); t = mla(t, u, o0 ? +0.1634018903557411517e-14 : o1 ? +0.6985387934608038824e-08 : -0.2100110908269393629e-10); t = mla(t, u, o0 ? -0.2783485786333455216e-13 : o1 ? -0.5988224513034371474e-07 : +0.3534639523461223473e-09); t = mla(t, u, o0 ? +0.4463221276786412722e-12 : o1 ? +0.4005716952355346640e-06 : -0.4664967728285395926e-08); t = mla(t, u, o0 ? -0.6711366622850138987e-11 : o1 ? -0.2132190104575784400e-05 : +0.4943823283769000532e-07); t = mla(t, u, o0 ? +0.9422759050232658346e-10 : o1 ? +0.9092461304042630325e-05 : -0.4271203394761148254e-06); t = mla(t, u, o0 ? -0.1229055530100228477e-08 : o1 ? -0.3079188080966205457e-04 : +0.3034067677404915895e-05); t = mla(t, u, o0 ? +0.1480719281585085023e-07 : o1 ? +0.7971413443082370762e-04 : -0.1776295289066871135e-04); t = mla(t, u, o0 ? -0.1636584469123402714e-06 : o1 ? -0.1387853215225442864e-03 : +0.8524547630559505050e-04); t = mla(t, u, o0 ? +0.1646211436588923363e-05 : o1 ? +0.6469678026257590965e-04 : -0.3290582944961784398e-03); t = mla(t, u, o0 ? -0.1492565035840624866e-04 : o1 ? +0.4996645280372945860e-03 : +0.9696966068789101157e-03); t = mla(t, u, o0 ? +0.1205533298178966496e-03 : o1 ? -0.1622802482842520535e-02 : -0.1812527628046986137e-02); t = mla(t, u, o0 ? -0.8548327023450851166e-03 : o1 ? +0.1615320557049377171e-03 : -0.4725409828123619017e-03); t = mla(t, u, o0 ? +0.5223977625442188799e-02 : o1 ? +0.1915262325574875607e-01 : +0.2090315427924229266e-01); t = mla(t, u, o0 ? -0.2686617064513125569e-01 : o1 ? -0.1027818298486033455e+00 : -0.1052041921842776645e+00); t = mla(t, u, o0 ? +0.1128379167095512753e+00 : o1 ? -0.6366172819842503827e+00 : -0.6345351808766568347e+00); t = mla(t, u, o0 ? -0.3761263890318375380e+00 : o1 ? -0.1128379590648910469e+01 : -0.1129442929103524396e+01); d = ddmul_d2_d_d(t, u); d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) : o1 ? dd(3.4110644736196137587e-08, -2.4875650708323294246e-24) : dd(0.00024963035690526438285, -5.4362665034856259795e-21)); d = o0 ? ddmul_d2_d2_d(d, a) : ddadd_d2_d_d2(1.0, ddneg_d2_d2(expk2(d))); u = mulsign(o2 ? (d.x + d.y) : 1, s); u = xisnan(a) ? SLEEF_NAN : u; return u; } EXPORT CONST double xerfc_u15(double a) { double s = a, r = 0, t; Sleef_double2 u, d, x; a = fabsk(a); int o0 = a < 1.0, o1 = a < 2.2, o2 = a < 4.2, o3 = a < 27.3; u = o0 ? ddmul_d2_d_d(a, a) : o1 ? dd(a, 0) : dddiv_d2_d2_d2(dd(1, 0), dd(a, 0)); t = o0 ? +0.6801072401395386139e-20 : o1 ? +0.3438010341362585303e-12 : o2 ? -0.5757819536420710449e+2 : +0.2334249729638701319e+5; t = mla(t, u.x, o0 ? -0.2161766247570055669e-18 : o1 ? -0.1237021188160598264e-10 : o2 ? +0.4669289654498104483e+3 : -0.4695661044933107769e+5); t = mla(t, u.x, o0 ? +0.4695919173301595670e-17 : o1 ? +0.2117985839877627852e-09 : o2 ? -0.1796329879461355858e+4 : +0.3173403108748643353e+5); t = mla(t, u.x, o0 ? -0.9049140419888007122e-16 : o1 ? -0.2290560929177369506e-08 : o2 ? +0.4355892193699575728e+4 : +0.3242982786959573787e+4); t = mla(t, u.x, o0 ? +0.1634018903557410728e-14 : o1 ? +0.1748931621698149538e-07 : o2 ? -0.7456258884965764992e+4 : -0.2014717999760347811e+5); t = mla(t, u.x, o0 ? -0.2783485786333451745e-13 : o1 ? -0.9956602606623249195e-07 : o2 ? +0.9553977358167021521e+4 : +0.1554006970967118286e+5); t = mla(t, u.x, o0 ? +0.4463221276786415752e-12 : o1 ? +0.4330010240640327080e-06 : o2 ? -0.9470019905444229153e+4 : -0.6150874190563554293e+4); t = mla(t, u.x, o0 ? -0.6711366622850136563e-11 : o1 ? -0.1435050600991763331e-05 : o2 ? +0.7387344321849855078e+4 : +0.1240047765634815732e+4); t = mla(t, u.x, o0 ? +0.9422759050232662223e-10 : o1 ? +0.3460139479650695662e-05 : o2 ? -0.4557713054166382790e+4 : -0.8210325475752699731e+2); t = mla(t, u.x, o0 ? -0.1229055530100229098e-08 : o1 ? -0.4988908180632898173e-05 : o2 ? +0.2207866967354055305e+4 : +0.3242443880839930870e+2); t = mla(t, u.x, o0 ? +0.1480719281585086512e-07 : o1 ? -0.1308775976326352012e-05 : o2 ? -0.8217975658621754746e+3 : -0.2923418863833160586e+2); t = mla(t, u.x, o0 ? -0.1636584469123399803e-06 : o1 ? +0.2825086540850310103e-04 : o2 ? +0.2268659483507917400e+3 : +0.3457461732814383071e+0); t = mla(t, u.x, o0 ? +0.1646211436588923575e-05 : o1 ? -0.6393913713069986071e-04 : o2 ? -0.4633361260318560682e+2 : +0.5489730155952392998e+1); t = mla(t, u.x, o0 ? -0.1492565035840623511e-04 : o1 ? -0.2566436514695078926e-04 : o2 ? +0.9557380123733945965e+1 : +0.1559934132251294134e-2); t = mla(t, u.x, o0 ? +0.1205533298178967851e-03 : o1 ? +0.5895792375659440364e-03 : o2 ? -0.2958429331939661289e+1 : -0.1541741566831520638e+1); t = mla(t, u.x, o0 ? -0.8548327023450850081e-03 : o1 ? -0.1695715579163588598e-02 : o2 ? +0.1670329508092765480e+0 : +0.2823152230558364186e-5); t = mla(t, u.x, o0 ? +0.5223977625442187932e-02 : o1 ? +0.2089116434918055149e-03 : o2 ? +0.6096615680115419211e+0 : +0.6249999184195342838e+0); t = mla(t, u.x, o0 ? -0.2686617064513125222e-01 : o1 ? +0.1912855949584917753e-01 : o2 ? +0.1059212443193543585e-2 : +0.1741749416408701288e-8); d = ddmul_d2_d2_d(u, t); d = ddadd2_d2_d2_d2(d, o0 ? dd(0.11283791670955126141, -4.0175691625932118483e-18) : o1 ? dd(-0.10277263343147646779, -6.2338714083404900225e-18) : o2 ? dd(-0.50005180473999022439, 2.6362140569041995803e-17) : dd(-0.5000000000258444377, -4.0074044712386992281e-17)); d = ddmul_d2_d2_d2(d, u); d = ddadd2_d2_d2_d2(d, o0 ? dd(-0.37612638903183753802, 1.3391897206042552387e-17) : o1 ? dd(-0.63661976742916359662, 7.6321019159085724662e-18) : o2 ? dd(1.601106273924963368e-06, 1.1974001857764476775e-23) : dd(2.3761973137523364792e-13, -1.1670076950531026582e-29)); d = ddmul_d2_d2_d2(d, u); d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) : o1 ? dd(-1.1283791674717296161, 8.0896847755965377194e-17) : o2 ? dd(-0.57236496645145429341, 3.0704553245872027258e-17) : dd(-0.57236494292470108114, -2.3984352208056898003e-17)); x = ddmul_d2_d2_d(o1 ? d : dd(-a, 0), a); x = o1 ? x : ddadd2_d2_d2_d2(x, d); x = o0 ? ddsub_d2_d2_d2(dd(1, 0), x) : expk2(x); x = o1 ? x : ddmul_d2_d2_d2(x, u); r = o3 ? (x.x + x.y) : 0; if (s < 0) r = 2 - r; r = xisnan(s) ? SLEEF_NAN : r; return r; } #ifdef ENABLE_MAIN // gcc -w -DENABLE_MAIN -I../common sleefdp.c rempitab.c -lm #include int main(int argc, char **argv) { double d1 = atof(argv[1]); printf("arg1 = %.20g\n", d1); //int i1 = atoi(argv[1]); //double d2 = atof(argv[2]); //printf("arg2 = %.20g\n", d2); //printf("%d\n", (int)d2); #if 0 double d3 = atof(argv[3]); printf("arg3 = %.20g\n", d3); #endif //printf("%g\n", pow2i(i1)); //int exp = xexpfrexp(d1); //double r = xnextafter(d1, d2); //double r = xfma(d1, d2, d3); printf("test = %.20g\n", xcos_u1(d1)); //printf("test = %.20g\n", xlog(d1)); //r = nextafter(d1, d2); printf("corr = %.20g\n", cos(d1)); //printf("%.20g %.20g\n", xround(d1), xrint(d1)); //Sleef_double2 r = xsincospi_u35(d); //printf("%g, %g\n", (double)r.x, (double)r.y); } #endif sleef-3.5.1/src/libm/sleefinline_header.h.org000066400000000000000000000004261373003144100210720ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See http://www.boost.org/LICENSE_1_0.txt) // This file is generated by SLEEF %VERSION% #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif sleef-3.5.1/src/libm/sleefld.c000066400000000000000000000243661373003144100161210ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include "misc.h" #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif static INLINE CONST long double mlal(long double x, long double y, long double z) { return x * y + z; } static INLINE CONST long double xrintl(long double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); } static INLINE CONST int64_t xceill(long double x) { return (int64_t)x + (x < 0 ? 0 : 1); } static INLINE CONST long double xtruncl(long double x) { return (long double)(int)x; } static INLINE CONST int xisnanl(long double x) { return x != x; } static INLINE CONST int xisinfl(long double x) { return x == SLEEF_INFINITYl || x == -SLEEF_INFINITYl; } static INLINE CONST int xisminfl(long double x) { return x == -SLEEF_INFINITYl; } static INLINE CONST int xispinfl(long double x) { return x == SLEEF_INFINITYl; } static INLINE CONST long double xfabsl(long double x) { return x >= 0 ? x : -x; } // #ifndef NDEBUG static int checkfp(long double x) { if (xisinfl(x) || xisnanl(x)) return 1; return 0; } #endif static INLINE CONST long double upperl(long double d) { union { long double ld; uint32_t u[4]; } cnv; cnv.ld = d; cnv.u[0] = 0; return cnv.ld; } static INLINE CONST Sleef_longdouble2 dl(long double h, long double l) { Sleef_longdouble2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_longdouble2 dlnormalize_l2_l2(Sleef_longdouble2 t) { Sleef_longdouble2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_longdouble2 dlscale_l2_l2_l(Sleef_longdouble2 d, long double s) { Sleef_longdouble2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_longdouble2 dlneg_l2_l2(Sleef_longdouble2 d) { Sleef_longdouble2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l_l(long double x, long double y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || xfabsl(x) >= xfabsl(y))) { fprintf(stderr, "[dladd_l2_l_l : %Lg, %Lg]\n", x, y); fflush(stderr); } #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l_l(long double x, long double y) { Sleef_longdouble2 r; r.x = x + y; long double v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l2_l(Sleef_longdouble2 x, long double y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || xfabsl(x.x) >= xfabsl(y))) { fprintf(stderr, "[dladd_l2_l2_l : %Lg %Lg]\n", x.x, y); fflush(stderr); } #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l2_l(Sleef_longdouble2 x, long double y) { // |x| >= |y| Sleef_longdouble2 r; r.x = x.x + y; long double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l_l2(long double x, Sleef_longdouble2 y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || xfabsl(x) >= xfabsl(y.x))) { fprintf(stderr, "[dladd_l2_l_l2 : %Lg %Lg]\n", x, y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l_l2(long double x, Sleef_longdouble2 y) { Sleef_longdouble2 r; r.x = x + y.x; long double v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsl(x.x) >= xfabsl(y.x))) { fprintf(stderr, "[dladd_l2_l2_l2 : %Lg %Lg]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { Sleef_longdouble2 r; r.x = x.x + y.x; long double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_longdouble2 dlsub_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsl(x.x) >= xfabsl(y.x))) { fprintf(stderr, "[dlsub_l2_l2_l2 : %Lg %Lg]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_longdouble2 dldiv_l2_l2_l2(Sleef_longdouble2 n, Sleef_longdouble2 d) { long double t = 1.0 / d.x; long double dh = upperl(d.x), dl = d.x - dh; long double th = upperl(t ), tl = t - th; long double nhh = upperl(n.x), nhl = n.x - nhh; Sleef_longdouble2 q; q.x = n.x * t; long double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_longdouble2 dlmul_l2_l_l(long double x, long double y) { long double xh = upperl(x), xl = x - xh; long double yh = upperl(y), yl = y - yh; Sleef_longdouble2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_longdouble2 dlmul_l2_l2_l(Sleef_longdouble2 x, long double y) { long double xh = upperl(x.x), xl = x.x - xh; long double yh = upperl(y ), yl = y - yh; Sleef_longdouble2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_longdouble2 dlmul_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { long double xh = upperl(x.x), xl = x.x - xh; long double yh = upperl(y.x), yl = y.x - yh; Sleef_longdouble2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST Sleef_longdouble2 dlsqu_l2_l2(Sleef_longdouble2 x) { long double xh = upperl(x.x), xl = x.x - xh; Sleef_longdouble2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST Sleef_longdouble2 dlrec_l2_l(long double d) { long double t = 1.0 / d; long double dh = upperl(d), dl = d - dh; long double th = upperl(t), tl = t - th; Sleef_longdouble2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_longdouble2 dlrec_l2_l2(Sleef_longdouble2 d) { long double t = 1.0 / d.x; long double dh = upperl(d.x), dl = d.x - dh; long double th = upperl(t ), tl = t - th; Sleef_longdouble2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } /* static INLINE CONST Sleef_longdouble2 dlsqrt_l2_l2(Sleef_longdouble2 d) { long double t = sqrt(d.x + d.y); return dlscale_l2_l2_l(dlmul_l2_l2_l2(dladd2_l2_l2_l2(d, dlmul_l2_l_l(t, t)), dlrec_l2_l(t)), 0.5); } */ // EXPORT CONST Sleef_longdouble2 xsincospil_u05(long double d) { long double u, s, t; Sleef_longdouble2 r, x, s2; u = d * 4; int64_t q = xceill(u) & ~(int64_t)1; s = u - (long double)q; t = s; s = s * s; s2 = dlmul_l2_l_l(t, t); // u = 4.59265607313529833157632e-17L; u = mlal(u, s, -2.04096140520547829627419e-14L); u = mlal(u, s, 6.94845264320316515640316e-12L); u = mlal(u, s, -1.75724767308629210422023e-09L); u = mlal(u, s, 3.13361689037693212744991e-07L); u = mlal(u, s, -3.65762041821772284521155e-05L); u = mlal(u, s, 0.00249039457019272015784594L); x = dladd2_l2_l_l2(u * s, dl(-0.0807455121882807817044873L, -2.40179063154839769223037e-21L)); x = dladd2_l2_l2_l2(dlmul_l2_l2_l2(s2, x), dl(0.785398163397448309628202L, -1.25420305812534448752181e-20L)); x = dlmul_l2_l2_l(x, t); r.x = x.x + x.y; // u = -2.00423964577657539380734e-18L; u = mlal(u, s, 1.00185574457758689324113e-15L); u = mlal(u, s, -3.89807283423502620989528e-13L); u = mlal(u, s, 1.15011591257563133685341e-10L); u = mlal(u, s, -2.461136950493305818105e-08L); u = mlal(u, s, 3.59086044859150791782134e-06L); u = mlal(u, s, -0.00032599188692739001335938L); x = dladd2_l2_l_l2(u * s, dl(0.0158543442438155008529635L, -6.97556143018517384674258e-22L)); x = dladd2_l2_l2_l2(dlmul_l2_l2_l2(s2, x), dl(-0.308425137534042456829379L, -9.19882299434302978226668e-21L)); x = dladd2_l2_l2_l(dlmul_l2_l2_l2(x, s2), 1); r.y = x.x + x.y; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfl(d)) { r.x = r.y = SLEEF_NAN; } if (!xisinfl(d) && xfabsl(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } EXPORT CONST Sleef_longdouble2 xsincospil_u35(long double d) { long double u, s, t; Sleef_longdouble2 r; u = d * 4; int64_t q = xceill(u) & ~(int64_t)1; s = u - (long double)q; t = s; s = s * s; // u = -0.2023275819380976135024e-13L; u = mlal(u, s, +0.6948176964255957574946e-11L); u = mlal(u, s, -0.1757247450021535880723e-8L); u = mlal(u, s, +0.3133616889379195970541e-6L); u = mlal(u, s, -0.3657620418215300856408e-4L); u = mlal(u, s, +0.2490394570192717262476e-2L); u = mlal(u, s, -0.8074551218828078160284e-1L); u = mlal(u, s, +0.7853981633974483096282e+0L); r.x = u * t; // u = +0.9933418221428971922705e-15L; u = mlal(u, s, -0.3897923064055824005357e-12L); u = mlal(u, s, +0.1150115771521792692066e-9L); u = mlal(u, s, -0.2461136949725905367314e-7L); u = mlal(u, s, +0.3590860448589084195081e-5L); u = mlal(u, s, -0.3259918869273895914840e-3L); u = mlal(u, s, +0.1585434424381550079706e-1L); u = mlal(u, s, -0.3084251375340424568294e+0L); u = mlal(u, s, 1.0L); r.y = u; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfl(d)) { r.x = r.y = SLEEF_NAN; } if (!xisinfl(d) && xfabsl(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } sleef-3.5.1/src/libm/sleeflibm_footer.h.org000066400000000000000000000001111373003144100205740ustar00rootroot00000000000000#ifdef __cplusplus } #endif #undef IMPORT #endif // #ifndef __SLEEF_H__ sleef-3.5.1/src/libm/sleeflibm_header.h.org000066400000000000000000000241441373003144100205420ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __SLEEF_H__ #define __SLEEF_H__ #define SLEEF_VERSION_MAJOR @SLEEF_VERSION_MAJOR@ #define SLEEF_VERSION_MINOR @SLEEF_VERSION_MINOR@ #define SLEEF_VERSION_PATCHLEVEL @SLEEF_VERSION_PATCHLEVEL@ #include #include #if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) #define CONST const #else #define CONST #endif #if defined(__AVX2__) || defined(__aarch64__) || defined(__arm__) || defined(__powerpc64__) || defined(__zarch__) #ifndef FP_FAST_FMA #define FP_FAST_FMA #endif #ifndef FP_FAST_FMAF #define FP_FAST_FMAF #endif #endif #if defined(_MSC_VER) && !defined(__STDC__) #define __STDC__ 1 #endif #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllexport) #else // #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllimport) #if (defined(_MSC_VER)) #pragma comment(lib,"sleef.lib") #endif // #if (defined(_MSC_VER)) #endif // #ifdef IMPORT_IS_EXPORT #else // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #define IMPORT #endif // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) #include #endif #if (defined(_MSC_VER)) #include #endif #if defined(__ARM_NEON__) || defined(__ARM_NEON) #include #endif #if defined(__ARM_FEATURE_SVE) #include #endif #if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__) #include #endif #if defined(__VX__) && defined(__VEC__) #ifndef SLEEF_VECINTRIN_H_INCLUDED #include #define SLEEF_VECINTRIN_H_INCLUDED #endif typedef __vector double SLEEF_VECTOR_DOUBLE; typedef __vector float SLEEF_VECTOR_FLOAT; typedef __vector int SLEEF_VECTOR_INT; #endif // #ifndef SLEEF_FP_ILOGB0 #define SLEEF_FP_ILOGB0 ((int)-2147483648) #endif #ifndef SLEEF_FP_ILOGBNAN #define SLEEF_FP_ILOGBNAN ((int)2147483647) #endif // IMPORT void *Sleef_malloc(size_t z); IMPORT void Sleef_free(void *ptr); IMPORT uint64_t Sleef_currentTimeMicros(); #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) IMPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif // #ifndef Sleef_double2_DEFINED #define Sleef_double2_DEFINED typedef struct { double x, y; } Sleef_double2; #endif #ifndef Sleef_float2_DEFINED #define Sleef_float2_DEFINED typedef struct { float x, y; } Sleef_float2; #endif #ifndef Sleef_longdouble2_DEFINED #define Sleef_longdouble2_DEFINED typedef struct { long double x, y; } Sleef_longdouble2; #endif #if !defined(Sleef_quad_DEFINED) #define Sleef_quad_DEFINED #if defined(ENABLEFLOAT128) typedef __float128 Sleef_quad; #else typedef struct { uint64_t x, y; } Sleef_quad; #endif #endif #if !defined(Sleef_quad2_DEFINED) #define Sleef_quad2_DEFINED typedef union { struct { Sleef_quad x, y; }; Sleef_quad s[2]; } Sleef_quad2; #endif #ifdef __cplusplus extern "C" { #endif IMPORT CONST double Sleef_sin_u35(double); IMPORT CONST double Sleef_cos_u35(double); IMPORT CONST Sleef_double2 Sleef_sincos_u35(double); IMPORT CONST double Sleef_tan_u35(double); IMPORT CONST double Sleef_asin_u35(double); IMPORT CONST double Sleef_acos_u35(double); IMPORT CONST double Sleef_atan_u35(double); IMPORT CONST double Sleef_atan2_u35(double, double); IMPORT CONST double Sleef_log_u35(double); IMPORT CONST double Sleef_cbrt_u35(double); IMPORT CONST double Sleef_sin_u10(double); IMPORT CONST double Sleef_cos_u10(double); IMPORT CONST Sleef_double2 Sleef_sincos_u10(double); IMPORT CONST double Sleef_tan_u10(double); IMPORT CONST double Sleef_asin_u10(double); IMPORT CONST double Sleef_acos_u10(double); IMPORT CONST double Sleef_atan_u10(double); IMPORT CONST double Sleef_atan2_u10(double, double); IMPORT CONST double Sleef_log_u10(double); IMPORT CONST double Sleef_cbrt_u10(double); IMPORT CONST double Sleef_exp_u10(double); IMPORT CONST double Sleef_pow_u10(double, double); IMPORT CONST double Sleef_sinh_u10(double); IMPORT CONST double Sleef_cosh_u10(double); IMPORT CONST double Sleef_tanh_u10(double); IMPORT CONST double Sleef_sinh_u35(double); IMPORT CONST double Sleef_cosh_u35(double); IMPORT CONST double Sleef_tanh_u35(double); IMPORT CONST double Sleef_asinh_u10(double); IMPORT CONST double Sleef_acosh_u10(double); IMPORT CONST double Sleef_atanh_u10(double); IMPORT CONST double Sleef_exp2_u10(double); IMPORT CONST double Sleef_exp10_u10(double); IMPORT CONST double Sleef_exp2_u35(double); IMPORT CONST double Sleef_exp10_u35(double); IMPORT CONST double Sleef_expm1_u10(double); IMPORT CONST double Sleef_log10_u10(double); IMPORT CONST double Sleef_log2_u10(double); IMPORT CONST double Sleef_log2_u35(double); IMPORT CONST double Sleef_log1p_u10(double); IMPORT CONST Sleef_double2 Sleef_sincospi_u05(double); IMPORT CONST Sleef_double2 Sleef_sincospi_u35(double); IMPORT CONST double Sleef_sinpi_u05(double); IMPORT CONST double Sleef_cospi_u05(double); IMPORT CONST double Sleef_ldexp(double, int); IMPORT CONST int Sleef_ilogb(double); IMPORT CONST double Sleef_fma(double, double, double); IMPORT CONST double Sleef_sqrt(double); IMPORT CONST double Sleef_sqrt_u05(double); IMPORT CONST double Sleef_sqrt_u35(double); IMPORT CONST double Sleef_hypot_u05(double, double); IMPORT CONST double Sleef_hypot_u35(double, double); IMPORT CONST double Sleef_fabs(double); IMPORT CONST double Sleef_copysign(double, double); IMPORT CONST double Sleef_fmax(double, double); IMPORT CONST double Sleef_fmin(double, double); IMPORT CONST double Sleef_fdim(double, double); IMPORT CONST double Sleef_trunc(double); IMPORT CONST double Sleef_floor(double); IMPORT CONST double Sleef_ceil(double); IMPORT CONST double Sleef_round(double); IMPORT CONST double Sleef_rint(double); IMPORT CONST double Sleef_nextafter(double, double); IMPORT CONST double Sleef_frfrexp(double); IMPORT CONST int Sleef_expfrexp(double); IMPORT CONST double Sleef_fmod(double, double); IMPORT CONST double Sleef_remainder(double, double); IMPORT CONST Sleef_double2 Sleef_modf(double); IMPORT CONST double Sleef_lgamma_u10(double); IMPORT CONST double Sleef_tgamma_u10(double); IMPORT CONST double Sleef_erf_u10(double); IMPORT CONST double Sleef_erfc_u15(double); IMPORT CONST float Sleef_sinf_u35(float); IMPORT CONST float Sleef_cosf_u35(float); IMPORT CONST Sleef_float2 Sleef_sincosf_u35(float); IMPORT CONST float Sleef_tanf_u35(float); IMPORT CONST float Sleef_asinf_u35(float); IMPORT CONST float Sleef_acosf_u35(float); IMPORT CONST float Sleef_atanf_u35(float); IMPORT CONST float Sleef_atan2f_u35(float, float); IMPORT CONST float Sleef_logf_u35(float); IMPORT CONST float Sleef_cbrtf_u35(float); IMPORT CONST float Sleef_sinf_u10(float); IMPORT CONST float Sleef_cosf_u10(float); IMPORT CONST Sleef_float2 Sleef_sincosf_u10(float); IMPORT CONST float Sleef_fastsinf_u3500(float); IMPORT CONST float Sleef_fastcosf_u3500(float); IMPORT CONST float Sleef_tanf_u10(float); IMPORT CONST float Sleef_asinf_u10(float); IMPORT CONST float Sleef_acosf_u10(float); IMPORT CONST float Sleef_atanf_u10(float); IMPORT CONST float Sleef_atan2f_u10(float, float); IMPORT CONST float Sleef_logf_u10(float); IMPORT CONST float Sleef_cbrtf_u10(float); IMPORT CONST float Sleef_expf_u10(float); IMPORT CONST float Sleef_powf_u10(float, float); IMPORT CONST float Sleef_fastpowf_u3500(float, float); IMPORT CONST float Sleef_sinhf_u10(float); IMPORT CONST float Sleef_coshf_u10(float); IMPORT CONST float Sleef_tanhf_u10(float); IMPORT CONST float Sleef_sinhf_u35(float); IMPORT CONST float Sleef_coshf_u35(float); IMPORT CONST float Sleef_tanhf_u35(float); IMPORT CONST float Sleef_asinhf_u10(float); IMPORT CONST float Sleef_acoshf_u10(float); IMPORT CONST float Sleef_atanhf_u10(float); IMPORT CONST float Sleef_exp2f_u10(float); IMPORT CONST float Sleef_exp10f_u10(float); IMPORT CONST float Sleef_exp2f_u35(float); IMPORT CONST float Sleef_exp10f_u35(float); IMPORT CONST float Sleef_expm1f_u10(float); IMPORT CONST float Sleef_log10f_u10(float); IMPORT CONST float Sleef_log2f_u10(float); IMPORT CONST float Sleef_log2f_u35(float); IMPORT CONST float Sleef_log1pf_u10(float); IMPORT CONST Sleef_float2 Sleef_sincospif_u05(float); IMPORT CONST Sleef_float2 Sleef_sincospif_u35(float); IMPORT CONST float Sleef_sinpif_u05(float d); IMPORT CONST float Sleef_cospif_u05(float d); IMPORT CONST float Sleef_ldexpf(float, int); IMPORT CONST int Sleef_ilogbf(float); IMPORT CONST float Sleef_fmaf(float, float, float); IMPORT CONST float Sleef_sqrtf(float); IMPORT CONST float Sleef_sqrtf_u05(float); IMPORT CONST float Sleef_sqrtf_u35(float); IMPORT CONST float Sleef_hypotf_u05(float, float); IMPORT CONST float Sleef_hypotf_u35(float, float); IMPORT CONST float Sleef_fabsf(float); IMPORT CONST float Sleef_copysignf(float, float); IMPORT CONST float Sleef_fmaxf(float, float); IMPORT CONST float Sleef_fminf(float, float); IMPORT CONST float Sleef_fdimf(float, float); IMPORT CONST float Sleef_truncf(float); IMPORT CONST float Sleef_floorf(float); IMPORT CONST float Sleef_ceilf(float); IMPORT CONST float Sleef_roundf(float); IMPORT CONST float Sleef_rintf(float); IMPORT CONST float Sleef_nextafterf(float, float); IMPORT CONST float Sleef_frfrexpf(float); IMPORT CONST int Sleef_expfrexpf(float); IMPORT CONST float Sleef_fmodf(float, float); IMPORT CONST float Sleef_remainderf(float, float); IMPORT CONST Sleef_float2 Sleef_modff(float); IMPORT CONST float Sleef_lgammaf_u10(float); IMPORT CONST float Sleef_tgammaf_u10(float); IMPORT CONST float Sleef_erff_u10(float); IMPORT CONST float Sleef_erfcf_u15(float); IMPORT CONST Sleef_longdouble2 Sleef_sincospil_u05(long double); IMPORT CONST Sleef_longdouble2 Sleef_sincospil_u35(long double); #if defined(Sleef_quad2_DEFINED) IMPORT CONST Sleef_quad2 Sleef_sincospiq_u05(Sleef_quad); IMPORT CONST Sleef_quad2 Sleef_sincospiq_u35(Sleef_quad); #endif sleef-3.5.1/src/libm/sleefqp.c000066400000000000000000000303261373003144100161330ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include "misc.h" #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif static INLINE CONST Sleef_quad mlaq(Sleef_quad x, Sleef_quad y, Sleef_quad z) { return x * y + z; } static INLINE CONST int64_t xrintq(Sleef_quad x) { return x < 0 ? (int64_t)(x - 0.5) : (int64_t)(x + 0.5); } static INLINE CONST int64_t xceilq(Sleef_quad x) { return (int64_t)x + (x < 0 ? 0 : 1); } static INLINE CONST Sleef_quad xtruncq(Sleef_quad x) { return (Sleef_quad)(int64_t)x; } static INLINE CONST int xisnanq(Sleef_quad x) { return x != x; } static INLINE CONST int xisinfq(Sleef_quad x) { return x == SLEEF_INFINITYq || x == -SLEEF_INFINITYq; } static INLINE CONST int xisminfq(Sleef_quad x) { return x == -SLEEF_INFINITYq; } static INLINE CONST int xispinfq(Sleef_quad x) { return x == SLEEF_INFINITYq; } static INLINE CONST Sleef_quad xfabsq(Sleef_quad x) { union { Sleef_quad q; uint64_t u[2]; } cnv; cnv.q = x; cnv.u[1] &= UINT64_C(0x7fffffffffffffff); return cnv.q; } // #ifndef NDEBUG static int checkfp(Sleef_quad x) { if (xisinfq(x) || xisnanq(x)) return 1; return 0; } #endif static INLINE CONST Sleef_quad upperq(Sleef_quad d) { union { Sleef_quad q; uint64_t u[2]; } cnv; cnv.q = d; cnv.u[0] &= ~((UINT64_C(1) << (112/2+1)) - 1); return cnv.q; } static INLINE CONST Sleef_quad2 dq(Sleef_quad h, Sleef_quad l) { Sleef_quad2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_quad2 dqnormalize_q2_q2(Sleef_quad2 t) { Sleef_quad2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_quad2 dqscale_q2_q2_q(Sleef_quad2 d, Sleef_quad s) { Sleef_quad2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_quad2 dqneg_q2_q2(Sleef_quad2 d) { Sleef_quad2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q_q(Sleef_quad x, Sleef_quad y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || xfabsq(x) >= xfabsq(y))) { fprintf(stderr, "[dqadd_q2_q_q : %g, %g]\n", (double)x, (double)y); fflush(stderr); } #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q_q(Sleef_quad x, Sleef_quad y) { Sleef_quad2 r; r.x = x + y; Sleef_quad v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || xfabsq(x.x) >= xfabsq(y))) { fprintf(stderr, "[dqadd_q2_q2_q : %g %g]\n", (double)x.x, (double)y); fflush(stderr); } #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { // |x| >= |y| Sleef_quad2 r; r.x = x.x + y; Sleef_quad v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q_q2(Sleef_quad x, Sleef_quad2 y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || xfabsq(x) >= xfabsq(y.x))) { fprintf(stderr, "[dqadd_q2_q_q2 : %g %g]\n", (double)x, (double)y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q_q2(Sleef_quad x, Sleef_quad2 y) { Sleef_quad2 r; r.x = x + y.x; Sleef_quad v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsq(x.x) >= xfabsq(y.x))) { fprintf(stderr, "[dqadd_q2_q2_q2 : %g %g]\n", (double)x.x, (double)y.x); fflush(stderr); } #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { Sleef_quad2 r; r.x = x.x + y.x; Sleef_quad v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_quad2 dqsub_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsq(x.x) >= xfabsq(y.x))) { fprintf(stderr, "[dqsub_q2_q2_q2 : %g %g]\n", (double)x.x, (double)y.x); fflush(stderr); } #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_quad2 dqdiv_q2_q2_q2(Sleef_quad2 n, Sleef_quad2 d) { Sleef_quad t = 1.0 / d.x; Sleef_quad dh = upperq(d.x), dl = d.x - dh; Sleef_quad th = upperq(t ), tl = t - th; Sleef_quad nhh = upperq(n.x), nhl = n.x - nhh; Sleef_quad2 q; q.x = n.x * t; Sleef_quad u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_quad2 dqmul_q2_q_q(Sleef_quad x, Sleef_quad y) { Sleef_quad xh = upperq(x), xl = x - xh; Sleef_quad yh = upperq(y), yl = y - yh; Sleef_quad2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_quad2 dqmul_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { Sleef_quad xh = upperq(x.x), xl = x.x - xh; Sleef_quad yh = upperq(y ), yl = y - yh; Sleef_quad2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_quad2 dqmul_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { Sleef_quad xh = upperq(x.x), xl = x.x - xh; Sleef_quad yh = upperq(y.x), yl = y.x - yh; Sleef_quad2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST Sleef_quad2 dqsqu_q2_q2(Sleef_quad2 x) { Sleef_quad xh = upperq(x.x), xl = x.x - xh; Sleef_quad2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST Sleef_quad2 dqrec_q2_q(Sleef_quad d) { Sleef_quad t = 1.0 / d; Sleef_quad dh = upperq(d), dl = d - dh; Sleef_quad th = upperq(t), tl = t - th; Sleef_quad2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_quad2 dqrec_q2_q2(Sleef_quad2 d) { Sleef_quad t = 1.0 / d.x; Sleef_quad dh = upperq(d.x), dl = d.x - dh; Sleef_quad th = upperq(t ), tl = t - th; Sleef_quad2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } /* static INLINE CONST Sleef_quad2 dqsqrt_q2_q2(Sleef_quad2 d) { Sleef_quad t = sqrt(d.x + d.y); return dqscale_q2_q2_q(dqmul_q2_q2_q2(dqadd2_q2_q2_q2(d, dqmul_q2_q_q(t, t)), dqrec_q2_q(t)), 0.5); } */ // EXPORT CONST Sleef_quad2 xsincospiq_u05(Sleef_quad d) { Sleef_quad u, s, t; Sleef_quad2 r, x, s2; u = d * 4; int64_t q = xceilq(u) & ~(int64_t)1; s = u - (Sleef_quad)q; t = s; s = s * s; s2 = dqmul_q2_q_q(t, t); // u = +0.1528321016188828732764080161368244291e-27Q; u = mlaq(u, s, -0.1494741498689376415859233754050616110e-24Q); u = mlaq(u, s, +0.1226149947504428931621181953791777769e-21Q); u = mlaq(u, s, -0.8348589834426964519785265770009675533e-19Q); u = mlaq(u, s, +0.4628704628834415551415078707261146069e-16Q); u = mlaq(u, s, -0.2041026339664143925641158896030605061e-13Q); u = mlaq(u, s, +0.6948453273886629408492386065037620114e-11Q); u = mlaq(u, s, -0.1757247673443401045145682042627557066e-8Q); u = mlaq(u, s, +0.3133616890378121520950407496603902388e-6Q); u = mlaq(u, s, -0.3657620418217725078660518698299784909e-4Q); u = mlaq(u, s, +0.2490394570192720160015798421577395304e-2Q); x = dqadd2_q2_q_q2(u * s, dq(-0.08074551218828078170696957048724322192457Q, 5.959584458773288360696286320980429277618e-36)); x = dqadd2_q2_q2_q2(dqmul_q2_q2_q2(s2, x), dq(0.7853981633974483096156608458198756993698Q, 2.167745574452451779709844565881105067311e-35Q)); x = dqmul_q2_q2_q(x, t); r.x = x.x + x.y; // u = -0.4616472554003168470361503708527464705e-29Q; u = mlaq(u, s, +0.4891528531228245577148587028696897180e-26Q); u = mlaq(u, s, -0.4377345071482935585011339656701961637e-23Q); u = mlaq(u, s, +0.3278483561449753435303463083506802784e-20Q); u = mlaq(u, s, -0.2019653396886554861865456720993185772e-17Q); u = mlaq(u, s, +0.1001886461636271957275884859852184250e-14Q); u = mlaq(u, s, -0.3898073171259675439843028673969857173e-12Q); u = mlaq(u, s, +0.1150115912797405152263176921581706121e-9Q); u = mlaq(u, s, -0.2461136950494199754009084018126527316e-7Q); u = mlaq(u, s, +0.3590860448591510079069203991167071234e-5Q); u = mlaq(u, s, -0.3259918869273900136414318317506198622e-3Q); x = dqadd2_q2_q_q2(u * s, dq(0.01585434424381550085228521039855226376329Q, 6.529088663284413499535484912972485728198e-38Q)); x = dqadd2_q2_q2_q2(dqmul_q2_q2_q2(s2, x), dq(-0.308425137534042456838577843746129712906Q, -1.006808646313642786855469666154064243572e-35Q)); x = dqadd2_q2_q2_q(dqmul_q2_q2_q2(x, s2), 1); r.y = x.x + x.y; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfq(d)) { r.x = r.y = SLEEF_NANq; } if (!xisinfq(d) && xfabsq(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } EXPORT CONST Sleef_quad2 xsincospiq_u35(Sleef_quad d) { Sleef_quad u, s, t; Sleef_quad2 r; u = d * 4; int64_t q = xceilq(u) & ~(int64_t)1; s = u - (Sleef_quad)q; t = s; s = s * s; // u = -0.1485963032785725729464918728185622156e-24Q; u = mlaq(u, s, +0.1226127943866088943202201676879490635e-21Q); u = mlaq(u, s, -0.8348589518463078609690110857435995326e-19Q); u = mlaq(u, s, +0.4628704628547538824855302470312741438e-16Q); u = mlaq(u, s, -0.2041026339663972432248777826778586936e-13Q); u = mlaq(u, s, +0.6948453273886628726907826757576187848e-11Q); u = mlaq(u, s, -0.1757247673443401044967978719804318982e-8Q); u = mlaq(u, s, +0.3133616890378121520950114757196589206e-6Q); u = mlaq(u, s, -0.3657620418217725078660518414453815240e-4Q); u = mlaq(u, s, +0.2490394570192720160015798421435124000e-2Q); u = mlaq(u, s, -0.8074551218828078170696957048724041729e-1Q); u = mlaq(u, s, +0.7853981633974483096156608458198756994e+0Q); r.x = u * t; // u = +0.4862670988511544771355006256522366302e-26Q; u = mlaq(u, s, -0.4377265452147065611484052550741141029e-23Q); u = mlaq(u, s, +0.3278483433857326331665386021267750285e-20Q); u = mlaq(u, s, -0.2019653396755055912482006994709659430e-17Q); u = mlaq(u, s, +0.1001886461636180795663169552615123249e-14Q); u = mlaq(u, s, -0.3898073171259675007871885150022866077e-12Q); u = mlaq(u, s, +0.1150115912797405152123832255915284811e-9Q); u = mlaq(u, s, -0.2461136950494199754008784937314856168e-7Q); u = mlaq(u, s, +0.3590860448591510079069203583263258862e-5Q); u = mlaq(u, s, -0.3259918869273900136414318317180623832e-3Q); u = mlaq(u, s, +0.1585434424381550085228521039855096075e-1Q); u = mlaq(u, s, -0.3084251375340424568385778437461297129e+0Q); u = mlaq(u, s, 1.0Q); r.y = u; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfq(d)) { r.x = r.y = SLEEF_NANq; } if (!xisinfq(d) && xfabsq(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } // #ifdef ENABLE_MAIN #include #include int main(int argc, char **argv) { Sleef_quad a = -8.3998726984803832684266802333309369056312711821029e-09Q; Sleef_quad2 q = xsincospiq_u05(a); printf(" "); printf128(q.x); printf("\n"); /* printf128(0.1Q); printf("\n"); Sleef_quad2 q2 = dqmul_q2_q_q(0.1Q, 0.1Q); printf128(q2.x); printf("\n"); printf128(q2.y); printf("\n"); */ /* printf("%s\n", toBCq(0.1Q)); printf("%s\n", toBCq(upperq(0.1Q))); printf("%s\n", toBCq(0.1Q-upperq(0.1Q))); Sleef_quad2 q2 = dqmul_q2_q_q(0.1Q, 0.1Q); printf("%s + ", toBCq(q2.x)); printf("%s\n", toBCq(q2.y)); */ } #endif sleef-3.5.1/src/libm/sleefsimddp.c000066400000000000000000005225301373003144100167760ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #if !defined(SLEEF_GENHEADER) #include #include #include #include #endif #include "misc.h" extern const double Sleef_rempitabdp[]; #define __SLEEFSIMDDP_C__ #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif // Intel #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesse2_gnuabi.h" #else #include "renamesse2.h" #endif #endif #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #ifdef DORENAME #include "renamesse4.h" #endif #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx_gnuabi.h" #else #include "renameavx.h" #endif #endif #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamefma4_gnuabi.h" #else #include "renamefma4.h" #endif #endif #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx2_gnuabi.h" #else #include "renameavx2.h" #endif #endif #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #ifdef DORENAME #include "renameavx2128.h" #endif #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx512f_gnuabi.h" #else #include "renameavx512f.h" #endif #endif #endif #ifdef ENABLE_AVX512FNOFMA #define CONFIG 2 #include "helperavx512f.h" #ifdef DORENAME #include "renameavx512fnofma.h" #endif #endif // Arm #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameadvsimd_gnuabi.h" #else #include "renameadvsimd.h" #endif #endif #endif #ifdef ENABLE_ADVSIMDNOFMA #define CONFIG 2 #include "helperadvsimd.h" #ifdef DORENAME #include "renameadvsimdnofma.h" #endif #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesve_gnuabi.h" #else #include "renamesve.h" #endif /* ENABLE_GNUABI */ #endif /* DORENAME */ #endif /* ENABLE_SVE */ #ifdef ENABLE_SVENOFMA #define CONFIG 2 #include "helpersve.h" #ifdef DORENAME #include "renamesvenofma.h" #endif /* DORENAME */ #endif /* ENABLE_SVE */ // IBM #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #ifdef DORENAME #include "renamevsx.h" #endif #endif #ifdef ENABLE_VSXNOFMA #define CONFIG 2 #include "helperpower_128.h" #ifdef DORENAME #include "renamevsxnofma.h" #endif #endif #ifdef ENABLE_ZVECTOR2 #define CONFIG 140 #include "helpers390x_128.h" #ifdef DORENAME #include "renamezvector2.h" #endif #endif #ifdef ENABLE_ZVECTOR2NOFMA #define CONFIG 141 #include "helpers390x_128.h" #ifdef DORENAME #include "renamezvector2nofma.h" #endif #endif // Generic #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #ifdef DORENAME #include "renamevecext.h" #endif #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #ifdef DORENAME #include "renamepurec.h" #endif #endif #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" #ifdef DORENAME #include "renamepurec_scalar.h" #endif #endif #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 #include "helperpurec_scalar.h" #ifdef DORENAME #include "renamepurecfma_scalar.h" #endif #endif // #define MLA(x, y, z) vmla_vd_vd_vd_vd((x), (y), (z)) #define C2V(c) vcast_vd_d(c) #include "estrin.h" // #include "dd.h" // static INLINE VECTOR_CC vopmask vnot_vo64_vo64(vopmask x) { return vxor_vo_vo_vo(x, veq64_vo_vm_vm(vcast_vm_i_i(0, 0), vcast_vm_i_i(0, 0))); } static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vd(vdouble d) { return veq64_vo_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))); } // return d0 < d1 ? x : y static INLINE CONST VECTOR_CC vint vsel_vi_vd_vd_vi_vi(vdouble d0, vdouble d1, vint x, vint y) { return vsel_vi_vo_vi_vi(vcast_vo32_vo64(vlt_vo_vd_vd(d0, d1)), x, y); } // return d0 < 0 ? x : 0 static INLINE CONST VECTOR_CC vint vsel_vi_vd_vi(vdouble d, vint x) { return vand_vi_vo_vi(vcast_vo32_vo64(vsignbit_vo_vd(d)), x); } static INLINE CONST VECTOR_CC vopmask visnegzero_vo_vd(vdouble d) { return veq64_vo_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))); } static INLINE CONST VECTOR_CC vopmask visnumber_vo_vd(vdouble x) { return vandnot_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, x)); } static INLINE CONST VECTOR_CC vmask vsignbit_vm_vd(vdouble d) { return vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))); } static INLINE CONST VECTOR_CC vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } static INLINE CONST VECTOR_CC vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(x)), vand_vm_vm_vm (vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(y)))); } static INLINE CONST VECTOR_CC vdouble vsign_vd_vd(vdouble d) { return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d); } static INLINE CONST VECTOR_CC vdouble vpow2i_vd_vi(vint q) { q = vadd_vi_vi_vi(vcast_vi_i(0x3ff), q); vint2 r = vcastu_vi2_vi(q); return vreinterpret_vd_vi2(vsll_vi2_vi2_i(r, 20)); } static INLINE CONST VECTOR_CC vdouble vldexp_vd_vd_vi(vdouble x, vint q) { vint m = vsra_vi_vi_i(q, 31); m = vsll_vi_vi_i(vsub_vi_vi_vi(vsra_vi_vi_i(vadd_vi_vi_vi(m, q), 9), m), 7); q = vsub_vi_vi_vi(q, vsll_vi_vi_i(m, 2)); m = vadd_vi_vi_vi(vcast_vi_i(0x3ff), m); m = vandnot_vi_vo_vi(vgt_vo_vi_vi(vcast_vi_i(0), m), m); m = vsel_vi_vo_vi_vi(vgt_vo_vi_vi(m, vcast_vi_i(0x7ff)), vcast_vi_i(0x7ff), m); vint2 r = vcastu_vi2_vi(m); vdouble y = vreinterpret_vd_vi2(vsll_vi2_vi2_i(r, 20)); return vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(x, y), y), y), y), vpow2i_vd_vi(q)); } static INLINE CONST VECTOR_CC vdouble vldexp2_vd_vd_vi(vdouble d, vint e) { return vmul_vd_vd_vd(vmul_vd_vd_vd(d, vpow2i_vd_vi(vsra_vi_vi_i(e, 1))), vpow2i_vd_vi(vsub_vi_vi_vi(e, vsra_vi_vi_i(e, 1)))); } static INLINE CONST VECTOR_CC vdouble vldexp3_vd_vd_vi(vdouble d, vint q) { return vreinterpret_vd_vi2(vadd_vi2_vi2_vi2(vreinterpret_vi2_vd(d), vsll_vi2_vi2_i(vcastu_vi2_vi(q), 20))); } #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) static INLINE CONST VECTOR_CC vint vilogbk_vi_vd(vdouble d) { vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(4.9090934652977266E-91)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(vcast_vd_d(2.037035976334486E90), d), d); vint q = vcastu_vi_vi2(vreinterpret_vi2_vd(d)); q = vand_vi_vi_vi(q, vcast_vi_i(((1 << 12)-1) << 20)); q = vsrl_vi_vi_i(q, 20); q = vsub_vi_vi_vi(q, vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vcast_vi_i(300 + 0x3ff), vcast_vi_i(0x3ff))); return q; } static INLINE CONST VECTOR_CC vint vilogb2k_vi_vd(vdouble d) { vint q = vcastu_vi_vi2(vreinterpret_vi2_vd(d)); q = vsrl_vi_vi_i(q, 20); q = vand_vi_vi_vi(q, vcast_vi_i(0x7ff)); q = vsub_vi_vi_vi(q, vcast_vi_i(0x3ff)); return q; } #endif static INLINE CONST VECTOR_CC vopmask visint_vo_vd(vdouble d) { #ifdef FULL_FP_ROUNDING return veq_vo_vd_vd(vtruncate_vd_vd(d), d); #else vdouble x = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0 / (INT64_C(1) << 31)))); x = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(INT64_C(1) << 31)), x, d); return vor_vo_vo_vo(veq_vo_vd_vd(vtruncate_vd_vd(x), x), vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 53))); #endif } static INLINE CONST VECTOR_CC vopmask visodd_vo_vd(vdouble d) { #ifdef FULL_FP_ROUNDING vdouble x = vmul_vd_vd_vd(d, vcast_vd_d(0.5)); return vneq_vo_vd_vd(vtruncate_vd_vd(x), x); #else vdouble x = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0 / (INT64_C(1) << 31)))); x = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(INT64_C(1) << 31)), x, d); return vand_vo_vo_vo(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vtruncate_vi_vd(x), vcast_vi_i(1)), vcast_vi_i(1))), vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 53))); #endif } // EXPORT CONST VECTOR_CC vdouble xldexp(vdouble x, vint q) { return vldexp_vd_vd_vi(x, q); } EXPORT CONST VECTOR_CC vint xilogb(vdouble d) { vdouble e = vcast_vd_vi(vilogbk_vi_vd(vabs_vd_vd(d))); e = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_FP_ILOGB0), e); e = vsel_vd_vo_vd_vd(visnan_vo_vd(d), vcast_vd_d(SLEEF_FP_ILOGBNAN), e); e = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(INT_MAX), e); return vrint_vi_vd(e); } #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) typedef struct { vdouble d; vint i; } di_t; static vdouble digetd_vd_di(di_t d) { return d.d; } static vint digeti_vi_di(di_t d) { return d.i; } static di_t disetdi_di_vd_vi(vdouble d, vint i) { di_t r = { d, i }; return r; } typedef struct { vdouble2 dd; vint i; } ddi_t; static vdouble2 ddigetdd_vd2_ddi(ddi_t d) { return d.dd; } static vint ddigeti_vi_ddi(ddi_t d) { return d.i; } static ddi_t ddisetddi_ddi_vd2_vi(vdouble2 v, vint i) { ddi_t r = { v, i }; return r; } static ddi_t ddisetdd_ddi_ddi_vd2(ddi_t ddi, vdouble2 v) { ddi.dd = v; return ddi; } #endif static INLINE CONST VECTOR_CC vdouble vorsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } static INLINE CONST di_t rempisub(vdouble x) { #ifdef FULL_FP_ROUNDING vdouble y = vrint_vd_vd(vmul_vd_vd_vd(x, vcast_vd_d(4))); vint vi = vtruncate_vi_vd(vsub_vd_vd_vd(y, vmul_vd_vd_vd(vrint_vd_vd(x), vcast_vd_d(4)))); return disetdi_di_vd_vi(vsub_vd_vd_vd(x, vmul_vd_vd_vd(y, vcast_vd_d(0.25))), vi); #else vdouble c = vmulsign_vd_vd_vd(vcast_vd_d(INT64_C(1) << 52), x); vdouble rint4x = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(vmul_vd_vd_vd(vcast_vd_d(4), x)), vcast_vd_d(INT64_C(1) << 52)), vmul_vd_vd_vd(vcast_vd_d(4), x), vorsign_vd_vd_vd(vsub_vd_vd_vd(vmla_vd_vd_vd_vd(vcast_vd_d(4), x, c), c), x)); vdouble rintx = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52)), x, vorsign_vd_vd_vd(vsub_vd_vd_vd(vadd_vd_vd_vd(x, c), c), x)); return disetdi_di_vd_vi(vmla_vd_vd_vd_vd(vcast_vd_d(-0.25), rint4x, x), vtruncate_vi_vd(vmla_vd_vd_vd_vd(vcast_vd_d(-4), rintx, rint4x))); #endif } static INLINE CONST ddi_t rempi(vdouble a) { vdouble2 x, y, z; vint ex = vilogb2k_vi_vd(a); #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) ex = vandnot_vi_vi_vi(vsra_vi_vi_i(ex, 31), ex); ex = vand_vi_vi_vi(ex, vcast_vi_i(1023)); #endif ex = vsub_vi_vi_vi(ex, vcast_vi_i(55)); vint q = vand_vi_vo_vi(vgt_vo_vi_vi(ex, vcast_vi_i(700-55)), vcast_vi_i(-64)); a = vldexp3_vd_vd_vi(a, q); ex = vandnot_vi_vi_vi(vsra_vi_vi_i(ex, 31), ex); ex = vsll_vi_vi_i(ex, 2); x = ddmul_vd2_vd_vd(a, vgather_vd_p_vi(Sleef_rempitabdp, ex)); di_t di = rempisub(vd2getx_vd_vd2(x)); q = digeti_vi_di(di); x = vd2setx_vd2_vd2_vd(x, digetd_vd_di(di)); x = ddnormalize_vd2_vd2(x); y = ddmul_vd2_vd_vd(a, vgather_vd_p_vi(Sleef_rempitabdp+1, ex)); x = ddadd2_vd2_vd2_vd2(x, y); di = rempisub(vd2getx_vd_vd2(x)); q = vadd_vi_vi_vi(q, digeti_vi_di(di)); x = vd2setx_vd2_vd2_vd(x, digetd_vd_di(di)); x = ddnormalize_vd2_vd2(x); y = vcast_vd2_vd_vd(vgather_vd_p_vi(Sleef_rempitabdp+2, ex), vgather_vd_p_vi(Sleef_rempitabdp+3, ex)); y = ddmul_vd2_vd2_vd(y, a); x = ddadd2_vd2_vd2_vd2(x, y); x = ddnormalize_vd2_vd2(x); x = ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(3.141592653589793116*2, 1.2246467991473532072e-16*2)); vopmask o = vlt_vo_vd_vd(vabs_vd_vd(a), vcast_vd_d(0.7)); x = vd2setx_vd2_vd2_vd(x, vsel_vd_vo_vd_vd(o, a, vd2getx_vd_vd2(x))); x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); return ddisetddi_ddi_vd2_vi(x, q); } EXPORT CONST VECTOR_CC vdouble xsin(vdouble d) { #if !defined(DETERMINISTIC) // The SIMD source files(sleefsimd?p.c) are compiled twice for each // vector extension, with DETERMINISTIC macro turned on and off. // Below is the normal(faster) implementation of sin function. The // function name xsin will be renamed to Sleef_sind2_u35sse2 with // renamesse2.h, for example. vdouble u, s, r = d; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2), d); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C), d); d = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D), d); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); ql = vsra_vi_vi_i(ql, 2); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); d = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); } s = vmul_vd_vd_vd(d, d); d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(s, s2, s4, -7.97255955009037868891952e-18, 2.81009972710863200091251e-15, -7.64712219118158833288484e-13, 1.60590430605664501629054e-10, -2.50521083763502045810755e-08, 2.75573192239198747630416e-06, -0.000198412698412696162806809, 0.00833333333333332974823815); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); u = vsel_vd_vo_vd_vd(visnegzero_vo_vd(r), r, u); return u; #else // #if !defined(DETERMINISTIC) // This is the deterministic implementation of sin function. Returned // values from deterministic functions are bitwise consistent across // all platforms. The function name xsin will be renamed to // Sleef_cinz_sind2_u35sse2 with renamesse2.h, for example. The // renaming by rename*.h is switched according to DETERMINISTIC macro. vdouble u, s, r = d; vint ql; vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2), d); vopmask g = vlt_vo_vd_vd(vabs_vd_vd(r), vcast_vd_d(TRIGRANGEMAX2)); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(r, vcast_vd_d(M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(r, vcast_vd_d(M_1_PI), dqh)); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), r); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C), u); u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D), u); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); d = vsel_vd_vo_vd_vd(g, d, u); g = vlt_vo_vd_vd(vabs_vd_vd(r), vcast_vd_d(TRIGRANGEMAX)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(r); vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); ql2 = vsra_vi_vi_i(ql2, 2); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); d = vsel_vd_vo_vd_vd(g, d, u); d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); } } s = vmul_vd_vd_vd(d, d); d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(s, s2, s4, -7.97255955009037868891952e-18, 2.81009972710863200091251e-15, -7.64712219118158833288484e-13, 1.60590430605664501629054e-10, -2.50521083763502045810755e-08, 2.75573192239198747630416e-06, -0.000198412698412696162806809, 0.00833333333333332974823815); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); u = vsel_vd_vo_vd_vd(visnegzero_vo_vd(r), r, u); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vdouble xsin_u1(vdouble d) { #if !defined(DETERMINISTIC) vdouble u; vdouble2 s, t, x; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); const vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D))); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); ql = vsra_vi_vi_i(ql, 2); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); s = vd2setx_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(s))))); } t = s; s = ddsqu_vd2_vd2(s); vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); u = POLY6(vd2getx_vd_vd2(s), s2, s4, 2.72052416138529567917983e-15, -7.6429259411395447190023e-13, 1.60589370117277896211623e-10, -2.5052106814843123359368e-08, 2.75573192104428224777379e-06, -0.000198412698412046454654947); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); u = ddmul_vd_vd2_vd2(t, x); u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; #else // #if !defined(DETERMINISTIC) vdouble u; vdouble2 s, t, x; vint ql; vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); x = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2))); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); const vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); x = vsel_vd2_vo_vd2_vd2(g, x, s); g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(d); vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); ql2 = vsra_vi_vi_i(ql2, 2); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(1)); vdouble2 t = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi))), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)))); t = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), t); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), t, ddigetdd_vd2_ddi(ddi))); s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); x = vsel_vd2_vo_vd2_vd2(g, x, s); x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); } } t = x; s = ddsqu_vd2_vd2(x); vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); u = POLY6(vd2getx_vd_vd2(s), s2, s4, 2.72052416138529567917983e-15, -7.6429259411395447190023e-13, 1.60589370117277896211623e-10, -2.5052106814843123359368e-08, 2.75573192104428224777379e-06, -0.000198412698412046454654947); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); u = ddmul_vd_vd2_vd2(t, x); u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vdouble xcos(vdouble d) { #if !defined(DETERMINISTIC) vdouble u, s, r = d; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))), vcast_vd_d(1)); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), d); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); ql = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vcast_vi_i(1)); vdouble dql = vcast_vd_vi(ql); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), d); d = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), d); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); ql = vsra_vi_vi_i(ql, 1); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); d = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); } s = vmul_vd_vd_vd(d, d); d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(s, s2, s4, -7.97255955009037868891952e-18, 2.81009972710863200091251e-15, -7.64712219118158833288484e-13, 1.60590430605664501629054e-10, -2.50521083763502045810755e-08, 2.75573192239198747630416e-06, -0.000198412698412696162806809, 0.00833333333333332974823815); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); return u; #else // #if !defined(DETERMINISTIC) vdouble u, s, r = d; vint ql; vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); vdouble dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))), vcast_vd_d(1)); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), d); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(r, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); vint ql2 = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(r, vcast_vd_d(M_1_PI)), vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vcast_vi_i(1)); vdouble dql = vcast_vd_vi(ql2); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), r); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), u); u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), u); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); d = vsel_vd_vo_vd_vd(g, d, u); g = vlt_vo_vd_vd(vabs_vd_vd(r), vcast_vd_d(TRIGRANGEMAX)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(r); vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); ql2 = vsra_vi_vi_i(ql2, 1); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); d = vsel_vd_vo_vd_vd(g, d, u); d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); } } s = vmul_vd_vd_vd(d, d); d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(s, s2, s4, -7.97255955009037868891952e-18, 2.81009972710863200091251e-15, -7.64712219118158833288484e-13, 1.60590430605664501629054e-10, -2.50521083763502045810755e-08, 2.75573192239198747630416e-06, -0.000198412698412696162806809, 0.00833333333333332974823815); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vdouble xcos_u1(vdouble d) { #if !defined(DETERMINISTIC) vdouble u; vdouble2 s, t, x; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))); dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), dql, vcast_vd_d(1)); ql = vrint_vi_vd(dql); s = ddadd2_vd2_vd_vd(d, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); ql = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vcast_vi_i(1)); const vdouble dql = vcast_vd_vi(ql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = ddadd2_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); ql = vsra_vi_vi_i(ql, 1); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); x = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), x); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddigetdd_vd2_ddi(ddi))); s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); s = vd2setx_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(s))))); } t = s; s = ddsqu_vd2_vd2(s); vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); u = POLY6(vd2getx_vd_vd2(s), s2, s4, 2.72052416138529567917983e-15, -7.6429259411395447190023e-13, 1.60589370117277896211623e-10, -2.5052106814843123359368e-08, 2.75573192104428224777379e-06, -0.000198412698412046454654947); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); u = ddmul_vd_vd2_vd2(t, x); u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); return u; #else // #if !defined(DETERMINISTIC) vdouble u; vdouble2 s, t, x; vint ql; vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); vdouble dql = vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))); dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), dql, vcast_vd_d(1)); ql = vrint_vi_vd(dql); x = ddadd2_vd2_vd_vd(d, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5))); x = ddadd_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); vint ql2 = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vcast_vi_i(1)); const vdouble dql = vcast_vd_vi(ql2); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = ddadd2_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); x = vsel_vd2_vo_vd2_vd2(g, x, s); g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(d); vint ql2 = vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(3)); ql2 = vadd_vi_vi_vi(vadd_vi_vi_vi(ql2, ql2), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); ql2 = vsra_vi_vi_i(ql2, 1); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddigeti_vi_ddi(ddi), vcast_vi_i(1)), vcast_vi_i(0)); vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); vdouble2 t = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); t = ddadd2_vd2_vd2_vd2(ddigetdd_vd2_ddi(ddi), t); ddi = ddisetdd_ddi_ddi_vd2(ddi, vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), t, ddigetdd_vd2_ddi(ddi))); s = ddnormalize_vd2_vd2(ddigetdd_vd2_ddi(ddi)); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); x = vsel_vd2_vo_vd2_vd2(g, x, s); x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); } } t = x; s = ddsqu_vd2_vd2(x); vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); u = POLY6(vd2getx_vd_vd2(s), s2, s4, 2.72052416138529567917983e-15, -7.6429259411395447190023e-13, 1.60589370117277896211623e-10, -2.5052106814843123359368e-08, 2.75573192104428224777379e-06, -0.000198412698412046454654947); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.00833333333333318056201922)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, vd2getx_vd_vd2(s))), s)); u = ddmul_vd_vd2_vd2(t, x); u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); return u; #endif // #if !defined(DETERMINISTIC) } #ifdef ENABLE_GNUABI #define TYPE2_FUNCATR static INLINE CONST #define TYPE6_FUNCATR static INLINE CONST #define SQRTU05_FUNCATR static INLINE CONST #define XSINCOS sincosk #define XSINCOS_U1 sincosk_u1 #define XSINCOSPI_U05 sincospik_u05 #define XSINCOSPI_U35 sincospik_u35 #define XMODF modfk #else #define TYPE2_FUNCATR EXPORT #define TYPE6_FUNCATR EXPORT CONST #define SQRTU05_FUNCATR EXPORT CONST #define XSINCOS xsincos #define XSINCOS_U1 xsincos_u1 #define XSINCOSPI_U05 xsincospi_u05 #define XSINCOSPI_U35 xsincospi_u35 #define XMODF xmodf #endif TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOS(vdouble d) { #if !defined(DETERMINISTIC) vopmask o; vdouble u, t, rx, ry, s; vdouble2 r; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), s); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); ql = vrint_vi_vd(dql); s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), s); s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), s); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), s); s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), s); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), s); s = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), s); } else { ddi_t ddi = rempi(d); ql = ddigeti_vi_ddi(ddi); s = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); s = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(s))); } t = s; s = vmul_vd_vd_vd(s, s); u = vcast_vd_d(1.58938307283228937328511e-10); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50506943502539773349318e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573131776846360512547e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698278911770864914)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0083333333333191845961746)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666130709393)); rx = vmla_vd_vd_vd_vd(vmul_vd_vd_vd(u, s), t, t); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); u = vcast_vd_d(-1.13615350239097429531523e-11); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.08757471207040055479366e-09)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.75573144028847567498567e-07)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.48015872890001867311915e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.00138888888888714019282329)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665519592062)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.5)); ry = vmla_vd_vd_vd_vd(s, u, vcast_vd_d(1)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); return r; #else // #if !defined(DETERMINISTIC) vopmask o; vdouble u, t, rx, ry, s = d; vdouble2 r; vint ql; vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(s, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), s); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), s); vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), u); u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), u); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); s = vsel_vd_vo_vd_vd(g, s, u); g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(d); u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); u = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(u))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ddigeti_vi_ddi(ddi)); s = vsel_vd_vo_vd_vd(g, s, u); } } t = s; s = vmul_vd_vd_vd(s, s); u = vcast_vd_d(1.58938307283228937328511e-10); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50506943502539773349318e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573131776846360512547e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698278911770864914)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0083333333333191845961746)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666130709393)); rx = vmla_vd_vd_vd_vd(vmul_vd_vd_vd(u, s), t, t); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); u = vcast_vd_d(-1.13615350239097429531523e-11); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.08757471207040055479366e-09)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.75573144028847567498567e-07)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.48015872890001867311915e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.00138888888888714019282329)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665519592062)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.5)); ry = vmla_vd_vd_vd_vd(s, u, vcast_vd_d(1)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); return r; #endif // #if !defined(DETERMINISTIC) } TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOS_U1(vdouble d) { #if !defined(DETERMINISTIC) vopmask o; vdouble u, rx, ry; vdouble2 r, s, t, x; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); const vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); } else { ddi_t ddi = rempi(d); ql = ddigeti_vi_ddi(ddi); s = ddigetdd_vd2_ddi(ddi); o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); s = vd2setxy_vd2_vd_vd(vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(s)))), vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(s))))); } t = s; s = vd2setx_vd2_vd2_vd(s, ddsqu_vd_vd2(s)); u = vcast_vd_d(1.58938307283228937328511e-10); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.50506943502539773349318e-08)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.75573131776846360512547e-06)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.000198412698278911770864914)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0083333333333191845961746)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.166666666666666130709393)); u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(t))); x = ddadd_vd2_vd2_vd(t, u); rx = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); u = vcast_vd_d(-1.13615350239097429531523e-11); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.08757471207040055479366e-09)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.75573144028847567498567e-07)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.48015872890001867311915e-05)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.00138888888888714019282329)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0416666666666665519592062)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.5)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(vd2getx_vd_vd2(s), u)); ry = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); return r; #else // #if !defined(DETERMINISTIC) vopmask o; vdouble u, rx, ry; vdouble2 r, s, t, x; vint ql; const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); const vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); x = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); x = ddadd_vd2_vd2_vd(x, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); s = vsel_vd2_vo_vd2_vd2(g, s, x); g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(d); x = ddigetdd_vd2_ddi(ddi); o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ddigeti_vi_ddi(ddi)); s = vsel_vd2_vo_vd2_vd2(g, s, x); } } t = s; s = vd2setx_vd2_vd2_vd(s, ddsqu_vd_vd2(s)); u = vcast_vd_d(1.58938307283228937328511e-10); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.50506943502539773349318e-08)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.75573131776846360512547e-06)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.000198412698278911770864914)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0083333333333191845961746)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.166666666666666130709393)); u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(t))); x = ddadd_vd2_vd2_vd(t, u); rx = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); u = vcast_vd_d(-1.13615350239097429531523e-11); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.08757471207040055479366e-09)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-2.75573144028847567498567e-07)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(2.48015872890001867311915e-05)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.00138888888888714019282329)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(0.0416666666666665519592062)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(-0.5)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(vd2getx_vd_vd2(s), u)); ry = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); return r; #endif // #if !defined(DETERMINISTIC) } #if !defined(DETERMINISTIC) TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOSPI_U05(vdouble d) { vopmask o; vdouble u, s, t, rx, ry; vdouble2 r, x, s2; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); s2 = ddmul_vd2_vd_vd(t, t); // u = vcast_vd_d(-2.02461120785182399295868e-14); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(6.94821830580179461327784e-12)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-1.75724749952853179952664e-09)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(3.13361688966868392878422e-07)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-3.6576204182161551920361e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00249039457019271850274356)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vcast_vd2_d_d(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vcast_vd2_d_d(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_vd2_vd2_vd(x, t); rx = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); // u = vcast_vd_d(9.94480387626843774090208e-16); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-3.89796226062932799164047e-13)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1.15011582539996035266901e-10)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.4611369501044697495359e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(3.59086044859052754005062e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000325991886927389905997954)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vcast_vd2_d_d(0.0158543442438155018914259, -1.04693272280631521908845e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vcast_vd2_d_d(-0.308425137534042437259529, -1.95698492133633550338345e-17)); x = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x, s2), vcast_vd_d(1)); ry = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); // o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); o = vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); r = vd2sety_vd2_vd2_vd(r, vsel_vd_vo_vd_vd(o, vcast_vd_d(1), vd2gety_vd_vd2(r))); o = visinf_vo_vd(d); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); return r; } TYPE2_FUNCATR VECTOR_CC vdouble2 XSINCOSPI_U35(vdouble d) { vopmask o; vdouble u, s, t, rx, ry; vdouble2 r; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); // u = vcast_vd_d(+0.6880638894766060136e-11); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.1757159564542310199e-8)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3133616327257867311e-6)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3657620416388486452e-4)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2490394570189932103e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.8074551218828056320e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.7853981633974482790e+0)); rx = vmul_vd_vd_vd(u, t); // u = vcast_vd_d(-0.3860141213683794352e-12); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1150057888029681415e-9)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.2461136493006663553e-7)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3590860446623516713e-5)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3259918869269435942e-3)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1585434424381541169e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3084251375340424373e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1)); ry = u; // o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); r = vd2setxy_vd2_vd_vd(vsel_vd_vo_vd_vd(o, rx, ry), vsel_vd_vo_vd_vd(o, ry, rx)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); o = vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); o = visinf_vo_vd(d); r = vd2setx_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(r))))); r = vd2sety_vd2_vd2_vd(r, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(r))))); return r; } TYPE6_FUNCATR VECTOR_CC vdouble2 XMODF(vdouble x) { vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); fr = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52)), vcast_vd_d(0), fr); vdouble2 ret; ret = vd2setxy_vd2_vd_vd(vcopysign_vd_vd_vd(fr, x), vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); return ret; } #ifdef ENABLE_GNUABI EXPORT VECTOR_CC void xsincos(vdouble a, double *ps, double *pc) { vdouble2 r = sincosk(a); vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); } EXPORT VECTOR_CC void xsincos_u1(vdouble a, double *ps, double *pc) { vdouble2 r = sincosk_u1(a); vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); } EXPORT VECTOR_CC void xsincospi_u05(vdouble a, double *ps, double *pc) { vdouble2 r = sincospik_u05(a); vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); } EXPORT VECTOR_CC void xsincospi_u35(vdouble a, double *ps, double *pc) { vdouble2 r = sincospik_u35(a); vstoreu_v_p_vd(ps, vd2getx_vd_vd2(r)); vstoreu_v_p_vd(pc, vd2gety_vd_vd2(r)); } EXPORT CONST VECTOR_CC vdouble xmodf(vdouble a, double *iptr) { vdouble2 r = modfk(a); vstoreu_v_p_vd(iptr, vd2gety_vd_vd2(r)); return vd2getx_vd_vd2(r); } #endif // #ifdef ENABLE_GNUABI #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vdouble2 sinpik(vdouble d) { vopmask o; vdouble u, s, t; vdouble2 x, s2; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2))); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); s2 = ddmul_vd2_vd_vd(t, t); // u = vsel_vd_vo_d_d(o, 9.94480387626843774090208e-16, -2.02461120785182399295868e-14); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -3.89796226062932799164047e-13, 6.948218305801794613277840e-12)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 1.150115825399960352669010e-10, -1.75724749952853179952664e-09)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -2.46113695010446974953590e-08, 3.133616889668683928784220e-07)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 3.590860448590527540050620e-06, -3.65762041821615519203610e-05)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -0.000325991886927389905997954, 0.0024903945701927185027435600)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vsel_vd2_vo_d_d_d_d(o, 0.0158543442438155018914259, -1.04693272280631521908845e-18, -0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vsel_vd2_vo_d_d_d_d(o, -0.308425137534042437259529, -1.95698492133633550338345e-17, 0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_vd2_vd2_vd2(x, vsel_vd2_vo_vd2_vd2(o, s2, vcast_vd2_vd_vd(t, vcast_vd_d(0)))); x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); return x; } EXPORT CONST VECTOR_CC vdouble xsinpi_u05(vdouble d) { vdouble2 x = sinpik(d); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r); r = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)), vreinterpret_vm_vd(r))); r = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(r))); return r; } static INLINE CONST VECTOR_CC vdouble2 cospik(vdouble d) { vopmask o; vdouble u, s, t; vdouble2 x, s2; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); s2 = ddmul_vd2_vd_vd(t, t); // u = vsel_vd_vo_d_d(o, 9.94480387626843774090208e-16, -2.02461120785182399295868e-14); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -3.89796226062932799164047e-13, 6.948218305801794613277840e-12)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 1.150115825399960352669010e-10, -1.75724749952853179952664e-09)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -2.46113695010446974953590e-08, 3.133616889668683928784220e-07)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 3.590860448590527540050620e-06, -3.65762041821615519203610e-05)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -0.000325991886927389905997954, 0.0024903945701927185027435600)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vsel_vd2_vo_d_d_d_d(o, 0.0158543442438155018914259, -1.04693272280631521908845e-18, -0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vsel_vd2_vo_d_d_d_d(o, -0.308425137534042437259529, -1.95698492133633550338345e-17, 0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_vd2_vd2_vd2(x, vsel_vd2_vo_vd2_vd2(o, s2, vcast_vd2_vd_vd(t, vcast_vd_d(0)))); x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); return x; } EXPORT CONST VECTOR_CC vdouble xcospi_u05(vdouble d) { vdouble2 x = cospik(d); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)), vcast_vd_d(1), r); r = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(r))); return r; } EXPORT CONST VECTOR_CC vdouble xtan(vdouble d) { #if !defined(DETERMINISTIC) vdouble u, s, x, y; vopmask o; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), x); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1e+6))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); ql = vrint_vi_vd(dql); x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), x); x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), x); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), x); x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), x); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), x); x = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), x); } else { ddi_t ddi = rempi(d); ql = ddigeti_vi_ddi(ddi); x = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); x = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(x))); x = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(x))); } x = vmul_vd_vd_vd(x, vcast_vd_d(0.5)); s = vmul_vd_vd_vd(x, x); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(s, s2, s4, +0.3245098826639276316e-3, +0.5619219738114323735e-3, +0.1460781502402784494e-2, +0.3591611540792499519e-2, +0.8863268409563113126e-2, +0.2186948728185535498e-1, +0.5396825399517272970e-1, +0.1333333333330500581e+0); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3333333333333343695e+0)); u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, x), x); y = vmla_vd_vd_vd_vd(u, u, vcast_vd_d(-1)); x = vmul_vd_vd_vd(u, vcast_vd_d(-2)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); u = vdiv_vd_vd_vd(vsel_vd_vo_vd_vd(o, vneg_vd_vd(y), x), vsel_vd_vo_vd_vd(o, x, y)); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; #else // #if !defined(DETERMINISTIC) vdouble u, s, x, y; vopmask o; vint ql; vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), s); vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), u); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), u); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), u); u = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), u); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); s = vsel_vd_vo_vd_vd(g, s, u); g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1e+6)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(d); vint ql2 = ddigeti_vi_ddi(ddi); u = vadd_vd_vd_vd(vd2getx_vd_vd2(ddigetdd_vd2_ddi(ddi)), vd2gety_vd_vd2(ddigetdd_vd2_ddi(ddi))); u = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(u))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ql2); s = vsel_vd_vo_vd_vd(g, s, u); } } x = vmul_vd_vd_vd(s, vcast_vd_d(0.5)); s = vmul_vd_vd_vd(x, x); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(s, s2, s4, +0.3245098826639276316e-3, +0.5619219738114323735e-3, +0.1460781502402784494e-2, +0.3591611540792499519e-2, +0.8863268409563113126e-2, +0.2186948728185535498e-1, +0.5396825399517272970e-1, +0.1333333333330500581e+0); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3333333333333343695e+0)); u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, x), x); y = vmla_vd_vd_vd_vd(u, u, vcast_vd_d(-1)); x = vmul_vd_vd_vd(u, vcast_vd_d(-2)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); u = vdiv_vd_vd_vd(vsel_vd_vo_vd_vd(o, vneg_vd_vd(y), x), vsel_vd_vo_vd_vd(o, x, y)); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vdouble xtan_u1(vdouble d) { #if !defined(DETERMINISTIC) vdouble u; vdouble2 s, t, x, y; vopmask o; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); s = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(vcast_vd2_d_d(M_2_PI_H, M_2_PI_L), d), vsub_vd_vd_vd(vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-0.5), vcast_vd_d(0.5)), dqh)); const vdouble dql = vtruncate_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5 ))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5 ))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5 ))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); } else { ddi_t ddi = rempi(d); ql = ddigeti_vi_ddi(ddi); s = ddigetdd_vd2_ddi(ddi); o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); s = vd2setx_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(s))))); s = vd2sety_vd2_vd2_vd(s, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(s))))); } t = ddscale_vd2_vd2_vd(s, vcast_vd_d(0.5)); s = ddsqu_vd2_vd2(t); vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(vd2getx_vd_vd2(s), s2, s4, +0.3245098826639276316e-3, +0.5619219738114323735e-3, +0.1460781502402784494e-2, +0.3591611540792499519e-2, +0.8863268409563113126e-2, +0.2186948728185535498e-1, +0.5396825399517272970e-1, +0.1333333333330500581e+0); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(+0.3333333333333343695e+0)); x = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(s, t), u)); y = ddadd_vd2_vd_vd2(vcast_vd_d(-1), ddsqu_vd2_vd2(x)); x = ddscale_vd2_vd2_vd(x, vcast_vd_d(-2)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); x = dddiv_vd2_vd2_vd2(vsel_vd2_vo_vd2_vd2(o, ddneg_vd2_vd2(y), x), vsel_vd2_vo_vd2_vd2(o, x, y)); u = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; #else // #if !defined(DETERMINISTIC) vdouble u; vdouble2 s, t, x, y; vopmask o; vint ql; const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); vopmask g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2)); if (!LIKELY(vtestallones_i_vo64(g))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); x = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(vcast_vd2_d_d(M_2_PI_H, M_2_PI_L), d), vsub_vd_vd_vd(vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-0.5), vcast_vd_d(0.5)), dqh)); const vdouble dql = vtruncate_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x))); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); x = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5 ))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5 ))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); x = ddadd2_vd2_vd2_vd(x, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5 ))); x = ddadd_vd2_vd2_vd(x, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, vrint_vi_vd(dql)); s = vsel_vd2_vo_vd2_vd2(g, s, x); g = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX)); if (!LIKELY(vtestallones_i_vo64(g))) { ddi_t ddi = rempi(d); x = ddigetdd_vd2_ddi(ddi); o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); ql = vsel_vi_vo_vi_vi(vcast_vo32_vo64(g), ql, ddigeti_vi_ddi(ddi)); s = vsel_vd2_vo_vd2_vd2(g, s, x); } } t = ddscale_vd2_vd2_vd(s, vcast_vd_d(0.5)); s = ddsqu_vd2_vd2(t); vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2); u = POLY8(vd2getx_vd_vd2(s), s2, s4, +0.3245098826639276316e-3, +0.5619219738114323735e-3, +0.1460781502402784494e-2, +0.3591611540792499519e-2, +0.8863268409563113126e-2, +0.2186948728185535498e-1, +0.5396825399517272970e-1, +0.1333333333330500581e+0); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(s), vcast_vd_d(+0.3333333333333343695e+0)); x = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(s, t), u)); y = ddadd_vd2_vd_vd2(vcast_vd_d(-1), ddsqu_vd2_vd2(x)); x = ddscale_vd2_vd2_vd(x, vcast_vd_d(-2)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); x = dddiv_vd2_vd2_vd2(vsel_vd2_vo_vd2_vd2(o, ddneg_vd2_vd2(y), x), vsel_vd2_vo_vd2_vd2(o, x, y)); u = vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; #endif // #if !defined(DETERMINISTIC) } static INLINE CONST VECTOR_CC vdouble atan2k(vdouble y, vdouble x) { vdouble s, t, u; vint q; vopmask p; q = vsel_vi_vd_vi(x, vcast_vi_i(-2)); x = vabs_vd_vd(x); q = vsel_vi_vd_vd_vi_vi(x, y, vadd_vi_vi_vi(q, vcast_vi_i(1)), q); p = vlt_vo_vd_vd(x, y); s = vsel_vd_vo_vd_vd(p, vneg_vd_vd(x), y); t = vmax_vd_vd_vd(x, y); s = vdiv_vd_vd_vd(s, t); t = vmul_vd_vd_vd(s, s); vdouble t2 = vmul_vd_vd_vd(t, t), t4 = vmul_vd_vd_vd(t2, t2), t8 = vmul_vd_vd_vd(t4, t4), t16 = vmul_vd_vd_vd(t8, t8); u = POLY19(t, t2, t4, t8, t16, -1.88796008463073496563746e-05, 0.000209850076645816976906797, -0.00110611831486672482563471, 0.00370026744188713119232403, -0.00889896195887655491740809, 0.016599329773529201970117, -0.0254517624932312641616861, 0.0337852580001353069993897, -0.0407629191276836500001934, 0.0466667150077840625632675, -0.0523674852303482457616113, 0.0587666392926673580854313, -0.0666573579361080525984562, 0.0769219538311769618355029, -0.090908995008245008229153, 0.111111105648261418443745, -0.14285714266771329383765, 0.199999999996591265594148, -0.333333333333311110369124); t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s); t = vmla_vd_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(M_PI/2), t); return t; } static INLINE CONST VECTOR_CC vdouble2 atan2k_u1(vdouble2 y, vdouble2 x) { vdouble u; vdouble2 s, t; vint q; vopmask p; q = vsel_vi_vd_vi(vd2getx_vd_vd2(x), vcast_vi_i(-2)); p = vlt_vo_vd_vd(vd2getx_vd_vd2(x), vcast_vd_d(0)); vmask b = vand_vm_vo64_vm(p, vreinterpret_vm_vd(vcast_vd_d(-0.0))); x = vd2setx_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(b, vreinterpret_vm_vd(vd2getx_vd_vd2(x))))); x = vd2sety_vd2_vd2_vd(x, vreinterpret_vd_vm(vxor_vm_vm_vm(b, vreinterpret_vm_vd(vd2gety_vd_vd2(x))))); q = vsel_vi_vd_vd_vi_vi(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y), vadd_vi_vi_vi(q, vcast_vi_i(1)), q); p = vlt_vo_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(y)); s = vsel_vd2_vo_vd2_vd2(p, ddneg_vd2_vd2(x), y); t = vsel_vd2_vo_vd2_vd2(p, y, x); s = dddiv_vd2_vd2_vd2(s, t); t = ddsqu_vd2_vd2(s); t = ddnormalize_vd2_vd2(t); vdouble t2 = vmul_vd_vd_vd(vd2getx_vd_vd2(t), vd2getx_vd_vd2(t)), t4 = vmul_vd_vd_vd(t2, t2), t8 = vmul_vd_vd_vd(t4, t4), t16 = vmul_vd_vd_vd(t8, t8); u = POLY16(vd2getx_vd_vd2(t), t2, t4, t8, 1.06298484191448746607415e-05, -0.000125620649967286867384336, 0.00070557664296393412389774, -0.00251865614498713360352999, 0.00646262899036991172313504, -0.0128281333663399031014274, 0.0208024799924145797902497, -0.0289002344784740315686289, 0.0359785005035104590853656, -0.041848579703592507506027, 0.0470843011653283988193763, -0.0524914210588448421068719, 0.0587946590969581003860434, -0.0666620884778795497194182, 0.0769225330296203768654095, -0.0909090442773387574781907); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(0.111111108376896236538123)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(-0.142857142756268568062339)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(0.199999999997977351284817)); u = vmla_vd_vd_vd_vd(u, vd2getx_vd_vd2(t), vcast_vd_d(-0.333333333333317605173818)); t = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(s, t), u)); t = ddadd_vd2_vd2_vd2(ddmul_vd2_vd2_vd(vcast_vd2_d_d(1.570796326794896557998982, 6.12323399573676603586882e-17), vcast_vd_vi(q)), t); return t; } static INLINE CONST VECTOR_CC vdouble visinf2_vd_vd_vd(vdouble d, vdouble m) { return vreinterpret_vd_vm(vand_vm_vo64_vm(visinf_vo_vd(d), vor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(m)))); } EXPORT CONST VECTOR_CC vdouble xatan2(vdouble y, vdouble x) { vdouble r = atan2k(vabs_vd_vd(y), x); r = vmulsign_vd_vd_vd(r, x); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r); r = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(0.0)), vreinterpret_vd_vm(vand_vm_vo64_vm(vsignbit_vo_vd(x), vreinterpret_vm_vd(vcast_vd_d(M_PI)))), r); r = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(vmulsign_vd_vd_vd(r, y)))); return r; } EXPORT CONST VECTOR_CC vdouble xatan2_u1(vdouble y, vdouble x) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(5.5626846462680083984e-309)); // nexttoward((1.0 / DBL_MAX), 1) x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(UINT64_C(1) << 53)), x); y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(UINT64_C(1) << 53)), y); vdouble2 d = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(y), vcast_vd_d(0)), vcast_vd2_vd_vd(x, vcast_vd_d(0))); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); r = vmulsign_vd_vd_vd(r, x); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r); r = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(0.0)), vreinterpret_vd_vm(vand_vm_vo64_vm(vsignbit_vo_vd(x), vreinterpret_vm_vd(vcast_vd_d(M_PI)))), r); r = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(vmulsign_vd_vd_vd(r, y)))); return r; } EXPORT CONST VECTOR_CC vdouble xasin(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))); vdouble x = vsel_vd_vo_vd_vd(o, vabs_vd_vd(d), vsqrt_vd_vd(x2)), u; vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u = vmla_vd_vd_vd_vd(u, vmul_vd_vd_vd(x, x2), x); vdouble r = vsel_vd_vo_vd_vd(o, u, vmla_vd_vd_vd_vd(u, vcast_vd_d(-2), vcast_vd_d(M_PI/2))); return vmulsign_vd_vd_vd(r, d); } EXPORT CONST VECTOR_CC vdouble xasin_u1(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; vdouble2 x = vsel_vd2_vo_vd2_vd2(o, vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), ddsqrt_vd2_vd(x2)); x = vsel_vd2_vo_vd2_vd2(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd2_d_d(0, 0), x); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x))); vdouble2 y = ddsub_vd2_vd2_vd(ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116/4, 1.2246467991473532072e-16/4), x), u); vdouble r = vsel_vd_vo_vd_vd(o, vadd_vd_vd_vd(u, vd2getx_vd_vd2(x)), vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)), vcast_vd_d(2))); return vmulsign_vd_vd_vd(r, d); } EXPORT CONST VECTOR_CC vdouble xacos(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; vdouble x = vsel_vd_vo_vd_vd(o, vabs_vd_vd(d), vsqrt_vd_vd(x2)); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd_d(0), x); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, x)); vdouble y = vsub_vd_vd_vd(vcast_vd_d(M_PI/2), vadd_vd_vd_vd(vmulsign_vd_vd_vd(x, d), vmulsign_vd_vd_vd(u, d))); x = vadd_vd_vd_vd(x, u); vdouble r = vsel_vd_vo_vd_vd(o, y, vmul_vd_vd_vd(x, vcast_vd_d(2))); return vsel_vd_vo_vd_vd(vandnot_vo_vo_vo(o, vlt_vo_vd_vd(d, vcast_vd_d(0))), vd2getx_vd_vd2(ddadd_vd2_vd2_vd(vcast_vd2_d_d(3.141592653589793116, 1.2246467991473532072e-16), vneg_vd_vd(r))), r); } EXPORT CONST VECTOR_CC vdouble xacos_u1(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; vdouble2 x = vsel_vd2_vo_vd2_vd2(o, vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), ddsqrt_vd2_vd(x2)); x = vsel_vd2_vo_vd2_vd2(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd2_d_d(0, 0), x); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); u = POLY12(x2, x4, x8, x16, +0.3161587650653934628e-1, -0.1581918243329996643e-1, +0.1929045477267910674e-1, +0.6606077476277170610e-2, +0.1215360525577377331e-1, +0.1388715184501609218e-1, +0.1735956991223614604e-1, +0.2237176181932048341e-1, +0.3038195928038132237e-1, +0.4464285681377102438e-1, +0.7500000000378581611e-1, +0.1666666666666497543e+0); u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x))); vdouble2 y = ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116/2, 1.2246467991473532072e-16/2), ddadd_vd2_vd_vd(vmulsign_vd_vd_vd(vd2getx_vd_vd2(x), d), vmulsign_vd_vd_vd(u, d))); x = ddadd_vd2_vd2_vd(x, u); y = vsel_vd2_vo_vd2_vd2(o, y, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); y = vsel_vd2_vo_vd2_vd2(vandnot_vo_vo_vo(o, vlt_vo_vd_vd(d, vcast_vd_d(0))), ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116, 1.2246467991473532072e-16), y), y); return vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)); } EXPORT CONST VECTOR_CC vdouble xatan_u1(vdouble d) { vdouble2 d2 = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), vcast_vd2_d_d(1, 0)); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(d2), vd2gety_vd_vd2(d2)); r = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(1.570796326794896557998982), r); return vmulsign_vd_vd_vd(r, d); } EXPORT CONST VECTOR_CC vdouble xatan(vdouble s) { vdouble t, u; vint q; #if defined(__INTEL_COMPILER) && defined(ENABLE_PURECFMA_SCALAR) vdouble w = s; #endif q = vsel_vi_vd_vi(s, vcast_vi_i(2)); s = vabs_vd_vd(s); q = vsel_vi_vd_vd_vi_vi(vcast_vd_d(1), s, vadd_vi_vi_vi(q, vcast_vi_i(1)), q); s = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vcast_vd_d(1), s), vrec_vd_vd(s), s); t = vmul_vd_vd_vd(s, s); vdouble t2 = vmul_vd_vd_vd(t, t), t4 = vmul_vd_vd_vd(t2, t2), t8 = vmul_vd_vd_vd(t4, t4), t16 = vmul_vd_vd_vd(t8, t8); u = POLY19(t, t2, t4, t8, t16, -1.88796008463073496563746e-05, 0.000209850076645816976906797, -0.00110611831486672482563471, 0.00370026744188713119232403, -0.00889896195887655491740809, 0.016599329773529201970117, -0.0254517624932312641616861, 0.0337852580001353069993897, -0.0407629191276836500001934, 0.0466667150077840625632675, -0.0523674852303482457616113, 0.0587666392926673580854313, -0.0666573579361080525984562, 0.0769219538311769618355029, -0.090908995008245008229153, 0.111111105648261418443745, -0.14285714266771329383765, 0.199999999996591265594148, -0.333333333333311110369124); t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s); t = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), t), t); t = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(t))); #if defined(__INTEL_COMPILER) && defined(ENABLE_PURECFMA_SCALAR) t = vsel_vd_vo_vd_vd(veq_vo_vd_vd(w, vcast_vd_d(0)), w, t); #endif return t; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vdouble xlog(vdouble d) { vdouble x, x2; vdouble t, m; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = vdiv_vd_vd_vd(vsub_vd_vd_vd(m, vcast_vd_d(1)), vadd_vd_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(x, x); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4), x3 = vmul_vd_vd_vd(x, x2); t = POLY7(x2, x4, x8, 0.153487338491425068243146, 0.152519917006351951593857, 0.181863266251982985677316, 0.222221366518767365905163, 0.285714294746548025383248, 0.399999999950799600689777, 0.6666666666667778740063); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) x = vmla_vd_vd_vd_vd(x, vcast_vd_d(2), vmul_vd_vd_vd(vcast_vd_d(0.693147180559945286226764), vcast_vd_vi(e))); x = vmla_vd_vd_vd_vd(x3, t, x); x = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), x); x = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), x); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), x); #else x = vmla_vd_vd_vd_vd(x, vcast_vd_d(2), vmul_vd_vd_vd(vcast_vd_d(0.693147180559945286226764), e)); x = vmla_vd_vd_vd_vd(x3, t, x); x = vfixup_vd_vd_vd_vi2_i(x, d, vcast_vi2_i((5 << (5*4))), 0); #endif return x; } #endif // #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vdouble xexp(vdouble d) { vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2))), s; vint q = vrint_vi_vd(u); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2U), d); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2L), s); #ifdef ENABLE_FMA_DP vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); u = POLY10(s, s2, s4, s8, +0.2081276378237164457e-8, +0.2511210703042288022e-7, +0.2755762628169491192e-6, +0.2755723402025388239e-5, +0.2480158687479686264e-4, +0.1984126989855865850e-3, +0.1388888888914497797e-2, +0.8333333333314938210e-2, +0.4166666666666602598e-1, +0.1666666666666669072e+0); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5000000000000000000e+0)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); #else // #ifdef ENABLE_FMA_DP vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); u = POLY10(s, s2, s4, s8, 2.08860621107283687536341e-09, 2.51112930892876518610661e-08, 2.75573911234900471893338e-07, 2.75572362911928827629423e-06, 2.4801587159235472998791e-05, 0.000198412698960509205564975, 0.00138888888889774492207962, 0.00833333333331652721664984, 0.0416666666666665047591422, 0.166666666666666851703837); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5000000000000000000e+0)); u = vadd_vd_vd_vd(vcast_vd_d(1), vmla_vd_vd_vd_vd(vmul_vd_vd_vd(s, s), u, s)); #endif // #ifdef ENABLE_FMA_DP u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(709.78271114955742909217217426)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-1000)), vreinterpret_vm_vd(u))); return u; } static INLINE CONST VECTOR_CC vdouble expm1k(vdouble d) { vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2))), s; vint q = vrint_vi_vd(u); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2U), d); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2L), s); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); u = POLY10(s, s2, s4, s8, 2.08860621107283687536341e-09, 2.51112930892876518610661e-08, 2.75573911234900471893338e-07, 2.75572362911928827629423e-06, 2.4801587159235472998791e-05, 0.000198412698960509205564975, 0.00138888888889774492207962, 0.00833333333331652721664984, 0.0416666666666665047591422, 0.166666666666666851703837); u = vadd_vd_vd_vd(vmla_vd_vd_vd_vd(s2, vcast_vd_d(0.5), vmul_vd_vd_vd(vmul_vd_vd_vd(s2, s), u)), s); u = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(q, vcast_vi_i(0))), u, vsub_vd_vd_vd(vldexp2_vd_vd_vi(vadd_vd_vd_vd(u, vcast_vd_d(1)), q), vcast_vd_d(1))); return u; } static INLINE CONST VECTOR_CC vdouble2 logk(vdouble d) { vdouble2 x, x2, s; vdouble t, m; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = ddsqu_vd2_vd2(x); vdouble x4 = vmul_vd_vd_vd(vd2getx_vd_vd2(x2), vd2getx_vd_vd2(x2)), x8 = vmul_vd_vd_vd(x4, x4), x16 = vmul_vd_vd_vd(x8, x8); t = POLY9(vd2getx_vd_vd2(x2), x4, x8, x16, 0.116255524079935043668677, 0.103239680901072952701192, 0.117754809412463995466069, 0.13332981086846273921509, 0.153846227114512262845736, 0.181818180850050775676507, 0.222222222230083560345903, 0.285714285714249172087875, 0.400000000000000077715612); vdouble2 c = vcast_vd2_d_d(0.666666666666666629659233, 3.80554962542412056336616e-17); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); #else s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); #endif s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); x = ddmul_vd2_vd2_vd2(x2, x); s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd2(x, c)); x = ddmul_vd2_vd2_vd2(x2, x); s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(x, t)); return s; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vdouble xlog_u1(vdouble d) { vdouble2 x; vdouble t, m, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); t = POLY7(x2, x4, x8, 0.1532076988502701353e+0, 0.1525629051003428716e+0, 0.1818605932937785996e+0, 0.2222214519839380009e+0, 0.2857142932794299317e+0, 0.3999999999635251990e+0, 0.6666666666667333541e+0); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); #else vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); #endif s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); #else r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vdouble expk(vdouble2 d) { vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(R_LN2)); vdouble dq = vrint_vd_vd(u); vint q = vrint_vi_vd(dq); vdouble2 s, t; s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(dq, vcast_vd_d(-L2U))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dq, vcast_vd_d(-L2L))); s = ddnormalize_vd2_vd2(s); vdouble s2 = vmul_vd_vd_vd(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s)), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); u = POLY10(vd2getx_vd_vd2(s), s2, s4, s8, 2.51069683420950419527139e-08, 2.76286166770270649116855e-07, 2.75572496725023574143864e-06, 2.48014973989819794114153e-05, 0.000198412698809069797676111, 0.0013888888939977128960529, 0.00833333333332371417601081, 0.0416666666665409524128449, 0.166666666666666740681535, 0.500000000000000999200722); t = ddadd_vd2_vd_vd2(vcast_vd_d(1), s); t = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(ddsqu_vd2_vd2(s), u)); u = vadd_vd_vd_vd(vd2getx_vd_vd2(t), vd2gety_vd_vd2(t)); u = vldexp2_vd_vd_vi(u, q); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(-1000)), vreinterpret_vm_vd(u))); return u; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vdouble xpow(vdouble x, vdouble y) { #if 1 vopmask yisint = visint_vo_vd(y); vopmask yisodd = vand_vo_vo_vo(visodd_vo_vd(y), yisint); vdouble2 d = ddmul_vd2_vd2_vd(logk(vabs_vd_vd(x)), y); vdouble result = expk(d); result = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(709.78271114955742909217217426)), vcast_vd_d(SLEEF_INFINITY), result); result = vmul_vd_vd_vd(result, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(1), vsel_vd_vo_vd_vd(yisint, vsel_vd_vo_vd_vd(yisodd, vcast_vd_d(-1.0), vcast_vd_d(1)), vcast_vd_d(SLEEF_NAN)))); vdouble efx = vmulsign_vd_vd_vd(vsub_vd_vd_vd(vabs_vd_vd(x), vcast_vd_d(1)), y); result = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(efx, vcast_vd_d(0.0)), vreinterpret_vm_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(efx, vcast_vd_d(0.0)), vcast_vd_d(1.0), vcast_vd_d(SLEEF_INFINITY))))), result); result = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0.0))), vmul_vd_vd_vd(vsel_vd_vo_vd_vd(yisodd, vsign_vd_vd(x), vcast_vd_d(1.0)), vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0.0)), vneg_vd_vd(y), y), vcast_vd_d(0.0)), vreinterpret_vm_vd(vcast_vd_d(SLEEF_INFINITY))))), result); result = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(result))); result = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(y, vcast_vd_d(0)), veq_vo_vd_vd(x, vcast_vd_d(1))), vcast_vd_d(1), result); return result; #else return expk(ddmul_vd2_vd2_vd(logk(x), y)); #endif } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vdouble2 expk2(vdouble2 d) { vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(R_LN2)); vdouble dq = vrint_vd_vd(u); vint q = vrint_vi_vd(dq); vdouble2 s, t; s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(dq, vcast_vd_d(-L2U))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dq, vcast_vd_d(-L2L))); vdouble2 s2 = ddsqu_vd2_vd2(s), s4 = ddsqu_vd2_vd2(s2); vdouble s8 = vmul_vd_vd_vd(vd2getx_vd_vd2(s4), vd2getx_vd_vd2(s4)); u = POLY10(vd2getx_vd_vd2(s), vd2getx_vd_vd2(s2), vd2getx_vd_vd2(s4), s8, +0.1602472219709932072e-9, +0.2092255183563157007e-8, +0.2505230023782644465e-7, +0.2755724800902135303e-6, +0.2755731892386044373e-5, +0.2480158735605815065e-4, +0.1984126984148071858e-3, +0.1388888888886763255e-2, +0.8333333333333347095e-2, +0.4166666666666669905e-1); t = ddadd_vd2_vd_vd2(vcast_vd_d(0.5), ddmul_vd2_vd2_vd(s, vcast_vd_d(+0.1666666666666666574e+0))); t = ddadd_vd2_vd_vd2(vcast_vd_d(1.0), ddmul_vd2_vd2_vd2(t, s)); t = ddadd_vd2_vd_vd2(vcast_vd_d(1.0), ddmul_vd2_vd2_vd2(t, s)); t = ddadd_vd2_vd2_vd2(t, ddmul_vd2_vd2_vd(s4, u)); t = vd2setx_vd2_vd2_vd(t, vldexp2_vd_vd_vi(vd2getx_vd_vd2(t), q)); t = vd2sety_vd2_vd2_vd(t, vldexp2_vd_vd_vi(vd2gety_vd_vd2(t), q)); t = vd2setx_vd2_vd2_vd(t, vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(-1000)), vreinterpret_vm_vd(vd2getx_vd_vd2(t))))); t = vd2sety_vd2_vd2_vd(t, vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(-1000)), vreinterpret_vm_vd(vd2gety_vd_vd2(t))))); return t; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vdouble xsinh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); d = ddsub_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); y = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST VECTOR_CC vdouble xcosh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); d = ddadd_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); y = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST VECTOR_CC vdouble xtanh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); vdouble2 e = ddrec_vd2_vd2(d); d = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddneg_vd2_vd2(e)), ddadd2_vd2_vd2_vd2(d, e)); y = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vo_vd(y)), vcast_vd_d(1.0), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST VECTOR_CC vdouble xsinh_u35(vdouble x) { vdouble e = expm1k(vabs_vd_vd(x)); vdouble y = vdiv_vd_vd_vd(vadd_vd_vd_vd(e, vcast_vd_d(2)), vadd_vd_vd_vd(e, vcast_vd_d(1))); y = vmul_vd_vd_vd(y, vmul_vd_vd_vd(vcast_vd_d(0.5), e)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST VECTOR_CC vdouble xcosh_u35(vdouble x) { vdouble e = xexp(vabs_vd_vd(x)); vdouble y = vmla_vd_vd_vd_vd(vcast_vd_d(0.5), e, vdiv_vd_vd_vd(vcast_vd_d(0.5), e)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST VECTOR_CC vdouble xtanh_u35(vdouble x) { vdouble d = expm1k(vmul_vd_vd_vd(vcast_vd_d(2), vabs_vd_vd(x))); vdouble y = vdiv_vd_vd_vd(d, vadd_vd_vd_vd(vcast_vd_d(2), d)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vo_vd(y)), vcast_vd_d(1.0), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } static INLINE CONST VECTOR_CC vdouble2 logk2(vdouble2 d) { vdouble2 x, x2, m, s; vdouble t; vint e; e = vilogbk_vi_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(d), vcast_vd_d(1.0/0.75))); m = vd2setxy_vd2_vd_vd(vldexp2_vd_vd_vi(vd2getx_vd_vd2(d), vneg_vi_vi(e)), vldexp2_vd_vd_vi(vd2gety_vd_vd2(d), vneg_vi_vi(e))); x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(m, vcast_vd_d(-1)), ddadd2_vd2_vd2_vd(m, vcast_vd_d(1))); x2 = ddsqu_vd2_vd2(x); vdouble x4 = vmul_vd_vd_vd(vd2getx_vd_vd2(x2), vd2getx_vd_vd2(x2)), x8 = vmul_vd_vd_vd(x4, x4); t = POLY7(vd2getx_vd_vd2(x2), x4, x8, 0.13860436390467167910856, 0.131699838841615374240845, 0.153914168346271945653214, 0.181816523941564611721589, 0.22222224632662035403996, 0.285714285511134091777308, 0.400000000000914013309483); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(x2), vcast_vd_d(0.666666666666664853302393)); s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x2, x), t)); return s; } EXPORT CONST VECTOR_CC vdouble xasinh(vdouble x) { vdouble y = vabs_vd_vd(x); vopmask o = vgt_vo_vd_vd(y, vcast_vd_d(1)); vdouble2 d; d = vsel_vd2_vo_vd2_vd2(o, ddrec_vd2_vd(x), vcast_vd2_vd_vd(y, vcast_vd_d(0))); d = ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddsqu_vd2_vd2(d), vcast_vd_d(1))); d = vsel_vd2_vo_vd2_vd2(o, ddmul_vd2_vd2_vd(d, y), d); d = logk2(ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd(d, x))); y = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(SQRT_DBL_MAX)), visnan_vo_vd(y)), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), x), y); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); y = vsel_vd_vo_vd_vd(visnegzero_vo_vd(x), vcast_vd_d(-0.0), y); return y; } EXPORT CONST VECTOR_CC vdouble xacosh(vdouble x) { vdouble2 d = logk2(ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddsqrt_vd2_vd2(ddadd2_vd2_vd_vd(x, vcast_vd_d(1))), ddsqrt_vd2_vd2(ddadd2_vd2_vd_vd(x, vcast_vd_d(-1)))), x)); vdouble y = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(SQRT_DBL_MAX)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vreinterpret_vd_vm(vandnot_vm_vo64_vm(veq_vo_vd_vd(x, vcast_vd_d(1.0)), vreinterpret_vm_vd(y))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(vlt_vo_vd_vd(x, vcast_vd_d(1.0)), vreinterpret_vm_vd(y))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST VECTOR_CC vdouble xatanh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = logk2(dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(1), y), ddadd2_vd2_vd_vd(vcast_vd_d(1), vneg_vd_vd(y)))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(vgt_vo_vd_vd(y, vcast_vd_d(1.0)), vreinterpret_vm_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(1.0)), vcast_vd_d(SLEEF_INFINITY), vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(0.5)))))); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(y))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST VECTOR_CC vdouble xcbrt(vdouble d) { vdouble x, y, q = vcast_vd_d(1.0); vint e, qu, re; vdouble t; #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) vdouble s = d; #endif e = vadd_vi_vi_vi(vilogbk_vi_vd(vabs_vd_vd(d)), vcast_vi_i(1)); d = vldexp2_vd_vd_vi(d, vneg_vi_vi(e)); t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144)); qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0))); re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3)))); q = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(1))), vcast_vd_d(1.2599210498948731647672106), q); q = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(2))), vcast_vd_d(1.5874010519681994747517056), q); q = vldexp2_vd_vd_vi(q, vsub_vi_vi_vi(qu, vcast_vi_i(2048))); q = vmulsign_vd_vd_vd(q, d); d = vabs_vd_vd(d); x = vcast_vd_d(-0.640245898480692909870982); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722)); y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0))); y = vmul_vd_vd_vd(vmul_vd_vd_vd(d, x), x); y = vmul_vd_vd_vd(vsub_vd_vd_vd(y, vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(2.0 / 3.0), y), vmla_vd_vd_vd_vd(y, x, vcast_vd_d(-1.0)))), q); #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) y = vsel_vd_vo_vd_vd(visinf_vo_vd(s), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), s), y); y = vsel_vd_vo_vd_vd(veq_vo_vd_vd(s, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), s), y); #endif return y; } EXPORT CONST VECTOR_CC vdouble xcbrt_u1(vdouble d) { vdouble x, y, z, t; vdouble2 q2 = vcast_vd2_d_d(1, 0), u, v; vint e, qu, re; #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) vdouble s = d; #endif e = vadd_vi_vi_vi(vilogbk_vi_vd(vabs_vd_vd(d)), vcast_vi_i(1)); d = vldexp2_vd_vd_vi(d, vneg_vi_vi(e)); t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144)); qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0))); re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3)))); q2 = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(1))), vcast_vd2_d_d(1.2599210498948731907, -2.5899333753005069177e-17), q2); q2 = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(2))), vcast_vd2_d_d(1.5874010519681995834, -1.0869008194197822986e-16), q2); q2 = vd2setxy_vd2_vd_vd(vmulsign_vd_vd_vd(vd2getx_vd_vd2(q2), d), vmulsign_vd_vd_vd(vd2gety_vd_vd2(q2), d)); d = vabs_vd_vd(d); x = vcast_vd_d(-0.640245898480692909870982); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722)); y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0))); z = x; u = ddmul_vd2_vd_vd(x, x); u = ddmul_vd2_vd2_vd2(u, u); u = ddmul_vd2_vd2_vd(u, d); u = ddadd2_vd2_vd2_vd(u, vneg_vd_vd(x)); y = vadd_vd_vd_vd(vd2getx_vd_vd2(u), vd2gety_vd_vd2(u)); y = vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(-2.0 / 3.0), y), z); v = ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(z, z), y); v = ddmul_vd2_vd2_vd(v, d); v = ddmul_vd2_vd2_vd2(v, q2); z = vldexp2_vd_vd_vi(vadd_vd_vd_vd(vd2getx_vd_vd2(v), vd2gety_vd_vd2(v)), vsub_vi_vi_vi(qu, vcast_vi_i(2048))); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) z = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), vd2getx_vd_vd2(q2)), z); z = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vreinterpret_vd_vm(vsignbit_vm_vd(vd2getx_vd_vd2(q2))), z); #else z = vsel_vd_vo_vd_vd(visinf_vo_vd(s), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), s), z); z = vsel_vd_vo_vd_vd(veq_vo_vd_vd(s, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), s), z); #endif return z; } #endif // #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vdouble xexp2(vdouble d) { vdouble u = vrint_vd_vd(d), s; vint q = vrint_vi_vd(u); s = vsub_vd_vd_vd(d, u); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); u = POLY10(s, s2, s4, s8, +0.4434359082926529454e-9, +0.7073164598085707425e-8, +0.1017819260921760451e-6, +0.1321543872511327615e-5, +0.1525273353517584730e-4, +0.1540353045101147808e-3, +0.1333355814670499073e-2, +0.9618129107597600536e-2, +0.5550410866482046596e-1, +0.2402265069591012214e+0); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6931471805599452862e+0)); #ifdef ENABLE_FMA_DP u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(1)); #else u = vd2getx_vd_vd2(ddnormalize_vd2_vd2(ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(u, s)))); #endif u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vge_vo_vd_vd(d, vcast_vd_d(1024)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-2000)), vreinterpret_vm_vd(u))); return u; } EXPORT CONST VECTOR_CC vdouble xexp2_u35(vdouble d) { vdouble u = vrint_vd_vd(d), s; vint q = vrint_vi_vd(u); s = vsub_vd_vd_vd(d, u); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); u = POLY10(s, s2, s4, s8, +0.4434359082926529454e-9, +0.7073164598085707425e-8, +0.1017819260921760451e-6, +0.1321543872511327615e-5, +0.1525273353517584730e-4, +0.1540353045101147808e-3, +0.1333355814670499073e-2, +0.9618129107597600536e-2, +0.5550410866482046596e-1, +0.2402265069591012214e+0); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6931471805599452862e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1)); u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vge_vo_vd_vd(d, vcast_vd_d(1024)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-2000)), vreinterpret_vm_vd(u))); return u; } EXPORT CONST VECTOR_CC vdouble xexp10(vdouble d) { vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(LOG10_2))), s; vint q = vrint_vi_vd(u); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10U), d); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10L), s); u = vcast_vd_d(+0.2411463498334267652e-3); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1157488415217187375e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5013975546789733659e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1959762320720533080e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6808936399446784138e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2069958494722676234e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5393829292058536229e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1171255148908541655e+1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2034678592293432953e+1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2650949055239205876e+1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2302585092994045901e+1)); #ifdef ENABLE_FMA_DP u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(1)); #else u = vd2getx_vd_vd2(ddnormalize_vd2_vd2(ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(u, s)))); #endif u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(308.25471555991671)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-350)), vreinterpret_vm_vd(u))); return u; } EXPORT CONST VECTOR_CC vdouble xexp10_u35(vdouble d) { vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(LOG10_2))), s; vint q = vrint_vi_vd(u); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10U), d); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10L), s); vdouble s2 = vmul_vd_vd_vd(s, s), s4 = vmul_vd_vd_vd(s2, s2), s8 = vmul_vd_vd_vd(s4, s4); u = POLY11(s, s2, s4, s8, +0.2411463498334267652e-3, +0.1157488415217187375e-2, +0.5013975546789733659e-2, +0.1959762320720533080e-1, +0.6808936399446784138e-1, +0.2069958494722676234e+0, +0.5393829292058536229e+0, +0.1171255148908541655e+1, +0.2034678592293432953e+1, +0.2650949055239205876e+1, +0.2302585092994045901e+1); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1)); u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(308.25471555991671)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-350)), vreinterpret_vm_vd(u))); return u; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vdouble xexpm1(vdouble a) { vdouble2 d = ddadd2_vd2_vd2_vd(expk2(vcast_vd2_vd_vd(a, vcast_vd_d(0))), vcast_vd_d(-1.0)); vdouble x = vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)); x = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(a, vcast_vd_d(709.782712893383996732223)), vcast_vd_d(SLEEF_INFINITY), x); x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(a, vcast_vd_d(-36.736800569677101399113302437)), vcast_vd_d(-1), x); x = vsel_vd_vo_vd_vd(visnegzero_vo_vd(a), vcast_vd_d(-0.0), x); return x; } EXPORT CONST VECTOR_CC vdouble xlog10(vdouble d) { vdouble2 x; vdouble t, m, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); t = POLY7(x2, x4, x8, +0.6653725819576758460e-1, +0.6625722782820833712e-1, +0.7898105214313944078e-1, +0.9650955035715275132e-1, +0.1240841409721444993e+0, +0.1737177927454605086e+0, +0.2895296546021972617e+0); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.30102999566398119802, -2.803728127785170339e-18), vcast_vd_vi(e)); #else vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.30102999566398119802, -2.803728127785170339e-18), e); #endif s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(0.86858896380650363334, 1.1430059694096389311e-17))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); #else r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST VECTOR_CC vdouble xlog2(vdouble d) { vdouble2 x; vdouble t, m, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); t = POLY7(x2, x4, x8, +0.2211941750456081490e+0, +0.2200768693152277689e+0, +0.2623708057488514656e+0, +0.3205977477944495502e+0, +0.4121985945485324709e+0, +0.5770780162997058982e+0, +0.96179669392608091449); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vdouble2 s = ddadd2_vd2_vd_vd2(vcast_vd_vi(e), ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(2.885390081777926774, 6.0561604995516736434e-18))); #else vdouble2 s = ddadd2_vd2_vd_vd2(e, ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(2.885390081777926774, 6.0561604995516736434e-18))); #endif s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); #else r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST VECTOR_CC vdouble xlog2_u35(vdouble d) { vdouble m, t, x, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = vdiv_vd_vd_vd(vsub_vd_vd_vd(m, vcast_vd_d(1)), vadd_vd_vd_vd(m, vcast_vd_d(1))); x2 = vmul_vd_vd_vd(x, x); t = vcast_vd_d(+0.2211941750456081490e+0); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.2200768693152277689e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.2623708057488514656e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.3205977477944495502e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.4121985945485324709e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.5770780162997058982e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.96179669392608091449 )); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vdouble2 s = ddadd_vd2_vd_vd2(vcast_vd_vi(e), ddmul_vd2_vd_vd(x, vcast_vd_d(2.885390081777926774))); #else vdouble2 s = ddadd_vd2_vd_vd2(e, ddmul_vd2_vd_vd(x, vcast_vd_d(2.885390081777926774))); #endif vdouble r = vmla_vd_vd_vd_vd(t, vmul_vd_vd_vd(x, x2), vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s))); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); #else r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST VECTOR_CC vdouble xlog1p(vdouble d) { vdouble2 x; vdouble t, m, x2; vdouble dp1 = vadd_vd_vd_vd(d, vcast_vd_d(1)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vd_vd(dp1, vcast_vd_d(DBL_MIN)); dp1 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(dp1, vcast_vd_d((double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32))), dp1); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(dp1, vcast_vd_d(1.0/0.75))); t = vldexp3_vd_vd_vi(vcast_vd_d(1), vneg_vi_vi(e)); m = vmla_vd_vd_vd_vd(d, t, vsub_vd_vd_vd(t, vcast_vd_d(1))); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(dp1, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); t = vldexp3_vd_vd_vi(vcast_vd_d(1), vneg_vi_vi(vrint_vi_vd(e))); m = vmla_vd_vd_vd_vd(d, t, vsub_vd_vd_vd(t, vcast_vd_d(1))); vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); #endif x = dddiv_vd2_vd2_vd2(vcast_vd2_vd_vd(m, vcast_vd_d(0)), ddadd_vd2_vd_vd(vcast_vd_d(2), m)); x2 = vmul_vd_vd_vd(vd2getx_vd_vd2(x), vd2getx_vd_vd2(x)); vdouble x4 = vmul_vd_vd_vd(x2, x2), x8 = vmul_vd_vd_vd(x4, x4); t = POLY7(x2, x4, x8, 0.1532076988502701353e+0, 0.1525629051003428716e+0, 0.1818605932937785996e+0, 0.2222214519839380009e+0, 0.2857142932794299317e+0, 0.3999999999635251990e+0, 0.6666666666667333541e+0); s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(1e+307)), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(-1)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(-1)), vcast_vd_d(-SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r); return r; } // static INLINE CONST VECTOR_CC vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } EXPORT CONST VECTOR_CC vdouble xfabs(vdouble x) { return vabs_vd_vd(x); } EXPORT CONST VECTOR_CC vdouble xcopysign(vdouble x, vdouble y) { return vcopysign_vd_vd_vd(x, y); } EXPORT CONST VECTOR_CC vdouble xfmax(vdouble x, vdouble y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y)); #else return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, y), x, y)); #endif } EXPORT CONST VECTOR_CC vdouble xfmin(vdouble x, vdouble y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y)); #else return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(y, x), x, y)); #endif } EXPORT CONST VECTOR_CC vdouble xfdim(vdouble x, vdouble y) { vdouble ret = vsub_vd_vd_vd(x, y); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(ret, vcast_vd_d(0)), veq_vo_vd_vd(x, y)), vcast_vd_d(0), ret); return ret; } EXPORT CONST VECTOR_CC vdouble xtrunc(vdouble x) { #ifdef FULL_FP_ROUNDING return vtruncate_vd_vd(x); #else vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); #endif } EXPORT CONST VECTOR_CC vdouble xfloor(vdouble x) { vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); fr = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(fr, vcast_vd_d(0)), vadd_vd_vd_vd(fr, vcast_vd_d(1.0)), fr); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); } EXPORT CONST VECTOR_CC vdouble xceil(vdouble x) { vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); fr = vsel_vd_vo_vd_vd(vle_vo_vd_vd(fr, vcast_vd_d(0)), fr, vsub_vd_vd_vd(fr, vcast_vd_d(1.0))); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); } EXPORT CONST VECTOR_CC vdouble xround(vdouble d) { vdouble x = vadd_vd_vd_vd(d, vcast_vd_d(0.5)); vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); x = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vle_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(fr, vcast_vd_d(0))), vsub_vd_vd_vd(x, vcast_vd_d(1.0)), x); fr = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(fr, vcast_vd_d(0)), vadd_vd_vd_vd(fr, vcast_vd_d(1.0)), fr); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0.49999999999999994449)), vcast_vd_d(0), x); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(d), vge_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 52))), d, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), d)); } EXPORT CONST VECTOR_CC vdouble xrint(vdouble d) { #ifdef FULL_FP_ROUNDING return vrint_vd_vd(d); #else vdouble c = vmulsign_vd_vd_vd(vcast_vd_d(INT64_C(1) << 52), d); return vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 52)), d, vorsign_vd_vd_vd(vsub_vd_vd_vd(vadd_vd_vd_vd(d, c), c), d)); #endif } EXPORT CONST VECTOR_CC vdouble xnextafter(vdouble x, vdouble y) { x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), y), x); vint2 t, xi2 = vreinterpret_vi2_vd(x); vopmask c = vxor_vo_vo_vo(vsignbit_vo_vd(x), vge_vo_vd_vd(y, x)); t = vadd_vi2_vi2_vi2(vxor_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0x7fffffff, 0xffffffff)), vcast_vi2_i_i(0, 1)); t = vadd_vi2_vi2_vi2(t, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, 1), veq_vi2_vi2_vi2(t, vcast_vi2_i_i(-1, 0))))); xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(c, vreinterpret_vd_vi2(t), vreinterpret_vd_vi2(xi2))); xi2 = vsub_vi2_vi2_vi2(xi2, vcast_vi2_vm(vand_vm_vo64_vm(vneq_vo_vd_vd(x, y), vcast_vm_i_i(0, 1)))); xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(vneq_vo_vd_vd(x, y), vreinterpret_vd_vi2(vadd_vi2_vi2_vi2(xi2, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, -1), veq_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0, -1)))))), vreinterpret_vd_vi2(xi2))); t = vadd_vi2_vi2_vi2(vxor_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0x7fffffff, 0xffffffff)), vcast_vi2_i_i(0, 1)); t = vadd_vi2_vi2_vi2(t, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, 1), veq_vi2_vi2_vi2(t, vcast_vi2_i_i(-1, 0))))); xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(c, vreinterpret_vd_vi2(t), vreinterpret_vd_vi2(xi2))); vdouble ret = vreinterpret_vd_vi2(xi2); ret = vsel_vd_vo_vd_vd(vand_vo_vo_vo(veq_vo_vd_vd(ret, vcast_vd_d(0)), vneq_vo_vd_vd(x, vcast_vd_d(0))), vmulsign_vd_vd_vd(vcast_vd_d(0), x), ret); ret = vsel_vd_vo_vd_vd(vand_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(y, vcast_vd_d(0))), y, ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); return ret; } EXPORT CONST VECTOR_CC vdouble xfrfrexp(vdouble x) { x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(DBL_MIN)), vmul_vd_vd_vd(x, vcast_vd_d(UINT64_C(1) << 63)), x); vmask xm = vreinterpret_vm_vd(x); xm = vand_vm_vm_vm(xm, vcast_vm_i_i(~0x7ff00000, ~0)); xm = vor_vm_vm_vm (xm, vcast_vm_i_i( 0x3fe00000, 0)); vdouble ret = vreinterpret_vd_vm(xm); ret = vsel_vd_vo_vd_vd(visinf_vo_vd(x), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), x), ret); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), x, ret); return ret; } EXPORT CONST VECTOR_CC vint xexpfrexp(vdouble x) { x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(DBL_MIN)), vmul_vd_vd_vd(x, vcast_vd_d(UINT64_C(1) << 63)), x); vint ret = vcastu_vi_vi2(vreinterpret_vi2_vd(x)); ret = vsub_vi_vi_vi(vand_vi_vi_vi(vsrl_vi_vi_i(ret, 20), vcast_vi_i(0x7ff)), vcast_vi_i(0x3fe)); ret = vsel_vi_vo_vi_vi(vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), visnan_vo_vd(x)), visinf_vo_vd(x)), vcast_vi_i(0), ret); return ret; } EXPORT CONST VECTOR_CC vdouble xfma(vdouble x, vdouble y, vdouble z) { #ifdef ENABLE_FMA_DP return vfma_vd_vd_vd_vd(x, y, z); #else vdouble h2 = vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z), q = vcast_vd_d(1); vopmask o = vlt_vo_vd_vd(vabs_vd_vd(h2), vcast_vd_d(1e-300)); { const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(c1)), x); y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(c1)), y); z = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(z, vcast_vd_d(c2)), z); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(1.0 / c2), q); } o = vgt_vo_vd_vd(vabs_vd_vd(h2), vcast_vd_d(1e+300)); { const double c0 = UINT64_C(1) << 54, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(1.0 / c1)), x); y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(1.0 / c1)), y); z = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(z, vcast_vd_d(1.0 / c2)), z); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(c2), q); } vdouble2 d = ddmul_vd2_vd_vd(x, y); d = ddadd2_vd2_vd2_vd(d, z); vdouble ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(y, vcast_vd_d(0))), z, vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d))); o = visinf_vo_vd(z); o = vandnot_vo_vo_vo(visinf_vo_vd(x), o); o = vandnot_vo_vo_vo(visnan_vo_vd(x), o); o = vandnot_vo_vo_vo(visinf_vo_vd(y), o); o = vandnot_vo_vo_vo(visnan_vo_vd(y), o); h2 = vsel_vd_vo_vd_vd(o, z, h2); o = vor_vo_vo_vo(visinf_vo_vd(h2), visnan_vo_vd(h2)); return vsel_vd_vo_vd_vd(o, h2, vmul_vd_vd_vd(ret, q)); #endif } SQRTU05_FUNCATR VECTOR_CC vdouble xsqrt_u05(vdouble d) { #if defined(ENABLE_FMA_DP) vdouble q, w, x, y, z; d = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), d); vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(8.636168555094445E-78)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(1.157920892373162E77)), d); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(2.9387358770557188E-39), vcast_vd_d(1)); y = vreinterpret_vd_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i_i(0x5fe6ec85, 0xe7de30da), vsrl_vi2_vi2_i(vreinterpret_vi2_vd(d), 1))); x = vmul_vd_vd_vd(d, y); w = vmul_vd_vd_vd(vcast_vd_d(0.5), y); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(0.5)); x = vfma_vd_vd_vd_vd(x, y, x); w = vfma_vd_vd_vd_vd(w, y, w); y = vfmanp_vd_vd_vd_vd(x, w, vcast_vd_d(1.5)); w = vadd_vd_vd_vd(w, w); w = vmul_vd_vd_vd(w, y); x = vmul_vd_vd_vd(w, d); y = vfmapn_vd_vd_vd_vd(w, d, x); z = vfmanp_vd_vd_vd_vd(w, x, vcast_vd_d(1)); z = vfmanp_vd_vd_vd_vd(w, y, z); w = vmul_vd_vd_vd(vcast_vd_d(0.5), x); w = vfma_vd_vd_vd_vd(w, z, y); w = vadd_vd_vd_vd(w, x); w = vmul_vd_vd_vd(w, q); w = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(d, vcast_vd_d(0)), veq_vo_vd_vd(d, vcast_vd_d(SLEEF_INFINITY))), d, w); w = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), w); return w; #else vdouble q; vopmask o; d = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), d); o = vlt_vo_vd_vd(d, vcast_vd_d(8.636168555094445E-78)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(1.157920892373162E77)), d); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(2.9387358770557188E-39*0.5), vcast_vd_d(0.5)); o = vgt_vo_vd_vd(d, vcast_vd_d(1.3407807929942597e+154)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(7.4583407312002070e-155)), d); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(1.1579208923731620e+77*0.5), q); vdouble x = vreinterpret_vd_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i_i(0x5fe6ec86, 0), vsrl_vi2_vi2_i(vreinterpret_vi2_vd(vadd_vd_vd_vd(d, vcast_vd_d(1e-320))), 1))); x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); x = vmul_vd_vd_vd(x, d); vdouble2 d2 = ddmul_vd2_vd2_vd2(ddadd2_vd2_vd_vd2(d, ddmul_vd2_vd_vd(x, x)), ddrec_vd2_vd(x)); x = vmul_vd_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(d2), vd2gety_vd_vd2(d2)), q); x = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), x); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, x); return x; #endif } EXPORT CONST VECTOR_CC vdouble xsqrt(vdouble d) { #if defined(ACCURATE_SQRT) return vsqrt_vd_vd(d); #else // fall back to approximation if ACCURATE_SQRT is undefined return xsqrt_u05(d); #endif } EXPORT CONST VECTOR_CC vdouble xsqrt_u35(vdouble d) { return xsqrt_u05(d); } EXPORT CONST VECTOR_CC vdouble xhypot_u05(vdouble x, vdouble y) { x = vabs_vd_vd(x); y = vabs_vd_vd(y); vdouble min = vmin_vd_vd_vd(x, y), n = min; vdouble max = vmax_vd_vd_vd(x, y), d = max; vopmask o = vlt_vo_vd_vd(max, vcast_vd_d(DBL_MIN)); n = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(n, vcast_vd_d(UINT64_C(1) << 54)), n); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(UINT64_C(1) << 54)), d); vdouble2 t = dddiv_vd2_vd2_vd2(vcast_vd2_vd_vd(n, vcast_vd_d(0)), vcast_vd2_vd_vd(d, vcast_vd_d(0))); t = ddmul_vd2_vd2_vd(ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddsqu_vd2_vd2(t), vcast_vd_d(1))), max); vdouble ret = vadd_vd_vd_vd(vd2getx_vd_vd2(t), vd2gety_vd_vd2(t)); ret = vsel_vd_vo_vd_vd(visnan_vo_vd(ret), vcast_vd_d(SLEEF_INFINITY), ret); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(min, vcast_vd_d(0)), max, ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(SLEEF_INFINITY)), veq_vo_vd_vd(y, vcast_vd_d(SLEEF_INFINITY))), vcast_vd_d(SLEEF_INFINITY), ret); return ret; } EXPORT CONST VECTOR_CC vdouble xhypot_u35(vdouble x, vdouble y) { x = vabs_vd_vd(x); y = vabs_vd_vd(y); vdouble min = vmin_vd_vd_vd(x, y); vdouble max = vmax_vd_vd_vd(x, y); vdouble t = vdiv_vd_vd_vd(min, max); vdouble ret = vmul_vd_vd_vd(max, vsqrt_vd_vd(vmla_vd_vd_vd_vd(t, t, vcast_vd_d(1)))); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(min, vcast_vd_d(0)), max, ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(SLEEF_INFINITY)), veq_vo_vd_vd(y, vcast_vd_d(SLEEF_INFINITY))), vcast_vd_d(SLEEF_INFINITY), ret); return ret; } static INLINE CONST VECTOR_CC vdouble vtoward0(vdouble x) { // returns nextafter(x, 0) vdouble t = vreinterpret_vd_vm(vadd64_vm_vm_vm(vreinterpret_vm_vd(x), vcast_vm_i_i(-1, -1))); return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t); } static INLINE CONST VECTOR_CC vdouble vptrunc(vdouble x) { // round to integer toward 0, positive argument only #ifdef FULL_FP_ROUNDING return vtruncate_vd_vd(x); #else vdouble fr = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(INT64_C(1) << 31)), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (INT64_C(1) << 31))))), x); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); return vsel_vd_vo_vd_vd(vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(INT64_C(1) << 52)), x, vsub_vd_vd_vd(x, fr)); #endif } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST VECTOR_CC vdouble xfmod(vdouble x, vdouble y) { vdouble n = vabs_vd_vd(x), d = vabs_vd_vd(y), s = vcast_vd_d(1), q; vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); n = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(n, vcast_vd_d(UINT64_C(1) << 54)), n); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(UINT64_C(1) << 54)), d); s = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(s , vcast_vd_d(1.0 / (UINT64_C(1) << 54))), s); vdouble2 r = vcast_vd2_vd_vd(n, vcast_vd_d(0)); vdouble rd = vtoward0(vrec_vd_vd(d)); for(int i=0;i<21;i++) { // ceil(log2(DBL_MAX) / 52) q = vptrunc(vmul_vd_vd_vd(vtoward0(vd2getx_vd_vd2(r)), rd)); #ifndef ENABLE_FMA_DP q = vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(q), vcast_vm_i_i(0xffffffff, 0xfffffffe))); #endif q = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vgt_vo_vd_vd(vmul_vd_vd_vd(vcast_vd_d(3), d), vd2getx_vd_vd2(r)), vge_vo_vd_vd(vd2getx_vd_vd2(r), d)), vcast_vd_d(2), q); q = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vgt_vo_vd_vd(vadd_vd_vd_vd(d, d), vd2getx_vd_vd2(r)), vge_vo_vd_vd(vd2getx_vd_vd2(r), d)), vcast_vd_d(1), q); r = ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd2(r, ddmul_vd2_vd_vd(q, vneg_vd_vd(d)))); if (vtestallones_i_vo64(vlt_vo_vd_vd(vd2getx_vd_vd2(r), d))) break; } vdouble ret = vmul_vd_vd_vd(vd2getx_vd_vd2(r), s); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(vadd_vd_vd_vd(vd2getx_vd_vd2(r), vd2gety_vd_vd2(r)), d), vcast_vd_d(0), ret); ret = vmulsign_vd_vd_vd(ret, x); ret = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(n, d), x, ret); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), ret); return ret; } static INLINE VECTOR_CC vdouble vrintk2_vd_vd(vdouble d) { #ifdef FULL_FP_ROUNDING return vrint_vd_vd(d); #else vdouble c = vmulsign_vd_vd_vd(vcast_vd_d(INT64_C(1) << 52), d); return vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(INT64_C(1) << 52)), d, vorsign_vd_vd_vd(vsub_vd_vd_vd(vadd_vd_vd_vd(d, c), c), d)); #endif } EXPORT CONST VECTOR_CC vdouble xremainder(vdouble x, vdouble y) { vdouble n = vabs_vd_vd(x), d = vabs_vd_vd(y), s = vcast_vd_d(1), q; vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN*2)); n = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(n, vcast_vd_d(UINT64_C(1) << 54)), n); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(UINT64_C(1) << 54)), d); s = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(s , vcast_vd_d(1.0 / (UINT64_C(1) << 54))), s); vdouble rd = vrec_vd_vd(d); vdouble2 r = vcast_vd2_vd_vd(n, vcast_vd_d(0)); vopmask qisodd = vneq_vo_vd_vd(vcast_vd_d(0), vcast_vd_d(0)); for(int i=0;i<21;i++) { // ceil(log2(DBL_MAX) / 52) q = vrintk2_vd_vd(vmul_vd_vd_vd(vd2getx_vd_vd2(r), rd)); #ifndef ENABLE_FMA_DP q = vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(q), vcast_vm_i_i(0xffffffff, 0xfffffffe))); #endif q = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(r)), vmul_vd_vd_vd(d, vcast_vd_d(1.5))), vmulsign_vd_vd_vd(vcast_vd_d(1.0), vd2getx_vd_vd2(r)), q); q = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(r)), vmul_vd_vd_vd(d, vcast_vd_d(0.5))), vandnot_vo_vo_vo(qisodd, veq_vo_vd_vd(vabs_vd_vd(vd2getx_vd_vd2(r)), vmul_vd_vd_vd(d, vcast_vd_d(0.5))))), vcast_vd_d(0.0), q); if (vtestallones_i_vo64(veq_vo_vd_vd(q, vcast_vd_d(0)))) break; q = vsel_vd_vo_vd_vd(visinf_vo_vd(vmul_vd_vd_vd(q, vneg_vd_vd(d))), vadd_vd_vd_vd(q, vmulsign_vd_vd_vd(vcast_vd_d(-1), vd2getx_vd_vd2(r))), q); qisodd = vxor_vo_vo_vo(qisodd, visodd_vo_vd(q)); r = ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd2(r, ddmul_vd2_vd_vd(q, vneg_vd_vd(d)))); } vdouble ret = vmul_vd_vd_vd(vd2getx_vd_vd2(r), s); ret = vmulsign_vd_vd_vd(ret, x); ret = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsel_vd_vo_vd_vd(visinf_vo_vd(x), vcast_vd_d(SLEEF_NAN), x), ret); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), ret); return ret; } #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) typedef struct { vdouble2 a, b; } dd2; static dd2 dd2setab_dd2_vd2_vd2(vdouble2 a, vdouble2 b) { dd2 r = { a, b }; return r; } static vdouble2 dd2geta_vd2_dd2(dd2 d) { return d.a; } static vdouble2 dd2getb_vd2_dd2(dd2 d) { return d.b; } #endif /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ static CONST dd2 gammak(vdouble a) { vdouble2 clc = vcast_vd2_d_d(0, 0), clln = vcast_vd2_d_d(1, 0), clld = vcast_vd2_d_d(1, 0); vdouble2 v = vcast_vd2_d_d(1, 0), x, y, z; vdouble t, u; vopmask otiny = vlt_vo_vd_vd(vabs_vd_vd(a), vcast_vd_d(1e-306)), oref = vlt_vo_vd_vd(a, vcast_vd_d(0.5)); x = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(0, 0), vsel_vd2_vo_vd2_vd2(oref, ddadd2_vd2_vd_vd(vcast_vd_d(1), vneg_vd_vd(a)), vcast_vd2_vd_vd(a, vcast_vd_d(0)))); vopmask o0 = vand_vo_vo_vo(vle_vo_vd_vd(vcast_vd_d(0.5), vd2getx_vd_vd2(x)), vle_vo_vd_vd(vd2getx_vd_vd2(x), vcast_vd_d(1.1))); vopmask o2 = vle_vo_vd_vd(vcast_vd_d(2.3), vd2getx_vd_vd2(x)); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x)); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(2)), y)); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(3)), y)); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(4)), y)); vopmask o = vand_vo_vo_vo(o2, vle_vo_vd_vd(vd2getx_vd_vd2(x), vcast_vd_d(7))); clln = vsel_vd2_vo_vd2_vd2(o, y, clln); x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(5)), x); t = vsel_vd_vo_vd_vd(o2, vrec_vd_vd(vd2getx_vd_vd2(x)), vd2getx_vd_vd2(ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd(x, vsel_vd_vo_d_d(o0, -1, -2))))); u = vsel_vd_vo_vo_d_d_d(o2, o0, -156.801412704022726379848862, +0.2947916772827614196e+2, +0.7074816000864609279e-7); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +1.120804464289911606838558160000, +0.1281459691827820109e+3, +0.4009244333008730443e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +13.39798545514258921833306020000, +0.2617544025784515043e+3, +0.1040114641628246946e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.116546276599463200848033357000, +0.3287022855685790432e+3, +0.1508349150733329167e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -1.391801093265337481495562410000, +0.2818145867730348186e+3, +0.1288143074933901020e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.015056113040026424412918973400, +0.1728670414673559605e+3, +0.4744167749884993937e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.179540117061234856098844714000, +0.7748735764030416817e+2, -0.6554816306542489902e-7)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.002481743600264997730942489280, +0.2512856643080930752e+2, -0.3189252471452599844e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.029527880945699120504851034100, +0.5766792106140076868e+1, +0.1358883821470355377e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000540164767892604515196325186, +0.7270275473996180571e+0, -0.4343931277157336040e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.006403362833808069794787256200, +0.8396709124579147809e-1, +0.9724785897406779555e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000162516262783915816896611252, -0.8211558669746804595e-1, -0.2036886057225966011e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.001914438498565477526465972390, +0.6828831828341884458e-1, +0.4373363141819725815e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +7.20489541602001055898311517e-05, -0.7712481339961671511e-1, -0.9439951268304008677e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000839498720672087279971000786, +0.8337492023017314957e-1, +0.2050727030376389804e-4)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -5.17179090826059219329394422e-05, -0.9094964931456242518e-1, -0.4492620183431184018e-4)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000592166437353693882857342347, +0.1000996313575929358e+0, +0.9945751236071875931e-4)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +6.97281375836585777403743539e-05, -0.1113342861544207724e+0, -0.2231547599034983196e-3)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000784039221720066627493314301, +0.1255096673213020875e+0, +0.5096695247101967622e-3)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000229472093621399176949318732, -0.1440498967843054368e+0, -0.1192753911667886971e-2)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.002681327160493827160473958490, +0.1695571770041949811e+0, +0.2890510330742210310e-2)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.003472222222222222222175164840, -0.2073855510284092762e+0, -0.7385551028674461858e-2)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.083333333333333333335592087900, +0.2705808084277815939e+0, +0.2058080842778455335e-1)); y = ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(-0.5)), logk2(x)); y = ddadd2_vd2_vd2_vd2(y, ddneg_vd2_vd2(x)); y = ddadd2_vd2_vd2_vd2(y, vcast_vd2_d_d(0.91893853320467278056, -3.8782941580672414498e-17)); // 0.5*log(2*M_PI) z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd (u, t), vsel_vd_vo_d_d(o0, -0.4006856343865314862e+0, -0.6735230105319810201e-1)); z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(z, t), vsel_vd_vo_d_d(o0, +0.8224670334241132030e+0, +0.3224670334241132030e+0)); z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(z, t), vsel_vd_vo_d_d(o0, -0.5772156649015328655e+0, +0.4227843350984671345e+0)); z = ddmul_vd2_vd2_vd(z, t); clc = vsel_vd2_vo_vd2_vd2(o2, y, z); clld = vsel_vd2_vo_vd2_vd2(o2, ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(u, t), vcast_vd_d(1)), clld); y = clln; clc = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(83.1776616671934334590333, 3.67103459631568507221878e-15), // log(2^120) vsel_vd2_vo_vd2_vd2(oref, ddadd2_vd2_vd2_vd2(vcast_vd2_d_d(1.1447298858494001639, 1.026595116270782638e-17), ddneg_vd2_vd2(clc)), clc)); // log(M_PI) clln = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(1, 0), vsel_vd2_vo_vd2_vd2(oref, clln, clld)); if (!vtestallones_i_vo64(vnot_vo64_vo64(oref))) { t = vsub_vd_vd_vd(a, vmul_vd_vd_vd(vcast_vd_d(INT64_C(1) << 28), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(a, vcast_vd_d(1.0 / (INT64_C(1) << 28))))))); x = ddmul_vd2_vd2_vd2(clld, sinpik(t)); } clld = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_vd_vd(vmul_vd_vd_vd(a, vcast_vd_d((INT64_C(1) << 60)*(double)(INT64_C(1) << 60))), vcast_vd_d(0)), vsel_vd2_vo_vd2_vd2(oref, x, y)); return dd2setab_dd2_vd2_vd2(clc, dddiv_vd2_vd2_vd2(clln, clld)); } EXPORT CONST VECTOR_CC vdouble xtgamma_u1(vdouble a) { dd2 d = gammak(a); vdouble2 y = ddmul_vd2_vd2_vd2(expk2(dd2geta_vd2_dd2(d)), dd2getb_vd2_dd2(d)); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)); vopmask o; o = vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(-SLEEF_INFINITY)), vand_vo_vo_vo(vlt_vo_vd_vd(a, vcast_vd_d(0)), visint_vo_vd(a))), vand_vo_vo_vo(vand_vo_vo_vo(visnumber_vo_vd(a), vlt_vo_vd_vd(a, vcast_vd_d(0))), visnan_vo_vd(r))); r = vsel_vd_vo_vd_vd(o, vcast_vd_d(SLEEF_NAN), r); o = vand_vo_vo_vo(vand_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(SLEEF_INFINITY)), visnumber_vo_vd(a)), vge_vo_vd_vd(a, vcast_vd_d(-DBL_MIN))), vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(0)), vgt_vo_vd_vd(a, vcast_vd_d(200))), visnan_vo_vd(r))); r = vsel_vd_vo_vd_vd(o, vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), a), r); return r; } EXPORT CONST VECTOR_CC vdouble xlgamma_u1(vdouble a) { dd2 d = gammak(a); vdouble2 y = ddadd2_vd2_vd2_vd2(dd2geta_vd2_dd2(d), logk2(ddabs_vd2_vd2(dd2getb_vd2_dd2(d)))); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(y), vd2gety_vd_vd2(y)); vopmask o; o = vor_vo_vo_vo(visinf_vo_vd(a), vor_vo_vo_vo(vand_vo_vo_vo(vle_vo_vd_vd(a, vcast_vd_d(0)), visint_vo_vd(a)), vand_vo_vo_vo(visnumber_vo_vd(a), visnan_vo_vd(r)))); r = vsel_vd_vo_vd_vd(o, vcast_vd_d(SLEEF_INFINITY), r); return r; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST VECTOR_CC vdouble xerf_u1(vdouble a) { vdouble s = a, t, u; vdouble2 d; a = vabs_vd_vd(a); vopmask o0 = vlt_vo_vd_vd(a, vcast_vd_d(1.0)); vopmask o1 = vlt_vo_vd_vd(a, vcast_vd_d(3.7)); vopmask o2 = vlt_vo_vd_vd(a, vcast_vd_d(6.0)); u = vsel_vd_vo_vd_vd(o0, vmul_vd_vd_vd(a, a), a); t = vsel_vd_vo_vo_d_d_d(o0, o1, +0.6801072401395392157e-20, +0.2830954522087717660e-13, -0.5846750404269610493e-17); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2161766247570056391e-18, -0.1509491946179481940e-11, +0.6076691048812607898e-15)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.4695919173301598752e-17, +0.3827857177807173152e-10, -0.3007518609604893831e-13)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.9049140419888010819e-16, -0.6139733921558987241e-09, +0.9427906260824646063e-12)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1634018903557411517e-14, +0.6985387934608038824e-08, -0.2100110908269393629e-10)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2783485786333455216e-13, -0.5988224513034371474e-07, +0.3534639523461223473e-09)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.4463221276786412722e-12, +0.4005716952355346640e-06, -0.4664967728285395926e-08)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.6711366622850138987e-11, -0.2132190104575784400e-05, +0.4943823283769000532e-07)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.9422759050232658346e-10, +0.9092461304042630325e-05, -0.4271203394761148254e-06)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1229055530100228477e-08, -0.3079188080966205457e-04, +0.3034067677404915895e-05)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1480719281585085023e-07, +0.7971413443082370762e-04, -0.1776295289066871135e-04)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1636584469123402714e-06, -0.1387853215225442864e-03, +0.8524547630559505050e-04)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1646211436588923363e-05, +0.6469678026257590965e-04, -0.3290582944961784398e-03)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1492565035840624866e-04, +0.4996645280372945860e-03, +0.9696966068789101157e-03)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1205533298178966496e-03, -0.1622802482842520535e-02, -0.1812527628046986137e-02)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.8548327023450851166e-03, +0.1615320557049377171e-03, -0.4725409828123619017e-03)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.5223977625442188799e-02, +0.1915262325574875607e-01, +0.2090315427924229266e-01)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2686617064513125569e-01, -0.1027818298486033455e+00, -0.1052041921842776645e+00)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1128379167095512753e+00, -0.6366172819842503827e+00, -0.6345351808766568347e+00)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.3761263890318375380e+00, -0.1128379590648910469e+01, -0.1129442929103524396e+01)); d = ddmul_vd2_vd_vd(t, u); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_d_d_d(o0, o1, 1.1283791670955125586, 3.4110644736196137587e-08, 0.00024963035690526438285), vsel_vd_vo_vo_d_d_d(o0, o1, 1.5335459613165822674e-17, -2.4875650708323294246e-24, -5.4362665034856259795e-21))); d = vsel_vd2_vo_vd2_vd2(o0, ddmul_vd2_vd2_vd(d, a), ddadd_vd2_vd_vd2(vcast_vd_d(1.0), ddneg_vd2_vd2(expk2(d)))); u = vmulsign_vd_vd_vd(vsel_vd_vo_vd_vd(o2, vadd_vd_vd_vd(vd2getx_vd_vd2(d), vd2gety_vd_vd2(d)), vcast_vd_d(1)), s); u = vsel_vd_vo_vd_vd(visnan_vo_vd(a), vcast_vd_d(SLEEF_NAN), u); return u; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST VECTOR_CC vdouble xerfc_u15(vdouble a) { vdouble s = a, r = vcast_vd_d(0), t; vdouble2 u, d, x; a = vabs_vd_vd(a); vopmask o0 = vlt_vo_vd_vd(a, vcast_vd_d(1.0)); vopmask o1 = vlt_vo_vd_vd(a, vcast_vd_d(2.2)); vopmask o2 = vlt_vo_vd_vd(a, vcast_vd_d(4.2)); vopmask o3 = vlt_vo_vd_vd(a, vcast_vd_d(27.3)); u = vsel_vd2_vo_vd2_vd2(o0, ddmul_vd2_vd_vd(a, a), vsel_vd2_vo_vd2_vd2(o1, vcast_vd2_vd_vd(a, vcast_vd_d(0)), dddiv_vd2_vd2_vd2(vcast_vd2_d_d(1, 0), vcast_vd2_vd_vd(a, vcast_vd_d(0))))); t = vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.6801072401395386139e-20, +0.3438010341362585303e-12, -0.5757819536420710449e+2, +0.2334249729638701319e+5); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2161766247570055669e-18, -0.1237021188160598264e-10, +0.4669289654498104483e+3, -0.4695661044933107769e+5)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.4695919173301595670e-17, +0.2117985839877627852e-09, -0.1796329879461355858e+4, +0.3173403108748643353e+5)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.9049140419888007122e-16, -0.2290560929177369506e-08, +0.4355892193699575728e+4, +0.3242982786959573787e+4)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1634018903557410728e-14, +0.1748931621698149538e-07, -0.7456258884965764992e+4, -0.2014717999760347811e+5)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2783485786333451745e-13, -0.9956602606623249195e-07, +0.9553977358167021521e+4, +0.1554006970967118286e+5)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.4463221276786415752e-12, +0.4330010240640327080e-06, -0.9470019905444229153e+4, -0.6150874190563554293e+4)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.6711366622850136563e-11, -0.1435050600991763331e-05, +0.7387344321849855078e+4, +0.1240047765634815732e+4)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.9422759050232662223e-10, +0.3460139479650695662e-05, -0.4557713054166382790e+4, -0.8210325475752699731e+2)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1229055530100229098e-08, -0.4988908180632898173e-05, +0.2207866967354055305e+4, +0.3242443880839930870e+2)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1480719281585086512e-07, -0.1308775976326352012e-05, -0.8217975658621754746e+3, -0.2923418863833160586e+2)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1636584469123399803e-06, +0.2825086540850310103e-04, +0.2268659483507917400e+3, +0.3457461732814383071e+0)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1646211436588923575e-05, -0.6393913713069986071e-04, -0.4633361260318560682e+2, +0.5489730155952392998e+1)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1492565035840623511e-04, -0.2566436514695078926e-04, +0.9557380123733945965e+1, +0.1559934132251294134e-2)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1205533298178967851e-03, +0.5895792375659440364e-03, -0.2958429331939661289e+1, -0.1541741566831520638e+1)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.8548327023450850081e-03, -0.1695715579163588598e-02, +0.1670329508092765480e+0, +0.2823152230558364186e-5)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.5223977625442187932e-02, +0.2089116434918055149e-03, +0.6096615680115419211e+0, +0.6249999184195342838e+0)); t = vmla_vd_vd_vd_vd(t, vd2getx_vd_vd2(u), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2686617064513125222e-01, +0.1912855949584917753e-01, +0.1059212443193543585e-2, +0.1741749416408701288e-8)); d = ddmul_vd2_vd2_vd(u, t); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 0.11283791670955126141, -0.10277263343147646779, -0.50005180473999022439, -0.5000000000258444377), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -4.0175691625932118483e-18, -6.2338714083404900225e-18, 2.6362140569041995803e-17, -4.0074044712386992281e-17))); d = ddmul_vd2_vd2_vd2(d, u); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.37612638903183753802, -0.63661976742916359662, 1.601106273924963368e-06, 2.3761973137523364792e-13), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.3391897206042552387e-17, 7.6321019159085724662e-18, 1.1974001857764476775e-23, -1.1670076950531026582e-29))); d = ddmul_vd2_vd2_vd2(d, u); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.1283791670955125586, -1.1283791674717296161, -0.57236496645145429341, -0.57236494292470108114), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.5335459613165822674e-17, 8.0896847755965377194e-17, 3.0704553245872027258e-17, -2.3984352208056898003e-17))); x = ddmul_vd2_vd2_vd(vsel_vd2_vo_vd2_vd2(o1, d, vcast_vd2_vd_vd(vneg_vd_vd(a), vcast_vd_d(0))), a); x = vsel_vd2_vo_vd2_vd2(o1, x, ddadd2_vd2_vd2_vd2(x, d)); x = vsel_vd2_vo_vd2_vd2(o0, ddsub_vd2_vd2_vd2(vcast_vd2_d_d(1, 0), x), expk2(x)); x = vsel_vd2_vo_vd2_vd2(o1, x, ddmul_vd2_vd2_vd2(x, u)); r = vsel_vd_vo_vd_vd(o3, vadd_vd_vd_vd(vd2getx_vd_vd2(x), vd2gety_vd_vd2(x)), vcast_vd_d(0)); r = vsel_vd_vo_vd_vd(vsignbit_vo_vd(s), vsub_vd_vd_vd(vcast_vd_d(2), r), r); r = vsel_vd_vo_vd_vd(visnan_vo_vd(s), vcast_vd_d(SLEEF_NAN), r); return r; } #endif // #if !defined(DETERMINISTIC) #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) // The normal and deterministic versions of implementations are common // for the functions like sincospi_u05. Aliases are defined by // DALIAS_* macros for such functions. The defined aliases // (e.g. ysincospi_u05) are renamed(e.g. to // Sleef_cinz_sincospid2_u05sse2) by rename*.h. #ifdef ENABLE_ALIAS #define DALIAS_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble) __attribute__((alias( stringify(x ## FUNC) ))); #define DALIAS_vd2_vd(FUNC) EXPORT CONST VECTOR_CC vdouble2 y ## FUNC(vdouble) __attribute__((alias( stringify(x ## FUNC) ))); #define DALIAS_vi_vd(FUNC) EXPORT CONST VECTOR_CC vint y ## FUNC(vdouble) __attribute__((alias( stringify(x ## FUNC) ))); #define DALIAS_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble, vdouble) __attribute__((alias( stringify(x ## FUNC) ))); #define DALIAS_vd_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble, vdouble, vdouble) __attribute__((alias( stringify(x ## FUNC) ))); #else #define DALIAS_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble d) { return x ## FUNC (d); } #define DALIAS_vd2_vd(FUNC) EXPORT CONST VECTOR_CC vdouble2 y ## FUNC(vdouble d) { return x ## FUNC (d); } #define DALIAS_vi_vd(FUNC) EXPORT CONST VECTOR_CC vint y ## FUNC(vdouble d) { return x ## FUNC (d); } #define DALIAS_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble x, vdouble y) { return x ## FUNC (x, y); } #define DALIAS_vd_vd_vd_vd(FUNC) EXPORT CONST VECTOR_CC vdouble y ## FUNC(vdouble x, vdouble y, vdouble z) { return x ## FUNC (x, y, z); } #endif DALIAS_vd2_vd(sincospi_u05) DALIAS_vd2_vd(sincospi_u35) DALIAS_vd2_vd(modf) DALIAS_vd_vd(log) DALIAS_vd_vd(log_u1) DALIAS_vd_vd_vd(pow) DALIAS_vd_vd(sinh) DALIAS_vd_vd(cosh) DALIAS_vd_vd(tanh) DALIAS_vd_vd(sinh_u35) DALIAS_vd_vd(cosh_u35) DALIAS_vd_vd(tanh_u35) DALIAS_vd_vd(asinh) DALIAS_vd_vd(acosh) DALIAS_vd_vd(atanh) DALIAS_vd_vd(cbrt) DALIAS_vd_vd(cbrt_u1) DALIAS_vd_vd(expm1) DALIAS_vd_vd(log10) DALIAS_vd_vd(log2) DALIAS_vd_vd(log2_u35) DALIAS_vd_vd(log1p) DALIAS_vd_vd(fabs) DALIAS_vd_vd_vd(copysign) DALIAS_vd_vd_vd(fmax) DALIAS_vd_vd_vd(fmin) DALIAS_vd_vd_vd(fdim) DALIAS_vd_vd(trunc) DALIAS_vd_vd(floor) DALIAS_vd_vd(ceil) DALIAS_vd_vd(round) DALIAS_vd_vd(rint) DALIAS_vd_vd_vd(nextafter) DALIAS_vd_vd(frfrexp) DALIAS_vi_vd(expfrexp) DALIAS_vd_vd_vd_vd(fma) DALIAS_vd_vd(sqrt_u05) DALIAS_vd_vd(sqrt_u35) DALIAS_vd_vd_vd(hypot_u05) DALIAS_vd_vd_vd(hypot_u35) DALIAS_vd_vd_vd(fmod) DALIAS_vd_vd_vd(remainder) DALIAS_vd_vd(tgamma_u1) DALIAS_vd_vd(lgamma_u1) DALIAS_vd_vd(erf_u1) DALIAS_vd_vd(erfc_u15) #endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) #if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) EXPORT CONST int xgetInt(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; } EXPORT CONST void *xgetPtr(int name) { if (name == 0) return ISANAME; return (void *)0; } #endif #if defined(ALIAS_NO_EXT_SUFFIX) && !defined(DETERMINISTIC) #include ALIAS_NO_EXT_SUFFIX #endif #ifdef ENABLE_MAIN // gcc -DENABLE_MAIN -Wno-attributes -I../common -I../arch -DENABLE_AVX2 -mavx2 -mfma sleefsimddp.c rempitab.c ../common/common.c -lm #include #include #include int main(int argc, char **argv) { vdouble d1 = vcast_vd_d(atof(argv[1])); vdouble d2 = vcast_vd_d(atof(argv[2])); //vdouble d3 = vcast_vd_d(atof(argv[3])); //vdouble r = xnextafter(d1, d2); //int i; //double fr = frexp(atof(argv[1]), &i); //printf("%.20g\n", xfma(d1, d2, d3)[0]);; //printf("test %.20g\n", xtgamma_u1(d1)[0]); //printf("corr %.20g\n", tgamma(d1[0])); //printf("test %.20g\n", xerf_u1(d1)[0]); //printf("corr %.20g\n", erf(d1[0])); //printf("test %.20g\n", xerfc_u15(d1)[0]); //printf("corr %.20g\n", erfc(d1[0])); //printf("%.20g\n", nextafter(d1[0], d2[0]));; //printf("%.20g\n", vcast_d_vd(xhypot_u05(d1, d2))); //printf("%.20g\n", fr); printf("%.20g\n", fmod(atof(argv[1]), atof(argv[2]))); printf("%.20g\n", xfmod(d1, d2)[0]); //vdouble2 r = xsincospi_u35(a); //printf("%g, %g\n", vcast_d_vd(r.x), vcast_d_vd(r.y)); } #endif #ifdef ENABLE_GNUABI /* "finite" aliases for compatibility with GLIBC */ EXPORT CONST VECTOR_CC vdouble __acos_finite (vdouble) __attribute__((weak, alias(str_xacos ))); EXPORT CONST VECTOR_CC vdouble __acosh_finite (vdouble) __attribute__((weak, alias(str_xacosh ))); EXPORT CONST VECTOR_CC vdouble __asin_finite (vdouble) __attribute__((weak, alias(str_xasin_u1 ))); EXPORT CONST VECTOR_CC vdouble __atan2_finite (vdouble, vdouble) __attribute__((weak, alias(str_xatan2_u1 ))); EXPORT CONST VECTOR_CC vdouble __atanh_finite (vdouble) __attribute__((weak, alias(str_xatanh ))); EXPORT CONST VECTOR_CC vdouble __cosh_finite (vdouble) __attribute__((weak, alias(str_xcosh ))); EXPORT CONST VECTOR_CC vdouble __exp10_finite (vdouble) __attribute__((weak, alias(str_xexp10 ))); EXPORT CONST VECTOR_CC vdouble __exp2_finite (vdouble) __attribute__((weak, alias(str_xexp2 ))); EXPORT CONST VECTOR_CC vdouble __exp_finite (vdouble) __attribute__((weak, alias(str_xexp ))); EXPORT CONST VECTOR_CC vdouble __fmod_finite (vdouble, vdouble) __attribute__((weak, alias(str_xfmod ))); EXPORT CONST VECTOR_CC vdouble __remainder_finite(vdouble, vdouble) __attribute__((weak, alias(str_xremainder))); EXPORT CONST VECTOR_CC vdouble __modf_finite (vdouble, vdouble *) __attribute__((weak, alias(str_xmodf ))); EXPORT CONST VECTOR_CC vdouble __hypot_u05_finite(vdouble, vdouble) __attribute__((weak, alias(str_xhypot_u05))); EXPORT CONST VECTOR_CC vdouble __lgamma_u1_finite(vdouble) __attribute__((weak, alias(str_xlgamma_u1))); EXPORT CONST VECTOR_CC vdouble __log10_finite (vdouble) __attribute__((weak, alias(str_xlog10 ))); EXPORT CONST VECTOR_CC vdouble __log_finite (vdouble) __attribute__((weak, alias(str_xlog_u1 ))); EXPORT CONST VECTOR_CC vdouble __pow_finite (vdouble, vdouble) __attribute__((weak, alias(str_xpow ))); EXPORT CONST VECTOR_CC vdouble __sinh_finite (vdouble) __attribute__((weak, alias(str_xsinh ))); EXPORT CONST VECTOR_CC vdouble __sqrt_finite (vdouble) __attribute__((weak, alias(str_xsqrt ))); EXPORT CONST VECTOR_CC vdouble __tgamma_u1_finite(vdouble) __attribute__((weak, alias(str_xtgamma_u1))); #ifdef HEADER_MASKED #include HEADER_MASKED #endif #endif /* #ifdef ENABLE_GNUABI */ sleef-3.5.1/src/libm/sleefsimdsp.c000066400000000000000000004425541373003144100170240ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #if !defined(SLEEF_GENHEADER) #include #include #include #include #endif #include "misc.h" extern const float Sleef_rempitabsp[]; #define __SLEEFSIMDSP_C__ #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif // Intel #ifdef ENABLE_SSE2 #define CONFIG 2 #if !defined(SLEEF_GENHEADER) #include "helpersse2.h" #else #include "macroonlySSE2.h" #endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesse2_gnuabi.h" #else #include "renamesse2.h" #endif #endif #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #if !defined(SLEEF_GENHEADER) #include "helpersse2.h" #else #include "macroonlySSE4.h" #endif #ifdef DORENAME #include "renamesse4.h" #endif #endif #ifdef ENABLE_AVX #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperavx.h" #else #include "macroonlyAVX.h" #endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx_gnuabi.h" #else #include "renameavx.h" #endif #endif #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #if !defined(SLEEF_GENHEADER) #include "helperavx.h" #else #include "macroonlyFMA4.h" #endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamefma4_gnuabi.h" #else #include "renamefma4.h" #endif #endif #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperavx2.h" #else #include "macroonlyAVX2.h" #endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx2_gnuabi.h" #else #include "renameavx2.h" #endif #endif #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperavx2_128.h" #else #include "macroonlyAVX2128.h" #endif #ifdef DORENAME #include "renameavx2128.h" #endif #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperavx512f.h" #else #include "macroonlyAVX512F.h" #endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx512f_gnuabi.h" #else #include "renameavx512f.h" #endif #endif #endif #ifdef ENABLE_AVX512FNOFMA #define CONFIG 2 #if !defined(SLEEF_GENHEADER) #include "helperavx512f.h" #else #include "macroonlyAVX512FNOFMA.h" #endif #ifdef DORENAME #include "renameavx512fnofma.h" #endif #endif // Arm #ifdef ENABLE_ADVSIMD #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperadvsimd.h" #else #include "macroonlyADVSIMD.h" #endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameadvsimd_gnuabi.h" #else #include "renameadvsimd.h" #endif #endif #endif #ifdef ENABLE_ADVSIMDNOFMA #define CONFIG 2 #if !defined(SLEEF_GENHEADER) #include "helperadvsimd.h" #else #include "macroonlyADVSIMDNOFMA.h" #endif #ifdef DORENAME #include "renameadvsimdnofma.h" #endif #endif #ifdef ENABLE_NEON32 #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperneon32.h" #endif #ifdef DORENAME #include "renameneon32.h" #endif #endif #ifdef ENABLE_NEON32VFPV4 #define CONFIG 4 #if !defined(SLEEF_GENHEADER) #include "helperneon32.h" #endif #ifdef DORENAME #include "renameneon32vfpv4.h" #endif #endif #ifdef ENABLE_SVE #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helpersve.h" #else #include "macroonlySVE.h" #endif #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesve_gnuabi.h" #else #include "renamesve.h" #endif /* ENABLE_GNUABI */ #endif /* DORENAME */ #endif /* ENABLE_SVE */ #ifdef ENABLE_SVENOFMA #define CONFIG 2 #if !defined(SLEEF_GENHEADER) #include "helpersve.h" #else #include "macroonlySVENOFMA.h" #endif #ifdef DORENAME #include "renamesvenofma.h" #endif /* DORENAME */ #endif /* ENABLE_SVE */ // IBM #ifdef ENABLE_VSX #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperpower_128.h" #else #include "macroonlyVSX.h" #endif #ifdef DORENAME #include "renamevsx.h" #endif #endif #ifdef ENABLE_VSXNOFMA #define CONFIG 2 #if !defined(SLEEF_GENHEADER) #include "helperpower_128.h" #else #include "macroonlyVSXNOFMA.h" #endif #ifdef DORENAME #include "renamevsxnofma.h" #endif #endif #ifdef ENABLE_ZVECTOR2 #define CONFIG 140 #if !defined(SLEEF_GENHEADER) #include "helpers390x_128.h" #else #include "macroonlyZVECTOR2.h" #endif #ifdef DORENAME #include "renamezvector2.h" #endif #endif #ifdef ENABLE_ZVECTOR2NOFMA #define CONFIG 141 #if !defined(SLEEF_GENHEADER) #include "helpers390x_128.h" #else #include "macroonlyZVECTOR2NOFMA.h" #endif #ifdef DORENAME #include "renamezvector2nofma.h" #endif #endif // Generic #ifdef ENABLE_VECEXT #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helpervecext.h" #endif #ifdef DORENAME #include "renamevecext.h" #endif #endif #ifdef ENABLE_PUREC #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperpurec.h" #endif #ifdef DORENAME #include "renamepurec.h" #endif #endif #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #if !defined(SLEEF_GENHEADER) #include "helperpurec_scalar.h" #else #include "macroonlyPUREC_SCALAR.h" #endif #ifdef DORENAME #include "renamepurec_scalar.h" #endif #endif #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 #if !defined(SLEEF_GENHEADER) #include "helperpurec_scalar.h" #else #include "macroonlyPURECFMA_SCALAR.h" #endif #ifdef DORENAME #include "renamepurecfma_scalar.h" #endif #endif // #define MLA(x, y, z) vmla_vf_vf_vf_vf((x), (y), (z)) #define C2V(c) vcast_vf_f(c) #include "estrin.h" // #include "df.h" static INLINE CONST VECTOR_CC vopmask visnegzero_vo_vf(vfloat d) { return veq_vo_vi2_vi2(vreinterpret_vi2_vf(d), vreinterpret_vi2_vf(vcast_vf_f(-0.0))); } static INLINE VECTOR_CC vopmask vnot_vo32_vo32(vopmask x) { return vxor_vo_vo_vo(x, veq_vo_vi2_vi2(vcast_vi2_i(0), vcast_vi2_i(0))); } static INLINE CONST VECTOR_CC vmask vsignbit_vm_vf(vfloat f) { return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f))); } static INLINE CONST VECTOR_CC vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } static INLINE CONST VECTOR_CC vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(x)), vand_vm_vm_vm (vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(y)))); } static INLINE CONST VECTOR_CC vfloat vsign_vf_vf(vfloat f) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(1.0f)), vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f)))); } static INLINE CONST VECTOR_CC vopmask vsignbit_vo_vf(vfloat d) { return veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0x80000000)), vcast_vi2_i(0x80000000)); } static INLINE CONST VECTOR_CC vint2 vsel_vi2_vf_vf_vi2_vi2(vfloat f0, vfloat f1, vint2 x, vint2 y) { return vsel_vi2_vo_vi2_vi2(vlt_vo_vf_vf(f0, f1), x, y); } static INLINE CONST VECTOR_CC vint2 vsel_vi2_vf_vi2(vfloat d, vint2 x) { return vand_vi2_vo_vi2(vsignbit_vo_vf(d), x); } static INLINE CONST VECTOR_CC vopmask visint_vo_vf(vfloat y) { return veq_vo_vf_vf(vtruncate_vf_vf(y), y); } static INLINE CONST VECTOR_CC vopmask visnumber_vo_vf(vfloat x) { return vnot_vo32_vo32(vor_vo_vo_vo(visinf_vo_vf(x), visnan_vo_vf(x))); } #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) static INLINE CONST VECTOR_CC vint2 vilogbk_vi2_vf(vfloat d) { vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(5.421010862427522E-20f)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(vcast_vf_f(1.8446744073709552E19f), d), d); vint2 q = vand_vi2_vi2_vi2(vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 23), vcast_vi2_i(0xff)); q = vsub_vi2_vi2_vi2(q, vsel_vi2_vo_vi2_vi2(o, vcast_vi2_i(64 + 0x7f), vcast_vi2_i(0x7f))); return q; } static INLINE CONST VECTOR_CC vint2 vilogb2k_vi2_vf(vfloat d) { vint2 q = vreinterpret_vi2_vf(d); q = vsrl_vi2_vi2_i(q, 23); q = vand_vi2_vi2_vi2(q, vcast_vi2_i(0xff)); q = vsub_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)); return q; } #endif // EXPORT CONST VECTOR_CC vint2 xilogbf(vfloat d) { vint2 e = vilogbk_vi2_vf(vabs_vf_vf(d)); e = vsel_vi2_vo_vi2_vi2(veq_vo_vf_vf(d, vcast_vf_f(0.0f)), vcast_vi2_i(SLEEF_FP_ILOGB0), e); e = vsel_vi2_vo_vi2_vi2(visnan_vo_vf(d), vcast_vi2_i(SLEEF_FP_ILOGBNAN), e); e = vsel_vi2_vo_vi2_vi2(visinf_vo_vf(d), vcast_vi2_i(INT_MAX), e); return e; } static INLINE CONST VECTOR_CC vfloat vpow2i_vf_vi2(vint2 q) { return vreinterpret_vf_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23)); } static INLINE CONST VECTOR_CC vfloat vldexp_vf_vf_vi2(vfloat x, vint2 q) { vfloat u; vint2 m = vsra_vi2_vi2_i(q, 31); m = vsll_vi2_vi2_i(vsub_vi2_vi2_vi2(vsra_vi2_vi2_i(vadd_vi2_vi2_vi2(m, q), 6), m), 4); q = vsub_vi2_vi2_vi2(q, vsll_vi2_vi2_i(m, 2)); m = vadd_vi2_vi2_vi2(m, vcast_vi2_i(0x7f)); m = vand_vi2_vi2_vi2(vgt_vi2_vi2_vi2(m, vcast_vi2_i(0)), m); vint2 n = vgt_vi2_vi2_vi2(m, vcast_vi2_i(0xff)); m = vor_vi2_vi2_vi2(vandnot_vi2_vi2_vi2(n, m), vand_vi2_vi2_vi2(n, vcast_vi2_i(0xff))); u = vreinterpret_vf_vi2(vsll_vi2_vi2_i(m, 23)); x = vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(x, u), u), u), u); u = vreinterpret_vf_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23)); return vmul_vf_vf_vf(x, u); } static INLINE CONST VECTOR_CC vfloat vldexp2_vf_vf_vi2(vfloat d, vint2 e) { return vmul_vf_vf_vf(vmul_vf_vf_vf(d, vpow2i_vf_vi2(vsra_vi2_vi2_i(e, 1))), vpow2i_vf_vi2(vsub_vi2_vi2_vi2(e, vsra_vi2_vi2_i(e, 1)))); } static INLINE CONST VECTOR_CC vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) { return vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vsll_vi2_vi2_i(q, 23))); } EXPORT CONST VECTOR_CC vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); } #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) typedef struct { vfloat d; vint2 i; } fi_t; static vfloat figetd_vf_di(fi_t d) { return d.d; } static vint2 figeti_vi2_di(fi_t d) { return d.i; } static fi_t fisetdi_fi_vf_vi2(vfloat d, vint2 i) { fi_t r = { d, i }; return r; } typedef struct { vfloat2 df; vint2 i; } dfi_t; static vfloat2 dfigetdf_vf2_dfi(dfi_t d) { return d.df; } static vint2 dfigeti_vi2_dfi(dfi_t d) { return d.i; } static dfi_t dfisetdfi_dfi_vf2_vi2(vfloat2 v, vint2 i) { dfi_t r = { v, i }; return r; } static dfi_t dfisetdf_dfi_dfi_vf2(dfi_t dfi, vfloat2 v) { dfi.df = v; return dfi; } #endif static INLINE CONST VECTOR_CC vfloat vorsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } static INLINE CONST fi_t rempisubf(vfloat x) { #ifdef FULL_FP_ROUNDING vfloat y = vrint_vf_vf(vmul_vf_vf_vf(x, vcast_vf_f(4))); vint2 vi = vtruncate_vi2_vf(vsub_vf_vf_vf(y, vmul_vf_vf_vf(vrint_vf_vf(x), vcast_vf_f(4)))); return fisetdi_fi_vf_vi2(vsub_vf_vf_vf(x, vmul_vf_vf_vf(y, vcast_vf_f(0.25))), vi); #else vfloat c = vmulsign_vf_vf_vf(vcast_vf_f(1 << 23), x); vfloat rint4x = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(vmul_vf_vf_vf(vcast_vf_f(4), x)), vcast_vf_f(1 << 23)), vmul_vf_vf_vf(vcast_vf_f(4), x), vorsign_vf_vf_vf(vsub_vf_vf_vf(vmla_vf_vf_vf_vf(vcast_vf_f(4), x, c), c), x)); vfloat rintx = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(1 << 23)), x, vorsign_vf_vf_vf(vsub_vf_vf_vf(vadd_vf_vf_vf(x, c), c), x)); return fisetdi_fi_vf_vi2(vmla_vf_vf_vf_vf(vcast_vf_f(-0.25), rint4x, x), vtruncate_vi2_vf(vmla_vf_vf_vf_vf(vcast_vf_f(-4), rintx, rint4x))); #endif } static INLINE CONST dfi_t rempif(vfloat a) { vfloat2 x, y, z; vint2 ex = vilogb2k_vi2_vf(a); #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) ex = vandnot_vi2_vi2_vi2(vsra_vi2_vi2_i(ex, 31), ex); ex = vand_vi2_vi2_vi2(ex, vcast_vi2_i(127)); #endif ex = vsub_vi2_vi2_vi2(ex, vcast_vi2_i(25)); vint2 q = vand_vi2_vo_vi2(vgt_vo_vi2_vi2(ex, vcast_vi2_i(90-25)), vcast_vi2_i(-64)); a = vldexp3_vf_vf_vi2(a, q); ex = vandnot_vi2_vi2_vi2(vsra_vi2_vi2_i(ex, 31), ex); ex = vsll_vi2_vi2_i(ex, 2); x = dfmul_vf2_vf_vf(a, vgather_vf_p_vi2(Sleef_rempitabsp, ex)); fi_t di = rempisubf(vf2getx_vf_vf2(x)); q = figeti_vi2_di(di); x = vf2setx_vf2_vf2_vf(x, figetd_vf_di(di)); x = dfnormalize_vf2_vf2(x); y = dfmul_vf2_vf_vf(a, vgather_vf_p_vi2(Sleef_rempitabsp+1, ex)); x = dfadd2_vf2_vf2_vf2(x, y); di = rempisubf(vf2getx_vf_vf2(x)); q = vadd_vi2_vi2_vi2(q, figeti_vi2_di(di)); x = vf2setx_vf2_vf2_vf(x, figetd_vf_di(di)); x = dfnormalize_vf2_vf2(x); y = vcast_vf2_vf_vf(vgather_vf_p_vi2(Sleef_rempitabsp+2, ex), vgather_vf_p_vi2(Sleef_rempitabsp+3, ex)); y = dfmul_vf2_vf2_vf(y, a); x = dfadd2_vf2_vf2_vf2(x, y); x = dfnormalize_vf2_vf2(x); x = dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(3.1415927410125732422f*2, -8.7422776573475857731e-08f*2)); x = vsel_vf2_vo_vf2_vf2(vlt_vo_vf_vf(vabs_vf_vf(a), vcast_vf_f(0.7f)), vcast_vf2_vf_vf(a, vcast_vf_f(0)), x); return dfisetdfi_dfi_vf2_vi2(x, q); } EXPORT CONST VECTOR_CC vfloat xsinf(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vfloat u, s, r = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f), d); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df), d); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); q = vsra_vi2_vi2_i(q, 2); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); d = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); d = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(d))); } s = vmul_vf_vf_vf(d, d); d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(r), r, u); return u; #else // #if !defined(DETERMINISTIC) vint2 q; vfloat u, s, r = d; q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f), d); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAX2f)); if (!LIKELY(vtestallones_i_vo32(g))) { s = vcast_vf_vi2(q); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Af), r); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Bf), u); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Cf), u); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Df), u); d = vsel_vf_vo_vf_vf(g, d, u); g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAXf)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(r); vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); q2 = vsra_vi2_vi2_i(q2, 2); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); u = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(u))); q = vsel_vi2_vo_vi2_vi2(g, q, q2); d = vsel_vf_vo_vf_vf(g, d, u); } } s = vmul_vf_vf_vf(d, d); d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(r), r, u); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vfloat xcosf(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vfloat u, s, r = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), d); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), d); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); q = vsra_vi2_vi2_i(q, 1); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); d = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); d = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(d))); } s = vmul_vf_vf_vf(d, d); d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); return u; #else // #if !defined(DETERMINISTIC) vint2 q; vfloat u, s, r = d; q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), d); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAX2f)); if (!LIKELY(vtestallones_i_vo32(g))) { s = vcast_vf_vi2(q); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Af*0.5f), r); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Bf*0.5f), u); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Cf*0.5f), u); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Df*0.5f), u); d = vsel_vf_vo_vf_vf(g, d, u); g = vlt_vo_vf_vf(vabs_vf_vf(r), vcast_vf_f(TRIGRANGEMAXf)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(r); vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); q2 = vsra_vi2_vi2_i(q2, 1); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); u = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(u))); q = vsel_vi2_vo_vi2_vi2(g, q, q2); d = vsel_vf_vo_vf_vf(g, d, u); } } s = vmul_vf_vf_vf(d, d); d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vopmask o; vfloat u, s, x; x = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f*0.5f))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); u = vcast_vf_vi2(q); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); u = vcast_vf_vi2(q); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), x); } else { dfi_t dfi = rempif(d); q = dfigeti_vi2_dfi(dfi); x = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(x))); x = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, x); } s = vmul_vf_vf_vf(x, x); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(x))); #if defined(ENABLE_NEON32) u = vcast_vf_f(0.00927245803177356719970703f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f)); #else vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2); u = POLY6(s, s2, s4, 0.00927245803177356719970703f, 0.00331984995864331722259521f, 0.0242998078465461730957031f, 0.0534495301544666290283203f, 0.133383005857467651367188f, 0.333331853151321411132812f); #endif u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x); u = vsel_vf_vo_vf_vf(o, vrec_vf_vf(u), u); return u; #else // #if !defined(DETERMINISTIC) vint2 q; vopmask o; vfloat u, s, x; q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); u = vcast_vf_vi2(q); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f*0.5f)); if (!LIKELY(vtestallones_i_vo32(g))) { vint2 q2 = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); s = vcast_vf_vi2(q); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Af*0.5f), d); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Bf*0.5f), u); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Cf*0.5f), u); u = vmla_vf_vf_vf_vf(s, vcast_vf_f(-PI_Df*0.5f), u); q = vsel_vi2_vo_vi2_vi2(g, q, q2); x = vsel_vf_vo_vf_vf(g, x, u); g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(d); u = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(u))); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); x = vsel_vf_vo_vf_vf(g, x, u); } } s = vmul_vf_vf_vf(x, x); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(x))); #if defined(ENABLE_NEON32) u = vcast_vf_f(0.00927245803177356719970703f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f)); #else vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2); u = POLY6(s, s2, s4, 0.00927245803177356719970703f, 0.00331984995864331722259521f, 0.0242998078465461730957031f, 0.0534495301544666290283203f, 0.133383005857467651367188f, 0.333331853151321411132812f); #endif u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x); u = vsel_vf_vo_vf_vf(o, vrec_vf_vf(u), u); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vfloat xsinf_u1(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vfloat u, v; vfloat2 s, t, x; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f))); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); q = vsra_vi2_vi2_i(q, 2); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); s = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); #if !defined(_MSC_VER) s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); #else s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s.x))); #endif } t = s; s = dfsqu_vf2_vf2(s); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); u = dfmul_vf_vf2_vf2(t, x); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); return u; #else // #if !defined(DETERMINISTIC) vint2 q; vfloat u, v; vfloat2 s, t, x; u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f))); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(d); vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); q2 = vsra_vi2_vi2_i(q2, 2); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi))), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)))); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); t = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); q = vsel_vi2_vo_vi2_vi2(g, q, q2); s = vsel_vf2_vo_vf2_vf2(g, s, t); } t = s; s = dfsqu_vf2_vf2(s); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); u = dfmul_vf_vf2_vf2(t, x); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vfloat xcosf_u1(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vfloat u; vfloat2 s, t, x; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { vfloat dq = vmla_vf_vf_vf_vf(vrint_vf_vf(vmla_vf_vf_vf_vf(d, vcast_vf_f(M_1_PI), vcast_vf_f(-0.5f))), vcast_vf_f(2), vcast_vf_f(1)); q = vrint_vi2_vf(dq); s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_A2f*0.5f))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_C2f*0.5f))); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); q = vsra_vi2_vi2_i(q, 1); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); s = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); #if !defined(_MSC_VER) s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); #else s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s.x))); #endif } t = s; s = dfsqu_vf2_vf2(s); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); u = dfmul_vf_vf2_vf2(t, x); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); return u; #else // #if !defined(DETERMINISTIC) vint2 q; vfloat u; vfloat2 s, t, x; vfloat dq = vmla_vf_vf_vf_vf(vrint_vf_vf(vmla_vf_vf_vf_vf(d, vcast_vf_f(M_1_PI), vcast_vf_f(-0.5f))), vcast_vf_f(2), vcast_vf_f(1)); q = vrint_vi2_vf(dq); s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_A2f*0.5f))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_C2f*0.5f))); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(d); vint2 q2 = vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(3)); q2 = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q2, q2), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); q2 = vsra_vi2_vi2_i(q2, 1); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfigeti_vi2_dfi(dfi), vcast_vi2_i(1)), vcast_vi2_i(0)); vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); x = dfadd2_vf2_vf2_vf2(dfigetdf_vf2_dfi(dfi), x); dfi = dfisetdf_dfi_dfi_vf2(dfi, vsel_vf2_vo_vf2_vf2(o, x, dfigetdf_vf2_dfi(dfi))); t = dfnormalize_vf2_vf2(dfigetdf_vf2_dfi(dfi)); t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); q = vsel_vi2_vo_vi2_vi2(g, q, q2); s = vsel_vf2_vo_vf2_vf2(g, s, t); } t = s; s = dfsqu_vf2_vf2(s); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833307858556509017944336f)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))), s)); u = dfmul_vf_vf2_vf2(t, x); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); return u; #endif // #if !defined(DETERMINISTIC) } EXPORT CONST VECTOR_CC vfloat xfastsinf_u3500(vfloat d) { vint2 q; vfloat u, s, t = d; s = vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)); u = vrint_vf_vf(s); q = vrint_vi2_vf(s); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-(float)M_PI), d); s = vmul_vf_vf_vf(d, d); u = vcast_vf_f(-0.1881748176e-3); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.8323502727e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.1666651368e+0)); u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, d), u, d); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(u))); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(t), vcast_vf_f(30.0f)); if (!LIKELY(vtestallones_i_vo32(g))) return vsel_vf_vo_vf_vf(g, u, xsinf(t)); return u; } EXPORT CONST VECTOR_CC vfloat xfastcosf_u3500(vfloat d) { vint2 q; vfloat u, s, t = d; s = vmla_vf_vf_vf_vf(d, vcast_vf_f((float)M_1_PI), vcast_vf_f(-0.5f)); u = vrint_vf_vf(s); q = vrint_vi2_vf(s); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-(float)M_PI), vsub_vf_vf_vf(d, vcast_vf_f((float)M_PI * 0.5f))); s = vmul_vf_vf_vf(d, d); u = vcast_vf_f(-0.1881748176e-3); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.8323502727e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.1666651368e+0)); u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, d), u, d); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(u))); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(t), vcast_vf_f(30.0f)); if (!LIKELY(vtestallones_i_vo32(g))) return vsel_vf_vo_vf_vf(g, u, xcosf(t)); return u; } #ifdef ENABLE_GNUABI #define TYPE2_FUNCATR static INLINE CONST #define TYPE6_FUNCATR static INLINE CONST #define SQRTFU05_FUNCATR static INLINE CONST #define XSINCOSF sincosfk #define XSINCOSF_U1 sincosfk_u1 #define XSINCOSPIF_U05 sincospifk_u05 #define XSINCOSPIF_U35 sincospifk_u35 #define XMODFF modffk #else #define TYPE2_FUNCATR EXPORT CONST #define TYPE6_FUNCATR EXPORT #define SQRTFU05_FUNCATR EXPORT #define XSINCOSF xsincosf #define XSINCOSF_U1 xsincosf_u1 #define XSINCOSPIF_U05 xsincospif_u05 #define XSINCOSPIF_U35 xsincospif_u35 #define XMODFF xmodff #endif TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSF(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vopmask o; vfloat u, s, t, rx, ry; vfloat2 r; s = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); u = vcast_vf_vi2(q); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), s); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); u = vcast_vf_vi2(q); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), s); } else { dfi_t dfi = rempif(d); q = dfigeti_vi2_dfi(dfi); s = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); s = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s))); } t = s; s = vmul_vf_vf_vf(s, s); u = vcast_vf_f(-0.000195169282960705459117889f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833215750753879547119141f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666537523269653320312f)); rx = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(u, s), t, t); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); u = vcast_vf_f(-2.71811842367242206819355e-07f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(2.47990446951007470488548e-05f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.00138888787478208541870117f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416666641831398010253906f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.5)); ry = vmla_vf_vf_vf_vf(s, u, vcast_vf_f(1)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); return r; #else // #if !defined(DETERMINISTIC) vint2 q; vopmask o; vfloat u, s, t, rx, ry; vfloat2 r; q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); u = vcast_vf_vi2(q); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), s); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); if (!LIKELY(vtestallones_i_vo32(g))) { vint2 q2 = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); u = vcast_vf_vi2(q2); t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), d); t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), t); t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), t); t = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), t); q = vsel_vi2_vo_vi2_vi2(g, q, q2); s = vsel_vf_vo_vf_vf(g, s, t); g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(d); t = vadd_vf_vf_vf(vf2getx_vf_vf2(dfigetdf_vf2_dfi(dfi)), vf2gety_vf_vf2(dfigetdf_vf2_dfi(dfi))); t = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(t))); q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); s = vsel_vf_vo_vf_vf(g, s, t); } } t = s; s = vmul_vf_vf_vf(s, s); u = vcast_vf_f(-0.000195169282960705459117889f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833215750753879547119141f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666537523269653320312f)); rx = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(u, s), t, t); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); u = vcast_vf_f(-2.71811842367242206819355e-07f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(2.47990446951007470488548e-05f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.00138888787478208541870117f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416666641831398010253906f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.5)); ry = vmla_vf_vf_vf_vf(s, u, vcast_vf_f(1)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); return r; #endif // #if !defined(DETERMINISTIC) } TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSF_U1(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vopmask o; vfloat u, v, rx, ry; vfloat2 r, s, t, x; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); } else { dfi_t dfi = rempif(d); q = dfigeti_vi2_dfi(dfi); s = dfigetdf_vf2_dfi(dfi); o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); } t = s; s = vf2setx_vf2_vf2_vf(s, dfsqu_vf_vf2(s)); u = vcast_vf_f(-0.000195169282960705459117889f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833215750753879547119141f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.166666537523269653320312f)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(vf2getx_vf_vf2(s), vf2getx_vf_vf2(t))); x = dfadd_vf2_vf2_vf(t, u); rx = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); u = vcast_vf_f(-2.71811842367242206819355e-07f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(2.47990446951007470488548e-05f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.00138888787478208541870117f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0416666641831398010253906f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.5)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(vf2getx_vf_vf2(s), u)); ry = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); return r; #else // #if !defined(DETERMINISTIC) vint2 q; vopmask o; vfloat u, v, rx, ry; vfloat2 r, s, t, x; u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(d); t = dfigetdf_vf2_dfi(dfi); o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); s = vsel_vf2_vo_vf2_vf2(g, s, t); } t = s; s = vf2setx_vf2_vf2_vf(s, dfsqu_vf_vf2(s)); u = vcast_vf_f(-0.000195169282960705459117889f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00833215750753879547119141f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.166666537523269653320312f)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(vf2getx_vf_vf2(s), vf2getx_vf_vf2(t))); x = dfadd_vf2_vf2_vf(t, u); rx = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); u = vcast_vf_f(-2.71811842367242206819355e-07f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(2.47990446951007470488548e-05f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.00138888787478208541870117f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0416666641831398010253906f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-0.5)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(vf2getx_vf_vf2(s), u)); ry = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); return r; #endif // #if !defined(DETERMINISTIC) } #if !defined(DETERMINISTIC) TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSPIF_U05(vfloat d) { vopmask o; vfloat u, s, t, rx, ry; vfloat2 r, x, s2; u = vmul_vf_vf_vf(d, vcast_vf_f(4)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); s2 = dfmul_vf2_vf_vf(t, t); // u = vcast_vf_f(+0.3093842054e-6); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3657307388e-4)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2490393585e-2)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vcast_vf2_f_f(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vcast_vf2_f_f(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_vf2_vf2_vf(x, t); rx = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); // u = vcast_vf_f(-0.2430611801e-7); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.3590577080e-5)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3259917721e-3)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vcast_vf2_f_f(0.015854343771934509277, 4.4940051354032242811e-10)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vcast_vf2_f_f(-0.30842512845993041992, -9.0728339030733922277e-09)); x = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x, s2), vcast_vf_f(1)); ry = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); // o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); o = vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1e+7f)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); o = visinf_vo_vf(d); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); return r; } TYPE2_FUNCATR VECTOR_CC vfloat2 XSINCOSPIF_U35(vfloat d) { vopmask o; vfloat u, s, t, rx, ry; vfloat2 r; u = vmul_vf_vf_vf(d, vcast_vf_f(4)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); // u = vcast_vf_f(-0.3600925265e-4); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2490088111e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.8074551076e-1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.7853981853e+0)); rx = vmul_vf_vf_vf(u, t); // u = vcast_vf_f(+0.3539815225e-5); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3259574005e-3)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1585431583e-1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3084251285e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(1)); ry = u; // o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); r = vf2setxy_vf2_vf_vf(vsel_vf_vo_vf_vf(o, rx, ry), vsel_vf_vo_vf_vf(o, ry, rx)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); o = vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1e+7f)); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); o = visinf_vo_vf(d); r = vf2setx_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(r))))); r = vf2sety_vf2_vf2_vf(r, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(r))))); return r; } TYPE6_FUNCATR VECTOR_CC vfloat2 XMODFF(vfloat x) { vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); fr = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23)), vcast_vf_f(0), fr); vfloat2 ret; ret = vf2setxy_vf2_vf_vf(vcopysign_vf_vf_vf(fr, x), vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); return ret; } #ifdef ENABLE_GNUABI EXPORT VECTOR_CC void xsincosf(vfloat a, float *ps, float *pc) { vfloat2 r = sincosfk(a); vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); } EXPORT VECTOR_CC void xsincosf_u1(vfloat a, float *ps, float *pc) { vfloat2 r = sincosfk_u1(a); vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); } EXPORT VECTOR_CC void xsincospif_u05(vfloat a, float *ps, float *pc) { vfloat2 r = sincospifk_u05(a); vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); } EXPORT VECTOR_CC void xsincospif_u35(vfloat a, float *ps, float *pc) { vfloat2 r = sincospifk_u35(a); vstoreu_v_p_vf(ps, vf2getx_vf_vf2(r)); vstoreu_v_p_vf(pc, vf2gety_vf_vf2(r)); } EXPORT CONST VECTOR_CC vfloat xmodff(vfloat a, float *iptr) { vfloat2 r = modffk(a); vstoreu_v_p_vf(iptr, vf2gety_vf_vf2(r)); return vf2getx_vf_vf2(r); } #endif // #ifdef ENABLE_GNUABI #endif // #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xtanf_u1(vfloat d) { #if !defined(DETERMINISTIC) vint2 q; vfloat u, v; vfloat2 s, t, x; vopmask o; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); } else { dfi_t dfi = rempif(d); q = dfigeti_vi2_dfi(dfi); s = dfigetdf_vf2_dfi(dfi); o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(s))))); s = vf2sety_vf2_vf2_vf(s, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(s))))); } o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); vmask n = vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))); #if !defined(_MSC_VER) s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(s)), n))); s = vf2sety_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(s)), n))); #else s.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(s.x), n)); s.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(s.y), n)); #endif t = s; s = dfsqu_vf2_vf2(s); s = dfnormalize_vf2_vf2(s); u = vcast_vf_f(0.00446636462584137916564941f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-8.3920182078145444393158e-05f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0109639242291450500488281f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0212360303848981857299805f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0540687143802642822265625f)); x = dfadd_vf2_vf_vf(vcast_vf_f(0.133325666189193725585938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(0.33333361148834228515625f), dfmul_vf2_vf2_vf2(s, x)), s)); x = dfmul_vf2_vf2_vf2(t, x); x = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf2(x), x); u = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); return u; #else // #if !defined(DETERMINISTIC) vint2 q; vfloat u, v; vfloat2 s, t, x; vopmask o; u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); vopmask g = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f)); if (!LIKELY(vtestallones_i_vo32(g))) { dfi_t dfi = rempif(d); t = dfigetdf_vf2_dfi(dfi); o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); t = vf2sety_vf2_vf2_vf(t, vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(vf2gety_vf_vf2(t))))); q = vsel_vi2_vo_vi2_vi2(g, q, dfigeti_vi2_dfi(dfi)); s = vsel_vf2_vo_vf2_vf2(g, s, t); } o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); vmask n = vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))); s = vf2setx_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(s)), n))); s = vf2sety_vf2_vf2_vf(s, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(s)), n))); t = s; s = dfsqu_vf2_vf2(s); s = dfnormalize_vf2_vf2(s); u = vcast_vf_f(0.00446636462584137916564941f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(-8.3920182078145444393158e-05f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0109639242291450500488281f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0212360303848981857299805f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0540687143802642822265625f)); x = dfadd_vf2_vf_vf(vcast_vf_f(0.133325666189193725585938f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(s))); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(0.33333361148834228515625f), dfmul_vf2_vf2_vf2(s, x)), s)); x = dfmul_vf2_vf2_vf2(t, x); x = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf2(x), x); u = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); return u; #endif // #if !defined(DETERMINISTIC) } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xatanf(vfloat d) { vfloat s, t, u; vint2 q; q = vsel_vi2_vf_vi2(d, vcast_vi2_i(2)); s = vabs_vf_vf(d); q = vsel_vi2_vf_vf_vi2_vi2(vcast_vf_f(1.0f), s, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); s = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vcast_vf_f(1.0f), s), vrec_vf_vf(s), s); t = vmul_vf_vf_vf(s, s); vfloat t2 = vmul_vf_vf_vf(t, t), t4 = vmul_vf_vf_vf(t2, t2); u = POLY8(t, t2, t4, 0.00282363896258175373077393f, -0.0159569028764963150024414f, 0.0425049886107444763183594f, -0.0748900920152664184570312f, 0.106347933411598205566406f, -0.142027363181114196777344f, 0.199926957488059997558594f, -0.333331018686294555664062f); t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s); t = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), t), t); t = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(t))); #if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) t = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.5874010519681994747517056f), d), t); #endif return t; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vfloat atan2kf(vfloat y, vfloat x) { vfloat s, t, u; vint2 q; vopmask p; q = vsel_vi2_vf_vi2(x, vcast_vi2_i(-2)); x = vabs_vf_vf(x); q = vsel_vi2_vf_vf_vi2_vi2(x, y, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); p = vlt_vo_vf_vf(x, y); s = vsel_vf_vo_vf_vf(p, vneg_vf_vf(x), y); t = vmax_vf_vf_vf(x, y); s = vdiv_vf_vf_vf(s, t); t = vmul_vf_vf_vf(s, s); vfloat t2 = vmul_vf_vf_vf(t, t), t4 = vmul_vf_vf_vf(t2, t2); u = POLY8(t, t2, t4, 0.00282363896258175373077393f, -0.0159569028764963150024414f, 0.0425049886107444763183594f, -0.0748900920152664184570312f, 0.106347933411598205566406f, -0.142027363181114196777344f, 0.199926957488059997558594f, -0.333331018686294555664062f); t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s); t = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f((float)(M_PI/2)), t); return t; } static INLINE CONST VECTOR_CC vfloat visinf2_vf_vf_vf(vfloat d, vfloat m) { return vreinterpret_vf_vm(vand_vm_vo32_vm(visinf_vo_vf(d), vor_vm_vm_vm(vsignbit_vm_vf(d), vreinterpret_vm_vf(m)))); } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xatan2f(vfloat y, vfloat x) { vfloat r = atan2kf(vabs_vf_vf(y), x); r = vmulsign_vf_vf_vf(r, x); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0.0f))), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), x))), r); r = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/4)), x))), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0.0f)), vreinterpret_vf_vm(vand_vm_vo32_vm(vsignbit_vo_vf(x), vreinterpret_vm_vf(vcast_vf_f((float)M_PI)))), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(vmulsign_vf_vf_vf(r, y)))); return r; } EXPORT CONST VECTOR_CC vfloat xasinf(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))); vfloat x = vsel_vf_vo_vf_vf(o, vabs_vf_vf(d), vsqrt_vf_vf(x2)), u; u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmla_vf_vf_vf_vf(u, vmul_vf_vf_vf(x, x2), x); vfloat r = vsel_vf_vo_vf_vf(o, u, vmla_vf_vf_vf_vf(u, vcast_vf_f(-2), vcast_vf_f(M_PIf/2))); return vmulsign_vf_vf_vf(r, d); } EXPORT CONST VECTOR_CC vfloat xacosf(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; vfloat x = vsel_vf_vo_vf_vf(o, vabs_vf_vf(d), vsqrt_vf_vf(x2)); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf_f(0), x); u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, x)); vfloat y = vsub_vf_vf_vf(vcast_vf_f(3.1415926535897932f/2), vadd_vf_vf_vf(vmulsign_vf_vf_vf(x, d), vmulsign_vf_vf_vf(u, d))); x = vadd_vf_vf_vf(x, u); vfloat r = vsel_vf_vo_vf_vf(o, y, vmul_vf_vf_vf(x, vcast_vf_f(2))); return vsel_vf_vo_vf_vf(vandnot_vo_vo_vo(o, vlt_vo_vf_vf(d, vcast_vf_f(0))), vf2getx_vf_vf2(dfadd_vf2_vf2_vf(vcast_vf2_f_f(3.1415927410125732422f,-8.7422776573475857731e-08f), vneg_vf_vf(r))), r); } #endif // #if !defined(DETERMINISTIC) // static INLINE CONST VECTOR_CC vfloat2 atan2kf_u1(vfloat2 y, vfloat2 x) { vfloat u; vfloat2 s, t; vint2 q; vopmask p; vmask r; q = vsel_vi2_vf_vf_vi2_vi2(vf2getx_vf_vf2(x), vcast_vf_f(0), vcast_vi2_i(-2), vcast_vi2_i(0)); p = vlt_vo_vf_vf(vf2getx_vf_vf2(x), vcast_vf_f(0)); r = vand_vm_vo32_vm(p, vreinterpret_vm_vf(vcast_vf_f(-0.0))); x = vf2setx_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2getx_vf_vf2(x)), r))); x = vf2sety_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vf2gety_vf_vf2(x)), r))); q = vsel_vi2_vf_vf_vi2_vi2(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y), vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); p = vlt_vo_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(y)); s = vsel_vf2_vo_vf2_vf2(p, dfneg_vf2_vf2(x), y); t = vsel_vf2_vo_vf2_vf2(p, y, x); s = dfdiv_vf2_vf2_vf2(s, t); t = dfsqu_vf2_vf2(s); t = dfnormalize_vf2_vf2(t); u = vcast_vf_f(-0.00176397908944636583328247f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.0107900900766253471374512f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(-0.0309564601629972457885742f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.0577365085482597351074219f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(-0.0838950723409652709960938f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.109463557600975036621094f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(-0.142626821994781494140625f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(t), vcast_vf_f(0.199983194470405578613281f)); t = dfmul_vf2_vf2_vf2(t, dfadd_vf2_vf_vf(vcast_vf_f(-0.333332866430282592773438f), vmul_vf_vf_vf(u, vf2getx_vf_vf2(t)))); t = dfmul_vf2_vf2_vf2(s, dfadd_vf2_vf_vf2(vcast_vf_f(1), t)); t = dfadd_vf2_vf2_vf2(dfmul_vf2_vf2_vf(vcast_vf2_f_f(1.5707963705062866211f, -4.3711388286737928865e-08f), vcast_vf_vi2(q)), t); return t; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xatan2f_u1(vfloat y, vfloat x) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(2.9387372783541830947e-39f)); // nexttowardf((1.0 / FLT_MAX), 1) x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(1 << 24)), x); y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(1 << 24)), y); vfloat2 d = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(y), vcast_vf_f(0)), vcast_vf2_vf_vf(x, vcast_vf_f(0))); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); r = vmulsign_vf_vf_vf(r, x); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0))), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/2), x))), r); r = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/4), x))), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0.0f)), vreinterpret_vf_vm(vand_vm_vo32_vm(vsignbit_vo_vf(x), vreinterpret_vm_vf(vcast_vf_f((float)M_PI)))), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(vmulsign_vf_vf_vf(r, y)))); return r; } EXPORT CONST VECTOR_CC vfloat xasinf_u1(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; vfloat2 x = vsel_vf2_vo_vf2_vf2(o, vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), dfsqrt_vf2_vf(x2)); x = vsel_vf2_vo_vf2_vf2(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf2_f_f(0, 0), x); u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x))); vfloat2 y = dfsub_vf2_vf2_vf(dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f/4,-8.7422776573475857731e-08f/4), x), u); vfloat r = vsel_vf_vo_vf_vf(o, vadd_vf_vf_vf(u, vf2getx_vf_vf2(x)), vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)), vcast_vf_f(2))); return vmulsign_vf_vf_vf(r, d); } EXPORT CONST VECTOR_CC vfloat xacosf_u1(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; vfloat2 x = vsel_vf2_vo_vf2_vf2(o, vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), dfsqrt_vf2_vf(x2)); x = vsel_vf2_vo_vf2_vf2(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf2_f_f(0, 0), x); u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x))); vfloat2 y = dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f/2, -8.7422776573475857731e-08f/2), dfadd_vf2_vf_vf(vmulsign_vf_vf_vf(vf2getx_vf_vf2(x), d), vmulsign_vf_vf_vf(u, d))); x = dfadd_vf2_vf2_vf(x, u); y = vsel_vf2_vo_vf2_vf2(o, y, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); y = vsel_vf2_vo_vf2_vf2(vandnot_vo_vo_vo(o, vlt_vo_vf_vf(d, vcast_vf_f(0))), dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f, -8.7422776573475857731e-08f), y), y); return vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)); } EXPORT CONST VECTOR_CC vfloat xatanf_u1(vfloat d) { vfloat2 d2 = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), vcast_vf2_f_f(1, 0)); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(d2), vf2gety_vf_vf2(d2)); r = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vcast_vf_f(1.570796326794896557998982), r); return vmulsign_vf_vf_vf(r, d); } #endif // #if !defined(DETERMINISTIC) // #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xlogf(vfloat d) { vfloat x, x2, t, m; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = vdiv_vf_vf_vf(vsub_vf_vf_vf(m, vcast_vf_f(1.0f)), vadd_vf_vf_vf(vcast_vf_f(1.0f), m)); x2 = vmul_vf_vf_vf(x, x); t = vcast_vf_f(0.2392828464508056640625f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.28518211841583251953125f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.400005877017974853515625f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.666666686534881591796875f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(2.0f)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); x = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), x); x = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NANf), x); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITYf), x); #else x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), e)); x = vfixup_vf_vf_vf_vi2_i(x, d, vcast_vi2_i((5 << (5*4))), 0); #endif return x; } #endif // #if !defined(DETERMINISTIC) #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xexpf(vfloat d) { vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); vfloat s, u; s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); u = vcast_vf_f(0.000198527617612853646278381); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5)); u = vadd_vf_vf_vf(vcast_vf_f(1.0f), vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s)); u = vldexp2_vf_vf_vi2(u, q); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-104)), vreinterpret_vm_vf(u))); u = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vcast_vf_f(100), d), vcast_vf_f(SLEEF_INFINITYf), u); return u; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vfloat expm1fk(vfloat d) { vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); vfloat s, u; s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); vfloat s2 = vmul_vf_vf_vf(s, s), s4 = vmul_vf_vf_vf(s2, s2); u = POLY6(s, s2, s4, 0.000198527617612853646278381, 0.00139304355252534151077271, 0.00833336077630519866943359, 0.0416664853692054748535156, 0.166666671633720397949219, 0.5); u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s); u = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(q, vcast_vi2_i(0)), u, vsub_vf_vf_vf(vldexp2_vf_vf_vi2(vadd_vf_vf_vf(u, vcast_vf_f(1)), q), vcast_vf_f(1))); return u; } #if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) EXPORT CONST VECTOR_CC vfloat xsqrtf_u35(vfloat d) { vfloat e = vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vcast_vi2_i(0x20000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x7f000000), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 1)))); vfloat m = vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vcast_vi2_i(0x3f000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x01ffffff), vreinterpret_vi2_vf(d)))); float32x4_t x = vrsqrteq_f32(m); x = vmulq_f32(x, vrsqrtsq_f32(m, vmulq_f32(x, x))); float32x4_t u = vmulq_f32(x, m); u = vmlaq_f32(u, vmlsq_f32(m, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); e = vreinterpret_vf_vm(vandnot_vm_vo32_vm(veq_vo_vf_vf(d, vcast_vf_f(0)), vreinterpret_vm_vf(e))); u = vmul_vf_vf_vf(e, u); u = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), u); u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(d), vlt_vo_vf_vf(d, vcast_vf_f(0))), vreinterpret_vm_vf(u))); u = vmulsign_vf_vf_vf(u, d); return u; } #elif defined(ENABLE_VECEXT) EXPORT CONST VECTOR_CC vfloat xsqrtf_u35(vfloat d) { vfloat q = vsqrt_vf_vf(d); q = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0), q); return vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), q); } #else EXPORT CONST VECTOR_CC vfloat xsqrtf_u35(vfloat d) { return vsqrt_vf_vf(d); } #endif #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xcbrtf(vfloat d) { vfloat x, y, q = vcast_vf_f(1.0), t; vint2 e, qu, re; #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) vfloat s = d; #endif e = vadd_vi2_vi2_vi2(vilogbk_vi2_vf(vabs_vf_vf(d)), vcast_vi2_i(1)); d = vldexp2_vf_vf_vi2(d, vneg_vi2_vi2(e)); t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144)); qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0f/3.0f))); re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3)))); q = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf_f(1.2599210498948731647672106f), q); q = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf_f(1.5874010519681994747517056f), q); q = vldexp2_vf_vf_vi2(q, vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048))); q = vmulsign_vf_vf_vf(q, d); d = vabs_vf_vf(d); x = vcast_vf_f(-0.601564466953277587890625f); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f)); y = vmul_vf_vf_vf(vmul_vf_vf_vf(d, x), x); y = vmul_vf_vf_vf(vsub_vf_vf_vf(y, vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(2.0f / 3.0f), y), vmla_vf_vf_vf_vf(y, x, vcast_vf_f(-1.0f)))), q); #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) y = vsel_vf_vo_vf_vf(visinf_vo_vf(s), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), s), y); y = vsel_vf_vo_vf_vf(veq_vo_vf_vf(s, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), s), y); #endif return y; } #endif // #if !defined(DETERMINISTIC) #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xcbrtf_u1(vfloat d) { vfloat x, y, z, t; vfloat2 q2 = vcast_vf2_f_f(1, 0), u, v; vint2 e, qu, re; #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) vfloat s = d; #endif e = vadd_vi2_vi2_vi2(vilogbk_vi2_vf(vabs_vf_vf(d)), vcast_vi2_i(1)); d = vldexp2_vf_vf_vi2(d, vneg_vi2_vi2(e)); t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144)); qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0/3.0))); re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3)))); q2 = vsel_vf2_vo_vf2_vf2(veq_vo_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf2_f_f(1.2599210739135742188f, -2.4018701694217270415e-08), q2); q2 = vsel_vf2_vo_vf2_vf2(veq_vo_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf2_f_f(1.5874010324478149414f, 1.9520385308169352356e-08), q2); q2 = vf2setx_vf2_vf2_vf(q2, vmulsign_vf_vf_vf(vf2getx_vf_vf2(q2), d)); q2 = vf2sety_vf2_vf2_vf(q2, vmulsign_vf_vf_vf(vf2gety_vf_vf2(q2), d)); d = vabs_vf_vf(d); x = vcast_vf_f(-0.601564466953277587890625f); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f)); y = vmul_vf_vf_vf(x, x); y = vmul_vf_vf_vf(y, y); x = vsub_vf_vf_vf(x, vmul_vf_vf_vf(vmlanp_vf_vf_vf_vf(d, y, x), vcast_vf_f(-1.0 / 3.0))); z = x; u = dfmul_vf2_vf_vf(x, x); u = dfmul_vf2_vf2_vf2(u, u); u = dfmul_vf2_vf2_vf(u, d); u = dfadd2_vf2_vf2_vf(u, vneg_vf_vf(x)); y = vadd_vf_vf_vf(vf2getx_vf_vf2(u), vf2gety_vf_vf2(u)); y = vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(-2.0 / 3.0), y), z); v = dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(z, z), y); v = dfmul_vf2_vf2_vf(v, d); v = dfmul_vf2_vf2_vf2(v, q2); z = vldexp2_vf_vf_vi2(vadd_vf_vf_vf(vf2getx_vf_vf2(v), vf2gety_vf_vf2(v)), vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048))); z = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), vf2getx_vf_vf2(q2)), z); z = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vreinterpret_vf_vm(vsignbit_vm_vf(vf2getx_vf_vf2(q2))), z); #if defined(ENABLE_AVX512F) || defined(ENABLE_AVX512FNOFMA) z = vsel_vf_vo_vf_vf(visinf_vo_vf(s), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), s), z); z = vsel_vf_vo_vf_vf(veq_vo_vf_vf(s, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), s), z); #endif return z; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vfloat2 logkf(vfloat d) { vfloat2 x, x2; vfloat t, m; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = dfsqu_vf2_vf2(x); t = vcast_vf_f(0.240320354700088500976562); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.285112679004669189453125)); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.400007992982864379882812)); vfloat2 c = vcast_vf2_f_f(0.66666662693023681640625f, 3.69183861259614332084311e-09f); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); #else vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); #endif s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(x2, x), dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf(x2, t), c))); return s; } static INLINE CONST VECTOR_CC vfloat logk3f(vfloat d) { vfloat x, x2, t, m; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = vdiv_vf_vf_vf(vsub_vf_vf_vf(m, vcast_vf_f(1.0f)), vadd_vf_vf_vf(vcast_vf_f(1.0f), m)); x2 = vmul_vf_vf_vf(x, x); t = vcast_vf_f(0.2392828464508056640625f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.28518211841583251953125f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.400005877017974853515625f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.666666686534881591796875f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(2.0f)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); #else x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), e)); #endif return x; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xlogf_u1(vfloat d) { vfloat2 x; vfloat t, m, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); t = vcast_vf_f(+0.3027294874e+0f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f)); s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), r); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NANf), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITYf), r); #else r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vfloat expkf(vfloat2 d) { vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(R_LN2f)); vint2 q = vrint_vi2_vf(u); vfloat2 s, t; s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf))); s = dfnormalize_vf2_vf2(s); u = vcast_vf_f(0.00136324646882712841033936f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.00836596917361021041870117f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.0416710823774337768554688f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.166665524244308471679688f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(0.499999850988388061523438f)); t = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfsqu_vf2_vf2(s), u)); t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t); u = vadd_vf_vf_vf(vf2getx_vf_vf2(t), vf2gety_vf_vf2(t)); u = vldexp_vf_vf_vi2(u, q); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(-104)), vreinterpret_vm_vf(u))); return u; } static INLINE CONST VECTOR_CC vfloat expk3f(vfloat d) { vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); vfloat s, u; s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); u = vcast_vf_f(0.000198527617612853646278381); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5)); u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, vadd_vf_vf_vf(s, vcast_vf_f(1.0f))); u = vldexp2_vf_vf_vi2(u, q); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-104)), vreinterpret_vm_vf(u))); return u; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xpowf(vfloat x, vfloat y) { #if 1 vopmask yisint = vor_vo_vo_vo(veq_vo_vf_vf(vtruncate_vf_vf(y), y), vgt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); vopmask yisodd = vand_vo_vo_vo(vand_vo_vo_vo(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vtruncate_vi2_vf(y), vcast_vi2_i(1)), vcast_vi2_i(1)), yisint), vlt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); #if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) yisodd = vandnot_vm_vo32_vm(visinf_vo_vf(y), yisodd); #endif vfloat result = expkf(dfmul_vf2_vf2_vf(logkf(vabs_vf_vf(x)), y)); result = vsel_vf_vo_vf_vf(visnan_vo_vf(result), vcast_vf_f(SLEEF_INFINITYf), result); result = vmul_vf_vf_vf(result, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, vcast_vf_f(0)), vcast_vf_f(1), vsel_vf_vo_vf_vf(yisint, vsel_vf_vo_vf_vf(yisodd, vcast_vf_f(-1.0f), vcast_vf_f(1)), vcast_vf_f(SLEEF_NANf)))); vfloat efx = vmulsign_vf_vf_vf(vsub_vf_vf_vf(vabs_vf_vf(x), vcast_vf_f(1)), y); result = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(efx, vcast_vf_f(0.0f)), vreinterpret_vm_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(efx, vcast_vf_f(0.0f)), vcast_vf_f(1.0f), vcast_vf_f(SLEEF_INFINITYf))))), result); result = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0))), vmul_vf_vf_vf(vsel_vf_vo_vf_vf(yisodd, vsign_vf_vf(x), vcast_vf_f(1)), vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vneg_vf_vf(y), y), vcast_vf_f(0)), vreinterpret_vm_vf(vcast_vf_f(SLEEF_INFINITYf))))), result); result = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(result))); result = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(y, vcast_vf_f(0)), veq_vo_vf_vf(x, vcast_vf_f(1))), vcast_vf_f(1), result); return result; #else return expkf(dfmul_vf2_vf2_vf(logkf(x), y)); #endif } EXPORT CONST VECTOR_CC vfloat xfastpowf_u3500(vfloat x, vfloat y) { vfloat result = expk3f(vmul_vf_vf_vf(logk3f(vabs_vf_vf(x)), y)); vopmask yisint = vor_vo_vo_vo(veq_vo_vf_vf(vtruncate_vf_vf(y), y), vgt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); vopmask yisodd = vand_vo_vo_vo(vand_vo_vo_vo(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vtruncate_vi2_vf(y), vcast_vi2_i(1)), vcast_vi2_i(1)), yisint), vlt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); result = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vsignbit_vo_vf(x), yisodd), vneg_vf_vf(result), result); result = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vcast_vf_f(0), result); result = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0)), vcast_vf_f(1), result); return result; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vfloat2 expk2f(vfloat2 d) { vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(R_LN2f)); vint2 q = vrint_vi2_vf(u); vfloat2 s, t; s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf))); u = vcast_vf_f(+0.1980960224e-3f); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(+0.1394256484e-2f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(+0.8333456703e-2f)); u = vmla_vf_vf_vf_vf(u, vf2getx_vf_vf2(s), vcast_vf_f(+0.4166637361e-1f)); t = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(s, u), vcast_vf_f(+0.166666659414234244790680580464e+0f)); t = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(s, t), vcast_vf_f(0.5)); t = dfadd2_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(dfsqu_vf2_vf2(s), t)); t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t); t = vf2setx_vf2_vf2_vf(t, vldexp2_vf_vf_vi2(vf2getx_vf_vf2(t), q)); t = vf2sety_vf2_vf2_vf(t, vldexp2_vf_vf_vi2(vf2gety_vf_vf2(t), q)); t = vf2setx_vf2_vf2_vf(t, vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(-104)), vreinterpret_vm_vf(vf2getx_vf_vf2(t))))); t = vf2sety_vf2_vf2_vf(t, vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(-104)), vreinterpret_vm_vf(vf2gety_vf_vf2(t))))); return t; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xsinhf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); d = dfsub_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); y = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST VECTOR_CC vfloat xcoshf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); d = dfadd_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); y = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST VECTOR_CC vfloat xtanhf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); vfloat2 e = dfrec_vf2_vf2(d); d = dfdiv_vf2_vf2_vf2(dfadd_vf2_vf2_vf2(d, dfneg_vf2_vf2(e)), dfadd_vf2_vf2_vf2(d, e)); y = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)), visnan_vo_vf(y)), vcast_vf_f(1.0f), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST VECTOR_CC vfloat xsinhf_u35(vfloat x) { vfloat e = expm1fk(vabs_vf_vf(x)); vfloat y = vdiv_vf_vf_vf(vadd_vf_vf_vf(e, vcast_vf_f(2)), vadd_vf_vf_vf(e, vcast_vf_f(1))); y = vmul_vf_vf_vf(y, vmul_vf_vf_vf(vcast_vf_f(0.5f), e)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST VECTOR_CC vfloat xcoshf_u35(vfloat x) { vfloat e = xexpf(vabs_vf_vf(x)); vfloat y = vmla_vf_vf_vf_vf(vcast_vf_f(0.5f), e, vdiv_vf_vf_vf(vcast_vf_f(0.5), e)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST VECTOR_CC vfloat xtanhf_u35(vfloat x) { vfloat d = expm1fk(vmul_vf_vf_vf(vcast_vf_f(2), vabs_vf_vf(x))); vfloat y = vdiv_vf_vf_vf(d, vadd_vf_vf_vf(vcast_vf_f(2), d)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)), visnan_vo_vf(y)), vcast_vf_f(1.0f), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vfloat2 logk2f(vfloat2 d) { vfloat2 x, x2, m, s; vfloat t; vint2 e; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) e = vilogbk_vi2_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(1.0f/0.75f))); #else e = vrint_vi2_vf(vgetexp_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(d), vcast_vf_f(1.0f/0.75f)))); #endif m = dfscale_vf2_vf2_vf(d, vpow2i_vf_vi2(vneg_vi2_vi2(e))); x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(m, vcast_vf_f(-1)), dfadd2_vf2_vf2_vf(m, vcast_vf_f(1))); x2 = dfsqu_vf2_vf2(x); t = vcast_vf_f(0.2392828464508056640625f); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.28518211841583251953125f)); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.400005877017974853515625f)); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(x2), vcast_vf_f(0.666666686534881591796875f)); s = dfmul_vf2_vf2_vf(vcast_vf2_vf_vf(vcast_vf_f(0.69314718246459960938f), vcast_vf_f(-1.904654323148236017e-09f)), vcast_vf_vi2(e)); s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x2, x), t)); return s; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xasinhf(vfloat x) { vfloat y = vabs_vf_vf(x); vopmask o = vgt_vo_vf_vf(y, vcast_vf_f(1)); vfloat2 d; d = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf(x), vcast_vf2_vf_vf(y, vcast_vf_f(0))); d = dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfsqu_vf2_vf2(d), vcast_vf_f(1))); d = vsel_vf2_vo_vf2_vf2(o, dfmul_vf2_vf2_vf(d, y), d); d = logk2f(dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf(d, x))); y = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(SQRT_FLT_MAX)), visnan_vo_vf(y)), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), x), y); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); y = vsel_vf_vo_vf_vf(visnegzero_vo_vf(x), vcast_vf_f(-0.0), y); return y; } EXPORT CONST VECTOR_CC vfloat xacoshf(vfloat x) { vfloat2 d = logk2f(dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfsqrt_vf2_vf2(dfadd2_vf2_vf_vf(x, vcast_vf_f(1))), dfsqrt_vf2_vf2(dfadd2_vf2_vf_vf(x, vcast_vf_f(-1)))), x)); vfloat y = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(SQRT_FLT_MAX)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vreinterpret_vf_vm(vandnot_vm_vo32_vm(veq_vo_vf_vf(x, vcast_vf_f(1.0f)), vreinterpret_vm_vf(y))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(vlt_vo_vf_vf(x, vcast_vf_f(1.0f)), vreinterpret_vm_vf(y))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST VECTOR_CC vfloat xatanhf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = logk2f(dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(1), y), dfadd2_vf2_vf_vf(vcast_vf_f(1), vneg_vf_vf(y)))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(y, vcast_vf_f(1.0)), vreinterpret_vm_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(1.0)), vcast_vf_f(SLEEF_INFINITYf), vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(0.5)))))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(y))); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } #endif // #if !defined(DETERMINISTIC) #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xexp2f(vfloat d) { vfloat u = vrint_vf_vf(d), s; vint2 q = vrint_vi2_vf(u); s = vsub_vf_vf_vf(d, u); u = vcast_vf_f(+0.1535920892e-3); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1339262701e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.9618384764e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5550347269e-1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2402264476e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.6931471825e+0)); #ifdef ENABLE_FMA_SP u = vfma_vf_vf_vf_vf(u, s, vcast_vf_f(1)); #else u = vf2getx_vf_vf2(dfnormalize_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(u, s)))); #endif u = vldexp2_vf_vf_vi2(u, q); u = vsel_vf_vo_vf_vf(vge_vo_vf_vf(d, vcast_vf_f(128)), vcast_vf_f(SLEEF_INFINITY), u); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-150)), vreinterpret_vm_vf(u))); return u; } EXPORT CONST VECTOR_CC vfloat xexp2f_u35(vfloat d) { vfloat u = vrint_vf_vf(d), s; vint2 q = vrint_vi2_vf(u); s = vsub_vf_vf_vf(d, u); u = vcast_vf_f(+0.1535920892e-3); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1339262701e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.9618384764e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5550347269e-1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2402264476e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.6931471825e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1000000000e+1)); u = vldexp2_vf_vf_vi2(u, q); u = vsel_vf_vo_vf_vf(vge_vo_vf_vf(d, vcast_vf_f(128)), vcast_vf_f(SLEEF_INFINITY), u); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-150)), vreinterpret_vm_vf(u))); return u; } EXPORT CONST VECTOR_CC vfloat xexp10f(vfloat d) { vfloat u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(LOG10_2))), s; vint2 q = vrint_vi2_vf(u); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Uf), d); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Lf), s); u = vcast_vf_f(+0.6802555919e-1); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2078080326e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5393903852e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1171245337e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2034678698e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2650949001e+1)); vfloat2 x = dfadd_vf2_vf2_vf(vcast_vf2_f_f(2.3025851249694824219, -3.1705172516493593157e-08), vmul_vf_vf_vf(u, s)); u = vf2getx_vf_vf2(dfnormalize_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf(x, s)))); u = vldexp2_vf_vf_vi2(u, q); u = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(38.5318394191036238941387f)), vcast_vf_f(SLEEF_INFINITYf), u); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-50)), vreinterpret_vm_vf(u))); return u; } EXPORT CONST VECTOR_CC vfloat xexp10f_u35(vfloat d) { vfloat u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(LOG10_2))), s; vint2 q = vrint_vi2_vf(u); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Uf), d); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Lf), s); u = vcast_vf_f(+0.2064004987e+0); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5417877436e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1171286821e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2034656048e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2650948763e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2302585125e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1000000000e+1)); u = vldexp2_vf_vf_vi2(u, q); u = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(38.5318394191036238941387f)), vcast_vf_f(SLEEF_INFINITYf), u); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-50)), vreinterpret_vm_vf(u))); return u; } EXPORT CONST VECTOR_CC vfloat xexpm1f(vfloat a) { vfloat2 d = dfadd2_vf2_vf2_vf(expk2f(vcast_vf2_vf_vf(a, vcast_vf_f(0))), vcast_vf_f(-1.0)); vfloat x = vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)); x = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(a, vcast_vf_f(88.72283172607421875f)), vcast_vf_f(SLEEF_INFINITYf), x); x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(a, vcast_vf_f(-16.635532333438687426013570f)), vcast_vf_f(-1), x); x = vsel_vf_vo_vf_vf(visnegzero_vo_vf(a), vcast_vf_f(-0.0f), x); return x; } #endif // #if !defined(DETERMINISTIC) #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xlog10f(vfloat d) { vfloat2 x; vfloat t, m, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); t = vcast_vf_f(+0.1314289868e+0); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f( +0.1735493541e+0)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f( +0.2895309627e+0)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.30103001, -1.432098889e-08), vcast_vf_vi2(e)); #else vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.30103001, -1.432098889e-08), e); #endif s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(0.868588984, -2.170757285e-08))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); #else r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST VECTOR_CC vfloat xlog2f(vfloat d) { vfloat2 x; vfloat t, m, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); t = vcast_vf_f(+0.4374550283e+0f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.5764790177e+0f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.9618012905120f)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vfloat2 s = dfadd2_vf2_vf_vf2(vcast_vf_vi2(e), dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(2.8853900432586669922, 3.2734474483568488616e-08))); #else vfloat2 s = dfadd2_vf2_vf_vf2(e, dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(2.8853900432586669922, 3.2734474483568488616e-08))); #endif s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); #else r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST VECTOR_CC vfloat xlog2f_u35(vfloat d) { vfloat m, t, x, x2; #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = vdiv_vf_vf_vf(vsub_vf_vf_vf(m, vcast_vf_f(1)), vadd_vf_vf_vf(m, vcast_vf_f(1))); x2 = vmul_vf_vf_vf(x, x); t = vcast_vf_f(+0.4374088347e+0); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.5764843822e+0)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.9618024230e+0)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vfloat r = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(x2, x), t, vmla_vf_vf_vf_vf(x, vcast_vf_f(+0.2885390043e+1), vcast_vf_vi2(e))); r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); #else vfloat r = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(x2, x), t, vmla_vf_vf_vf_vf(x, vcast_vf_f(+0.2885390043e+1), e)); r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST VECTOR_CC vfloat xlog1pf(vfloat d) { vfloat2 x; vfloat t, m, x2; vfloat dp1 = vadd_vf_vf_vf(d, vcast_vf_f(1)); #if !defined(ENABLE_AVX512F) && !defined(ENABLE_AVX512FNOFMA) vopmask o = vlt_vo_vf_vf(dp1, vcast_vf_f(FLT_MIN)); dp1 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(dp1, vcast_vf_f((float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32))), dp1); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(dp1, vcast_vf_f(1.0f/0.75f))); t = vldexp3_vf_vf_vi2(vcast_vf_f(1), vneg_vi2_vi2(e)); m = vmla_vf_vf_vf_vf(d, t, vsub_vf_vf_vf(t, vcast_vf_f(1))); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(dp1, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); t = vldexp3_vf_vf_vi2(vcast_vf_f(1), vneg_vi2_vi2(vrint_vi2_vf(e))); m = vmla_vf_vf_vf_vf(d, t, vsub_vf_vf_vf(t, vcast_vf_f(1))); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); #endif x = dfdiv_vf2_vf2_vf2(vcast_vf2_vf_vf(m, vcast_vf_f(0)), dfadd_vf2_vf_vf(vcast_vf_f(2), m)); x2 = vmul_vf_vf_vf(vf2getx_vf_vf2(x), vf2getx_vf_vf2(x)); t = vcast_vf_f(+0.3027294874e+0f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f)); s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(1e+38)), vcast_vf_f(SLEEF_INFINITYf), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(vcast_vf_f(-1), d), vreinterpret_vm_vf(r))); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(-1)), vcast_vf_f(-SLEEF_INFINITYf), r); r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), r); return r; } #endif // #if !defined(DETERMINISTIC) // #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xfabsf(vfloat x) { return vabs_vf_vf(x); } EXPORT CONST VECTOR_CC vfloat xcopysignf(vfloat x, vfloat y) { return vcopysign_vf_vf_vf(x, y); } EXPORT CONST VECTOR_CC vfloat xfmaxf(vfloat x, vfloat y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y)); #else return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, y), x, y)); #endif } EXPORT CONST VECTOR_CC vfloat xfminf(vfloat x, vfloat y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y)); #else return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(y, x), x, y)); #endif } EXPORT CONST VECTOR_CC vfloat xfdimf(vfloat x, vfloat y) { vfloat ret = vsub_vf_vf_vf(x, y); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(ret, vcast_vf_f(0)), veq_vo_vf_vf(x, y)), vcast_vf_f(0), ret); return ret; } EXPORT CONST VECTOR_CC vfloat xtruncf(vfloat x) { #ifdef FULL_FP_ROUNDING return vtruncate_vf_vf(x); #else vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); #endif } EXPORT CONST VECTOR_CC vfloat xfloorf(vfloat x) { vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); fr = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(fr, vcast_vf_f(0)), vadd_vf_vf_vf(fr, vcast_vf_f(1.0f)), fr); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); } EXPORT CONST VECTOR_CC vfloat xceilf(vfloat x) { vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); fr = vsel_vf_vo_vf_vf(vle_vo_vf_vf(fr, vcast_vf_f(0)), fr, vsub_vf_vf_vf(fr, vcast_vf_f(1.0f))); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); } EXPORT CONST VECTOR_CC vfloat xroundf(vfloat d) { vfloat x = vadd_vf_vf_vf(d, vcast_vf_f(0.5f)); vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); x = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vle_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(fr, vcast_vf_f(0))), vsub_vf_vf_vf(x, vcast_vf_f(1.0f)), x); fr = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(fr, vcast_vf_f(0)), vadd_vf_vf_vf(fr, vcast_vf_f(1.0f)), fr); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0.4999999701976776123f)), vcast_vf_f(0), x); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(d), vge_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(INT64_C(1) << 23))), d, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), d)); } EXPORT CONST VECTOR_CC vfloat xrintf(vfloat d) { #ifdef FULL_FP_ROUNDING return vrint_vf_vf(d); #else vfloat c = vmulsign_vf_vf_vf(vcast_vf_f(1 << 23), d); return vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1 << 23)), d, vorsign_vf_vf_vf(vsub_vf_vf_vf(vadd_vf_vf_vf(d, c), c), d)); #endif } EXPORT CONST VECTOR_CC vfloat xfmaf(vfloat x, vfloat y, vfloat z) { #ifdef ENABLE_FMA_SP return vfma_vf_vf_vf_vf(x, y, z); #else vfloat h2 = vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z), q = vcast_vf_f(1); vopmask o = vlt_vo_vf_vf(vabs_vf_vf(h2), vcast_vf_f(1e-38f)); { const float c0 = UINT64_C(1) << 25, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(c1)), x); y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(c1)), y); z = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(z, vcast_vf_f(c2)), z); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(1.0f / c2), q); } o = vgt_vo_vf_vf(vabs_vf_vf(h2), vcast_vf_f(1e+38f)); { const float c0 = UINT64_C(1) << 25, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(1.0f / c1)), x); y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(1.0f / c1)), y); z = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(z, vcast_vf_f(1.0f / c2)), z); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(c2), q); } vfloat2 d = dfmul_vf2_vf_vf(x, y); d = dfadd2_vf2_vf2_vf(d, z); vfloat ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(y, vcast_vf_f(0))), z, vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d))); o = visinf_vo_vf(z); o = vandnot_vo_vo_vo(visinf_vo_vf(x), o); o = vandnot_vo_vo_vo(visnan_vo_vf(x), o); o = vandnot_vo_vo_vo(visinf_vo_vf(y), o); o = vandnot_vo_vo_vo(visnan_vo_vf(y), o); h2 = vsel_vf_vo_vf_vf(o, z, h2); o = vor_vo_vo_vo(visinf_vo_vf(h2), visnan_vo_vf(h2)); return vsel_vf_vo_vf_vf(o, h2, vmul_vf_vf_vf(ret, q)); #endif } #endif // #if !defined(DETERMINISTIC) #if !defined(SLEEF_GENHEADER) static INLINE CONST VECTOR_CC vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } #endif SQRTFU05_FUNCATR VECTOR_CC vfloat xsqrtf_u05(vfloat d) { #if defined(ENABLE_FMA_SP) vfloat q, w, x, y, z; d = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), d); vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(5.2939559203393770e-23f)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(1.8889465931478580e+22f)), d); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(7.2759576141834260e-12f), vcast_vf_f(1.0f)); y = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i(0x5f3759df), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 1))); x = vmul_vf_vf_vf(d, y); w = vmul_vf_vf_vf(vcast_vf_f(0.5), y); y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(0.5)); x = vfma_vf_vf_vf_vf(x, y, x); w = vfma_vf_vf_vf_vf(w, y, w); y = vfmanp_vf_vf_vf_vf(x, w, vcast_vf_f(1.5)); w = vadd_vf_vf_vf(w, w); w = vmul_vf_vf_vf(w, y); x = vmul_vf_vf_vf(w, d); y = vfmapn_vf_vf_vf_vf(w, d, x); z = vfmanp_vf_vf_vf_vf(w, x, vcast_vf_f(1)); z = vfmanp_vf_vf_vf_vf(w, y, z); w = vmul_vf_vf_vf(vcast_vf_f(0.5), x); w = vfma_vf_vf_vf_vf(w, z, y); w = vadd_vf_vf_vf(w, x); w = vmul_vf_vf_vf(w, q); w = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(d, vcast_vf_f(0)), veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf))), d, w); w = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), w); return w; #else vfloat q; vopmask o; d = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), d); o = vlt_vo_vf_vf(d, vcast_vf_f(5.2939559203393770e-23f)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(1.8889465931478580e+22f)), d); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(7.2759576141834260e-12f*0.5f), vcast_vf_f(0.5f)); o = vgt_vo_vf_vf(d, vcast_vf_f(1.8446744073709552e+19f)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(5.4210108624275220e-20f)), d); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(4294967296.0f * 0.5f), q); vfloat x = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i(0x5f375a86), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(vadd_vf_vf_vf(d, vcast_vf_f(1e-45f))), 1))); x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); x = vmul_vf_vf_vf(x, d); vfloat2 d2 = dfmul_vf2_vf2_vf2(dfadd2_vf2_vf_vf2(d, dfmul_vf2_vf_vf(x, x)), dfrec_vf2_vf(x)); x = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(d2), vf2gety_vf_vf2(d2)), q); x = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), x); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), d, x); return x; #endif } EXPORT CONST VECTOR_CC vfloat xsqrtf(vfloat d) { #ifdef ACCURATE_SQRT return vsqrt_vf_vf(d); #else // fall back to approximation if ACCURATE_SQRT is undefined return xsqrtf_u05(d); #endif } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xhypotf_u05(vfloat x, vfloat y) { x = vabs_vf_vf(x); y = vabs_vf_vf(y); vfloat min = vmin_vf_vf_vf(x, y), n = min; vfloat max = vmax_vf_vf_vf(x, y), d = max; vopmask o = vlt_vo_vf_vf(max, vcast_vf_f(FLT_MIN)); n = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(n, vcast_vf_f(UINT64_C(1) << 24)), n); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(UINT64_C(1) << 24)), d); vfloat2 t = dfdiv_vf2_vf2_vf2(vcast_vf2_vf_vf(n, vcast_vf_f(0)), vcast_vf2_vf_vf(d, vcast_vf_f(0))); t = dfmul_vf2_vf2_vf(dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfsqu_vf2_vf2(t), vcast_vf_f(1))), max); vfloat ret = vadd_vf_vf_vf(vf2getx_vf_vf2(t), vf2gety_vf_vf2(t)); ret = vsel_vf_vo_vf_vf(visnan_vo_vf(ret), vcast_vf_f(SLEEF_INFINITYf), ret); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(min, vcast_vf_f(0)), max, ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(SLEEF_INFINITYf)), veq_vo_vf_vf(y, vcast_vf_f(SLEEF_INFINITYf))), vcast_vf_f(SLEEF_INFINITYf), ret); return ret; } EXPORT CONST VECTOR_CC vfloat xhypotf_u35(vfloat x, vfloat y) { x = vabs_vf_vf(x); y = vabs_vf_vf(y); vfloat min = vmin_vf_vf_vf(x, y), n = min; vfloat max = vmax_vf_vf_vf(x, y), d = max; vfloat t = vdiv_vf_vf_vf(min, max); vfloat ret = vmul_vf_vf_vf(max, vsqrt_vf_vf(vmla_vf_vf_vf_vf(t, t, vcast_vf_f(1)))); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(min, vcast_vf_f(0)), max, ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(SLEEF_INFINITYf)), veq_vo_vf_vf(y, vcast_vf_f(SLEEF_INFINITYf))), vcast_vf_f(SLEEF_INFINITYf), ret); return ret; } EXPORT CONST VECTOR_CC vfloat xnextafterf(vfloat x, vfloat y) { x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), y), x); vint2 t, xi2 = vreinterpret_vi2_vf(x); vopmask c = vxor_vo_vo_vo(vsignbit_vo_vf(x), vge_vo_vf_vf(y, x)); xi2 = vsel_vi2_vo_vi2_vi2(c, vsub_vi2_vi2_vi2(vcast_vi2_i(0), vxor_vi2_vi2_vi2(xi2, vcast_vi2_i(1 << 31))), xi2); xi2 = vsel_vi2_vo_vi2_vi2(vneq_vo_vf_vf(x, y), vsub_vi2_vi2_vi2(xi2, vcast_vi2_i(1)), xi2); xi2 = vsel_vi2_vo_vi2_vi2(c, vsub_vi2_vi2_vi2(vcast_vi2_i(0), vxor_vi2_vi2_vi2(xi2, vcast_vi2_i(1 << 31))), xi2); vfloat ret = vreinterpret_vf_vi2(xi2); ret = vsel_vf_vo_vf_vf(vand_vo_vo_vo(veq_vo_vf_vf(ret, vcast_vf_f(0)), vneq_vo_vf_vf(x, vcast_vf_f(0))), vmulsign_vf_vf_vf(vcast_vf_f(0), x), ret); ret = vsel_vf_vo_vf_vf(vand_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(y, vcast_vf_f(0))), y, ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); return ret; } EXPORT CONST VECTOR_CC vfloat xfrfrexpf(vfloat x) { x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(FLT_MIN)), vmul_vf_vf_vf(x, vcast_vf_f(UINT64_C(1) << 30)), x); vmask xm = vreinterpret_vm_vf(x); xm = vand_vm_vm_vm(xm, vcast_vm_i_i(~0x7f800000U, ~0x7f800000U)); xm = vor_vm_vm_vm (xm, vcast_vm_i_i( 0x3f000000U, 0x3f000000U)); vfloat ret = vreinterpret_vf_vm(xm); ret = vsel_vf_vo_vf_vf(visinf_vo_vf(x), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), x), ret); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), x, ret); return ret; } #endif // #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vint2 xexpfrexpf(vfloat x) { /* x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(FLT_MIN)), vmul_vf_vf_vf(x, vcast_vf_f(UINT64_C(1) << 63)), x); vint ret = vcastu_vi_vi2(vreinterpret_vi2_vf(x)); ret = vsub_vi_vi_vi(vand_vi_vi_vi(vsrl_vi_vi_i(ret, 20), vcast_vi_i(0x7ff)), vcast_vi_i(0x3fe)); ret = vsel_vi_vo_vi_vi(vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), visnan_vo_vf(x)), visinf_vo_vf(x)), vcast_vi_i(0), ret); return ret; */ return vcast_vi2_i(0); } static INLINE CONST VECTOR_CC vfloat vtoward0f(vfloat x) { vfloat t = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vreinterpret_vi2_vf(x), vcast_vi2_i(1))); return vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vcast_vf_f(0), t); } static INLINE CONST VECTOR_CC vfloat vptruncf(vfloat x) { #ifdef FULL_FP_ROUNDING return vtruncate_vf_vf(x); #else vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); return vsel_vf_vo_vf_vf(vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(INT64_C(1) << 23)), x, vsub_vf_vf_vf(x, fr)); #endif } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xfmodf(vfloat x, vfloat y) { vfloat nu = vabs_vf_vf(x), de = vabs_vf_vf(y), s = vcast_vf_f(1), q; vopmask o = vlt_vo_vf_vf(de, vcast_vf_f(FLT_MIN)); nu = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(nu, vcast_vf_f(UINT64_C(1) << 25)), nu); de = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(de, vcast_vf_f(UINT64_C(1) << 25)), de); s = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(s , vcast_vf_f(1.0f / (UINT64_C(1) << 25))), s); vfloat rde = vtoward0f(vrec_vf_vf(de)); #if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4) rde = vtoward0f(rde); #endif vfloat2 r = vcast_vf2_vf_vf(nu, vcast_vf_f(0)); for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 q = vptruncf(vmul_vf_vf_vf(vtoward0f(vf2getx_vf_vf2(r)), rde)); q = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vgt_vo_vf_vf(vmul_vf_vf_vf(vcast_vf_f(3), de), vf2getx_vf_vf2(r)), vge_vo_vf_vf(vf2getx_vf_vf2(r), de)), vcast_vf_f(2), q); q = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vgt_vo_vf_vf(vmul_vf_vf_vf(vcast_vf_f(2), de), vf2getx_vf_vf2(r)), vge_vo_vf_vf(vf2getx_vf_vf2(r), de)), vcast_vf_f(1), q); r = dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf2(r, dfmul_vf2_vf_vf(vptruncf(q), vneg_vf_vf(de)))); if (vtestallones_i_vo32(vlt_vo_vf_vf(vf2getx_vf_vf2(r), de))) break; } vfloat ret = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(r), vf2gety_vf_vf2(r)), s); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(r), vf2gety_vf_vf2(r)), de), vcast_vf_f(0), ret); ret = vmulsign_vf_vf_vf(ret, x); ret = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(nu, de), x, ret); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(de, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), ret); return ret; } static INLINE CONST VECTOR_CC vfloat vrintfk2_vf_vf(vfloat d) { #ifdef FULL_FP_ROUNDING return vrint_vf_vf(d); #else vfloat c = vmulsign_vf_vf_vf(vcast_vf_f(1 << 23), d); return vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1 << 23)), d, vorsign_vf_vf_vf(vsub_vf_vf_vf(vadd_vf_vf_vf(d, c), c), d)); #endif } EXPORT CONST VECTOR_CC vfloat xremainderf(vfloat x, vfloat y) { vfloat n = vabs_vf_vf(x), d = vabs_vf_vf(y), s = vcast_vf_f(1), q; vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN*2)); n = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(n, vcast_vf_f(UINT64_C(1) << 25)), n); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(UINT64_C(1) << 25)), d); s = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(s , vcast_vf_f(1.0f / (UINT64_C(1) << 25))), s); vfloat2 r = vcast_vf2_vf_vf(n, vcast_vf_f(0)); vfloat rd = vrec_vf_vf(d); vopmask qisodd = vneq_vo_vf_vf(vcast_vf_f(0), vcast_vf_f(0)); for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 q = vrintfk2_vf_vf(vmul_vf_vf_vf(vf2getx_vf_vf2(r), rd)); q = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(r)), vmul_vf_vf_vf(d, vcast_vf_f(1.5f))), vmulsign_vf_vf_vf(vcast_vf_f(1.0f), vf2getx_vf_vf2(r)), q); q = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(r)), vmul_vf_vf_vf(d, vcast_vf_f(0.5f))), vandnot_vo_vo_vo(qisodd, veq_vo_vf_vf(vabs_vf_vf(vf2getx_vf_vf2(r)), vmul_vf_vf_vf(d, vcast_vf_f(0.5f))))), vcast_vf_f(0.0), q); if (vtestallones_i_vo32(veq_vo_vf_vf(q, vcast_vf_f(0)))) break; q = vsel_vf_vo_vf_vf(visinf_vo_vf(vmul_vf_vf_vf(q, vneg_vf_vf(d))), vadd_vf_vf_vf(q, vmulsign_vf_vf_vf(vcast_vf_f(-1), vf2getx_vf_vf2(r))), q); qisodd = vxor_vo_vo_vo(qisodd, vand_vo_vo_vo(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vtruncate_vi2_vf(q), vcast_vi2_i(1)), vcast_vi2_i(1)), vlt_vo_vf_vf(vabs_vf_vf(q), vcast_vf_f(1 << 24)))); r = dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf2(r, dfmul_vf2_vf_vf(q, vneg_vf_vf(d)))); } vfloat ret = vmul_vf_vf_vf(vadd_vf_vf_vf(vf2getx_vf_vf2(r), vf2gety_vf_vf2(r)), s); ret = vmulsign_vf_vf_vf(ret, x); ret = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsel_vf_vo_vf_vf(visinf_vo_vf(x), vcast_vf_f(SLEEF_NANf), x), ret); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), ret); return ret; } #endif // #if !defined(DETERMINISTIC) // static INLINE CONST VECTOR_CC vfloat2 sinpifk(vfloat d) { vopmask o; vfloat u, s, t; vfloat2 x, s2; u = vmul_vf_vf_vf(d, vcast_vf_f(4.0)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); s2 = dfmul_vf2_vf_vf(t, t); // u = vsel_vf_vo_f_f(o, -0.2430611801e-7f, +0.3093842054e-6f); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, +0.3590577080e-5f, -0.3657307388e-4f)); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, -0.3259917721e-3f, +0.2490393585e-2f)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vsel_vf2_vo_f_f_f_f(o, 0.015854343771934509277, 4.4940051354032242811e-10, -0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vsel_vf2_vo_f_f_f_f(o, -0.30842512845993041992, -9.0728339030733922277e-09, 0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_vf2_vf2_vf2(x, vsel_vf2_vo_vf2_vf2(o, s2, vcast_vf2_vf_vf(t, vcast_vf_f(0)))); x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); x = vf2setx_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(x))))); x = vf2sety_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(x))))); return x; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xsinpif_u05(vfloat d) { vfloat2 x = sinpifk(d); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0), r); r = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX4f)), vreinterpret_vm_vf(r))); r = vreinterpret_vf_vm(vor_vm_vo32_vm(visinf_vo_vf(d), vreinterpret_vm_vf(r))); return r; } #endif // #if !defined(DETERMINISTIC) static INLINE CONST VECTOR_CC vfloat2 cospifk(vfloat d) { vopmask o; vfloat u, s, t; vfloat2 x, s2; u = vmul_vf_vf_vf(d, vcast_vf_f(4.0)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); s2 = dfmul_vf2_vf_vf(t, t); // u = vsel_vf_vo_f_f(o, -0.2430611801e-7f, +0.3093842054e-6f); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, +0.3590577080e-5f, -0.3657307388e-4f)); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, -0.3259917721e-3f, +0.2490393585e-2f)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vsel_vf2_vo_f_f_f_f(o, 0.015854343771934509277, 4.4940051354032242811e-10, -0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vsel_vf2_vo_f_f_f_f(o, -0.30842512845993041992, -9.0728339030733922277e-09, 0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_vf2_vf2_vf2(x, vsel_vf2_vo_vf2_vf2(o, s2, vcast_vf2_vf_vf(t, vcast_vf_f(0)))); x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); x = vf2setx_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2getx_vf_vf2(x))))); x = vf2sety_vf2_vf2_vf(x, vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(vf2gety_vf_vf2(x))))); return x; } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xcospif_u05(vfloat d) { vfloat2 x = cospifk(d); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)); r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX4f)), vcast_vf_f(1), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(visinf_vo_vf(d), vreinterpret_vm_vf(r))); return r; } #endif // #if !defined(DETERMINISTIC) #if !(defined(ENABLE_SVE) || defined(ENABLE_SVENOFMA)) typedef struct { vfloat2 a, b; } df2; static df2 df2setab_df2_vf2_vf2(vfloat2 a, vfloat2 b) { df2 r = { a, b }; return r; } static vfloat2 df2geta_vf2_df2(df2 d) { return d.a; } static vfloat2 df2getb_vf2_df2(df2 d) { return d.b; } #endif /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ static CONST df2 gammafk(vfloat a) { vfloat2 clc = vcast_vf2_f_f(0, 0), clln = vcast_vf2_f_f(1, 0), clld = vcast_vf2_f_f(1, 0); vfloat2 v = vcast_vf2_f_f(1, 0), x, y, z; vfloat t, u; vopmask otiny = vlt_vo_vf_vf(vabs_vf_vf(a), vcast_vf_f(1e-30f)), oref = vlt_vo_vf_vf(a, vcast_vf_f(0.5)); x = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_f_f(0, 0), vsel_vf2_vo_vf2_vf2(oref, dfadd2_vf2_vf_vf(vcast_vf_f(1), vneg_vf_vf(a)), vcast_vf2_vf_vf(a, vcast_vf_f(0)))); vopmask o0 = vand_vo_vo_vo(vle_vo_vf_vf(vcast_vf_f(0.5), vf2getx_vf_vf2(x)), vle_vo_vf_vf(vf2getx_vf_vf2(x), vcast_vf_f(1.2))); vopmask o2 = vle_vo_vf_vf(vcast_vf_f(2.3), vf2getx_vf_vf2(x)); y = dfnormalize_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x)); y = dfnormalize_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(2)), y)); vopmask o = vand_vo_vo_vo(o2, vle_vo_vf_vf(vf2getx_vf_vf2(x), vcast_vf_f(7))); clln = vsel_vf2_vo_vf2_vf2(o, y, clln); x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(3)), x); t = vsel_vf_vo_vf_vf(o2, vrec_vf_vf(vf2getx_vf_vf2(x)), vf2getx_vf_vf2(dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf(x, vsel_vf_vo_f_f(o0, -1, -2))))); u = vsel_vf_vo_vo_f_f_f(o2, o0, +0.000839498720672087279971000786, +0.9435157776e+0f, +0.1102489550e-3f); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -5.17179090826059219329394422e-05, +0.8670063615e+0f, +0.8160019934e-4f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.000592166437353693882857342347, +0.4826702476e+0f, +0.1528468856e-3f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +6.97281375836585777403743539e-05, -0.8855129778e-1f, -0.2355068718e-3f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.000784039221720066627493314301, +0.1013825238e+0f, +0.4962242092e-3f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.000229472093621399176949318732, -0.1493408978e+0f, -0.1193488017e-2f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.002681327160493827160473958490, +0.1697509140e+0f, +0.2891599433e-2f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.003472222222222222222175164840, -0.2072454542e+0f, -0.7385451812e-2f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.083333333333333333335592087900, +0.2705872357e+0f, +0.2058077045e-1f)); y = dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(-0.5)), logk2f(x)); y = dfadd2_vf2_vf2_vf2(y, dfneg_vf2_vf2(x)); y = dfadd2_vf2_vf2_vf2(y, vcast_vf2_d(0.91893853320467278056)); // 0.5*log(2*M_PI) z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf (u, t), vsel_vf_vo_f_f(o0, -0.400686534596170958447352690395e+0f, -0.673523028297382446749257758235e-1f)); z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(z, t), vsel_vf_vo_f_f(o0, +0.822466960142643054450325495997e+0f, +0.322467033928981157743538726901e+0f)); z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(z, t), vsel_vf_vo_f_f(o0, -0.577215665946766039837398973297e+0f, +0.422784335087484338986941629852e+0f)); z = dfmul_vf2_vf2_vf(z, t); clc = vsel_vf2_vo_vf2_vf2(o2, y, z); clld = vsel_vf2_vo_vf2_vf2(o2, dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(u, t), vcast_vf_f(1)), clld); y = clln; clc = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_d(41.58883083359671856503), // log(2^60) vsel_vf2_vo_vf2_vf2(oref, dfadd2_vf2_vf2_vf2(vcast_vf2_d(1.1447298858494001639), dfneg_vf2_vf2(clc)), clc)); // log(M_PI) clln = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_f_f(1, 0), vsel_vf2_vo_vf2_vf2(oref, clln, clld)); if (!vtestallones_i_vo32(vnot_vo32_vo32(oref))) { t = vsub_vf_vf_vf(a, vmul_vf_vf_vf(vcast_vf_f(INT64_C(1) << 12), vcast_vf_vi2(vtruncate_vi2_vf(vmul_vf_vf_vf(a, vcast_vf_f(1.0 / (INT64_C(1) << 12))))))); x = dfmul_vf2_vf2_vf2(clld, sinpifk(t)); } clld = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_vf_vf(vmul_vf_vf_vf(a, vcast_vf_f((INT64_C(1) << 30)*(float)(INT64_C(1) << 30))), vcast_vf_f(0)), vsel_vf2_vo_vf2_vf2(oref, x, y)); return df2setab_df2_vf2_vf2(clc, dfdiv_vf2_vf2_vf2(clln, clld)); } #if !defined(DETERMINISTIC) EXPORT CONST VECTOR_CC vfloat xtgammaf_u1(vfloat a) { df2 d = gammafk(a); vfloat2 y = dfmul_vf2_vf2_vf2(expk2f(df2geta_vf2_df2(d)), df2getb_vf2_df2(d)); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)); vopmask o; o = vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(-SLEEF_INFINITYf)), vand_vo_vo_vo(vlt_vo_vf_vf(a, vcast_vf_f(0)), visint_vo_vf(a))), vand_vo_vo_vo(vand_vo_vo_vo(visnumber_vo_vf(a), vlt_vo_vf_vf(a, vcast_vf_f(0))), visnan_vo_vf(r))); r = vsel_vf_vo_vf_vf(o, vcast_vf_f(SLEEF_NANf), r); o = vand_vo_vo_vo(vand_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(SLEEF_INFINITYf)), visnumber_vo_vf(a)), vge_vo_vf_vf(a, vcast_vf_f(-FLT_MIN))), vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(0)), vgt_vo_vf_vf(a, vcast_vf_f(36))), visnan_vo_vf(r))); r = vsel_vf_vo_vf_vf(o, vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), a), r); return r; } EXPORT CONST VECTOR_CC vfloat xlgammaf_u1(vfloat a) { df2 d = gammafk(a); vfloat2 y = dfadd2_vf2_vf2_vf2(df2geta_vf2_df2(d), logk2f(dfabs_vf2_vf2(df2getb_vf2_df2(d)))); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(y), vf2gety_vf_vf2(y)); vopmask o; o = vor_vo_vo_vo(visinf_vo_vf(a), vor_vo_vo_vo(vand_vo_vo_vo(vle_vo_vf_vf(a, vcast_vf_f(0)), visint_vo_vf(a)), vand_vo_vo_vo(visnumber_vo_vf(a), visnan_vo_vf(r)))); r = vsel_vf_vo_vf_vf(o, vcast_vf_f(SLEEF_INFINITYf), r); return r; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST VECTOR_CC vfloat xerff_u1(vfloat a) { vfloat s = a, t, u; vfloat2 d; a = vabs_vf_vf(a); vopmask o0 = vlt_vo_vf_vf(a, vcast_vf_f(1.1)); vopmask o1 = vlt_vo_vf_vf(a, vcast_vf_f(2.4)); vopmask o2 = vlt_vo_vf_vf(a, vcast_vf_f(4.0)); u = vsel_vf_vo_vf_vf(o0, vmul_vf_vf_vf(a, a), a); t = vsel_vf_vo_vo_f_f_f(o0, o1, +0.7089292194e-4f, -0.1792667899e-4f, -0.9495757695e-5f); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, -0.7768311189e-3f, +0.3937633010e-3f, +0.2481465926e-3f)); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, +0.5159463733e-2f, -0.3949181177e-2f, -0.2918176819e-2f)); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, -0.2683781274e-1f, +0.2445474640e-1f, +0.2059706673e-1f)); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, +0.1128318012e+0f, -0.1070996150e+0f, -0.9901899844e-1f)); d = dfmul_vf2_vf_vf(t, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_d_d_d(o0, o1, -0.376125876000657465175213237214e+0, -0.634588905908410389971210809210e+0, -0.643598050547891613081201721633e+0)); d = dfmul_vf2_vf2_vf(d, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_d_d_d(o0, o1, +0.112837916021059138255978217023e+1, -0.112879855826694507209862753992e+1, -0.112461487742845562801052956293e+1)); d = dfmul_vf2_vf2_vf(d, a); d = vsel_vf2_vo_vf2_vf2(o0, d, dfadd_vf2_vf_vf2(vcast_vf_f(1.0), dfneg_vf2_vf2(expk2f(d)))); u = vmulsign_vf_vf_vf(vsel_vf_vo_vf_vf(o2, vadd_vf_vf_vf(vf2getx_vf_vf2(d), vf2gety_vf_vf2(d)), vcast_vf_f(1)), s); u = vsel_vf_vo_vf_vf(visnan_vo_vf(a), vcast_vf_f(SLEEF_NANf), u); return u; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST VECTOR_CC vfloat xerfcf_u15(vfloat a) { vfloat s = a, r = vcast_vf_f(0), t; vfloat2 u, d, x; a = vabs_vf_vf(a); vopmask o0 = vlt_vo_vf_vf(a, vcast_vf_f(1.0)); vopmask o1 = vlt_vo_vf_vf(a, vcast_vf_f(2.2)); vopmask o2 = vlt_vo_vf_vf(a, vcast_vf_f(4.3)); vopmask o3 = vlt_vo_vf_vf(a, vcast_vf_f(10.1)); u = vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_vf_vf(a, vcast_vf_f(0)), dfdiv_vf2_vf2_vf2(vcast_vf2_f_f(1, 0), vcast_vf2_vf_vf(a, vcast_vf_f(0)))); t = vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, -0.8638041618e-4f, -0.6236977242e-5f, -0.3869504035e+0f, +0.1115344167e+1f); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.6000166177e-3f, +0.5749821503e-4f, +0.1288077235e+1f, -0.9454904199e+0f)); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, -0.1665703603e-2f, +0.6002851478e-5f, -0.1816803217e+1f, -0.3667259514e+0f)); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.1795156277e-3f, -0.2851036377e-2f, +0.1249150872e+1f, +0.7155663371e+0f)); t = vmla_vf_vf_vf_vf(t, vf2getx_vf_vf2(u), vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.1914106123e-1f, +0.2260518074e-1f, -0.1328857988e+0f, -0.1262947265e-1f)); d = dfmul_vf2_vf2_vf(u, t); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.102775359343930288081655368891e+0, -0.105247583459338632253369014063e+0, -0.482365310333045318680618892669e+0, -0.498961546254537647970305302739e+0)); d = dfmul_vf2_vf2_vf2(d, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.636619483208481931303752546439e+0, -0.635609463574589034216723775292e+0, -0.134450203224533979217859332703e-2, -0.471199543422848492080722832666e-4)); d = dfmul_vf2_vf2_vf2(d, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.112837917790537404939545770596e+1, -0.112855987376668622084547028949e+1, -0.572319781150472949561786101080e+0, -0.572364030327966044425932623525e+0)); x = dfmul_vf2_vf2_vf(vsel_vf2_vo_vf2_vf2(o1, d, vcast_vf2_vf_vf(vneg_vf_vf(a), vcast_vf_f(0))), a); x = vsel_vf2_vo_vf2_vf2(o1, x, dfadd2_vf2_vf2_vf2(x, d)); x = expk2f(x); x = vsel_vf2_vo_vf2_vf2(o1, x, dfmul_vf2_vf2_vf2(x, u)); r = vsel_vf_vo_vf_vf(o3, vadd_vf_vf_vf(vf2getx_vf_vf2(x), vf2gety_vf_vf2(x)), vcast_vf_f(0)); r = vsel_vf_vo_vf_vf(vsignbit_vo_vf(s), vsub_vf_vf_vf(vcast_vf_f(2), r), r); r = vsel_vf_vo_vf_vf(visnan_vo_vf(s), vcast_vf_f(SLEEF_NANf), r); return r; } #endif // #if !defined(DETERMINISTIC) #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) // See sleefsimddp.c for explanation of these macros #ifdef ENABLE_ALIAS #define DALIAS_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat) __attribute__((alias( stringify(x ## FUNC) ))); #define DALIAS_vf2_vf(FUNC) EXPORT CONST VECTOR_CC vfloat2 y ## FUNC(vfloat) __attribute__((alias( stringify(x ## FUNC) ))); #define DALIAS_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat, vfloat) __attribute__((alias( stringify(x ## FUNC) ))); #define DALIAS_vf_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat, vfloat, vfloat) __attribute__((alias( stringify(x ## FUNC) ))); #else #define DALIAS_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat d) { return x ## FUNC (d); } #define DALIAS_vf2_vf(FUNC) EXPORT CONST VECTOR_CC vfloat2 y ## FUNC(vfloat d) { return x ## FUNC (d); } #define DALIAS_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat x, vfloat y) { return x ## FUNC (x, y); } #define DALIAS_vf_vf_vf_vf(FUNC) EXPORT CONST VECTOR_CC vfloat y ## FUNC(vfloat x, vfloat y, vfloat z) { return x ## FUNC (x, y, z); } #endif DALIAS_vf2_vf(sincospif_u05) DALIAS_vf2_vf(sincospif_u35) DALIAS_vf2_vf(modff) DALIAS_vf_vf(atanf) DALIAS_vf_vf_vf(atan2f) DALIAS_vf_vf(asinf) DALIAS_vf_vf(acosf) DALIAS_vf_vf_vf(atan2f_u1) DALIAS_vf_vf(asinf_u1) DALIAS_vf_vf(acosf_u1) DALIAS_vf_vf(atanf_u1) DALIAS_vf_vf(logf) DALIAS_vf_vf(expf) DALIAS_vf_vf(cbrtf) DALIAS_vf_vf(cbrtf_u1) DALIAS_vf_vf(logf_u1) DALIAS_vf_vf_vf(powf) DALIAS_vf_vf(sinhf) DALIAS_vf_vf(coshf) DALIAS_vf_vf(tanhf) DALIAS_vf_vf(sinhf_u35) DALIAS_vf_vf(coshf_u35) DALIAS_vf_vf(tanhf_u35) DALIAS_vf_vf(asinhf) DALIAS_vf_vf(acoshf) DALIAS_vf_vf(atanhf) DALIAS_vf_vf(exp2f) DALIAS_vf_vf(exp2f_u35) DALIAS_vf_vf(exp10f) DALIAS_vf_vf(exp10f_u35) DALIAS_vf_vf(expm1f) DALIAS_vf_vf(log10f) DALIAS_vf_vf(log2f) DALIAS_vf_vf(log2f_u35) DALIAS_vf_vf(log1pf) DALIAS_vf_vf(fabsf) DALIAS_vf_vf_vf(copysignf) DALIAS_vf_vf_vf(fmaxf) DALIAS_vf_vf_vf(fminf) DALIAS_vf_vf_vf(fdimf) DALIAS_vf_vf(truncf) DALIAS_vf_vf(floorf) DALIAS_vf_vf(ceilf) DALIAS_vf_vf(roundf) DALIAS_vf_vf(rintf) DALIAS_vf_vf_vf_vf(fmaf) DALIAS_vf_vf_vf(hypotf_u05) DALIAS_vf_vf_vf(hypotf_u35) DALIAS_vf_vf_vf(nextafterf) DALIAS_vf_vf(frfrexpf) DALIAS_vf_vf_vf(fmodf) DALIAS_vf_vf_vf(remainderf) DALIAS_vf_vf(sinpif_u05) DALIAS_vf_vf(cospif_u05) DALIAS_vf_vf(tgammaf_u1) DALIAS_vf_vf(lgammaf_u1) DALIAS_vf_vf(erff_u1) DALIAS_vf_vf(erfcf_u15) DALIAS_vf_vf_vf(fastpowf_u3500) #endif // #if !defined(DETERMINISTIC) && !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) #if !defined(ENABLE_GNUABI) && !defined(SLEEF_GENHEADER) EXPORT CONST int xgetIntf(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; } EXPORT CONST void *xgetPtrf(int name) { if (name == 0) return ISANAME; return (void *)0; } #endif #if defined(ALIAS_NO_EXT_SUFFIX) && !defined(DETERMINISTIC) #include ALIAS_NO_EXT_SUFFIX #endif #ifdef ENABLE_GNUABI EXPORT CONST VECTOR_CC vfloat __acosf_finite (vfloat) __attribute__((weak, alias(str_xacosf_u1 ))); EXPORT CONST VECTOR_CC vfloat __acoshf_finite (vfloat) __attribute__((weak, alias(str_xacoshf ))); EXPORT CONST VECTOR_CC vfloat __asinf_finite (vfloat) __attribute__((weak, alias(str_xasinf_u1 ))); EXPORT CONST VECTOR_CC vfloat __atan2f_finite (vfloat, vfloat) __attribute__((weak, alias(str_xatan2f_u1 ))); EXPORT CONST VECTOR_CC vfloat __atanhf_finite (vfloat) __attribute__((weak, alias(str_xatanhf ))); EXPORT CONST VECTOR_CC vfloat __coshf_finite (vfloat) __attribute__((weak, alias(str_xcoshf ))); EXPORT CONST VECTOR_CC vfloat __exp10f_finite (vfloat) __attribute__((weak, alias(str_xexp10f ))); EXPORT CONST VECTOR_CC vfloat __exp2f_finite (vfloat) __attribute__((weak, alias(str_xexp2f ))); EXPORT CONST VECTOR_CC vfloat __expf_finite (vfloat) __attribute__((weak, alias(str_xexpf ))); EXPORT CONST VECTOR_CC vfloat __fmodf_finite (vfloat, vfloat) __attribute__((weak, alias(str_xfmodf ))); EXPORT CONST VECTOR_CC vfloat __remainderf_finite(vfloat, vfloat) __attribute__((weak, alias(str_xremainderf))); EXPORT CONST VECTOR_CC vfloat __modff_finite (vfloat, vfloat *) __attribute__((weak, alias(str_xmodff ))); EXPORT CONST VECTOR_CC vfloat __hypotf_u05_finite(vfloat, vfloat) __attribute__((weak, alias(str_xhypotf_u05))); EXPORT CONST VECTOR_CC vfloat __lgammaf_u1_finite(vfloat) __attribute__((weak, alias(str_xlgammaf_u1))); EXPORT CONST VECTOR_CC vfloat __log10f_finite (vfloat) __attribute__((weak, alias(str_xlog10f ))); EXPORT CONST VECTOR_CC vfloat __logf_finite (vfloat) __attribute__((weak, alias(str_xlogf_u1 ))); EXPORT CONST VECTOR_CC vfloat __powf_finite (vfloat, vfloat) __attribute__((weak, alias(str_xpowf ))); EXPORT CONST VECTOR_CC vfloat __sinhf_finite (vfloat) __attribute__((weak, alias(str_xsinhf ))); EXPORT CONST VECTOR_CC vfloat __sqrtf_finite (vfloat) __attribute__((weak, alias(str_xsqrtf ))); EXPORT CONST VECTOR_CC vfloat __tgammaf_u1_finite(vfloat) __attribute__((weak, alias(str_xtgammaf_u1))); #ifdef HEADER_MASKED #include HEADER_MASKED #endif #endif /* #ifdef ENABLE_GNUABI */ #ifdef ENABLE_MAIN // gcc -DENABLE_MAIN -Wno-attributes -I../common -I../arch -DENABLE_AVX2 -mavx2 -mfma sleefsimdsp.c rempitab.c ../common/common.c -lm #include #include #include int main(int argc, char **argv) { vfloat vf1 = vcast_vf_f(atof(argv[1])); //vfloat vf2 = vcast_vf_f(atof(argv[2])); //vfloat r = xpowf(vf1, vf2); //vfloat r = xsqrtf_u05(vf1); //printf("%g\n", xnextafterf(vf1, vf2)[0]); //printf("%g\n", nextafterf(atof(argv[1]), atof(argv[2]))); printf("t = %.20g\n", xlogf_u1(vf1)[0]); printf("c = %.20g\n", logf(atof(argv[1]))); } #endif sleef-3.5.1/src/libm/sleefsp.c000066400000000000000000001750001373003144100161340ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include #ifndef ENABLE_BUILTIN_MATH #include #define SQRTF sqrtf #else #define SQRTF __builtin_sqrtf #endif #include "misc.h" extern const float Sleef_rempitabsp[]; #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif #define MLA mlaf #define C2V(x) (x) #include "estrin.h" static INLINE CONST int32_t floatToRawIntBits(float d) { union { float f; int32_t i; } tmp; tmp.f = d; return tmp.i; } static INLINE CONST float intBitsToFloat(int32_t i) { union { float f; int32_t i; } tmp; tmp.i = i; return tmp.f; } static INLINE CONST float fabsfk(float x) { return intBitsToFloat(0x7fffffffL & floatToRawIntBits(x)); } static INLINE CONST float mulsignf(float x, float y) { return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31))); } static INLINE CONST float copysignfk(float x, float y) { return intBitsToFloat((floatToRawIntBits(x) & ~(1 << 31)) ^ (floatToRawIntBits(y) & (1 << 31))); } static INLINE CONST float signf(float d) { return mulsignf(1, d); } static INLINE CONST float mlaf(float x, float y, float z) { return x * y + z; } static INLINE CONST float rintfk(float x) { return x < 0 ? (int)(x - 0.5f) : (int)(x + 0.5f); } static INLINE CONST int ceilfk(float x) { return (int)x + (x < 0 ? 0 : 1); } static INLINE CONST float fminfk(float x, float y) { return x < y ? x : y; } static INLINE CONST float fmaxfk(float x, float y) { return x > y ? x : y; } static INLINE CONST int xisintf(float x) { return (x == (int)x); } static INLINE CONST int xisnanf(float x) { return x != x; } static INLINE CONST int xisinff(float x) { return x == SLEEF_INFINITYf || x == -SLEEF_INFINITYf; } static INLINE CONST int xisminff(float x) { return x == -SLEEF_INFINITYf; } static INLINE CONST int xispinff(float x) { return x == SLEEF_INFINITYf; } static INLINE CONST int xisnegzerof(float x) { return floatToRawIntBits(x) == floatToRawIntBits(-0.0); } static INLINE CONST int xisnumberf(float x) { return !xisinff(x) && !xisnanf(x); } static INLINE CONST int ilogbkf(float d) { int m = d < 5.421010862427522E-20f; d = m ? 1.8446744073709552E19f * d : d; int q = (floatToRawIntBits(d) >> 23) & 0xff; q = m ? q - (64 + 0x7f) : q - 0x7f; return q; } // vilogb2kf is similar to ilogbkf, but the argument has to be a // normalized FP value. static INLINE CONST int ilogb2kf(float d) { return ((floatToRawIntBits(d) >> 23) & 0xff) - 0x7f; } EXPORT CONST int xilogbf(float d) { int e = ilogbkf(fabsfk(d)); e = d == 0.0f ? SLEEF_FP_ILOGB0 : e; e = xisnanf(d) ? SLEEF_FP_ILOGBNAN : e; e = xisinff(d) ? INT_MAX : e; return e; } static INLINE CONST float pow2if(int q) { return intBitsToFloat(((int32_t)(q + 0x7f)) << 23); } static INLINE CONST float ldexpkf(float x, int q) { float u; int m; m = q >> 31; m = (((m + q) >> 6) - m) << 4; q = q - (m << 2); m += 127; m = m < 0 ? 0 : m; m = m > 255 ? 255 : m; u = intBitsToFloat(((int32_t)m) << 23); x = x * u * u * u * u; u = intBitsToFloat(((int32_t)(q + 0x7f)) << 23); return x * u; } static INLINE CONST float ldexp2kf(float d, int e) { // faster than ldexpkf, short reach return d * pow2if(e >> 1) * pow2if(e - (e >> 1)); } static INLINE CONST float ldexp3kf(float d, int e) { // very fast, no denormal return intBitsToFloat(floatToRawIntBits(d) + (e << 23)); } // #ifndef NDEBUG static int checkfp(float x) { if (xisinff(x) || xisnanf(x)) return 1; return 0; } #endif static INLINE CONST float upperf(float d) { return intBitsToFloat(floatToRawIntBits(d) & 0xfffff000); } static INLINE CONST Sleef_float2 df(float h, float l) { Sleef_float2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_float2 dfx(double d) { Sleef_float2 ret; ret.x = d; ret.y = d - ret.x; return ret; } static INLINE CONST Sleef_float2 dfnormalize_f2_f2(Sleef_float2 t) { Sleef_float2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_float2 dfscale_f2_f2_f(Sleef_float2 d, float s) { Sleef_float2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_float2 dfneg_f2_f2(Sleef_float2 d) { Sleef_float2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_float2 dfabs_f2_f2(Sleef_float2 x) { return df(x.x < 0 ? -x.x : x.x, x.x < 0 ? -x.y : x.y); } static INLINE CONST Sleef_float2 dfadd_f2_f_f(float x, float y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || fabsfk(x) >= fabsfk(y))) fprintf(stderr, "[dfadd_f2_f_f : %g, %g]", x, y); #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f_f(float x, float y) { Sleef_float2 r; r.x = x + y; float v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_float2 dfadd_f2_f2_f(Sleef_float2 x, float y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || fabsfk(x.x) >= fabsfk(y))) fprintf(stderr, "[dfadd_f2_f2_f : %g %g]", x.x, y); #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_float2 dfadd_f2_f_f2(float x, Sleef_float2 y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || fabsfk(x) >= fabsfk(y.x))) { fprintf(stderr, "[dfadd_f2_f_f2 : %g %g]\n", x, y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f2_f(Sleef_float2 x, float y) { // |x| >= |y| Sleef_float2 r; r.x = x.x + y; float v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f_f2(float x, Sleef_float2 y) { Sleef_float2 r; r.x = x + y.x; float v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST Sleef_float2 dfadd_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || fabsfk(x.x) >= fabsfk(y.x))) fprintf(stderr, "[dfadd_f2_f2_f2 : %g %g]", x.x, y.x); #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { Sleef_float2 r; r.x = x.x + y.x; float v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_float2 dfsub_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || fabsfk(x.x) >= fabsfk(y.x))) fprintf(stderr, "[dfsub_f2_f2_f2 : %g %g]", x.x, y.x); #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_float2 dfdiv_f2_f2_f2(Sleef_float2 n, Sleef_float2 d) { float t = 1.0f / d.x; float dh = upperf(d.x), dl = d.x - dh; float th = upperf(t ), tl = t - th; float nhh = upperf(n.x), nhl = n.x - nhh; Sleef_float2 q; q.x = n.x * t; float u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_float2 dfmul_f2_f_f(float x, float y) { float xh = upperf(x), xl = x - xh; float yh = upperf(y), yl = y - yh; Sleef_float2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_float2 dfmul_f2_f2_f(Sleef_float2 x, float y) { float xh = upperf(x.x), xl = x.x - xh; float yh = upperf(y ), yl = y - yh; Sleef_float2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_float2 dfmul_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { float xh = upperf(x.x), xl = x.x - xh; float yh = upperf(y.x), yl = y.x - yh; Sleef_float2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST float dfmul_f_f2_f2(Sleef_float2 x, Sleef_float2 y) { float xh = upperf(x.x), xl = x.x - xh; float yh = upperf(y.x), yl = y.x - yh; return x.y * yh + xh * y.y + xl * yl + xh * yl + xl * yh + xh * yh; } static INLINE CONST Sleef_float2 dfsqu_f2_f2(Sleef_float2 x) { float xh = upperf(x.x), xl = x.x - xh; Sleef_float2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST float dfsqu_f_f2(Sleef_float2 x) { float xh = upperf(x.x), xl = x.x - xh; return xh * x.y + xh * x.y + xl * xl + (xh * xl + xh * xl) + xh * xh; } static INLINE CONST Sleef_float2 dfrec_f2_f(float d) { float t = 1.0f / d; float dh = upperf(d), dl = d - dh; float th = upperf(t), tl = t - th; Sleef_float2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_float2 dfrec_f2_f2(Sleef_float2 d) { float t = 1.0f / d.x; float dh = upperf(d.x), dl = d.x - dh; float th = upperf(t ), tl = t - th; Sleef_float2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } static INLINE CONST Sleef_float2 dfsqrt_f2_f2(Sleef_float2 d) { float t = SQRTF(d.x + d.y); return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f2_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5f); } static INLINE CONST Sleef_float2 dfsqrt_f2_f(float d) { float t = SQRTF(d); return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5); } // typedef struct { float d; int32_t i; } fi_t; typedef struct { Sleef_float2 df; int32_t i; } dfi_t; static CONST fi_t rempisubf(float x) { fi_t ret; float fr = x - (float)(INT64_C(1) << 10) * (int32_t)(x * (1.0f / (INT64_C(1) << 10))); ret.i = ((7 & ((x > 0 ? 4 : 3) + (int32_t)(fr * 8))) - 3) >> 1; fr = fr - 0.25f * (int32_t)(fr * 4 + mulsignf(0.5f, x)); fr = fabsfk(fr) > 0.125f ? (fr - mulsignf(0.5f, x)) : fr; fr = fabsfk(fr) > 1e+10f ? 0 : fr; if (fabsfk(x) == 0.12499999254941940308f) { fr = x; ret.i = 0; } ret.d = fr; return ret; } static CONST dfi_t rempif(float a) { Sleef_float2 x, y, z; fi_t di; float t; int ex = ilogb2kf(a) - 25, q = ex > (90 - 25) ? -64 : 0; a = ldexp3kf(a, q); if (ex < 0) ex = 0; ex *= 4; x = dfmul_f2_f_f(a, Sleef_rempitabsp[ex]); di = rempisubf(x.x); q = di.i; x.x = di.d; x = dfnormalize_f2_f2(x); y = dfmul_f2_f_f(a, Sleef_rempitabsp[ex+1]); x = dfadd2_f2_f2_f2(x, y); di = rempisubf(x.x); q += di.i; x.x = di.d; x = dfnormalize_f2_f2(x); y = dfmul_f2_f2_f(df(Sleef_rempitabsp[ex+2], Sleef_rempitabsp[ex+3]), a); x = dfadd2_f2_f2_f2(x, y); x = dfnormalize_f2_f2(x); x = dfmul_f2_f2_f2(x, df(3.1415927410125732422f*2, -8.7422776573475857731e-08f*2)); dfi_t ret = { fabsfk(a) < 0.7f ? df(a, 0) : x, q }; return ret; } EXPORT CONST float xsinf(float d) { int q; float u, s, t = d; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)M_1_PI); d = mlaf(q, -PI_A2f, d); d = mlaf(q, -PI_B2f, d); d = mlaf(q, -PI_C2f, d); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = (int)rintfk(d * (float)M_1_PI); d = mlaf(q, -PI_Af, d); d = mlaf(q, -PI_Bf, d); d = mlaf(q, -PI_Cf, d); d = mlaf(q, -PI_Df, d); } else { dfi_t dfi = rempif(t); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 1) >> 2; if ((dfi.i & 1) != 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x))); } d = dfi.df.x + dfi.df.y; if (xisinff(t) || xisnanf(t)) d = SLEEF_NANf; } s = d * d; if ((q & 1) != 0) d = -d; u = 2.6083159809786593541503e-06f; u = mlaf(u, s, -0.0001981069071916863322258f); u = mlaf(u, s, 0.00833307858556509017944336f); u = mlaf(u, s, -0.166666597127914428710938f); u = mlaf(s, u * d, d); if (xisnegzerof(t)) u = -0.0f; return u; } EXPORT CONST float xsinf_u1(float d) { int q; float u; Sleef_float2 s, t, x; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)M_1_PI); u = mlaf(q, -PI_A2f, d); s = dfadd2_f2_f_f(u, q * (-PI_B2f)); s = dfadd_f2_f2_f(s, q * (-PI_C2f)); } else { dfi_t dfi = rempif(d); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 1) >> 2; if ((dfi.i & 1) != 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x))); } s = dfnormalize_f2_f2(dfi.df); if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } t = s; s = dfsqu_f2_f2(s); u = 2.6083159809786593541503e-06f; u = mlaf(u, s.x, -0.0001981069071916863322258f); u = mlaf(u, s.x, 0.00833307858556509017944336f); x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s)); u = dfmul_f_f2_f2(t, x); if ((q & 1) != 0) u = -u; if (xisnegzerof(d)) u = d; return u; } EXPORT CONST float xcosf(float d) { int q; float u, s, t = d; if (fabsfk(d) < TRIGRANGEMAX2f) { q = 1 + 2*(int)rintfk(d * (float)M_1_PI - 0.5f); d = mlaf(q, -PI_A2f*0.5f, d); d = mlaf(q, -PI_B2f*0.5f, d); d = mlaf(q, -PI_C2f*0.5f, d); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = 1 + 2*(int)rintfk(d * (float)M_1_PI - 0.5f); d = mlaf(q, -PI_Af*0.5f, d); d = mlaf(q, -PI_Bf*0.5f, d); d = mlaf(q, -PI_Cf*0.5f, d); d = mlaf(q, -PI_Df*0.5f, d); } else { dfi_t dfi = rempif(t); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 7) >> 1; if ((dfi.i & 1) == 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x > 0 ? 1 : -1), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x > 0 ? 1 : -1))); } d = dfi.df.x + dfi.df.y; if (xisinff(t) || xisnanf(t)) d = SLEEF_NANf; } s = d * d; if ((q & 2) == 0) d = -d; u = 2.6083159809786593541503e-06f; u = mlaf(u, s, -0.0001981069071916863322258f); u = mlaf(u, s, 0.00833307858556509017944336f); u = mlaf(u, s, -0.166666597127914428710938f); u = mlaf(s, u * d, d); return u; } EXPORT CONST float xcosf_u1(float d) { float u; Sleef_float2 s, t, x; int q; if (fabsfk(d) < TRIGRANGEMAX2f) { d = fabsfk(d); float dq = mlaf(rintfk(d * (float)M_1_PI - 0.5f), 2, 1); q = (int)dq; s = dfadd2_f2_f_f (d, dq * (-PI_A2f*0.5f)); s = dfadd2_f2_f2_f(s, dq * (-PI_B2f*0.5f)); s = dfadd2_f2_f2_f(s, dq * (-PI_C2f*0.5f)); } else { dfi_t dfi = rempif(d); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 7) >> 1; if ((dfi.i & 1) == 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x > 0 ? 1 : -1), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x > 0 ? 1 : -1))); } s = dfnormalize_f2_f2(dfi.df); if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } t = s; s = dfsqu_f2_f2(s); u = 2.6083159809786593541503e-06f; u = mlaf(u, s.x, -0.0001981069071916863322258f); u = mlaf(u, s.x, 0.00833307858556509017944336f); x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s)); u = dfmul_f_f2_f2(t, x); if ((((int)q) & 2) == 0) u = -u; return u; } EXPORT CONST float xfastsinf_u3500(float d) { int q; float u, s, t = d; q = rintfk(d * (float)M_1_PI); d = mlaf(q, -(float)M_PI, d); s = d * d; u = -0.1881748176e-3; u = mlaf(u, s, +0.8323502727e-2); u = mlaf(u, s, -0.1666651368e+0); u = mlaf(s * d, u, d); if ((q & 1) != 0) u = -u; if (UNLIKELY(fabsfk(t) > 30.0f)) return xsinf(t); return u; } EXPORT CONST float xfastcosf_u3500(float d) { int q; float u, s, t = d; q = rintfk(mlaf(d, (float)M_1_PI, -0.5f)); d = mlaf(q, -(float)M_PI, d - (float)M_PI*0.5f); s = d * d; u = -0.1881748176e-3; u = mlaf(u, s, +0.8323502727e-2); u = mlaf(u, s, -0.1666651368e+0); u = mlaf(s * d, u, d); if ((q & 1) == 0) u = -u; if (UNLIKELY(fabsfk(t) > 30.0f)) return xcosf(t); return u; } EXPORT CONST Sleef_float2 xsincosf(float d) { int q; float u, s, t; Sleef_float2 r; s = d; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * ((float)(2 * M_1_PI))); s = mlaf(q, -PI_A2f*0.5f, s); s = mlaf(q, -PI_B2f*0.5f, s); s = mlaf(q, -PI_C2f*0.5f, s); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = (int)rintfk(d * ((float)(2 * M_1_PI))); s = mlaf(q, -PI_Af*0.5f, s); s = mlaf(q, -PI_Bf*0.5f, s); s = mlaf(q, -PI_Cf*0.5f, s); s = mlaf(q, -PI_Df*0.5f, s); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df.x + dfi.df.y; if (xisinff(d) || xisnanf(d)) s = SLEEF_NANf; } t = s; s = s * s; u = -0.000195169282960705459117889f; u = mlaf(u, s, 0.00833215750753879547119141f); u = mlaf(u, s, -0.166666537523269653320312f); u = u * s * t; r.x = t + u; if (xisnegzerof(d)) r.x = -0.0f; u = -2.71811842367242206819355e-07f; u = mlaf(u, s, 2.47990446951007470488548e-05f); u = mlaf(u, s, -0.00138888787478208541870117f); u = mlaf(u, s, 0.0416666641831398010253906f); u = mlaf(u, s, -0.5f); r.y = u * s + 1; if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 2) != 0) { r.x = -r.x; } if (((q+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_float2 xsincosf_u1(float d) { int q; float u; Sleef_float2 r, s, t, x; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)(2 * M_1_PI)); u = mlaf(q, -PI_A2f*0.5f, d); s = dfadd2_f2_f_f(u, q * (-PI_B2f*0.5f)); s = dfadd_f2_f2_f(s, q * (-PI_C2f*0.5f)); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df; if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } t = s; s.x = dfsqu_f_f2(s); u = -0.000195169282960705459117889f; u = mlaf(u, s.x, 0.00833215750753879547119141f); u = mlaf(u, s.x, -0.166666537523269653320312f); u *= s.x * t.x; x = dfadd_f2_f2_f(t, u); r.x = x.x + x.y; if (xisnegzerof(d)) r.x = -0.0f; u = -2.71811842367242206819355e-07f; u = mlaf(u, s.x, 2.47990446951007470488548e-05f); u = mlaf(u, s.x, -0.00138888787478208541870117f); u = mlaf(u, s.x, 0.0416666641831398010253906f); u = mlaf(u, s.x, -0.5f); x = dfadd_f2_f_f2(1, dfmul_f2_f_f(s.x, u)); r.y = x.x + x.y; if ((q & 1) != 0) { u = r.y; r.y = r.x; r.x = u; } if ((q & 2) != 0) { r.x = -r.x; } if (((q+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_float2 xsincospif_u05(float d) { float u, s, t; Sleef_float2 r, x, s2; u = d * 4; int q = ceilfk(u) & ~(int)1; s = u - (float)q; t = s; s = s * s; s2 = dfmul_f2_f_f(t, t); // u = +0.3093842054e-6; u = mlaf(u, s, -0.3657307388e-4); u = mlaf(u, s, +0.2490393585e-2); x = dfadd2_f2_f_f2(u * s, df(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), df(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_f2_f2_f(x, t); r.x = x.x + x.y; if (xisnegzerof(d)) r.x = -0.0f; u = -0.2430611801e-7; u = mlaf(u, s, +0.3590577080e-5); u = mlaf(u, s, -0.3259917721e-3); x = dfadd2_f2_f_f2(u * s, df(0.015854343771934509277, 4.4940051354032242811e-10)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), df(-0.30842512845993041992, -9.0728339030733922277e-09)); x = dfadd2_f2_f2_f(dfmul_f2_f2_f2(x, s2), 1); r.y = x.x + x.y; if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsfk(d) > 1e+7f) { r.x = 0; r.y = 1; } if (xisinff(d)) { r.x = r.y = SLEEF_NANf; } return r; } EXPORT CONST Sleef_float2 xsincospif_u35(float d) { float u, s, t; Sleef_float2 r; u = d * 4; int q = ceilfk(u) & ~(int)1; s = u - (float)q; t = s; s = s * s; // u = -0.3600925265e-4; u = mlaf(u, s, +0.2490088111e-2); u = mlaf(u, s, -0.8074551076e-1); u = mlaf(u, s, +0.7853981853e+0); r.x = u * t; u = +0.3539815225e-5; u = mlaf(u, s, -0.3259574005e-3); u = mlaf(u, s, +0.1585431583e-1); u = mlaf(u, s, -0.3084251285e+0); u = mlaf(u, s, 1); r.y = u; if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsfk(d) > 1e+7f) { r.x = 0; r.y = 1; } if (xisinff(d)) { r.x = r.y = SLEEF_NANf; } return r; } EXPORT CONST float xtanf(float d) { int q; float u, s, x; x = d; if (fabsfk(d) < TRIGRANGEMAX2f*0.5f) { q = (int)rintfk(d * (float)(2 * M_1_PI)); x = mlaf(q, -PI_A2f*0.5f, x); x = mlaf(q, -PI_B2f*0.5f, x); x = mlaf(q, -PI_C2f*0.5f, x); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = (int)rintfk(d * (float)(2 * M_1_PI)); x = mlaf(q, -PI_Af*0.5f, x); x = mlaf(q, -PI_Bf*0.5f, x); x = mlaf(q, -PI_Cf*0.5f, x); x = mlaf(q, -PI_Df*0.5f, x); } else { dfi_t dfi = rempif(d); q = dfi.i; x = dfi.df.x + dfi.df.y; if (xisinff(d) || xisnanf(d)) x = SLEEF_NANf; } s = x * x; if ((q & 1) != 0) x = -x; float s2 = s * s, s4 = s2 * s2; u = POLY6(s, s2, s4, 0.00927245803177356719970703f, 0.00331984995864331722259521f, 0.0242998078465461730957031f, 0.0534495301544666290283203f, 0.133383005857467651367188f, 0.333331853151321411132812f); u = mlaf(s, u * x, x); if ((q & 1) != 0) u = 1.0f / u; return u; } EXPORT CONST float xtanf_u1(float d) { int q; float u; Sleef_float2 s, t, x; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)(2 * M_1_PI)); u = mlaf(q, -PI_A2f*0.5f, d); s = dfadd2_f2_f_f(u, q * (-PI_B2f*0.5f)); s = dfadd_f2_f2_f(s, q * (-PI_C2f*0.5f)); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df; if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } if ((q & 1) != 0) s = dfneg_f2_f2(s); t = s; s = dfsqu_f2_f2(s); s = dfnormalize_f2_f2(s); u = 0.00446636462584137916564941f; u = mlaf(u, s.x, -8.3920182078145444393158e-05f); u = mlaf(u, s.x, 0.0109639242291450500488281f); u = mlaf(u, s.x, 0.0212360303848981857299805f); u = mlaf(u, s.x, 0.0540687143802642822265625f); x = dfadd_f2_f_f(0.133325666189193725585938f, u * s.x); x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f2(0.33333361148834228515625f, dfmul_f2_f2_f2(s, x)), s)); x = dfmul_f2_f2_f2(t, x); if ((q & 1) != 0) x = dfrec_f2_f2(x); u = x.x + x.y; if (xisnegzerof(d)) u = -0.0f; return u; } EXPORT CONST float xatanf(float s) { float t, u; int q = 0; if (signf(s) == -1) { s = -s; q = 2; } if (s > 1) { s = 1.0f / s; q |= 1; } t = s * s; float t2 = t * t, t4 = t2 * t2; u = POLY8(t, t2, t4, 0.00282363896258175373077393f, -0.0159569028764963150024414f, 0.0425049886107444763183594f, -0.0748900920152664184570312f, 0.106347933411598205566406f, -0.142027363181114196777344f, 0.199926957488059997558594f, -0.333331018686294555664062f); t = s + s * (t * u); if ((q & 1) != 0) t = 1.570796326794896557998982f - t; if ((q & 2) != 0) t = -t; return t; } static INLINE CONST float atan2kf(float y, float x) { float s, t, u; int q = 0; if (x < 0) { x = -x; q = -2; } if (y > x) { t = x; x = y; y = -t; q += 1; } s = y / x; t = s * s; float t2 = t * t, t4 = t2 * t2; u = POLY8(t, t2, t4, 0.00282363896258175373077393f, -0.0159569028764963150024414f, 0.0425049886107444763183594f, -0.0748900920152664184570312f, 0.106347933411598205566406f, -0.142027363181114196777344f, 0.199926957488059997558594f, -0.333331018686294555664062f); t = u * t * s + s; t = q * (float)(M_PI/2) + t; return t; } EXPORT CONST float xatan2f(float y, float x) { float r = atan2kf(fabsfk(y), x); r = mulsignf(r, x); if (xisinff(x) || x == 0) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI /2)) : 0); if (xisinff(y) ) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0); if ( y == 0) r = (signf(x) == -1 ? M_PIf : 0); return xisnanf(x) || xisnanf(y) ? SLEEF_NANf : mulsignf(r, y); } EXPORT CONST float xasinf(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), x = o ? fabsfk(d) : SQRTF(x2), u; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u = mlaf(u, x * x2, x); float r = o ? u : (M_PIf/2 - 2*u); r = mulsignf(r, d); return r; } EXPORT CONST float xacosf(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; float x = o ? fabsfk(d) : SQRTF(x2); x = fabsfk(d) == 1.0 ? 0 : x; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u *= x * x2; float y = 3.1415926535897932f/2 - (mulsignf(x, d) + mulsignf(u, d)); x += u; float r = o ? y : (x*2); if (!o && d < 0) r = dfadd_f2_f2_f(df(3.1415927410125732422f,-8.7422776573475857731e-08f), -r).x; return r; } static Sleef_float2 atan2kf_u1(Sleef_float2 y, Sleef_float2 x) { float u; Sleef_float2 s, t; int q = 0; if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; } if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; } s = dfdiv_f2_f2_f2(y, x); t = dfsqu_f2_f2(s); t = dfnormalize_f2_f2(t); u = -0.00176397908944636583328247f; u = mlaf(u, t.x, 0.0107900900766253471374512f); u = mlaf(u, t.x, -0.0309564601629972457885742f); u = mlaf(u, t.x, 0.0577365085482597351074219f); u = mlaf(u, t.x, -0.0838950723409652709960938f); u = mlaf(u, t.x, 0.109463557600975036621094f); u = mlaf(u, t.x, -0.142626821994781494140625f); u = mlaf(u, t.x, 0.199983194470405578613281f); t = dfmul_f2_f2_f2(t, dfadd_f2_f_f(-0.333332866430282592773438f, u * t.x)); t = dfmul_f2_f2_f2(s, dfadd_f2_f_f2(1, t)); t = dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(1.5707963705062866211f, -4.3711388286737928865e-08f), q), t); return t; } EXPORT CONST float xatan2f_u1(float y, float x) { if (fabsfk(x) < 2.9387372783541830947e-39f) { y *= (UINT64_C(1) << 24); x *= (UINT64_C(1) << 24); } // nexttowardf((1.0 / FLT_MAX), 1) Sleef_float2 d = atan2kf_u1(df(fabsfk(y), 0), df(x, 0)); float r = d.x + d.y; r = mulsignf(r, x); if (xisinff(x) || x == 0) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI /2)) : 0.0f); if (xisinff(y) ) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0.0f); if ( y == 0) r = (signf(x) == -1 ? (float)M_PI : 0.0f); return xisnanf(x) || xisnanf(y) ? SLEEF_NANf : mulsignf(r, y); } EXPORT CONST float xasinf_u1(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; Sleef_float2 x = o ? df(fabsfk(d), 0) : dfsqrt_f2_f(x2); x = fabsfk(d) == 1.0f ? df(0, 0) : x; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u *= x2 * x.x; Sleef_float2 y = dfadd_f2_f2_f(dfsub_f2_f2_f2(df(3.1415927410125732422f/4,-8.7422776573475857731e-08f/4), x), -u); float r = o ? (u + x.x) : ((y.x + y.y)*2); r = mulsignf(r, d); return r; } EXPORT CONST float xacosf_u1(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; Sleef_float2 x = o ? df(fabsfk(d), 0) : dfsqrt_f2_f(x2); x = fabsfk(d) == 1.0 ? df(0, 0) : x; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u = u * x.x * x2; Sleef_float2 y = dfsub_f2_f2_f2(df(3.1415927410125732422f/2,-8.7422776573475857731e-08f/2), dfadd_f2_f_f(mulsignf(x.x, d), mulsignf(u, d))); x = dfadd_f2_f2_f(x, u); y = o ? y : dfscale_f2_f2_f(x, 2); if (!o && d < 0) y = dfsub_f2_f2_f2(df(3.1415927410125732422f,-8.7422776573475857731e-08f), y); return y.x + y.y; } EXPORT CONST float xatanf_u1(float d) { Sleef_float2 d2 = atan2kf_u1(df(fabsfk(d), 0.0f), df(1.0f, 0.0f)); float r = d2.x + d2.y; if (xisinff(d)) r = 1.570796326794896557998982f; return mulsignf(r, d); } EXPORT CONST float xlogf(float d) { float x, x2, t, m; int e; int o = d < FLT_MIN; if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = (m-1.0f) / (m+1.0f); x2 = x * x; t = 0.2392828464508056640625f; t = mlaf(t, x2, 0.28518211841583251953125f); t = mlaf(t, x2, 0.400005877017974853515625f); t = mlaf(t, x2, 0.666666686534881591796875f); t = mlaf(t, x2, 2.0f); x = x * t + 0.693147180559945286226764f * e; if (xisinff(d)) x = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) x = SLEEF_NANf; if (d == 0) x = -SLEEF_INFINITYf; return x; } EXPORT CONST float xexpf(float d) { int q = (int)rintfk(d * R_LN2f); float s, u; s = mlaf(q, -L2Uf, d); s = mlaf(q, -L2Lf, s); u = 0.000198527617612853646278381; u = mlaf(u, s, 0.00139304355252534151077271); u = mlaf(u, s, 0.00833336077630519866943359); u = mlaf(u, s, 0.0416664853692054748535156); u = mlaf(u, s, 0.166666671633720397949219); u = mlaf(u, s, 0.5); u = s * s * u + s + 1.0f; u = ldexp2kf(u, q); if (d < -104) u = 0; if (d > 104) u = SLEEF_INFINITYf; return u; } static INLINE CONST float expkf(Sleef_float2 d) { int q = (int)rintfk((d.x + d.y) * R_LN2f); Sleef_float2 s, t; float u; s = dfadd2_f2_f2_f(d, q * -L2Uf); s = dfadd2_f2_f2_f(s, q * -L2Lf); s = dfnormalize_f2_f2(s); u = 0.00136324646882712841033936f; u = mlaf(u, s.x, 0.00836596917361021041870117f); u = mlaf(u, s.x, 0.0416710823774337768554688f); u = mlaf(u, s.x, 0.166665524244308471679688f); u = mlaf(u, s.x, 0.499999850988388061523438f); t = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfsqu_f2_f2(s), u)); t = dfadd_f2_f_f2(1, t); u = ldexpkf(t.x + t.y, q); if (d.x < -104) u = 0; return u; } static INLINE CONST float expm1kf(float d) { int q = (int)rintfk(d * R_LN2f); float s, u; s = mlaf(q, -L2Uf, d); s = mlaf(q, -L2Lf, s); float s2 = s * s, s4 = s2 * s2; u = POLY6(s, s2, s4, 0.000198527617612853646278381, 0.00139304355252534151077271, 0.00833336077630519866943359, 0.0416664853692054748535156, 0.166666671633720397949219, 0.5); u = s * s * u + s; if (q != 0) u = ldexp2kf(u + 1, q) - 1; return u; } static INLINE CONST Sleef_float2 logkf(float d) { Sleef_float2 x, x2, s; float m, t; int e; int o = d < FLT_MIN; if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = dfsqu_f2_f2(x); t = 0.240320354700088500976562; t = mlaf(t, x2.x, 0.285112679004669189453125); t = mlaf(t, x2.x, 0.400007992982864379882812); Sleef_float2 c = df(0.66666662693023681640625f, 3.69183861259614332084311e-09f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f2(dfmul_f2_f2_f2(x2, x), dfadd2_f2_f2_f2(dfmul_f2_f2_f(x2, t), c))); return s; } EXPORT CONST float xlogf_u1(float d) { Sleef_float2 x, s; float m, t, x2; int e; int o = d < FLT_MIN; if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = x.x * x.x; t = +0.3027294874e+0f; t = mlaf(t, x2, +0.3996108174e+0f); t = mlaf(t, x2, +0.6666694880e+0f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), (float)e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (xisinff(d)) r = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) r = SLEEF_NANf; if (d == 0) r = -SLEEF_INFINITYf; return r; } static INLINE CONST Sleef_float2 expk2f(Sleef_float2 d) { int q = (int)rintfk((d.x + d.y) * R_LN2f); Sleef_float2 s, t; float u; s = dfadd2_f2_f2_f(d, q * -L2Uf); s = dfadd2_f2_f2_f(s, q * -L2Lf); u = +0.1980960224e-3f; u = mlaf(u, s.x, +0.1394256484e-2f); u = mlaf(u, s.x, +0.8333456703e-2f); u = mlaf(u, s.x, +0.4166637361e-1f); t = dfadd2_f2_f2_f(dfmul_f2_f2_f(s, u), +0.166666659414234244790680580464e+0f); t = dfadd2_f2_f2_f(dfmul_f2_f2_f2(s, t), 0.5); t = dfadd2_f2_f2_f2(s, dfmul_f2_f2_f2(dfsqu_f2_f2(s), t)); t = dfadd2_f2_f_f2(1, t); t.x = ldexp2kf(t.x, q); t.y = ldexp2kf(t.y, q); return d.x < -104 ? df(0, 0) : t; } EXPORT CONST float xpowf(float x, float y) { int yisint = (y == (int)y) || (fabsfk(y) >= (float)(INT64_C(1) << 24)); int yisodd = (1 & (int)y) != 0 && yisint && fabsfk(y) < (float)(INT64_C(1) << 24); float result = expkf(dfmul_f2_f2_f(logkf(fabsfk(x)), y)); result = xisnanf(result) ? SLEEF_INFINITYf : result; result *= (x >= 0 ? 1 : (!yisint ? SLEEF_NANf : (yisodd ? -1 : 1))); float efx = mulsignf(fabsfk(x) - 1, y); if (xisinff(y)) result = efx < 0 ? 0.0f : (efx == 0 ? 1.0f : SLEEF_INFINITYf); if (xisinff(x) || x == 0) result = (yisodd ? signf(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : SLEEF_INFINITYf); if (xisnanf(x) || xisnanf(y)) result = SLEEF_NANf; if (y == 0 || x == 1) result = 1; return result; } static INLINE CONST float logk3f(float d) { float x, x2, t, m; int e; int o = d < FLT_MIN; if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = (m-1) / (m+1); x2 = x * x; t = 0.2392828464508056640625f; t = mlaf(t, x2, 0.28518211841583251953125f); t = mlaf(t, x2, 0.400005877017974853515625f); t = mlaf(t, x2, 0.666666686534881591796875f); t = mlaf(t, x2, 2.0f); x = mlaf(x, t, 0.693147180559945286226764f * e); return x; } static INLINE CONST float expk3f(float d) { int q = (int)rintfk(d * R_LN2f); float s, u; s = mlaf(q, -L2Uf, d); s = mlaf(q, -L2Lf, s); u = 0.000198527617612853646278381; u = mlaf(u, s, 0.00139304355252534151077271); u = mlaf(u, s, 0.00833336077630519866943359); u = mlaf(u, s, 0.0416664853692054748535156); u = mlaf(u, s, 0.166666671633720397949219); u = mlaf(u, s, 0.5); u = mlaf(s * s, u, s + 1.0f); u = ldexpkf(u, q); if (d < -104) u = 0; return u; } EXPORT CONST float xfastpowf_u3500(float x, float y) { float result = expk3f(logk3f(fabsfk(x)) * y); int yisint = (y == (int)y) || (fabsfk(y) >= (float)(INT64_C(1) << 24)); int yisodd = (1 & (int)y) != 0 && yisint && fabsfk(y) < (float)(INT64_C(1) << 24); result *= (x < 0 && yisodd) ? -1 : 1; if (x == 0) result = 0; if (y == 0) result = 1; return result; } EXPORT CONST float xsinhf(float x) { float y = fabsfk(x); Sleef_float2 d = expk2f(df(y, 0)); d = dfsub_f2_f2_f2(d, dfrec_f2_f2(d)); y = (d.x + d.y) * 0.5f; y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xcoshf(float x) { float y = fabsfk(x); Sleef_float2 d = expk2f(df(y, 0)); d = dfadd_f2_f2_f2(d, dfrec_f2_f2(d)); y = (d.x + d.y) * 0.5f; y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xtanhf(float x) { float y = fabsfk(x); Sleef_float2 d = expk2f(df(y, 0)); Sleef_float2 e = dfrec_f2_f2(d); d = dfdiv_f2_f2_f2(dfsub_f2_f2_f2(d, e), dfadd_f2_f2_f2(d, e)); y = d.x + d.y; y = fabsfk(x) > 18.714973875f ? 1.0f : y; y = xisnanf(y) ? 1.0f : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xsinhf_u35(float x) { float e = expm1kf(fabsfk(x)); float y = (e + 2) / (e + 1) * (0.5f * e); y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xcoshf_u35(float x) { float e = xexpf(fabsfk(x)); float y = 0.5f * e + 0.5f / e; y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xtanhf_u35(float x) { float y = fabsfk(x); float d = expm1kf(2*y); y = d / (d + 2); y = fabsfk(x) > 18.714973875f ? 1.0f : y; y = xisnanf(y) ? 1.0f : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } static INLINE CONST Sleef_float2 logk2f(Sleef_float2 d) { Sleef_float2 x, x2, m, s; float t; int e; e = ilogbkf(d.x * (1.0f/0.75f)); m = dfscale_f2_f2_f(d, pow2if(-e)); x = dfdiv_f2_f2_f2(dfadd2_f2_f2_f(m, -1), dfadd2_f2_f2_f(m, 1)); x2 = dfsqu_f2_f2(x); t = 0.2392828464508056640625f; t = mlaf(t, x2.x, 0.28518211841583251953125f); t = mlaf(t, x2.x, 0.400005877017974853515625f); t = mlaf(t, x2.x, 0.666666686534881591796875f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)); return s; } EXPORT CONST float xasinhf(float x) { float y = fabsfk(x); Sleef_float2 d; d = y > 1 ? dfrec_f2_f(x) : df(y, 0); d = dfsqrt_f2_f2(dfadd2_f2_f2_f(dfsqu_f2_f2(d), 1)); d = y > 1 ? dfmul_f2_f2_f(d, y) : d; d = logk2f(dfnormalize_f2_f2(dfadd_f2_f2_f(d, x))); y = d.x + d.y; y = (fabsfk(x) > SQRT_FLT_MAX || xisnanf(y)) ? mulsignf(SLEEF_INFINITYf, x) : y; y = xisnanf(x) ? SLEEF_NANf : y; y = xisnegzerof(x) ? -0.0f : y; return y; } EXPORT CONST float xacoshf(float x) { Sleef_float2 d = logk2f(dfadd2_f2_f2_f(dfmul_f2_f2_f2(dfsqrt_f2_f2(dfadd2_f2_f_f(x, 1)), dfsqrt_f2_f2(dfadd2_f2_f_f(x, -1))), x)); float y = d.x + d.y; y = (x > SQRT_FLT_MAX || xisnanf(y)) ? SLEEF_INFINITYf : y; y = x == 1.0f ? 0.0f : y; y = x < 1.0f ? SLEEF_NANf : y; y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xatanhf(float x) { float y = fabsfk(x); Sleef_float2 d = logk2f(dfdiv_f2_f2_f2(dfadd2_f2_f_f(1, y), dfadd2_f2_f_f(1, -y))); y = y > 1.0f ? SLEEF_NANf : (y == 1.0f ? SLEEF_INFINITYf : (d.x + d.y) * 0.5f); y = xisinff(x) || xisnanf(y) ? SLEEF_NANf : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xexp2f(float d) { int q = (int)rintfk(d); float s, u; s = d - q; u = +0.1535920892e-3; u = mlaf(u, s, +0.1339262701e-2); u = mlaf(u, s, +0.9618384764e-2); u = mlaf(u, s, +0.5550347269e-1); u = mlaf(u, s, +0.2402264476e+0); u = mlaf(u, s, +0.6931471825e+0); u = dfnormalize_f2_f2(dfadd_f2_f_f2(1, dfmul_f2_f_f(u, s))).x; u = ldexp2kf(u, q); if (d >= 128) u = SLEEF_INFINITYf; if (d < -150) u = 0; return u; } EXPORT CONST float xexp2f_u35(float d) { int q = (int)rintfk(d); float s, u; s = d - q; u = +0.1535920892e-3; u = mlaf(u, s, +0.1339262701e-2); u = mlaf(u, s, +0.9618384764e-2); u = mlaf(u, s, +0.5550347269e-1); u = mlaf(u, s, +0.2402264476e+0); u = mlaf(u, s, +0.6931471825e+0); u = mlaf(u, s, +0.1000000000e+1); u = ldexp2kf(u, q); if (d >= 128) u = SLEEF_INFINITYf; if (d < -150) u = 0; return u; } EXPORT CONST float xexp10f(float d) { int q = (int)rintfk(d * (float)LOG10_2); float s, u; s = mlaf(q, -L10Uf, d); s = mlaf(q, -L10Lf, s); u = +0.6802555919e-1; u = mlaf(u, s, +0.2078080326e+0); u = mlaf(u, s, +0.5393903852e+0); u = mlaf(u, s, +0.1171245337e+1); u = mlaf(u, s, +0.2034678698e+1); u = mlaf(u, s, +0.2650949001e+1); Sleef_float2 x = dfadd_f2_f2_f(df(2.3025851249694824219, -3.1705172516493593157e-08), u * s); u = dfnormalize_f2_f2(dfadd_f2_f_f2(1, dfmul_f2_f2_f(x, s))).x; u = ldexp2kf(u, q); if (d > 38.5318394191036238941387f) u = SLEEF_INFINITYf; // log10(FLT_MAX) if (d < -50) u = 0; return u; } EXPORT CONST float xexp10f_u35(float d) { int q = (int)rintfk(d * (float)LOG10_2); float s, u; s = mlaf(q, -L10Uf, d); s = mlaf(q, -L10Lf, s); u = +0.2064004987e+0; u = mlaf(u, s, +0.5417877436e+0); u = mlaf(u, s, +0.1171286821e+1); u = mlaf(u, s, +0.2034656048e+1); u = mlaf(u, s, +0.2650948763e+1); u = mlaf(u, s, +0.2302585125e+1); u = mlaf(u, s, +0.1000000000e+1); u = ldexp2kf(u, q); if (d > 38.5318394191036238941387f) u = SLEEF_INFINITYf; // log10(FLT_MAX) if (d < -50) u = 0; return u; } EXPORT CONST float xexpm1f(float a) { Sleef_float2 d = dfadd2_f2_f2_f(expk2f(df(a, 0)), -1.0f); float x = d.x + d.y; if (a > 88.72283172607421875f) x = SLEEF_INFINITYf; if (a < -16.635532333438687426013570f) x = -1; if (xisnegzerof(a)) x = -0.0f; return x; } EXPORT CONST float xlog10f(float d) { Sleef_float2 x, s; float m, t, x2; int e; int o = d < FLT_MIN; if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = x.x * x.x; t = +0.1314289868e+0; t = mlaf(t, x2, +0.1735493541e+0); t = mlaf(t, x2, +0.2895309627e+0); s = dfmul_f2_f2_f(df(0.30103001, -1.432098889e-08), (float)e); s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f2(x, df(0.868588984, -2.170757285e-08))); s = dfadd_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (xisinff(d)) r = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) r = SLEEF_NANf; if (d == 0) r = -SLEEF_INFINITYf; return r; } EXPORT CONST float xlog2f(float d) { Sleef_float2 x, s; float m, t, x2; int e; int o = d < FLT_MIN; if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = x.x * x.x; t = +0.4374550283e+0f; t = mlaf(t, x2, +0.5764790177e+0f); t = mlaf(t, x2, +0.9618012905120f); s = dfadd2_f2_f_f2(e, dfmul_f2_f2_f2(x, df(2.8853900432586669922, 3.2734474483568488616e-08))); s = dfadd2_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (xisinff(d)) r = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) r = SLEEF_NANf; if (d == 0) r = -SLEEF_INFINITYf; return r; } EXPORT CONST float xlog2f_u35(float d) { float m, t, x, x2; int e; int o = d < FLT_MIN; if (o) d *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = (m - 1) / (m + 1); x2 = x * x; t = +0.4374088347e+0; t = mlaf(t, x2, +0.5764843822e+0); t = mlaf(t, x2, +0.9618024230e+0); float r = mlaf(x2 * x, t, mlaf(x, +0.2885390043e+1, e)); if (xisinff(d)) r = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) r = SLEEF_NANf; if (d == 0) r = -SLEEF_INFINITYf; return r; } EXPORT CONST float xlog1pf(float d) { Sleef_float2 x, s; float m, t, x2; int e; float dp1 = d + 1; int o = dp1 < FLT_MIN; if (o) dp1 *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); e = ilogb2kf(dp1 * (1.0f/0.75f)); t = ldexp3kf(1, -e); m = mlaf(d, t, t-1); if (o) e -= 64; x = dfdiv_f2_f2_f2(df(m, 0), dfadd_f2_f_f(2, m)); x2 = x.x * x.x; t = +0.3027294874e+0f; t = mlaf(t, x2, +0.3996108174e+0f); t = mlaf(t, x2, +0.6666694880e+0f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), (float)e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (d > 1e+38) r = SLEEF_INFINITYf; if (d < -1) r = SLEEF_NANf; if (d == -1) r = -SLEEF_INFINITYf; if (xisnegzerof(d)) r = -0.0f; return r; } EXPORT CONST float xcbrtf(float d) { float x, y, q = 1.0f; int e, r; e = ilogbkf(fabsfk(d))+1; d = ldexp2kf(d, -e); r = (e + 6144) % 3; q = (r == 1) ? 1.2599210498948731647672106f : q; q = (r == 2) ? 1.5874010519681994747517056f : q; q = ldexp2kf(q, (e + 6144) / 3 - 2048); q = mulsignf(q, d); d = fabsfk(d); x = -0.601564466953277587890625f; x = mlaf(x, d, 2.8208892345428466796875f); x = mlaf(x, d, -5.532182216644287109375f); x = mlaf(x, d, 5.898262500762939453125f); x = mlaf(x, d, -3.8095417022705078125f); x = mlaf(x, d, 2.2241256237030029296875f); y = d * x * x; y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q; return y; } EXPORT CONST float xcbrtf_u1(float d) { float x, y, z; Sleef_float2 q2 = df(1, 0), u, v; int e, r; e = ilogbkf(fabsfk(d))+1; d = ldexp2kf(d, -e); r = (e + 6144) % 3; q2 = (r == 1) ? df(1.2599210739135742188, -2.4018701694217270415e-08) : q2; q2 = (r == 2) ? df(1.5874010324478149414, 1.9520385308169352356e-08) : q2; q2.x = mulsignf(q2.x, d); q2.y = mulsignf(q2.y, d); d = fabsfk(d); x = -0.601564466953277587890625f; x = mlaf(x, d, 2.8208892345428466796875f); x = mlaf(x, d, -5.532182216644287109375f); x = mlaf(x, d, 5.898262500762939453125f); x = mlaf(x, d, -3.8095417022705078125f); x = mlaf(x, d, 2.2241256237030029296875f); y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0f); z = x; u = dfmul_f2_f_f(x, x); u = dfmul_f2_f2_f2(u, u); u = dfmul_f2_f2_f(u, d); u = dfadd2_f2_f2_f(u, -x); y = u.x + u.y; y = -2.0 / 3.0 * y * z; v = dfadd2_f2_f2_f(dfmul_f2_f_f(z, z), y); v = dfmul_f2_f2_f(v, d); v = dfmul_f2_f2_f2(v, q2); z = ldexp2kf(v.x + v.y, (e + 6144) / 3 - 2048); if (xisinff(d)) { z = mulsignf(SLEEF_INFINITYf, q2.x); } if (d == 0) { z = mulsignf(0, q2.x); } return z; } // EXPORT CONST float xfabsf(float x) { return fabsfk(x); } EXPORT CONST float xcopysignf(float x, float y) { return copysignfk(x, y); } EXPORT CONST float xfmaxf(float x, float y) { return y != y ? x : (x > y ? x : y); } EXPORT CONST float xfminf(float x, float y) { return y != y ? x : (x < y ? x : y); } EXPORT CONST float xfdimf(float x, float y) { float ret = x - y; if (ret < 0 || x == y) ret = 0; return ret; } EXPORT CONST float xtruncf(float x) { float fr = x - (int32_t)x; return (xisinff(x) || fabsfk(x) >= (float)(INT64_C(1) << 23)) ? x : copysignfk(x - fr, x); } EXPORT CONST float xfloorf(float x) { float fr = x - (int32_t)x; fr = fr < 0 ? fr+1.0f : fr; return (xisinff(x) || fabsfk(x) >= (float)(INT64_C(1) << 23)) ? x : copysignfk(x - fr, x); } EXPORT CONST float xceilf(float x) { float fr = x - (int32_t)x; fr = fr <= 0 ? fr : fr-1.0f; return (xisinff(x) || fabsfk(x) >= (float)(INT64_C(1) << 23)) ? x : copysignfk(x - fr, x); } EXPORT CONST float xroundf(float d) { float x = d + 0.5f; float fr = x - (int32_t)x; if (fr == 0 && x <= 0) x--; fr = fr < 0 ? fr+1.0f : fr; x = d == 0.4999999701976776123f ? 0 : x; // nextafterf(0.5, 0) return (xisinff(d) || fabsfk(d) >= (float)(INT64_C(1) << 23)) ? d : copysignfk(x - fr, d); } EXPORT CONST float xrintf(float d) { float x = d + 0.5f; int32_t isodd = (1 & (int32_t)x) != 0; float fr = x - (int32_t)x; fr = (fr < 0 || (fr == 0 && isodd)) ? fr+1.0f : fr; x = d == 0.50000005960464477539f ? 0 : x; // nextafterf(0.5, 1) return (xisinff(d) || fabsfk(d) >= (float)(INT64_C(1) << 23)) ? d : copysignfk(x - fr, d); } EXPORT CONST Sleef_float2 xmodff(float x) { float fr = x - (int32_t)x; fr = fabsfk(x) > (float)(INT64_C(1) << 23) ? 0 : fr; Sleef_float2 ret = { copysignfk(fr, x), copysignfk(x - fr, x) }; return ret; } EXPORT CONST float xldexpf(float x, int exp) { if (exp > 300) exp = 300; if (exp < -300) exp = -300; int e0 = exp >> 2; if (exp < 0) e0++; if (-50 < exp && exp < 50) e0 = 0; int e1 = exp - (e0 << 2); float p = pow2if(e0); float ret = x * pow2if(e1) * p * p * p * p; return ret; } EXPORT CONST float xnextafterf(float x, float y) { union { float f; int32_t i; } cx; cx.f = x == 0 ? mulsignf(0, y) : x; int c = (cx.i < 0) == (y < x); if (c) cx.i = -(cx.i ^ (1 << 31)); if (x != y) cx.i--; if (c) cx.i = -(cx.i ^ (1 << 31)); if (cx.f == 0 && x != 0) cx.f = mulsignf(0, x); if (x == 0 && y == 0) cx.f = y; if (xisnanf(x) || xisnanf(y)) cx.f = SLEEF_NANf; return cx.f; } EXPORT CONST float xfrfrexpf(float x) { union { float f; int32_t u; } cx; if (fabsfk(x) < FLT_MIN) x *= (1 << 30); cx.f = x; cx.u &= ~0x7f800000U; cx.u |= 0x3f000000U; if (xisinff(x)) cx.f = mulsignf(SLEEF_INFINITYf, x); if (x == 0) cx.f = x; return cx.f; } EXPORT CONST int xexpfrexpf(float x) { union { float f; uint32_t u; } cx; int ret = 0; if (fabsfk(x) < FLT_MIN) { x *= (1 << 30); ret = -30; } cx.f = x; ret += (int32_t)(((cx.u >> 23) & 0xff)) - 0x7e; if (x == 0 || xisnanf(x) || xisinff(x)) ret = 0; return ret; } EXPORT CONST float xhypotf_u05(float x, float y) { x = fabsfk(x); y = fabsfk(y); float min = fminfk(x, y), n = min; float max = fmaxfk(x, y), d = max; if (max < FLT_MIN) { n *= UINT64_C(1) << 24; d *= UINT64_C(1) << 24; } Sleef_float2 t = dfdiv_f2_f2_f2(df(n, 0), df(d, 0)); t = dfmul_f2_f2_f(dfsqrt_f2_f2(dfadd2_f2_f2_f(dfsqu_f2_f2(t), 1)), max); float ret = t.x + t.y; if (xisnanf(ret)) ret = SLEEF_INFINITYf; if (min == 0) ret = max; if (xisnanf(x) || xisnanf(y)) ret = SLEEF_NANf; if (x == SLEEF_INFINITYf || y == SLEEF_INFINITYf) ret = SLEEF_INFINITYf; return ret; } EXPORT CONST float xhypotf_u35(float x, float y) { x = fabsfk(x); y = fabsfk(y); float min = fminfk(x, y); float max = fmaxfk(x, y); float t = min / max; float ret = max * SQRTF(1 + t*t); if (min == 0) ret = max; if (xisnanf(x) || xisnanf(y)) ret = SLEEF_NANf; if (x == SLEEF_INFINITYf || y == SLEEF_INFINITYf) ret = SLEEF_INFINITYf; return ret; } static INLINE CONST float toward0f(float d) { return d == 0 ? 0 : intBitsToFloat(floatToRawIntBits(d)-1); } static INLINE CONST float ptruncf(float x) { return fabsfk(x) >= (float)(INT64_C(1) << 23) ? x : (x - (x - (int32_t)x)); } EXPORT CONST float xfmodf(float x, float y) { float nu = fabsfk(x), de = fabsfk(y), s = 1, q; if (de < FLT_MIN) { nu *= UINT64_C(1) << 25; de *= UINT64_C(1) << 25; s = 1.0f / (UINT64_C(1) << 25); } Sleef_float2 r = df(nu, 0); float rde = toward0f(1.0f / de); for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 q = ptruncf(toward0f(r.x) * rde); q = (3*de > r.x && r.x >= de) ? 2 : q; q = (2*de > r.x && r.x >= de) ? 1 : q; r = dfnormalize_f2_f2(dfadd2_f2_f2_f2(r, dfmul_f2_f_f(q, -de))); if (r.x < de) break; } float ret = (r.x + r.y) * s; if (r.x + r.y == de) ret = 0; ret = mulsignf(ret, x); if (nu < de) ret = x; if (de == 0) ret = SLEEF_NANf; return ret; } static INLINE CONST float rintfk2(float d) { float x = d + 0.5f; int32_t isodd = (1 & (int32_t)x) != 0; float fr = x - (int32_t)x; fr = (fr < 0 || (fr == 0 && isodd)) ? fr+1.0f : fr; return (fabsfk(d) >= (float)(INT64_C(1) << 23)) ? d : copysignfk(x - fr, d); } EXPORT CONST float xremainderf(float x, float y) { float n = fabsfk(x), d = fabsfk(y), s = 1, q; if (d < FLT_MIN*2) { n *= UINT64_C(1) << 25; d *= UINT64_C(1) << 25; s = 1.0f / (UINT64_C(1) << 25); } float rd = 1.0f / d; Sleef_float2 r = df(n, 0); int qisodd = 0; for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 q = rintfk2(r.x * rd); if (fabsfk(r.x) < 1.5f * d) q = r.x < 0 ? -1 : 1; if (fabsfk(r.x) < 0.5f * d || (fabsfk(r.x) == 0.5f * d && !qisodd)) q = 0; if (q == 0) break; if (xisinff(q * -d)) q = q + mulsignf(-1, r.x); qisodd ^= (1 & (int)q) != 0 && fabsfk(q) < (float)(INT64_C(1) << 24); r = dfnormalize_f2_f2(dfadd2_f2_f2_f2(r, dfmul_f2_f_f(q, -d))); } float ret = r.x * s; ret = mulsignf(ret, x); if (xisinff(y)) ret = xisinff(x) ? SLEEF_NANf : x; if (d == 0) ret = SLEEF_NANf; return ret; } EXPORT CONST float xsqrtf_u05(float d) { float q = 0.5f; d = d < 0 ? SLEEF_NANf : d; if (d < 5.2939559203393770e-23f) { d *= 1.8889465931478580e+22f; q = 7.2759576141834260e-12f * 0.5f; } if (d > 1.8446744073709552e+19f) { d *= 5.4210108624275220e-20f; q = 4294967296.0f * 0.5f; } // http://en.wikipedia.org/wiki/Fast_inverse_square_root float x = intBitsToFloat(0x5f375a86 - (floatToRawIntBits(d + 1e-45f) >> 1)); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x) * d; Sleef_float2 d2 = dfmul_f2_f2_f2(dfadd2_f2_f_f2(d, dfmul_f2_f_f(x, x)), dfrec_f2_f(x)); float ret = (d2.x + d2.y) * q; ret = d == SLEEF_INFINITYf ? SLEEF_INFINITYf : ret; ret = d == 0 ? d : ret; return ret; } EXPORT CONST float xsqrtf_u35(float d) { float q = 1.0f; d = d < 0 ? SLEEF_NANf : d; if (d < 5.2939559203393770e-23f) { d *= 1.8889465931478580e+22f; q = 7.2759576141834260e-12f; } if (d > 1.8446744073709552e+19f) { d *= 5.4210108624275220e-20f; q = 4294967296.0f; } // http://en.wikipedia.org/wiki/Fast_inverse_square_root float x = intBitsToFloat(0x5f375a86 - (floatToRawIntBits(d + 1e-45) >> 1)); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); return d == SLEEF_INFINITYf ? SLEEF_INFINITYf : (x * d * q); } EXPORT CONST float xsqrtf(float d) { return SQRTF(d); } EXPORT CONST float xfmaf(float x, float y, float z) { float h2 = x * y + z, q = 1; if (fabsfk(h2) < 1e-38f) { const float c0 = 1 << 25, c1 = c0 * c0, c2 = c1 * c1; x *= c1; y *= c1; z *= c2; q = 1.0f / c2; } if (fabsfk(h2) > 1e+38f) { const float c0 = 1 << 25, c1 = c0 * c0, c2 = c1 * c1; x *= 1.0 / c1; y *= 1.0 / c1; z *= 1.0 / c2; q = c2; } Sleef_float2 d = dfmul_f2_f_f(x, y); d = dfadd2_f2_f2_f(d, z); float ret = (x == 0 || y == 0) ? z : (d.x + d.y); if (xisinff(z) && !xisinff(x) && !xisnanf(x) && !xisinff(y) && !xisnanf(y)) h2 = z; return (xisinff(h2) || xisnanf(h2)) ? h2 : ret*q; } // static INLINE CONST Sleef_float2 sinpifk(float d) { float u, s, t; Sleef_float2 x, s2; u = d * 4; int q = ceilfk(u) & ~1; int o = (q & 2) != 0; s = u - (float)q; t = s; s = s * s; s2 = dfmul_f2_f_f(t, t); // u = o ? -0.2430611801e-7f : +0.3093842054e-6f; u = mlaf(u, s, o ? +0.3590577080e-5f : -0.3657307388e-4f); u = mlaf(u, s, o ? -0.3259917721e-3f : +0.2490393585e-2f); x = dfadd2_f2_f_f2(u * s, o ? df(0.015854343771934509277, 4.4940051354032242811e-10) : df(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), o ? df(-0.30842512845993041992, -9.0728339030733922277e-09) : df(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_f2_f2_f2(x, o ? s2 : df(t, 0)); x = o ? dfadd2_f2_f2_f(x, 1) : x; // if ((q & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST float xsinpif_u05(float d) { Sleef_float2 x = sinpifk(d); float r = x.x + x.y; if (xisnegzerof(d)) r = -0.0; if (fabsfk(d) > TRIGRANGEMAX4f) r = 0; if (xisinff(d)) r = SLEEF_NANf; return r; } static INLINE CONST Sleef_float2 cospifk(float d) { float u, s, t; Sleef_float2 x, s2; u = d * 4; int q = ceilfk(u) & ~1; int o = (q & 2) == 0; s = u - (float)q; t = s; s = s * s; s2 = dfmul_f2_f_f(t, t); // u = o ? -0.2430611801e-7f : +0.3093842054e-6f; u = mlaf(u, s, o ? +0.3590577080e-5f : -0.3657307388e-4f); u = mlaf(u, s, o ? -0.3259917721e-3f : +0.2490393585e-2f); x = dfadd2_f2_f_f2(u * s, o ? df(0.015854343771934509277, 4.4940051354032242811e-10) : df(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), o ? df(-0.30842512845993041992, -9.0728339030733922277e-09) : df(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_f2_f2_f2(x, o ? s2 : df(t, 0)); x = o ? dfadd2_f2_f2_f(x, 1) : x; // if (((q+2) & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST float xcospif_u05(float d) { Sleef_float2 x = cospifk(d); float r = x.x + x.y; if (fabsfk(d) > TRIGRANGEMAX4f) r = 1; if (xisinff(d)) r = SLEEF_NANf; return r; } typedef struct { Sleef_float2 a, b; } df2; static CONST df2 gammafk(float a) { Sleef_float2 clc = df(0, 0), clln = df(1, 0), clld = df(1, 0), v = df(1, 0), x, y, z; float t, u; int otiny = fabsfk(a) < 1e-30f, oref = a < 0.5f; x = otiny ? df(0, 0) : (oref ? dfadd2_f2_f_f(1, -a) : df(a, 0)); int o0 = (0.5f <= x.x && x.x <= 1.2), o2 = 2.3 < x.x; y = dfnormalize_f2_f2(dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, 1), x)); y = dfnormalize_f2_f2(dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, 2), y)); clln = (o2 && x.x <= 7) ? y : clln; x = (o2 && x.x <= 7) ? dfadd2_f2_f2_f(x, 3) : x; t = o2 ? (1.0 / x.x) : dfnormalize_f2_f2(dfadd2_f2_f2_f(x, o0 ? -1 : -2)).x; u = o2 ? +0.000839498720672087279971000786 : (o0 ? +0.9435157776e+0f : +0.1102489550e-3f); u = mlaf(u, t, o2 ? -5.17179090826059219329394422e-05 : (o0 ? +0.8670063615e+0f : +0.8160019934e-4f)); u = mlaf(u, t, o2 ? -0.000592166437353693882857342347 : (o0 ? +0.4826702476e+0f : +0.1528468856e-3f)); u = mlaf(u, t, o2 ? +6.97281375836585777403743539e-05 : (o0 ? -0.8855129778e-1f : -0.2355068718e-3f)); u = mlaf(u, t, o2 ? +0.000784039221720066627493314301 : (o0 ? +0.1013825238e+0f : +0.4962242092e-3f)); u = mlaf(u, t, o2 ? -0.000229472093621399176949318732 : (o0 ? -0.1493408978e+0f : -0.1193488017e-2f)); u = mlaf(u, t, o2 ? -0.002681327160493827160473958490 : (o0 ? +0.1697509140e+0f : +0.2891599433e-2f)); u = mlaf(u, t, o2 ? +0.003472222222222222222175164840 : (o0 ? -0.2072454542e+0f : -0.7385451812e-2f)); u = mlaf(u, t, o2 ? +0.083333333333333333335592087900 : (o0 ? +0.2705872357e+0f : +0.2058077045e-1f)); y = dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, -0.5), logk2f(x)); y = dfadd2_f2_f2_f2(y, dfneg_f2_f2(x)); y = dfadd2_f2_f2_f2(y, dfx(0.91893853320467278056)); // 0.5*log(2*M_PI) z = dfadd2_f2_f2_f(dfmul_f2_f_f (u, t), o0 ? -0.400686534596170958447352690395e+0f : -0.673523028297382446749257758235e-1f); z = dfadd2_f2_f2_f(dfmul_f2_f2_f(z, t), o0 ? +0.822466960142643054450325495997e+0f : +0.322467033928981157743538726901e+0f); z = dfadd2_f2_f2_f(dfmul_f2_f2_f(z, t), o0 ? -0.577215665946766039837398973297e+0f : +0.422784335087484338986941629852e+0f); z = dfmul_f2_f2_f(z, t); clc = o2 ? y : z; clld = o2 ? dfadd2_f2_f2_f(dfmul_f2_f_f(u, t), 1) : clld; y = clln; clc = otiny ? dfx(41.58883083359671856503) : // log(2^60) (oref ? dfadd2_f2_f2_f2(dfx(1.1447298858494001639), dfneg_f2_f2(clc)) : clc); // log(M_PI) clln = otiny ? df(1, 0) : (oref ? clln : clld); if (oref) x = dfmul_f2_f2_f2(clld, sinpifk(a - (float)(INT64_C(1) << 12) * (int32_t)(a * (1.0 / (INT64_C(1) << 12))))); clld = otiny ? df(a*((INT64_C(1) << 30)*(float)(INT64_C(1) << 30)), 0) : (oref ? x : y); df2 ret = { clc, dfdiv_f2_f2_f2(clln, clld) }; return ret; } EXPORT CONST float xtgammaf_u1(float a) { df2 d = gammafk(a); Sleef_float2 y = dfmul_f2_f2_f2(expk2f(d.a), d.b); float r = y.x + y.y; r = (a == -SLEEF_INFINITYf || (a < 0 && xisintf(a)) || (xisnumberf(a) && a < 0 && xisnanf(r))) ? SLEEF_NANf : r; r = ((a == SLEEF_INFINITYf || xisnumberf(a)) && a >= -FLT_MIN && (a == 0 || a > 36 || xisnanf(r))) ? mulsignf(SLEEF_INFINITYf, a) : r; return r; } EXPORT CONST float xlgammaf_u1(float a) { df2 d = gammafk(a); Sleef_float2 y = dfadd2_f2_f2_f2(d.a, logk2f(dfabs_f2_f2(d.b))); float r = y.x + y.y; r = (xisinff(a) || (a <= 0 && xisintf(a)) || (xisnumberf(a) && xisnanf(r))) ? SLEEF_INFINITYf : r; return r; } EXPORT CONST float xerff_u1(float a) { float s = a, t, u; Sleef_float2 d; a = fabsfk(a); int o0 = a < 1.1f, o1 = a < 2.4f, o2 = a < 4.0f; u = o0 ? (a*a) : a; t = o0 ? +0.7089292194e-4f : o1 ? -0.1792667899e-4f : -0.9495757695e-5f; t = mlaf(t, u, o0 ? -0.7768311189e-3f : o1 ? +0.3937633010e-3f : +0.2481465926e-3f); t = mlaf(t, u, o0 ? +0.5159463733e-2f : o1 ? -0.3949181177e-2f : -0.2918176819e-2f); t = mlaf(t, u, o0 ? -0.2683781274e-1f : o1 ? +0.2445474640e-1f : +0.2059706673e-1f); t = mlaf(t, u, o0 ? +0.1128318012e+0f : o1 ? -0.1070996150e+0f : -0.9901899844e-1f); d = dfmul_f2_f_f(t, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.376125876000657465175213237214e+0) : o1 ? dfx(-0.634588905908410389971210809210e+0) : dfx(-0.643598050547891613081201721633e+0)); d = dfmul_f2_f2_f(d, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(+0.112837916021059138255978217023e+1) : o1 ? dfx(-0.112879855826694507209862753992e+1) : dfx(-0.112461487742845562801052956293e+1)); d = dfmul_f2_f2_f(d, a); d = o0 ? d : dfadd_f2_f_f2(1.0, dfneg_f2_f2(expk2f(d))); u = mulsignf(o2 ? (d.x + d.y) : 1, s); u = xisnanf(a) ? SLEEF_NANf : u; return u; } EXPORT CONST float xerfcf_u15(float a) { float s = a, r = 0, t; Sleef_float2 u, d, x; a = fabsfk(a); int o0 = a < 1.0f, o1 = a < 2.2f, o2 = a < 4.3f, o3 = a < 10.1f; u = o1 ? df(a, 0) : dfdiv_f2_f2_f2(df(1, 0), df(a, 0)); t = o0 ? -0.8638041618e-4f : o1 ? -0.6236977242e-5f : o2 ? -0.3869504035e+0f : +0.1115344167e+1f; t = mlaf(t, u.x, o0 ? +0.6000166177e-3f : o1 ? +0.5749821503e-4f : o2 ? +0.1288077235e+1f : -0.9454904199e+0f); t = mlaf(t, u.x, o0 ? -0.1665703603e-2f : o1 ? +0.6002851478e-5f : o2 ? -0.1816803217e+1f : -0.3667259514e+0f); t = mlaf(t, u.x, o0 ? +0.1795156277e-3f : o1 ? -0.2851036377e-2f : o2 ? +0.1249150872e+1f : +0.7155663371e+0f); t = mlaf(t, u.x, o0 ? +0.1914106123e-1f : o1 ? +0.2260518074e-1f : o2 ? -0.1328857988e+0f : -0.1262947265e-1f); d = dfmul_f2_f2_f(u, t); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.102775359343930288081655368891e+0) : o1 ? dfx(-0.105247583459338632253369014063e+0) : o2 ? dfx(-0.482365310333045318680618892669e+0) : dfx(-0.498961546254537647970305302739e+0)); d = dfmul_f2_f2_f2(d, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.636619483208481931303752546439e+0) : o1 ? dfx(-0.635609463574589034216723775292e+0) : o2 ? dfx(-0.134450203224533979217859332703e-2) : dfx(-0.471199543422848492080722832666e-4)); d = dfmul_f2_f2_f2(d, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.112837917790537404939545770596e+1) : o1 ? dfx(-0.112855987376668622084547028949e+1) : o2 ? dfx(-0.572319781150472949561786101080e+0) : dfx(-0.572364030327966044425932623525e+0)); x = dfmul_f2_f2_f(o1 ? d : df(-a, 0), a); x = o1 ? x : dfadd2_f2_f2_f2(x, d); x = expk2f(x); x = o1 ? x : dfmul_f2_f2_f2(x, u); r = o3 ? (x.x + x.y) : 0; if (s < 0) r = 2 - r; r = xisnanf(s) ? SLEEF_NANf : r; return r; } // #ifdef ENABLE_MAIN // gcc -w -DENABLE_MAIN -I../common sleefsp.c rempitab.c -lm #include int main(int argc, char **argv) { float d1 = atof(argv[1]); //float d2 = atof(argv[2]); //float d3 = atof(argv[3]); //printf("%.20g, %.20g\n", (double)d1, (double)d2); //float i2 = atoi(argv[2]); //float c = xatan2f_u1(d1, d2); //printf("round %.20g\n", (double)d1); printf("test = %.20g\n", (double)xsqrtf_u05(d1)); //printf("correct = %.20g\n", (double)roundf(d1)); //printf("rint %.20g\n", (double)d1); //printf("test = %.20g\n", (double)xrintf(d1)); //printf("correct = %.20g\n", (double)rintf(d1)); //Sleef_float2 r = xsincospif_u35(d); //printf("%g, %g\n", (double)r.x, (double)r.y); } #endif sleef-3.5.1/src/quad-tester/000077500000000000000000000000001373003144100156375ustar00rootroot00000000000000sleef-3.5.1/src/quad-tester/CMakeLists.txt000066400000000000000000000100231373003144100203730ustar00rootroot00000000000000link_directories(${sleef_BINARY_DIR}/lib) # libsleefquad link_directories(${sleef_BINARY_DIR}/src/common) # common.a include_directories(${sleef_BINARY_DIR}/include) # sleef.h, sleefquad.h include_directories(${sleef_SOURCE_DIR}/src/quad) # qrename.h include_directories(${sleef_BINARY_DIR}/src/quad/include) # rename headers if(NOT LIB_MPFR) find_program(QTESTER_COMMAND qtester) endif(NOT LIB_MPFR) find_library(LIBRT rt) if (NOT LIBRT) set(LIBRT "") endif() set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS}") if(COMPILER_SUPPORTS_FLOAT128) list(APPEND COMMON_TARGET_DEFINITIONS ENABLEFLOAT128=1) endif() # function(add_test_iut IUT) if (LIB_MPFR) set(QTESTER qtester) elseif(QTESTER_COMMAND) set(QTESTER ${QTESTER_COMMAND}) endif() # When we are crosscompiling using the mkrename* tools from a native # build, we use the tester executable from the native build. if (CMAKE_CROSSCOMPILING AND NATIVE_BUILD_DIR) set(QTESTER ${NATIVE_BUILD_DIR}/bin/qtester) endif(CMAKE_CROSSCOMPILING AND NATIVE_BUILD_DIR) if (QTESTER) if (NOT EMULATOR) if (SDE_COMMAND) set(FLAGS_SDE "--sde" ${SDE_COMMAND}) else() set(FLAGS_SDE) endif() if (ARMIE_COMMAND) set(FLAGS_ARMIE ${ARMIE_COMMAND} -msve-vector-bits=${SVE_VECTOR_BITS}) else() set(FLAGS_ARMIE) endif() add_test(NAME ${IUT} COMMAND ${QTESTER} ${FLAGS_SDE} ${FLAGS_ARMIE} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT} WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) else() add_test(NAME ${IUT} COMMAND ${QTESTER} ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT} WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) endif() endif() endfunction() # Add vector extension `iut`s set(IUT_SRC qiutsimd.c qiutsimdmain.c qtesterutil.c) macro(test_extension SIMD) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} LCSIMD) string(CONCAT TARGET_IUT${SIMD} "qiut" ${LCSIMD}) add_executable(${TARGET_IUT${SIMD}} ${IUT_SRC}) target_compile_options(${TARGET_IUT${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${TARGET_IUT${SIMD}} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) target_link_libraries(${TARGET_IUT${SIMD}} sleefquad ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) add_dependencies(${TARGET_IUT${SIMD}} sleefquad_headers ${TARGET_HEADERS}) add_dependencies(${TARGET_IUT${SIMD}} sleefquad ${TARGET_LIBSLEEF}) set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES C_STANDARD 99) add_test_iut(${TARGET_IUT${SIMD}}) list(APPEND IUT_LIST ${TARGET_IUT${SIMD}}) if(LIB_MPFR AND NOT MINGW) # Build qtester2 SIMD string(TOLOWER ${SIMD} SIMDLC) set(T "tester2${SIMDLC}qp") add_executable(${T} tester2simdqp.c qtesterutil.c) target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${T} PRIVATE ENABLE_${SIMD}=1 USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) set_target_properties(${T} PROPERTIES C_STANDARD 99) target_link_libraries(${T} sleefquad ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP}) add_dependencies(${T} sleefquad sleefquad_headers ${TARGET_LIBSLEEF} ${TARGET_HEADERS}) if (MPFR_INCLUDE_DIR) target_include_directories(${T} PRIVATE ${MPFR_INCLUDE_DIR}) endif() endif() endif(COMPILER_SUPPORTS_${SIMD}) endmacro(test_extension) foreach(SIMD ${SLEEFQUAD_SUPPORTED_EXT}) test_extension(${SIMD}) endforeach() if(LIB_MPFR AND NOT MINGW) # Compile executable 'qtester' add_host_executable(qtester qtester.c qtesterutil.c) if (NOT CMAKE_CROSSCOMPILING) target_link_libraries(qtester sleefquad ${TARGET_LIBSLEEF} ${LIBM} ${LIB_MPFR} ${LIBGMP}) target_compile_definitions(qtester PRIVATE USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(qtester PRIVATE -Wno-unused-result) set_target_properties(qtester PROPERTIES C_STANDARD 99) if (MPFR_INCLUDE_DIR) target_include_directories(qtester PRIVATE ${MPFR_INCLUDE_DIR}) endif() endif() endif(LIB_MPFR AND NOT MINGW) sleef-3.5.1/src/quad-tester/qiutsimd.c000066400000000000000000000173501373003144100176500ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #if defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #include "misc.h" #include "sleef.h" #include "sleefquad.h" #include "qtesterutil.h" // #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" #include "qrenamepurec_scalar.h" #endif #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 #include "helperpurec_scalar.h" #include "qrenamepurecfma_scalar.h" #endif #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #include "qrenamesse2.h" #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #include "qrenameavx2128.h" #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #include "qrenameavx.h" #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #include "qrenamefma4.h" #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #include "qrenameavx2.h" #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #include "qrenameavx512f.h" #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #include "qrenameadvsimd.h" #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #include "qrenamesve.h" #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #include "qrenamevsx.h" #endif #ifdef ENABLE_DSP128 #define CONFIG 2 #include "helpersse2.h" #include "qrenamedsp128.h" #endif #ifdef ENABLE_DSP256 #define CONFIG 1 #include "helperavx.h" #include "qrenamedsp256.h" #endif // int check_featureQP() { if (vavailability_i(1) == 0) return 0; vargquad a; memrand(&a, sizeof(vargquad)); a = xsqrtq_u05(a); return 1; } // typedef union { Sleef_quad q; struct { uint64_t l, h; }; } cnv128; #define BUFSIZE 1024 #define func_q_q(funcStr, funcName) { \ while (startsWith(buf, funcStr " ")) { \ sentinel = 0; \ int lane = xrand() % VECTLENDP; \ cnv128 c0; \ sscanf(buf, funcStr " %" PRIx64 ":%" PRIx64, &c0.h, &c0.l); \ vargquad a0; \ memrand(&a0, sizeof(vargquad)); \ a0.s[lane] = c0.q; \ a0 = funcName(a0); \ c0.q = a0.s[lane]; \ printf("%" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l); \ fflush(stdout); \ if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \ } \ } #define func_q_q_q(funcStr, funcName) { \ while (startsWith(buf, funcStr " ")) { \ sentinel = 0; \ int lane = xrand() % VECTLENDP; \ cnv128 c0, c1; \ sscanf(buf, funcStr " %" PRIx64 ":%" PRIx64 " %" PRIx64 ":%" PRIx64, &c0.h, &c0.l, &c1.h, &c1.l); \ vargquad a0, a1; \ memrand(&a0, sizeof(vargquad)); \ memrand(&a1, sizeof(vargquad)); \ a0.s[lane] = c0.q; \ a1.s[lane] = c1.q; \ a0 = funcName(a0, a1); \ c0.q = a0.s[lane]; \ printf("%" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l); \ fflush(stdout); \ if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \ } \ } #define func_i_q_q(funcStr, funcName) { \ while (startsWith(buf, funcStr " ")) { \ sentinel = 0; \ int lane = xrand() % VECTLENDP; \ cnv128 c0, c1; \ sscanf(buf, funcStr " %" PRIx64 ":%" PRIx64 " %" PRIx64 ":%" PRIx64, &c0.h, &c0.l, &c1.h, &c1.l); \ vargquad a0, a1; \ memrand(&a0, sizeof(vargquad)); \ memrand(&a1, sizeof(vargquad)); \ a0.s[lane] = c0.q; \ a1.s[lane] = c1.q; \ vint vi = funcName(a0, a1); \ int t[VECTLENDP*2]; \ vstoreu_v_p_vi(t, vi); \ printf("%d\n", t[lane]); \ fflush(stdout); \ if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \ } \ } #define func_d_q(funcStr, funcName) { \ while (startsWith(buf, funcStr " ")) { \ sentinel = 0; \ int lane = xrand() % VECTLENDP; \ cnv128 c0; \ sscanf(buf, funcStr " %" PRIx64 ":%" PRIx64, &c0.h, &c0.l); \ vargquad a0; \ memrand(&a0, sizeof(vargquad)); \ a0.s[lane] = c0.q; \ double d[VECTLENDP]; \ vstoreu_v_p_vd(d, funcName(a0)); \ printf("%" PRIx64 "\n", d2u(d[lane])); \ fflush(stdout); \ if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \ } \ } #define func_q_d(funcStr, funcName) { \ while (startsWith(buf, funcStr " ")) { \ sentinel = 0; \ int lane = xrand() % VECTLENDP; \ uint64_t u; \ sscanf(buf, funcStr " %" PRIx64, &u); \ double s[VECTLENDP]; \ memrand(s, sizeof(s)); \ s[lane] = u2d(u); \ vargquad a0 = funcName(vloadu_vd_p(s)); \ cnv128 c0; \ c0.q = a0.s[lane]; \ printf("%" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l); \ fflush(stdout); \ if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \ } \ } #define func_strtoq(funcStr) { \ while (startsWith(buf, funcStr " ")) { \ sentinel = 0; \ char s[64]; \ sscanf(buf, funcStr " %63s", s); \ Sleef_quad1 a0; \ a0 = Sleef_strtoq(s, NULL, 10); \ cnv128 c0; \ c0.q = a0.s[0]; \ printf("%" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l); \ fflush(stdout); \ if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \ } \ } #define func_qtostr(funcStr) { \ while (startsWith(buf, funcStr " ")) { \ sentinel = 0; \ cnv128 c0; \ sscanf(buf, funcStr " %" PRIx64 ":%" PRIx64, &c0.h, &c0.l); \ Sleef_quad1 a0; \ a0.s[0] = c0.q; \ char s[64]; \ Sleef_qtostr(s, 63, a0, 10); \ printf("%s\n", s); \ fflush(stdout); \ if (fgets(buf, BUFSIZE-1, stdin) == NULL) break; \ } \ } int do_test(int argc, char **argv) { xsrand(time(NULL)); { int k = 0; k += 1; #ifdef ENABLE_PUREC_SCALAR k += 2; // Enable string testing #endif printf("%d\n", k); fflush(stdout); } char buf[BUFSIZE]; fgets(buf, BUFSIZE-1, stdin); int sentinel = 0; while(!feof(stdin) && sentinel < 2) { func_q_q_q("addq_u05", xaddq_u05); func_q_q_q("subq_u05", xsubq_u05); func_q_q_q("mulq_u05", xmulq_u05); func_q_q_q("divq_u05", xdivq_u05); func_q_q("sqrtq_u05", xsqrtq_u05); func_q_q("sinq_u10", xsinq_u10); func_q_q("cosq_u10", xcosq_u10); func_q_q("tanq_u10", xtanq_u10); func_q_q("asinq_u10", xasinq_u10); func_q_q("acosq_u10", xacosq_u10); func_q_q("atanq_u10", xatanq_u10); func_q_q("expq_u10", xexpq_u10); func_q_q("exp2q_u10", xexp2q_u10); func_q_q("exp10q_u10", xexp10q_u10); func_q_q("expm1q_u10", xexpm1q_u10); func_q_q("logq_u10", xlogq_u10); func_q_q("log2q_u10", xlog2q_u10); func_q_q("log10q_u10", xlog10q_u10); func_q_q("log1pq_u10", xlog1pq_u10); func_q_q("negq", xnegq); func_q_d("cast_from_doubleq", xcast_from_doubleq); func_d_q("cast_to_doubleq", xcast_to_doubleq); func_i_q_q("cmpltq", xcmpltq); func_i_q_q("cmpgtq", xcmpgtq); func_i_q_q("cmpleq", xcmpleq); func_i_q_q("cmpgeq", xcmpgeq); func_i_q_q("cmpeqq", xcmpeqq); func_i_q_q("cmpneqq", xcmpneqq); func_i_q_q("unordq", xunordq); func_strtoq("strtoq"); func_qtostr("qtostr"); sentinel++; } return 0; } sleef-3.5.1/src/quad-tester/qiutsimdmain.c000066400000000000000000000017051373003144100205120ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include static jmp_buf sigjmp; int do_test(int argc, char **argv); int check_featureQP(); static void sighandler(int signum) { longjmp(sigjmp, 1); } int detectFeatureQP() { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) { int r = check_featureQP(); signal(SIGILL, SIG_DFL); return r; } else { signal(SIGILL, SIG_DFL); return 0; } } int main(int argc, char **argv) { if (!detectFeatureQP()) { fprintf(stderr, "\n\n***** This host does not support the necessary CPU features to execute this program *****\n\n\n"); printf("0\n"); fclose(stdout); exit(-1); } return do_test(argc, argv); } sleef-3.5.1/src/quad-tester/qtester.c000066400000000000000000000617231373003144100175030ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // This define is needed to prevent the `execvpe` function to raise a // warning at compile time. For more information, see // https://linux.die.net/man/3/execvp. #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "misc.h" #include "qtesterutil.h" void stop(char *mes) { fprintf(stderr, "%s\n", mes); exit(-1); } int ptoc[2], ctop[2]; int pid; FILE *fpctop; extern char **environ; void startChild(const char *path, char *const argv[]) { pipe(ptoc); pipe(ctop); pid = fork(); assert(pid != -1); if (pid == 0) { // child process char buf0[1], buf1[1]; int i; close(ptoc[1]); close(ctop[0]); fflush(stdin); fflush(stdout); i = dup2(ptoc[0], fileno(stdin)); assert(i != -1); i = dup2(ctop[1], fileno(stdout)); assert(i != -1); setvbuf(stdin, buf0, _IONBF,0); setvbuf(stdout, buf1, _IONBF,0); fflush(stdin); fflush(stdout); #if !defined(__APPLE__) && !defined(__FreeBSD__) execvpe(path, argv, environ); #else execvp(path, argv); #endif fprintf(stderr, "execvp in startChild : %s\n", strerror(errno)); exit(-1); } // parent process close(ptoc[0]); close(ctop[1]); } // typedef union { Sleef_quad q; struct { uint64_t l, h; }; } cnv128; #define child_q_q(funcStr, arg) do { \ char str[256]; \ cnv128 c; \ c.q = arg; \ sprintf(str, funcStr " %" PRIx64 ":%" PRIx64 "\n", c.h, c.l); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%" PRIx64 ":%" PRIx64, &c.h, &c.l); \ return c.q; \ } while(0) #define child_q2_q(funcStr, arg) do { \ char str[256]; \ cnv128 c0, c1; \ c0.q = arg; \ sprintf(str, funcStr " %" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%" PRIx64 ":%" PRIx64 " %" PRIx64 ":%" PRIx64 , &c0.h, &c0.l, &c1.h, &c1.l); \ Sleef_quad2 ret = { c0.q, c1.q }; \ return ret; \ } while(0) #define child_q_q_q(funcStr, arg0, arg1) do { \ char str[256]; \ cnv128 c0, c1; \ c0.q = arg0; \ c1.q = arg1; \ sprintf(str, funcStr " %" PRIx64 ":%" PRIx64 " %" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l, c1.h, c1.l); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%" PRIx64 ":%" PRIx64, &c0.h, &c0.l); \ return c0.q; \ } while(0) #define child_i_q_q(funcStr, arg0, arg1) do { \ char str[256]; \ cnv128 c0, c1; \ c0.q = arg0; \ c1.q = arg1; \ sprintf(str, funcStr " %" PRIx64 ":%" PRIx64 " %" PRIx64 ":%" PRIx64 "\n", c0.h, c0.l, c1.h, c1.l); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ int i; \ sscanf(str, "%d", &i); \ return i; \ } while(0) #define child_d_q(funcStr, arg) do { \ char str[256]; \ cnv128 c; \ c.q = arg; \ sprintf(str, funcStr " %" PRIx64 ":%" PRIx64 "\n", c.h, c.l); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ uint64_t u; \ sscanf(str, "%" PRIx64, &u); \ return u2d(u); \ } while(0) #define child_q_d(funcStr, arg) do { \ char str[256]; \ uint64_t u; \ sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ cnv128 c; \ sscanf(str, "%" PRIx64 ":%" PRIx64, &c.h, &c.l); \ return c.q; \ } while(0) #define child_q_str(funcStr, arg) do { \ char str[256]; \ sprintf(str, funcStr " %s\n", arg); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ cnv128 c; \ sscanf(str, "%" PRIx64 ":%" PRIx64, &c.h, &c.l); \ return c.q; \ } while(0) #define child_str_q(funcStr, ret, arg) do { \ char str[256]; \ cnv128 c; \ c.q = arg; \ sprintf(str, funcStr " %" PRIx64 ":%" PRIx64 "\n", c.h, c.l); \ write(ptoc[1], str, strlen(str)); \ if (fgets(str, 255, fpctop) == NULL) stop("child " funcStr); \ sscanf(str, "%63s", ret); \ } while(0) Sleef_quad child_addq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("addq_u05", x, y); } Sleef_quad child_subq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("subq_u05", x, y); } Sleef_quad child_mulq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("mulq_u05", x, y); } Sleef_quad child_divq_u05(Sleef_quad x, Sleef_quad y) { child_q_q_q("divq_u05", x, y); } Sleef_quad child_negq(Sleef_quad x) { child_q_q("negq", x); } int child_cmpltq(Sleef_quad x, Sleef_quad y) { child_i_q_q("cmpltq", x, y); } int child_cmpgtq(Sleef_quad x, Sleef_quad y) { child_i_q_q("cmpgtq", x, y); } int child_cmpleq(Sleef_quad x, Sleef_quad y) { child_i_q_q("cmpleq", x, y); } int child_cmpgeq(Sleef_quad x, Sleef_quad y) { child_i_q_q("cmpgeq", x, y); } int child_cmpeqq(Sleef_quad x, Sleef_quad y) { child_i_q_q("cmpeqq", x, y); } int child_cmpneqq(Sleef_quad x, Sleef_quad y) { child_i_q_q("cmpneqq", x, y); } int child_unordq(Sleef_quad x, Sleef_quad y) { child_i_q_q("unordq", x, y); } Sleef_quad child_cast_from_doubleq(double x) { child_q_d("cast_from_doubleq", x); } double child_cast_to_doubleq(Sleef_quad x) { child_d_q("cast_to_doubleq", x); } Sleef_quad child_strtoq(const char *s) { child_q_str("strtoq", s); } void child_qtostr(char *ret, Sleef_quad x) { child_str_q("qtostr", ret, x); } Sleef_quad child_sqrtq_u05(Sleef_quad x) { child_q_q("sqrtq_u05", x); } Sleef_quad child_sinq_u10(Sleef_quad x) { child_q_q("sinq_u10", x); } Sleef_quad child_cosq_u10(Sleef_quad x) { child_q_q("cosq_u10", x); } Sleef_quad child_tanq_u10(Sleef_quad x) { child_q_q("tanq_u10", x); } Sleef_quad child_asinq_u10(Sleef_quad x) { child_q_q("asinq_u10", x); } Sleef_quad child_acosq_u10(Sleef_quad x) { child_q_q("acosq_u10", x); } Sleef_quad child_atanq_u10(Sleef_quad x) { child_q_q("atanq_u10", x); } Sleef_quad child_expq_u10(Sleef_quad x) { child_q_q("expq_u10", x); } Sleef_quad child_exp2q_u10(Sleef_quad x) { child_q_q("exp2q_u10", x); } Sleef_quad child_exp10q_u10(Sleef_quad x) { child_q_q("exp10q_u10", x); } Sleef_quad child_expm1q_u10(Sleef_quad x) { child_q_q("expm1q_u10", x); } Sleef_quad child_logq_u10(Sleef_quad x) { child_q_q("logq_u10", x); } Sleef_quad child_log2q_u10(Sleef_quad x) { child_q_q("log2q_u10", x); } Sleef_quad child_log10q_u10(Sleef_quad x) { child_q_q("log10q_u10", x); } Sleef_quad child_log1pq_u10(Sleef_quad x) { child_q_q("log1pq_u10", x); } Sleef_quad child_copysignq(Sleef_quad x, Sleef_quad y) { child_q_q_q("copysignq", x, y); } Sleef_quad child_fabsq(Sleef_quad x) { child_q_q("fabsq", x); } Sleef_quad child_fmaxq(Sleef_quad x, Sleef_quad y) { child_q_q_q("fmaxq", x, y); } Sleef_quad child_fminq(Sleef_quad x, Sleef_quad y) { child_q_q_q("fminq", x, y); } // #define cmpDenorm_q(mpfrFunc, childFunc, argx) do { \ mpfr_set_f128(frx, argx, GMP_RNDN); \ mpfrFunc(frz, frx, GMP_RNDN); \ Sleef_quad t = childFunc(argx); \ double u = countULPf128(t, frz, 1); \ if (u >= 10) { \ fprintf(stderr, "\narg = %s\ntest = %s\ncorrect = %s\nulp = %g\n", \ sprintf128(argx), sprintf128(t), sprintfr(frz), u); \ success = 0; \ break; \ } \ } while(0) #define cmpDenorm_q_q(mpfrFunc, childFunc, argx, argy) do { \ mpfr_set_f128(frx, argx, GMP_RNDN); \ mpfr_set_f128(fry, argy, GMP_RNDN); \ mpfrFunc(frz, frx, fry, GMP_RNDN); \ Sleef_quad t = childFunc(argx, argy); \ double u = countULPf128(t, frz, 1); \ if (u >= 10) { \ Sleef_quad qz = mpfr_get_f128(frz, GMP_RNDN); \ fprintf(stderr, "\narg = %s,\n %s\ntest = %s\ncorrect = %s\nulp = %g\n", \ sprintf128(argx), sprintf128(argy), sprintf128(t), sprintf128(qz), u); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracy_q(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_f128(frx, argx, GMP_RNDN); \ mpfrFunc(frz, frx, GMP_RNDN); \ Sleef_quad t = childFunc(argx); \ double e = countULPf128(t, frz, 0); \ maxError = fmax(maxError, e); \ if (e > bound) { \ fprintf(stderr, "\narg = %s, test = %s, correct = %s, ULP = %lf\n", \ sprintf128(argx), sprintf128(childFunc(argx)), sprintfr(frz), countULPf128(t, frz, 0)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracy_q_q(mpfrFunc, childFunc, argx, argy, bound) do { \ mpfr_set_f128(frx, argx, GMP_RNDN); \ mpfr_set_f128(fry, argy, GMP_RNDN); \ mpfrFunc(frz, frx, fry, GMP_RNDN); \ Sleef_quad t = childFunc(argx, argy); \ double e = countULPf128(t, frz, 0); \ maxError = fmax(maxError, e); \ if (e > bound) { \ fprintf(stderr, "\narg = %s, %s, test = %s, correct = %s, ULP = %lf\n", \ sprintf128(argx), sprintf128(argy), sprintf128(childFunc(argx, argy)), sprintfr(frz), countULPf128(t, frz, 0)); \ success = 0; \ break; \ } \ } while(0) #define testComparison(mpfrFunc, childFunc, argx, argy) do { \ mpfr_set_f128(frx, argx, GMP_RNDN); \ mpfr_set_f128(fry, argy, GMP_RNDN); \ int c = mpfrFunc(frx, fry); \ int t = childFunc(argx, argy); \ if ((c != 0) != (t != 0)) { \ fprintf(stderr, "\narg = %s, %s, test = %d, correct = %d\n", \ sprintf128(argx), sprintf128(argy), t, c); \ success = 0; \ break; \ } \ } while(0) // #define cmpDenormOuterLoop_q_q(mpfrFunc, childFunc, checkVals) do { \ for(int i=0;i #include #include #include #include #include #include #include #include #ifdef USEMPFR #include #endif #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #if defined(__MINGW32__) || defined(__MINGW64__) #include #endif #if defined(_MSC_VER) #include #endif #include "misc.h" #include "qtesterutil.h" // int readln(int fd, char *buf, int cnt) { int i, rcnt = 0; if (cnt < 1) return -1; while(cnt >= 2) { i = read(fd, buf, 1); if (i != 1) return i; if (*buf == '\n') break; rcnt++; buf++; cnt--; } *++buf = '\0'; rcnt++; return rcnt; } int startsWith(char *str, char *prefix) { return strncmp(str, prefix, strlen(prefix)) == 0; } // xuint128 xu(uint64_t h, uint64_t l) { xuint128 r = { l, h }; return r; } xuint128 sll128(uint64_t u, int c) { if (c < 64) { xuint128 r = { u << c, u >> (64 - c) }; return r; } xuint128 r = { 0, u << (c - 64) }; return r; } xuint128 add128(xuint128 x, xuint128 y) { xuint128 r = { x.l + y.l, x.h + y.h }; if (r.l < x.l) r.h++; return r; } int lt128(xuint128 x, xuint128 y) { if (x.h < y.h) return 1; if (x.h == y.h && x.l < y.l) return 1; return 0; } // typedef union { Sleef_quad q; xuint128 x; struct { uint64_t l, h; }; } cnv_t; int iszerof128(Sleef_quad a) { cnv_t c128 = { .q = a }; return (((c128.h & UINT64_C(0x7fffffffffffffff)) == 0) && c128.l == 0); } int isnegf128(Sleef_quad a) { cnv_t c128 = { .q = a }; return c128.h >> 63; } int isinff128(Sleef_quad a) { cnv_t c128 = { .q = a }; return (((c128.h & UINT64_C(0x7fffffffffffffff)) == UINT64_C(0x7fff000000000000)) && c128.l == 0); } int isnonnumberf128(Sleef_quad a) { cnv_t c128 = { .q = a }; return (c128.h & UINT64_C(0x7fff000000000000)) == UINT64_C(0x7fff000000000000); } int isnanf128(Sleef_quad a) { return isnonnumberf128(a) && !isinff128(a); } // static uint64_t xseed; uint64_t xrand() { uint64_t u = xseed; xseed = xseed * UINT64_C(6364136223846793005) + 1; u = (u & ((~UINT64_C(0)) << 32)) | (xseed >> 32); xseed = xseed * UINT64_C(6364136223846793005) + 1; return u; } void xsrand(uint64_t s) { xseed = s; xrand(); xrand(); xrand(); } void memrand(void *p, int size) { uint64_t *q = (uint64_t *)p; int i; for(i=0;i 0, disinf = isinff128(d); if (ciszero && !diszero) { ret = 10000; } else if (ciszero && diszero) { ret = 0; if (checkNegZero && csign != dsign) ret = 10003; } else if (cisnan && disnan) { ret = 0; } else if (cisnan || disnan) { ret = 10001; } else if (cisinf && disinf) { ret = csign == dsign ? 0 : 10002; } else { mpfr_set_f128(frd, d, GMP_RNDN); int e = mpfr_get_exp(frd); mpfr_set_d(frb, 1, GMP_RNDN); assert(!mpfr_zero_p(frb)); mpfr_set_exp(frb, e-113+1); mpfr_max(frb, frb, fr_denorm_min, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); ret = fabs(mpfr_get_d(fra, GMP_RNDN)); } mpfr_clears(fra, frb, frc, frd, NULL); return ret; } // char *sprintfr(mpfr_t fr) { int digits = 51; mpfr_t t; mpfr_inits(t, NULL); int sign = mpfr_signbit(fr) ? -1 : 1; char *s = malloc(digits + 10); if (mpfr_inf_p(fr)) { sprintf(s, "%cinf", sign < 0 ? '-' : '+'); } else if (mpfr_nan_p(fr)) { sprintf(s, "nan"); } else { mpfr_exp_t e; s[0] = sign < 0 ? '-' : '+'; s[1] = '0'; s[2] = '.'; mpfr_abs(t, fr, GMP_RNDN); mpfr_get_str(s+3, &e, 10, digits, t, GMP_RNDN); int ie = e; char es[32]; snprintf(es, 30, "e%c%d", ie >= 0 ? '+' : '-', ie >= 0 ? ie : -ie); strncat(s, es, digits+10); } mpfr_clears(t, NULL); return s; } // #if MPFR_VERSION_MAJOR >= 4 && defined(ENABLEFLOAT128) && !defined(__APPLE__) void mpfr_set_f128(mpfr_t frx, Sleef_quad q, mpfr_rnd_t rnd) { int mpfr_set_float128(mpfr_t rop, __float128 op, mpfr_rnd_t rnd); union { Sleef_quad q; __float128 f; } c; c.q = q; mpfr_set_float128(frx, c.f, rnd); } Sleef_quad mpfr_get_f128(mpfr_t m, mpfr_rnd_t rnd) { __float128 mpfr_get_float128(mpfr_t op, mpfr_rnd_t rnd); union { Sleef_quad q; __float128 f; } c; c.f = mpfr_get_float128(m, rnd); return c.q; } #else #pragma message ( "Internal MPFR<->float128 conversion is used" ) void mpfr_set_f128(mpfr_t frx, Sleef_quad a, mpfr_rnd_t rnd) { union { Sleef_quad u; struct { uint64_t l, h; }; } c128 = { .u = a }; int sign = (int)(c128.h >> 63); int exp = ((int)(c128.h >> 48)) & 0x7fff; if (isnanf128(a)) { mpfr_set_nan(frx); } else if (isinff128(a)) { mpfr_set_inf(frx, sign ? -1 : 1); } else if (exp == 0) { c128.h &= UINT64_C(0xffffffffffff); mpfr_set_d(frx, ldexp((double)c128.h, 64), GMP_RNDN); mpfr_add_d(frx, frx, (double)(c128.l & UINT64_C(0xffffffff00000000)), GMP_RNDN); mpfr_add_d(frx, frx, (double)(c128.l & UINT64_C(0xffffffff)), GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) - 16382 - 112); mpfr_setsign(frx, frx, sign, GMP_RNDN); } else { c128.h &= UINT64_C(0xffffffffffff); mpfr_set_d(frx, ldexp(1, 112), GMP_RNDN); mpfr_add_d(frx, frx, ldexp((double)c128.h, 64), GMP_RNDN); mpfr_add_d(frx, frx, (double)(c128.l & UINT64_C(0xffffffff00000000)), GMP_RNDN); mpfr_add_d(frx, frx, (double)(c128.l & UINT64_C(0xffffffff)), GMP_RNDN); mpfr_set_exp(frx, exp - 16382); mpfr_setsign(frx, frx, sign, GMP_RNDN); } } static double3 mpfr_get_d3(mpfr_t fr, mpfr_rnd_t rnd) { double3 ret; mpfr_t t; mpfr_inits(t, NULL); ret.x = mpfr_get_d(fr, GMP_RNDN); mpfr_sub_d(t, fr, ret.x, GMP_RNDN); ret.y = mpfr_get_d(t, GMP_RNDN); mpfr_sub_d(t, t, ret.y, GMP_RNDN); ret.z = mpfr_get_d(t, GMP_RNDN); mpfr_clears(t, NULL); return ret; } static TDX_t mpfr_get_tdx(mpfr_t fr, mpfr_rnd_t rnd) { TDX_t td; if (mpfr_nan_p(fr)) { td.dd.x = NAN; td.dd.y = 0; td.dd.z = 0; td.e = 0; return td; } if (mpfr_inf_p(fr)) { td.dd.x = copysign(INFINITY, mpfr_cmp_d(fr, 0)); td.dd.y = 0; td.dd.z = 0; td.e = 0; return td; } if (mpfr_zero_p(fr)) { td.dd.x = copysign(0, mpfr_signbit(fr) ? -1 : 1); td.dd.y = 0; td.dd.z = 0; td.e = 0; return td; } mpfr_t t; mpfr_inits(t, NULL); mpfr_set(t, fr, GMP_RNDN); td.e = mpfr_get_exp(fr) + 16382; assert(!mpfr_zero_p(t)); mpfr_set_exp(t, 1); mpfr_setsign(t, t, mpfr_signbit(fr), GMP_RNDN); td.dd = mpfr_get_d3(t, GMP_RNDN); if (fabs(td.dd.x) == 2.0) { td.dd.x *= 0.5; td.dd.y *= 0.5; td.dd.z *= 0.5; td.e++; } mpfr_clears(t, NULL); return td; } #define HBX 1.0 #define LOGXSCALE 1 #define XSCALE (1 << LOGXSCALE) #define SX 61 #define HBY (1.0 / (UINT64_C(1) << 53)) #define LOGYSCALE 4 #define YSCALE (1 << LOGYSCALE) #define SY 11 #define HBZ (1.0 / ((UINT64_C(1) << 53) * (double)(UINT64_C(1) << 53))) #define LOGZSCALE 10 #define ZSCALE (1 << LOGZSCALE) #define SZ 36 #define HBR (1.0 / (UINT64_C(1) << 60)) static int64_t doubleToRawLongBits(double d) { union { double f; int64_t i; } tmp; tmp.f = d; return tmp.i; } static double longBitsToDouble(int64_t i) { union { double f; int64_t i; } tmp; tmp.i = i; return tmp.f; } static int xisnonnumber(double x) { return (doubleToRawLongBits(x) & UINT64_C(0x7ff0000000000000)) == UINT64_C(0x7ff0000000000000); } static double xordu(double x, uint64_t y) { union { double d; uint64_t u; } cx; cx.d = x; cx.u ^= y; return cx.d; } static double pow2i(int q) { return longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); } static double ldexp2k(double d, int e) { // faster than ldexpk, short reach return d * pow2i(e >> 1) * pow2i(e - (e >> 1)); } Sleef_quad mpfr_get_f128(mpfr_t a, mpfr_rnd_t rnd) { TDX_t f = mpfr_get_tdx(a, rnd); cnv_t c128; union { double d; uint64_t u; } c64; c64.d = f.dd.x; uint64_t signbit = c64.u & UINT64_C(0x8000000000000000); int isZero = (f.dd.x == 0.0), denorm = 0; f.dd.x = xordu(f.dd.x, signbit); f.dd.y = xordu(f.dd.y, signbit); f.dd.z = xordu(f.dd.z, signbit); double t = 1; if (f.e <= 0) { t = ldexp2k(0.5, f.e); if (f.e < -120) t = 0; f.e = 1; denorm = 1; } if ((fabs(f.dd.x) == 1.0 && f.dd.y <= -pow(2, -114)) && f.e != 1) { t = 2; f.e--; } f.dd.x *= t; f.dd.y *= t; f.dd.z *= t; c64.d = f.dd.y + HBY * YSCALE; c64.u &= UINT64_C(0xffffffffffffffff) << LOGYSCALE; f.dd.z += f.dd.y - (c64.d - (HBZ * ZSCALE + HBY * YSCALE)); f.dd.y = c64.d; double c = denorm ? (HBX * XSCALE + HBX) : (HBX * XSCALE); c64.d = f.dd.x + c; c64.u &= UINT64_C(0xffffffffffffffff) << LOGXSCALE; t = f.dd.y + (f.dd.x - (c64.d - c)); f.dd.z += f.dd.y - t + (f.dd.x - (c64.d - c)); f.dd.x = c64.d; c64.d = t; c64.u &= UINT64_C(0xffffffffffffffff) << LOGYSCALE; f.dd.z += t - c64.d; f.dd.y = c64.d; t = f.dd.z - HBZ * ZSCALE < 0 ? HBZ * (ZSCALE/2) : 0; f.dd.y -= t; f.dd.z += t; t = f.dd.y - HBY * YSCALE < 0 ? HBY * (YSCALE/2) : 0; f.dd.x -= t; f.dd.y += t; f.dd.z = f.dd.z + HBR - HBR; // c64.d = f.dd.x; c64.u &= UINT64_C(0xfffffffffffff); c128.x = sll128(c64.u, SX); c64.d = f.dd.z; c64.u &= UINT64_C(0xfffffffffffff); c128.l |= c64.u >> SZ; c64.d = f.dd.y; c64.u &= UINT64_C(0xfffffffffffff); c128.x = add128(c128.x, sll128(c64.u, SY)); c128.h &= denorm ? UINT64_C(0xffffffffffff) : UINT64_C(0x3ffffffffffff); c128.h += ((f.e-1) & ~((uint64_t)-1UL << 15)) << 48; if (isZero) { c128.l = c128.h = 0; } if (f.e >= 32767 || f.dd.x == INFINITY) { c128.h = UINT64_C(0x7fff000000000000); c128.l = 0; } if (xisnonnumber(f.dd.x) && f.dd.x != INFINITY) c128.h = c128.l = UINT64_C(0xffffffffffffffff); c128.h |= signbit; return c128.q; } #endif // #if MPFR_VERSION_MAJOR >= 4 char *sprintf128(Sleef_quad q) { mpfr_t fr; mpfr_inits(fr, NULL); mpfr_set_f128(fr, q, GMP_RNDN); char *f = sprintfr(fr); mpfr_clears(fr, NULL); cnv_t c128 = { .q = q }; char *ret = malloc(128); sprintf(ret, "%016llx%016llx (%s)", (unsigned long long)c128.h, (unsigned long long)c128.l, f); free(f); return ret; } double cast_d_q(Sleef_quad q) { mpfr_t fr; mpfr_inits(fr, NULL); mpfr_set_f128(fr, q, GMP_RNDN); double ret = mpfr_get_d(fr, GMP_RNDN); mpfr_clears(fr, NULL); return ret; } Sleef_quad add_q_d(Sleef_quad q, double d) { mpfr_t fr; mpfr_inits(fr, NULL); mpfr_set_f128(fr, q, GMP_RNDN); mpfr_add_d(fr, fr, d, GMP_RNDN); q = mpfr_get_f128(fr, GMP_RNDN); mpfr_clears(fr, NULL); return q; } Sleef_quad cast_q_str(const char *s) { mpfr_t fr; mpfr_inits(fr, NULL); mpfr_set_str(fr, s, 10, GMP_RNDN); Sleef_quad q = mpfr_get_f128(fr, GMP_RNDN); mpfr_clears(fr, NULL); return q; } Sleef_quad add_q_q(Sleef_quad q, Sleef_quad r) { mpfr_t fr0, fr1; mpfr_inits(fr0, fr1, NULL); mpfr_set_f128(fr0, q, GMP_RNDN); mpfr_set_f128(fr1, r, GMP_RNDN); mpfr_add(fr0, fr0, fr1, GMP_RNDN); q = mpfr_get_f128(fr0, GMP_RNDN); mpfr_clears(fr0, fr1, NULL); return q; } #else // #ifdef USEMPFR char *sprintf128(Sleef_quad x) { cnv_t c128 = { .q = x }; char *s = malloc(128); sprintf(s, "%016llx%016llx", (unsigned long long)c128.h, (unsigned long long)c128.l); return s; } #endif // #ifdef USEMPFR sleef-3.5.1/src/quad-tester/qtesterutil.h000066400000000000000000000025771373003144100204100ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) typedef struct { uint64_t l, h; } xuint128; xuint128 xu(uint64_t h, uint64_t l); xuint128 sll128(uint64_t u, int c); xuint128 add128(xuint128 x, xuint128 y); int lt128(xuint128 x, xuint128 y); void xsrand(uint64_t s); uint64_t xrand(); void memrand(void *p, int size); Sleef_quad rndf128(Sleef_quad min, Sleef_quad max); Sleef_quad rndf128x(); int readln(int fd, char *buf, int cnt); int startsWith(char *str, char *prefix); int iszerof128(Sleef_quad a); int isnegf128(Sleef_quad a); int isinff128(Sleef_quad a); int isnonnumberf128(Sleef_quad a); int isnanf128(Sleef_quad a); static double u2d(uint64_t u) { union { double f; uint64_t i; } tmp; tmp.i = u; return tmp.f; } static uint64_t d2u(double d) { union { double f; uint64_t i; } tmp; tmp.f = d; return tmp.i; } #ifdef USEMPFR void mpfr_set_f128(mpfr_t frx, Sleef_quad a, mpfr_rnd_t rnd); Sleef_quad mpfr_get_f128(mpfr_t m, mpfr_rnd_t rnd); double countULPf128(Sleef_quad d, mpfr_t c, int checkNegZero); char *sprintfr(mpfr_t fr); char *sprintf128(Sleef_quad x); double cast_d_q(Sleef_quad q); Sleef_quad cast_q_str(const char *s); Sleef_quad add_q_d(Sleef_quad q, double d); #endif sleef-3.5.1/src/quad-tester/tester2simdqp.c000066400000000000000000000411431373003144100206140ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include #include #include "sleef.h" #include "sleefquad.h" #include "misc.h" #include "qtesterutil.h" // #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" #include "qrenamepurec_scalar.h" #endif #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 #include "helperpurec_scalar.h" #include "qrenamepurecfma_scalar.h" #endif #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #include "qrenamesse2.h" #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #include "qrenameavx2128.h" #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #include "qrenameavx.h" #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #include "qrenamefma4.h" #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #include "qrenameavx2.h" #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #include "qrenameavx512f.h" #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #include "qrenameadvsimd.h" #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #include "qrenamesve.h" #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #include "qrenamevsx.h" #endif #ifdef ENABLE_DSP128 #define CONFIG 2 #include "helpersse2.h" #include "qrenamedsp128.h" #endif #ifdef ENABLE_DSP256 #define CONFIG 1 #include "helperavx.h" #include "qrenamedsp256.h" #endif // #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) typedef union { Sleef_quad q; xuint128 x; struct { uint64_t l, h; }; } cnv_t; Sleef_quad nexttoward0q(Sleef_quad x, int n) { cnv_t cx; cx.q = x; cx.x = add128(cx.x, xu(n < 0 ? 0 : -1, -(int64_t)n)); return cx.q; } static vargquad vset(vargquad v, int idx, Sleef_quad d) { v.s[idx] = d; return v; } static Sleef_quad vget(vargquad v, int idx) { return v.s[idx]; } vdouble vsetd(vdouble v, int idx, double d) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); a[idx] = d; return vloadu_vd_p(a); } double vgetd(vdouble v, int idx) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[idx]; } static int vgeti(vint v, int idx) { int a[VECTLENDP*2]; vstoreu_v_p_vi(a, v); return a[idx]; } int main(int argc,char **argv) { mpfr_set_default_prec(1024); xsrand(time(NULL) + (((int)getpid()) << 12)); srandom(time(NULL) + (((int)getpid()) << 12)); // const Sleef_quad oneQ = cast_q_str("1"); const Sleef_quad oneEMinus300Q = cast_q_str("1e-300"); const Sleef_quad oneEMinus10Q = cast_q_str("1e-10"); const Sleef_quad oneEPlus10Q = cast_q_str("1e+10"); const Sleef_quad oneEMinus100Q = cast_q_str("1e-100"); const Sleef_quad oneEPlus100Q = cast_q_str("1e+100"); const Sleef_quad oneEMinus1000Q = cast_q_str("1e-1000"); const Sleef_quad oneEPlus1000Q = cast_q_str("1e+1000"); const Sleef_quad quadMin = cast_q_str("3.36210314311209350626267781732175260e-4932"); const Sleef_quad quadMax = cast_q_str("1.18973149535723176508575932662800702e+4932"); const Sleef_quad quadDenormMin = cast_q_str("6.475175119438025110924438958227646552e-4966"); const Sleef_quad M_PI_2Q = cast_q_str("1.5707963267948966192313216916397514"); // int cnt, ecnt = 0; vargquad a0, a1, a2, a3; vdouble vd0 = vcast_vd_d(0), vd1, vd2, vd3; Sleef_quad q0, q1, q2, q3, t; mpfr_t frw, frx, fry, frz; mpfr_inits(frw, frx, fry, frz, NULL); for(cnt = 0;ecnt < 1000;cnt++) { int e = cnt % VECTLENDP; // In the following switch-case statement, I am trying to test // with numbers that tends to trigger bugs. Each case is executed // once in 128 times of loop execution. switch(cnt & 127) { case 127: q0 = nexttoward0q(quadMin, (xrand() & 63) - 31); q1 = rndf128x(); break; case 126: q0 = nexttoward0q(quadMax, (xrand() & 31)); q1 = rndf128x(); break; case 125: q0 = nexttoward0q(quadDenormMin, -(int)(xrand() & 31)); q1 = rndf128x(); break; #if defined(ENABLEFLOAT128) #define SLEEF_QUAD_MIN 3.36210314311209350626267781732175260e-4932Q #define SLEEF_QUAD_MAX 1.18973149535723176508575932662800702e+4932Q case 124: q0 = rndf128x(); q1 = rndf128x(); q1 += q0; break; case 123: q0 = rndf128x(); q1 = rndf128x(); q1 -= q0; break; case 122: q0 = rndf128x(); q1 = rndf128x(); q1 += 1; break; case 121: q0 = rndf128x(); q1 = rndf128x(); q0 += 1; q1 -= 1; break; case 120: q0 = rndf128x(); q1 = rndf128x(); q1 += copysign(1, q1) * SLEEF_QUAD_MIN; break; case 119: q0 = rndf128x(); q1 = rndf128x(); q1 = copysign(1, q1) * SLEEF_QUAD_MIN; break; case 118: q0 = rndf128x(); q1 = rndf128x(); q0 += copysign(1, q0); q1 = copysign(1, q1) * SLEEF_QUAD_MIN; break; case 117: q0 = rndf128x(); q1 = rndf128x(); q1 = copysign(1, q1) * SLEEF_QUAD_MIN; break; case 116: q0 = rndf128x(); q1 = rndf128x(); q0 += copysign(1, q0); q1 = copysign(1, q1) * SLEEF_QUAD_MIN; break; case 115: q0 = rndf128x(); q1 = rndf128x(); q1 += copysign(1, q1) * SLEEF_QUAD_MAX; break; #endif default: // Each case in the following switch-case statement is executed // once in 8 loops. switch(cnt & 7) { case 0: q0 = rndf128(oneEMinus10Q, oneEPlus10Q); q1 = rndf128(oneEMinus10Q, oneEPlus10Q); break; case 1: q0 = rndf128(oneEMinus100Q, oneEPlus100Q); q1 = rndf128(oneEMinus100Q, oneEPlus100Q); break; case 2: q0 = rndf128(oneEMinus1000Q, oneEPlus1000Q); q1 = rndf128(oneEMinus1000Q, oneEPlus1000Q); break; default: q0 = rndf128x(); q1 = rndf128x(); break; } break; } a0 = vset(a0, e, q0); a1 = vset(a1, e, q1); mpfr_set_f128(frx, q0, GMP_RNDN); mpfr_set_f128(fry, q1, GMP_RNDN); { mpfr_add(frz, frx, fry, GMP_RNDN); double u0 = countULPf128(t = vget(xaddq_u05(a0, a1), e), frz, 0); if (u0 > 0.5000000001) { printf(ISANAME " add arg=%s %s ulp=%.20g\n", sprintf128(q0), sprintf128(q1), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_sub(frz, frx, fry, GMP_RNDN); double u0 = countULPf128(t = vget(xsubq_u05(a0, a1), e), frz, 0); if (u0 > 0.5000000001) { printf(ISANAME " sub arg=%s %s ulp=%.20g\n", sprintf128(q0), sprintf128(q1), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_mul(frz, frx, fry, GMP_RNDN); double u0 = countULPf128(t = vget(xmulq_u05(a0, a1), e), frz, 0); if (u0 > 0.5000000001) { printf(ISANAME " mul arg=%s %s ulp=%.20g\n", sprintf128(q0), sprintf128(q1), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_div(frz, frx, fry, GMP_RNDN); double u0 = countULPf128(t = vget(xdivq_u05(a0, a1), e), frz, 0); if (u0 > 0.5000000001) { printf(ISANAME " div arg=%s %s ulp=%.20g\n", sprintf128(q0), sprintf128(q1), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_sqrt(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xsqrtq_u05(a0), e), frz, 0); if (u0 > 0.5000000001) { printf(ISANAME " sqrt arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { double d = mpfr_get_d(frx, GMP_RNDN); vd0 = vsetd(vd0, e, d); t = vget(xcast_from_doubleq(vd0), e); mpfr_set_d(frz, d, GMP_RNDN); Sleef_quad q2 = mpfr_get_f128(frz, GMP_RNDN); if (memcmp(&t, &q2, sizeof(Sleef_quad)) != 0 && !(isnanf128(t) && isnanf128(q2))) { printf(ISANAME " cast_from_double arg=%.20g\n", d); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(q2)); fflush(stdout); ecnt++; } } { double td = vgetd(xcast_to_doubleq(a0), e); double cd = mpfr_get_d(frx, GMP_RNDN); if (fabs(cd) >= DBL_MIN && cd != td && !(isnan(td) && isnan(cd))) { printf(ISANAME " cast_to_double arg=%s\n", sprintf128(q0)); printf("test = %.20g\n", td); printf("corr = %.20g\n", cd); fflush(stdout); ecnt++; } } { int ci = mpfr_less_p(frx, fry); int ti = vgeti(xcmpltq(a0, a1), e); if (ci != ti) { printf(ISANAME " cmpltq arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } { int ci = mpfr_greater_p(frx, fry); int ti = vgeti(xcmpgtq(a0, a1), e); if (ci != ti) { printf(ISANAME " cmpgtq arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } { int ci = mpfr_lessequal_p(frx, fry); int ti = vgeti(xcmpleq(a0, a1), e); if (ci != ti) { printf(ISANAME " cmpleq arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } { int ci = mpfr_greaterequal_p(frx, fry); int ti = vgeti(xcmpgeq(a0, a1), e); if (ci != ti) { printf(ISANAME " cmpgeq arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } { int ci = mpfr_equal_p(frx, fry); int ti = vgeti(xcmpeqq(a0, a1), e); if (ci != ti) { printf(ISANAME " cmpeq arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } { int ci = mpfr_lessgreater_p(frx, fry); int ti = vgeti(xcmpneqq(a0, a1), e); if (ci != ti) { printf(ISANAME " cmpneq arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } { int ci = mpfr_unordered_p(frx, fry); int ti = vgeti(xunordq(a0, a1), e); if (ci != ti) { printf(ISANAME " unord arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } { int ci = mpfr_unordered_p(frx, fry); int ti = vgeti(xunordq(a0, a1), e); if (ci != ti) { printf(ISANAME " unord arg=%s, %s, test = %d, corr = %d \n", sprintf128(q0), sprintf128(q1), ti, ci); fflush(stdout); ecnt++; } } #ifdef ENABLE_PUREC_SCALAR if ((cnt & 15) == 1) { char s[64]; Sleef_qtostr(s, 63, a0, 10); Sleef_quad q1 = vget(Sleef_strtoq(s, NULL, 10), e); if (memcmp(&q0, &q1, sizeof(Sleef_quad)) != 0 && !(isnanf128(q0) && isnanf128(q1))) { printf("qtostr/strtoq arg=%s\n", sprintf128(q0)); fflush(stdout); ecnt++; } } #endif { mpfr_exp(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xexpq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " exp arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_exp2(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xexp2q_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " exp2 arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_exp10(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xexp10q_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " exp10 arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_expm1(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xexpm1q_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " expm1 arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_log(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xlogq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " log arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_log2(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xlog2q_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " log2 arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_log10(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xlog10q_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " log10 arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_log1p(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xlog1pq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " log1p arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_atan(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xatanq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " atan arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } q0 = rndf128(oneEMinus300Q, oneQ); a0 = vset(a0, e, q0); mpfr_set_f128(frx, q0, GMP_RNDN); { mpfr_asin(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xasinq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " asin arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_acos(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xacosq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " acos arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } #if defined(ENABLEFLOAT128) switch(cnt & 31) { case 0: { memrand(&q0, sizeof(__float128)); q0 = q0 * M_PI_2Q; } break; case 1: { int t; memrand(&t, sizeof(int)); t &= ~((~0UL) << (xrand() & 31)); q0 = t * M_PI_2Q; } break; case 2: q0 = rndf128x(); break; default: q0 = rndf128(1e-20, 1e+20); break; } a0 = vset(a0, e, q0); mpfr_set_f128(frx, q0, GMP_RNDN); #endif { mpfr_sin(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xsinq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " sin arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_cos(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xcosq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " cos arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_tan(frz, frx, GMP_RNDN); double u0 = countULPf128(t = vget(xtanq_u10(a0), e), frz, 0); if (u0 > 0.8) { printf(ISANAME " tan arg=%s ulp=%.20g\n", sprintf128(q0), u0); printf("test = %s\n", sprintf128(t)); printf("corr = %s\n\n", sprintf128(mpfr_get_f128(frz, GMP_RNDN))); fflush(stdout); ecnt++; } } } } sleef-3.5.1/src/quad/000077500000000000000000000000001373003144100143335ustar00rootroot00000000000000sleef-3.5.1/src/quad/CMakeLists.txt000066400000000000000000000151021373003144100170720ustar00rootroot00000000000000file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include/) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${sleef_SOURCE_DIR}/src/libm) # dd.h set(COMMON_TARGET_PROPERTIES C_STANDARD 99) if(COMPILER_SUPPORTS_FLOAT128) list(APPEND COMMON_TARGET_DEFINITIONS ENABLEFLOAT128=1) endif() if(COMPILER_SUPPORTS_BUILTIN_MATH) list(APPEND COMMON_TARGET_DEFINITIONS ENABLE_BUILTIN_MATH=1) endif() if (BUILD_SHARED_LIBS) list(APPEND COMMON_TARGET_PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() # set(QUAD_HEADER_PARAMS_PUREC_SCALAR 1 Sleef_quad1 Sleef_quad2 double float uint64_t int32_t __STDC__ purec) set(QUAD_HEADER_PARAMS_PURECFMA_SCALAR 1 Sleef_quad1 Sleef_quad2 double float uint64_t int32_t FP_FAST_FMA purecfma) set(QUAD_HEADER_PARAMS_SSE2 2 Sleef_quad2 Sleef_quad4 __m128d __m128 __m128i __m128i __SSE2__ sse2) set(QUAD_HEADER_PARAMS_AVX 4 Sleef_quad4 Sleef_quad8 __m256d __m256 __m256i __m128i __AVX__ avx) set(QUAD_HEADER_PARAMS_FMA4 4 Sleef_quad4 Sleef_quad8 __m256d __m256 __m256i __m128i __AVX__ fma4) set(QUAD_HEADER_PARAMS_AVX2 4 Sleef_quad4 Sleef_quad8 __m256d __m256 __m256i __m128i __AVX__ avx2) set(QUAD_HEADER_PARAMS_AVX512F 8 Sleef_quad8 Sleef_quad16 __m512d __m512 __m512i __m256i __AVX512F__ avx512f) set(QUAD_HEADER_PARAMS_ADVSIMD 2 Sleef_quad2 Sleef_quad4 float64x2_t float32x4_t uint32x4_t int32x2_t __ARM_NEON advsimd) set(QUAD_HEADER_PARAMS_SVE x Sleef_quadx Sleef_quadx svfloat64_t svfloat32_t svint32_t svint32_t __ARM_FEATURE_SVE sve) set(QUAD_RENAME_PARAMS_PUREC_SCALAR 1 purec) set(QUAD_RENAME_PARAMS_PURECFMA_SCALAR 1 purecfma) set(QUAD_RENAME_PARAMS_SSE2 2 sse2) set(QUAD_RENAME_PARAMS_AVX 4 avx) set(QUAD_RENAME_PARAMS_FMA4 4 fma4) set(QUAD_RENAME_PARAMS_AVX2 4 avx2) set(QUAD_RENAME_PARAMS_AVX512F 8 avx512f) set(QUAD_RENAME_PARAMS_ADVSIMD 2 advsimd) set(QUAD_RENAME_PARAMS_SVE x sve) # set(CMAKE_C_FLAGS "${ORG_CMAKE_C_FLAGS} ${SLEEF_C_FLAGS}") # -------------------------------------------------------------------- # sleefquad.h # -------------------------------------------------------------------- set(SLEEFQUAD_ORG_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/sleefquad_header.h.org) set(SLEEFQUAD_ORG_FOOTER ${CMAKE_CURRENT_SOURCE_DIR}/sleefquad_footer.h.org) set(SLEEFQUAD_INCLUDE_HEADER ${sleef_BINARY_DIR}/include/sleefquad.h) set(SLEEF_HEADER_COMMANDS "") list(APPEND SLEEF_HEADER_COMMANDS COMMAND ${CMAKE_COMMAND} -E copy ${SLEEFQUAD_ORG_HEADER} ${SLEEFQUAD_INCLUDE_HEADER}) foreach(SIMD ${SLEEFQUAD_SUPPORTED_EXT}) if(COMPILER_SUPPORTS_${SIMD}) list(APPEND SLEEF_HEADER_COMMANDS COMMAND echo Generating sleefquad.h: qmkrename ${QUAD_HEADER_PARAMS_${SIMD}}) list(APPEND SLEEF_HEADER_COMMANDS COMMAND $ ${QUAD_HEADER_PARAMS_${SIMD}} >> ${SLEEFQUAD_INCLUDE_HEADER}) endif() endforeach() if((MSVC OR MINGW AND WIN32) OR SLEEF_CLANG_ON_WINDOWS) string(REPLACE "/" "\\" sleef_footer_input_file "${SLEEFQUAD_ORG_FOOTER}") list(APPEND SLEEF_HEADER_COMMANDS COMMAND type ${sleef_footer_input_file} >> ${SLEEFQUAD_INCLUDE_HEADER}) else() list(APPEND SLEEF_HEADER_COMMANDS COMMAND cat ${SLEEFQUAD_ORG_FOOTER} >> ${SLEEFQUAD_INCLUDE_HEADER}) endif() add_custom_command(OUTPUT ${SLEEFQUAD_INCLUDE_HEADER} ${SLEEF_HEADER_COMMANDS} DEPENDS ${SLEEFQUAD_ORG_HEADER} ${SLEEFQUAD_ORG_FOOTER} qmkrename ) # -------------------------------------------------------------------- # qmkrename # qrenameXXX.h for each vector extension # -------------------------------------------------------------------- # Helper executable: generates parts of the sleef header file add_host_executable(qmkrename qmkrename.c) set_target_properties(qmkrename PROPERTIES ${COMMON_TARGET_PROPERTIES}) set(HEADER_FILES_GENERATED "") foreach(SIMD ${SLEEFQUAD_SUPPORTED_EXT}) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} SIMDLC) set(HEADER_${SIMD} ${CMAKE_CURRENT_BINARY_DIR}/include/qrename${SIMDLC}.h) list(APPEND HEADER_FILES_GENERATED ${HEADER_${SIMD}}) # Generate qmkrename commands add_custom_command(OUTPUT ${HEADER_${SIMD}} COMMAND echo Generating qrename${vecarch}.h: qmkrename ${QUAD_RENAME_PARAMS_${SIMD}} COMMAND $ ${QUAD_RENAME_PARAMS_${SIMD}} > ${HEADER_${SIMD}} DEPENDS qmkrename ) add_custom_target(qrename${SIMD}.h_generated DEPENDS ${HEADER_${SIMD}}) endif() endforeach() # -------------------------------------------------------------------- # sleefquad_headers # -------------------------------------------------------------------- add_custom_target(sleefquad_headers ALL DEPENDS ${SLEEFQUAD_INCLUDE_HEADER} ${HEADER_FILES_GENERATED} ) # -------------------------------------------------------------------- # libsleefquad # -------------------------------------------------------------------- foreach(SIMD ${SLEEFQUAD_SUPPORTED_EXT}) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} SIMDLC) set(OBJECT "sleefquad${SIMDLC}_obj") add_library(${OBJECT} OBJECT sleefsimdqp.c ${HEADER_${SIMD}}) if(COMPILER_SUPPORTS_BUILTIN_MATH) target_compile_definitions(${OBJECT} PRIVATE ENABLE_BUILTIN_MATH=1) endif() target_compile_definitions(${OBJECT} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ${COMMON_TARGET_DEFINITIONS}) set_target_properties(${OBJECT} PROPERTIES ${COMMON_TARGET_PROPERTIES}) add_dependencies(${OBJECT} qrename${SIMD}.h_generated) target_compile_options(${OBJECT} PRIVATE ${FLAGS_ENABLE_${SIMD}}) list(APPEND SLEEFQUAD_OBJECTS $) endif() endforeach() add_library(sleefquad rempitabqp.c ${SLEEFQUAD_OBJECTS}) set_target_properties(sleefquad PROPERTIES VERSION ${SLEEF_VERSION} SOVERSION ${SLEEF_SOVERSION} PUBLIC_HEADER ${SLEEFQUAD_INCLUDE_HEADER} ${COMMON_TARGET_PROPERTIES} ) set_target_properties(sleefquad PROPERTIES ${COMMON_TARGET_PROPERTIES}) if(LIBM AND NOT COMPILER_SUPPORTS_BUILTIN_MATH) target_link_libraries(sleefquad ${LIBM}) endif() # -------------------------------------------------------------------- # Install # -------------------------------------------------------------------- # Install libsleef and sleef.h install(TARGETS sleefquad PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") sleef-3.5.1/src/quad/qfuncproto.h000066400000000000000000000035121373003144100167050ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) typedef struct { char *name; int ulp; int ulpSuffix; int funcType; int flags; } funcSpec; /* ulp : (error bound in ulp) * 10 ulpSuffix: 0 : "" 1 : "_u10" 2 : "_u05" funcType: 0 : vargquad func(vargquad); 1 : vargquad func(vargquad, vargquad); 2 : vargquad2 func(vargquad); 3 : vargquad func(vargquad, vint); 4 : vint func(vargquad); 5 : vargquad func(vargquad, vargquad, vargquad); 6 : vargquad2 func(vargquad); 7 : int func(int); 8 : void *func(int); 9 : vint func(vargquad, vargquad); 10 : vdouble func(vargquad); 11 : vargquad func(vdouble); 12 : vmask func(vargquad); 13 : vargquad func(vmask); */ funcSpec funcList[] = { { "add", 5, 2, 1, 0 }, { "sub", 5, 2, 1, 0 }, { "mul", 5, 2, 1, 0 }, { "div", 5, 2, 1, 0 }, { "neg", -1, 0, 0, 0 }, { "sqrt", 5, 2, 0, 0 }, { "cmplt", -1, 0, 9, 0 }, { "cmple", -1, 0, 9, 0 }, { "cmpgt", -1, 0, 9, 0 }, { "cmpge", -1, 0, 9, 0 }, { "cmpeq", -1, 0, 9, 0 }, { "cmpneq", -1, 0, 9, 0 }, { "unord", -1, 0, 9, 0 }, { "cast_to_double", -1, 0, 10, 0 }, { "cast_from_double", -1, 0, 11, 0 }, { "sin", 10, 1, 0, 0 }, { "cos", 10, 1, 0, 0 }, { "tan", 10, 1, 0, 0 }, { "asin", 10, 1, 0, 0 }, { "acos", 10, 1, 0, 0 }, { "atan", 10, 1, 0, 0 }, { "exp", 10, 1, 0, 0 }, { "exp2", 10, 1, 0, 0 }, { "exp10", 10, 1, 0, 0 }, { "expm1", 10, 1, 0, 0 }, { "log", 10, 1, 0, 0 }, { "log2", 10, 1, 0, 0 }, { "log10", 10, 1, 0, 0 }, { "log1p", 10, 1, 0, 0 }, //{ "sincos", 10, 1, 2, 0 }, //{ "ldexp", -1, 0, 3, 0 }, //{ "ilogb", -1, 0, 4, 0 }, //{ "fma", -1, 0, 5, 0 }, { NULL, -1, 0, 0, 0 }, }; sleef-3.5.1/src/quad/qmkrename.c000066400000000000000000000145401373003144100164630ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "qfuncproto.h" int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "Generate a header for renaming functions\n"); fprintf(stderr, "Usage : %s []\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Generate a part of header for library functions\n"); fprintf(stderr, "Usage : %s []\n", argv[0]); fprintf(stderr, "\n"); exit(-1); } static char *ulpSuffixStr[] = { "", "_u10", "_u05" }; if (argc == 2 || argc == 3) { char *wqp = argv[1]; char *isaname = argc == 2 ? "" : argv[2]; char *isaub = argc == 3 ? "_" : ""; if (strcmp(isaname, "sve") == 0) wqp = "x"; for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("#define x%sq%s Sleef_%sq%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], funcList[i].name, wqp, funcList[i].ulp, isaname); } else { printf("#define x%sq Sleef_%sq%s%s%s\n", funcList[i].name, funcList[i].name, wqp, isaub, isaname); } } } else { char *wqp = argv[1]; char *vargquadname = argv[2]; char *vargquad2name = argv[3]; char *vdoublename = argv[4]; char *vfloatname = argv[5]; char *vmaskname = argv[6]; char *vintname = argv[7]; char *architecture = argv[8]; char *isaname = argc == 10 ? argv[9] : ""; char *isaub = argc == 10 ? "_" : ""; if (strcmp(isaname, "sve") == 0) wqp = "x"; printf("#ifdef %s\n", architecture); if (strcmp(vargquadname, "-") != 0) { for(int i=0;funcList[i].name != NULL;i++) { switch(funcList[i].funcType) { case 0: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s);\n", vargquadname, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s);\n", vargquadname, funcList[i].name, wqp, isaub, isaname, vargquadname); } break; case 1: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s, %s);\n", vargquadname, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s, %s);\n", vargquadname, funcList[i].name, wqp, isaub, isaname, vargquadname, vargquadname); } break; case 2: case 6: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s);\n", vargquad2name, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s);\n", vargquad2name, funcList[i].name, wqp, isaub, isaname, vargquadname); } break; case 3: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s, %s);\n", vargquadname, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname, vintname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s, %s);\n", vargquadname, funcList[i].name, wqp, isaub, isaname, vargquadname, vintname); } break; case 4: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s);\n", vintname, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s);\n", vintname, funcList[i].name, wqp, isaub, isaname, vargquadname); } break; case 5: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s, %s, %s);\n", vargquadname, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname, vargquadname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s, %s, %s);\n", vargquadname, funcList[i].name, wqp, isaub, isaname, vargquadname, vargquadname, vargquadname); } break; case 7: printf("IMPORT CONST int Sleef_%sq%s%s%s(int);\n", funcList[i].name, wqp, isaub, isaname); break; case 8: printf("IMPORT CONST void *Sleef_%sq%s%s%s(int);\n", funcList[i].name, wqp, isaub, isaname); break; case 9: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s, %s);\n", vintname, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s, %s);\n", vintname, funcList[i].name, wqp, isaub, isaname, vargquadname, vargquadname); } break; case 10: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s);\n", vdoublename, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s);\n", vdoublename, funcList[i].name, wqp, isaub, isaname, vargquadname); } break; case 11: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s);\n", vargquadname, funcList[i].name, wqp, funcList[i].ulp, isaname, vdoublename); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s);\n", vargquadname, funcList[i].name, wqp, isaub, isaname, vdoublename); } break; case 12: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s);\n", vmaskname, funcList[i].name, wqp, funcList[i].ulp, isaname, vargquadname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s);\n", vmaskname, funcList[i].name, wqp, isaub, isaname, vargquadname); } break; case 13: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sq%s_u%02d%s(%s);\n", vargquadname, funcList[i].name, wqp, funcList[i].ulp, isaname, vmaskname); } else { printf("IMPORT CONST %s Sleef_%sq%s%s%s(%s);\n", vargquadname, funcList[i].name, wqp, isaub, isaname, vmaskname); } break; } } } printf("#endif\n"); } exit(0); } sleef-3.5.1/src/quad/rempitabqp.c000066400000000000000000014011131373003144100166440ustar00rootroot00000000000000#include "misc.h" NOEXPORT const double Sleef_rempitabqp[] = { 0.15915494308865163475, 114.12758349655632628, 87.820147804392036051, 27.423136899138626177, 14.254027919272630243, 85.935026329207175877, 114.27691102886092267, 37.750191829592949944, 2.4546589403216785286, 111.34725244651053799, 62.908236858707823558, 87.408456635555921821, 64.604759095473127672, 28.784837529448850546, 51.622219578821386676, 23.484166172853292665, 126.14843699368429952, 64.497947248710261192, 90.04072758117763442, 79.851150118094665231, 93.238662576877686661, 47.957378064249496674, 44.495092337878304534, 122.05645651105805882, 36.728927543368627084, 8.9860062754451064393, 3.1885831643376150168, 117.4925837228220189, 107.81580297079563024, 72.690177145766938338, 6.3499807085099746473, 64.814470052646356635, 32.07563458668300882, 48.190104861652798718, 40.78663459321251139, 17.552382976566150319, 13.621939875218231464, 21.256544246167322854, 57.381995073275902541, 9.6354705119701975491, 41.499050218579213833, 113.6220945173263317, 79.59452043374767527, 103.64401134191575693, 123.88471844718515058, 101.4736260631980258, 18.097774968078738311, 85.95650738066615304, 76.065655079091811785, 37.675427324622432934, 68.676777671174932038, 32.00599382463042275, 53.77951688189568813, 0.19179906913632294163, 73.478443940475699492, 111.92379273104597814, 40.996545034144219244, 107.14597675384720787, 124.38805678367862129, 98.636395050234568771, 103.347364549201302, 101.568637595839391, 98.20809433281829115, 0.21127150943721062504, 6.4992127331242954824, 115.19938259065020247, 123.67517849772775662, 108.64186459926349926, 2.9984621451549173798, 4.6062990500759042334, 93.491721027461608173, 49.145381707759952405, 52.727074269980221288, 71.068700205698405625, 99.034355036987108178, 24.011549219118023757, 111.50147725937858922, 49.03615573892602697, 53.722402094637800474, 91.897739203315722989, 18.206447852357086958, 109.32060226575413253, 97.416567328084056498, 19.228490159101056634, 125.86502164847843233, 68.117931140997825423, 37.659347287717537256, 94.778508877261629095, 125.23389264055003878, 9.1960193313898344059, 89.79227686872400227, 44.219955035077873617, 80.594031160762824584, 98.175325535910815233, 9.1060313697525998577, 1.3536325568711617962, 74.266441315219708486, 112.02497297326408443, 78.441242103348486125, 30.240608034280739957, 35.887181287234852789, 31.222180933826166438, 111.20435677495697746, 16.961934521656075958, 75.690011131529900013, 19.857096218624064932, 39.445799316181364702, 89.399067092508630594, 9.8946251570741878822, 75.790836480315192603, 41.449609204857551958, 16.424785501782025676, 110.92636979258531937, 35.2268612109037349, 93.553590155912388582, 60.716867816085141385, 101.8275823641452007, 0.52353978253449895419, 53.558147969339188421, 14.022688405977532966, 47.412589899395243265, 67.466685592477006139, 20.558449846765142865, 42.477512440153077478, 78.376983236481464701, 52.194720245763164712, 80.210266588394006249, 10.32282516378472792, 13.76645060176088009, 65.576818637229735032, 113.30637098982697353, 33.130573191992880311, 85.595008141503058141, 40.008528567348548677, 76.341531842852418777, 14.979836669819633244, 71.290441850942443125, 102.19776376600566437, 98.564603420047205873, 14.271847993266419508, 80.116664932062121807, 19.992192520050593885, 36.609960786536248634, 73.030199955795978894, 104.99763589425128885, 48.433380696202220861, 57.572455457851901883, 118.88634466403527767, 44.860695004797889851, 73.750690998633217532, 13.556655583906831453, 88.593745596503140405, 14.11532725519646192, 43.085424128406884847, 9.7802314795590064023, 30.352614322964655003, 90.858137878454726888, 85.039140199140092591, 23.407077980155008845, 18.207643349760473939, 20.440075362967036199, 109.47096555736425216, 66.428168903472396778, 106.84400033217025339, 91.750582754768402083, 76.596534958600386744, 17.320324285941751441, 49.535734807919652667, 113.66190455558171379, 61.225422811843600357, 18.669559649624716258, 76.387663934096053708, 74.178235974148265086, 85.815782102505181683, 122.10498128451581579, 24.799236859667871613, 123.2972609065072902, 50.72595180519783753, 121.41778173806596897, 114.39462376141091227, 90.193730781800695695, 3.0683144895410805475, 26.293759879121353151, 46.076575149436393986, 19.660881336145394016, 25.597005338699091226, 101.31112300279710325, 103.8807629969487607, 91.892762314044375671, 57.045969595055794343, 110.15196133002973511, 94.689776863833685638, 49.182032269411138259, 123.69104138236070867, 113.69436034850150463, 117.72063118611185928, 1.4682589776566601358, 23.970066872989264084, 91.772521428280015243, 32.248277749331464292, 74.556784206688462291, 120.01798423568834551, 87.713686773899098625, 80.036939679470378906, 58.663490617163915886, 21.035325459186424268, 79.868647571121982764, 19.982377222022478236, 57.246738529793219641, 1.9401161782407143619, 7.9883917663682950661, 108.92261904602855793, 42.957866646069305716, 55.213983339206606615, 85.734177860773343127, 8.1786861732471152209, 113.8021284014939738, 6.3038078266908996738, 108.74856571221971535, 13.502748231236182619, 29.736439245178189594, 66.181101124901033472, 14.021098141827678774, 50.432291116485430393, 71.692326357551792171, 112.62160509388922947, 46.52251733029333991, 70.328927450136689004, 82.779915324492321815, 114.20617270459479187, 120.00074403454345884, 27.256265592593990732, 72.206675336365151452, 105.47692570983417681, 90.069591432285960764, 59.451044258996262215, 6.5436011348210740834, 11.666369110360392369, 127.00500442843986093, 66.781588143319822848, 101.71752368163652136, 110.83473463953487226, 9.4843589676565898117, 87.965483504598523723, 32.660788242061244091, 94.484722863278875593, 121.36131720729463268, 27.298648878622771008, 105.17309219513481366, 68.000137710059789242, 93.444196628861391218, 44.093294898437306983, 44.109477207573945634, 104.37636018792545656, 16.936308635202294681, 42.461140061379410326, 48.120956522194319405, 47.03053727519363747, 74.836943859434541082, 76.560959611877478892, 70.982041261755512096, 106.17278879548030091, 17.240951762454642449, 116.54761581097409362, 85.489981931084912503, 19.448874795671144966, 68.055698282205412397, 120.54780957969705923, 79.357897397254419047, 26.784055884931149194, 8.8652342647874320392, 93.554617017987766303, 36.863637874987034593, 107.36296787388710072, 114.24380599117284874, 57.996031665523332777, 107.15169167585190735, 6.8493815877518500201, 70.030736486780369887, 104.34439242449298035, 32.394922227955248673, 72.045712768554949434, 125.99193223337715608, 79.855994894536706852, 102.91479410954343621, 19.828504664699721616, 19.423237169754429488, 49.419210917687451001, 51.794468197291280376, 59.485514724419772392, 78.600481083303748164, 75.802543954887369182, 49.485430251796060475, 125.97828236915302114, 98.782930868979747174, 83.325835802559595322, 59.946491932565550087, 73.477066622643178562, 89.105636388390848879, 73.372970791053376161, 18.817599439345940482, 102.06705368556504254, 45.42352350837609265, 23.440921115026867483, 90.344093902869644808, 17.970589397602452664, 53.956875561776541872, 55.525845976218988653, 49.46730813278190908, 77.778136706394434441, 43.505807501616800437, 67.471431943824427435, 84.265864734097704059, 14.933240975493390579, 24.684664495714969235, 126.41874786355037941, 1.3222032844169007149, 126.01238007772190031, 83.383431563808699138, 113.72324844212562311, 21.304834404720168095, 62.332525882295158226, 51.597882727459364105, 0.67672862408289802261, 30.132787537324475124, 27.410108195865177549, 20.656855709145020228, 49.723609964024944929, 119.05708648653671844, 91.246047016928059747, 84.369696109431970399, 85.591131138298806036, 14.127644950760441134, 4.2155267645284766331, 24.204889203938364517, 19.661099106528126867, 82.477870938018895686, 120.89188850888967863, 122.58114253445455688, 120.63265677435265388, 27.082481923996965634, 29.761974944867688464, 72.461197358730714768, 84.204483040954073658, 80.890771993039379595, 99.777925805155973649, 91.8974949579933309, 94.518818676227965625, 126.28205386120316689, 88.787575110411125934, 93.769932165232603438, 79.733441483964270446, 38.645666720854933374, 115.27208568365676911, 31.268716353544732556, 112.30475518474486307, 74.891574611563555663, 115.66812160437984858, 18.66350298186080181, 94.633595189065090381, 109.45983699533462641, 108.68776993196661351, 58.76809135159055586, 61.680897811242175521, 38.882956551777169807, 45.576713001821190119, 7.4377005918177019339, 120.60427874347078614, 120.21996894028052338, 47.606037590059713693, 0.31830988618094124831, 100.25516699311629054, 47.640295608784072101, 54.846273798277252354, 28.508055838548898464, 43.870052658417989733, 100.55382205772184534, 75.500383659185899887, 4.9093178806469950359, 94.694504893021075986, 125.81647371741928509, 46.816913271115481621, 1.2095181909462553449, 57.569675058897701092, 103.24443915764277335, 46.968332345710223308, 124.29687398737223702, 0.99589449742416036315, 52.081455162358906819, 31.702300236192968441, 58.477325153755373321, 95.914756128498993348, 88.990184675760247046, 116.11291302211611765, 73.457855086737254169, 17.972012550890212879, 6.3771663286788680125, 106.98516744564767578, 87.631605941594898468, 17.380354291533876676, 12.699961417023587273, 1.6289401052927132696, 64.15126917336601764, 96.380209723305597436, 81.573269186425022781, 35.104765953132300638, 27.243879750436462928, 42.513088492334645707, 114.76399014655180508, 19.270941023940395098, 82.998100437162065646, 99.244189034656301374, 31.189040867498988518, 79.288022683835151838, 119.76943689437393914, 74.947252126396051608, 36.195549936161114601, 43.913014761335944058, 24.131310158183623571, 75.350854649248503847, 9.3535553423498640768, 64.011987649260845501, 107.55903376379137626, 0.38359813827628386207, 18.956887880955036962, 95.847585462091956288, 81.993090068292076467, 86.291953507698053727, 120.77611356736088055, 69.272790100472775521, 78.694729098406241974, 75.137275191682419972, 68.4161886656365823, 0.42254301887442125008, 12.998425466252228944, 102.39876518130404293, 119.35035699545915122, 89.283729198526998516, 5.9969242903098347597, 9.2125981001554464456, 58.983442054923216347, 98.290763415519904811, 105.45414853996408056, 14.137400411400449229, 70.068710073974216357, 48.023098438239685493, 95.002954518757178448, 98.07231147785205394, 107.44480418927923893, 55.795478406631445978, 36.412895704717811896, 90.641204531511903042, 66.833134656168112997, 38.456980318205751246, 123.73004329696050263, 8.2358622819956508465, 75.318694575438712491, 61.557017754526896169, 122.46778528110007755, 18.392038662783306791, 51.58455373744800454, 88.439910070159385214, 33.188062321529287146, 68.350651071821630467, 18.212062739505199715, 2.7072651137459615711, 20.53288263044305495, 96.04994594653180684, 28.882484206696972251, 60.481216068561479915, 71.774362574469705578, 62.444361867655970855, 94.408713549913954921, 33.923869043315789895, 23.380022263059800025, 39.714192437248129863, 78.891598632366367383, 50.798134185017261188, 19.789250314152013743, 23.581672960630385205, 82.899218409715103917, 32.849571003567689331, 93.852739585170638748, 70.453722421811107779, 59.107180311824777164, 121.43373563217392075, 75.655164728290401399, 1.0470795650689979084, 107.11629593867837684, 28.045376811955065932, 94.825179798794124508, 6.9333711849540122785, 41.116899693530285731, 84.955024880309792934, 28.753966472962929402, 104.3894404915299674, 32.420533176788012497, 20.64565032756945584, 27.532901203525398159, 3.1536372744631080423, 98.612741979653947055, 66.261146383989398601, 43.190016283006116282, 80.017057134700735332, 24.683063685704837553, 29.959673339642904466, 14.580883701888524229, 76.39552753201496671, 69.129206840094411746, 28.543695986536476994, 32.233329864124243613, 39.984385040101187769, 73.219921573076135246, 18.060399911595595768, 81.995271788502577692, 96.866761392404441722, 115.14491091570744175, 109.77268932807055535, 89.721390009599417681, 19.501381997266435064, 27.113311167817300884, 49.18749119300628081, 28.230654510392923839, 86.170848256813769694, 19.560462959118012805, 60.705228645932947984, 53.716275756913091755, 42.078280398280185182, 46.81415596031001769, 36.415286699520947877, 40.880150725937710376, 90.941931114732142305, 4.8563378069484315347, 85.688000664344144752, 55.501165509540442144, 25.193069917200773489, 34.640648571883502882, 99.071469615842943313, 99.323809111163427588, 122.45084562368720071, 37.339119299253070494, 24.775327868195745395, 20.356471948300168151, 43.631564205014001345, 116.20996256903163157, 49.598473719339381205, 118.5945218130145804, 101.45190361039931304, 114.83556347613557591, 100.78924752282546251, 52.38746156360139139, 6.136628979082161095, 52.587519758242706303, 92.153150298872787971, 39.321762672290788032, 51.194010677401820431, 74.622246005597844487, 79.761525993901159381, 55.785524628088751342, 114.09193919011522667, 92.303922660063108196, 61.379553727667371277, 98.364064538825914497, 119.38208276472505531, 99.388720697006647242, 107.44126237222371856, 2.9365179553133202717, 47.940133745982166147, 55.545042856560030486, 64.496555498666566564, 21.113568413380562561, 112.03596847138032899, 47.42737354780183523, 32.073879358940757811, 117.32698123432783177, 42.070650918376486516, 31.737295142243965529, 39.964754444044956472, 114.49347705958643928, 3.8802323564814287238, 15.976783532740228111, 89.845238092057115864, 85.915733292138611432, 110.42796667841685121, 43.468355721546686254, 16.357372346497868421, 99.604256802987947594, 12.607615653385437327, 89.497131424439430702, 27.005496462472365238, 59.472878490360017167, 4.3622022498020669445, 28.042196283655357547, 100.86458223297449877, 15.384652715107222321, 97.243210187778458931, 93.045034660590317799, 12.657854900277015986, 37.559830648988281609, 100.41234540919322171, 112.00148806908691768, 54.512531185191619443, 16.413350672733940883, 82.953851419671991607, 52.139182864571921527, 118.90208851799252443, 13.087202269642148167, 23.332738220724422717, 126.01000885688335984, 5.5631762866432836745, 75.435047363276680699, 93.669469279069744516, 18.968717935316817602, 47.930967009197047446, 65.321576484126126161, 60.969445726561389165, 114.72263441458926536, 54.597297757249179995, 82.346184390273265308, 8.0002754201232164633, 58.888393257722782437, 88.186589796874613967, 88.218954415151529247, 80.752720375850913115, 33.872617270404589362, 84.922280122758820653, 96.241913044388638809, 94.061074550390912918, 21.673887718872720143, 25.121919223758595763, 13.964082523514662171, 84.34557759096060181, 34.481903524912922876, 105.09523162195182522, 42.979963862169825006, 38.897749591345927911, 8.1113965644144627731, 113.09561915939775645, 30.715794794508838095, 53.568111769862298388, 17.730468529578502057, 59.109234035975532606, 73.727275749977707164, 86.725935747777839424, 100.48761198234569747, 115.99206333105030353, 86.303383351703814697, 13.698763175507338019, 12.061472973564377753, 80.688784848985960707, 64.789844455914135324, 16.091425537113536848, 123.98386446675795014, 31.711989789077051682, 77.829588219086872414, 39.657009329399443232, 38.846474339508858975, 98.838421835374902003, 103.58893639458256075, 118.97102944884318276, 29.200962166611134307, 23.605087909774738364, 98.970860503595758928, 123.95656473830968025, 69.565861737963132327, 38.651671605119190644, 119.89298386513473815, 18.954133245289995102, 50.211272776785335736, 18.745941582106752321, 37.635198878695518943, 76.134107371130085085, 90.8470470167521853, 46.881842230057372944, 52.688187805739289615, 35.941178795204905327, 107.91375112355308374, 111.05169195243797731, 98.934616265567456139, 27.556273412788868882, 87.011615003237238852, 6.9428638876524928492, 40.531729468195408117, 29.866481950986781158, 49.369328991433576448, 124.83749572710075881, 2.6444065688374394085, 124.02476015544743859, 38.766863127621036256, 99.446496884251246229, 42.60966880944033619, 124.66505176459031645, 103.19576545491872821, 1.3534572481657960452, 60.265575074648950249, 54.820216391730355099, 41.313711418290040456, 99.447219928053527838, 110.11417297307707486, 54.492094033859757474, 40.739392218867578777, 43.182262276597612072, 28.255289901520882268, 8.4310535290569532663, 48.409778407876729034, 39.322198213059891714, 36.95574187604142935, 113.78377701778299524, 117.16228506891275174, 113.26531354870530777, 54.164963847993931267, 59.523949889739014907, 16.922394717465067515, 40.408966081911785295, 33.781543986082397169, 71.555851610315585276, 55.794989915990299778, 61.037637352459569229, 124.56410772240997176, 49.575150220825889846, 59.539864330468844855, 31.466882967928540893, 77.291333441713504726, 102.54417136731353821, 62.53743270709310309, 96.609510369493364124, 21.783149223130749306, 103.33624320875969715, 37.3270059637252416, 61.26719037813381874, 90.919673990672890795, 89.375539863933227025, 117.53618270318111172, 123.36179562248435104, 77.765913103554339614, 91.153426003642380238, 14.875401183639041847, 113.20855748694521026, 112.43993788056104677, 95.212075180119427387, 0.63661977236552047543, 72.510333986232581083, 95.280591217568144202, 109.69254759655450471, 57.016111677097796928, 87.740105316839617444, 73.107644115447328659, 23.000767318371799774, 9.8186357612976280507, 61.389009786045789951, 123.63294743483857019, 93.633826542230963241, 2.4190363818925106898, 115.13935011779904016, 78.488878315285546705, 93.936664691424084594, 120.59374797474447405, 1.9917889948483207263, 104.16291032471781364, 63.404600472385936882, 116.95465030751438462, 63.829512257001624675, 49.980369351524132071, 104.22582604423587327, 18.915710173474508338, 35.944025101780425757, 12.754332657361374004, 85.970334891298989533, 47.263211883189796936, 34.760708583067753352, 25.399922834047174547, 3.257880210589064518, 0.30253834673567325808, 64.76041944661483285, 35.146538372850045562, 70.209531906264601275, 54.487759500872925855, 85.026176984672929393, 101.52798029310361017, 38.541882047884428175, 37.99620087432776927, 70.488378069312602747, 62.378081735001615016, 30.576045367673941655, 111.53887378875151626, 21.894504252792103216, 72.391099872322229203, 87.826029522671888117, 48.262620316370885121, 22.701709298497007694, 18.707110684703366132, 0.023975298525328980759, 87.118067527582752518, 0.76719627655256772414, 37.913775761913711904, 63.695170924187550554, 35.986180136587790912, 44.583907015399745433, 113.55222713472539908, 10.54558020094918902, 29.389458196816121927, 22.274550383368477924, 8.8323773312731646001, 0.84508603774884250015, 25.996850932508095866, 76.797530362611723831, 110.70071399092194042, 50.567458397053997032, 11.993848580619669519, 18.425196200310892891, 117.96688410985007067, 68.581526831043447601, 82.908297079928161111, 28.274800822804536438, 12.137420147948432714, 96.046196876483008964, 62.005909037517994875, 68.144622955707745859, 86.889608378558477852, 111.59095681326289196, 72.82579140943926177, 53.282409063027444063, 5.6662693123362259939, 76.91396063641514047, 119.46008659392100526, 16.471724563994939672, 22.637389150877424981, 123.11403550905743032, 116.93557056220015511, 36.784077325566613581, 103.16910747489964706, 48.879820140318770427, 66.376124643062212272, 8.7013021436432609335, 36.424125479010399431, 5.4145302274919231422, 41.065765260889747879, 64.099891893063613679, 57.764968413393944502, 120.96243213712659781, 15.548725148939411156, 124.88872373531557969, 60.817427099827909842, 67.84773808663157979, 46.76004452611960005, 79.428384874499897705, 29.783197264732734766, 101.59626837003452238, 39.578500628304027487, 47.163345921264408389, 37.798436819430207834, 65.699142007139016641, 59.705479170344915474, 12.907444843622215558, 118.21436062365319231, 114.86747126435147948, 23.310329456580802798, 2.0941591301416337956, 86.232591877356753685, 56.090753623913769843, 61.650359597588249017, 13.866742369908024557, 82.233799387064209441, 41.910049760619585868, 57.507932945929496782, 80.778880983059934806, 64.841066353576024994, 41.291300655138911679, 55.065802407050796319, 6.3072745489298540633, 69.225483959311532089, 4.5222927679787972011, 86.380032566015870543, 32.034114269401470665, 49.366127371409675106, 59.919346679285808932, 29.161767403780686436, 24.791055064033571398, 10.258413680188823491, 57.087391973072953988, 64.466659728248487227, 79.968770080206013517, 18.439843146152270492, 36.120799823194829514, 35.990543577008793363, 65.733522784812521422, 102.28982183141852147, 91.545378656144748675, 51.442780019198835362, 39.002763994532870129, 54.226622335634601768, 98.374982386012561619, 56.461309020789485658, 44.341696513627539389, 39.120925918236025609, 121.41045729186589597, 107.43255151382618351, 84.156560796560370363, 93.62831192062003538, 72.830573399041895755, 81.760301451879058732, 53.88386222946428461, 9.7126756139005010482, 43.376001328688289504, 111.00233101908088429, 50.386139834401546977, 69.281297143770643743, 70.142939231689524604, 70.647618222330493154, 116.90169124737440143, 74.678238598506140988, 49.550655736391490791, 40.712943896600336302, 87.263128410031640669, 104.41992513806326315, 99.196947438682400389, 109.18904362603279878, 74.903807220802264055, 101.6711269522747898, 73.578495045650925022, 104.77492312720278278, 12.27325795816432219, 105.17503951648905058, 56.306300597745575942, 78.643525344581576064, 102.38802135480727884, 21.244492011199326953, 31.523051987802318763, 111.57104925618114066, 100.18387838023409131, 56.607845320126216393, 122.75910745533838053, 68.728129077655466972, 110.7641655294537486, 70.777441394016932463, 86.882524744447437115, 5.8730359106266405433, 95.880267491964332294, 111.09008571312369895, 0.99311099733313312754, 42.2271368267647631, 96.071936942760657985, 94.854747095603670459, 64.147758717885153601, 106.65396246865566354, 84.141301836752973031, 63.474590284487931058, 79.929508888093550922, 100.98695411917287856, 7.7604647129628574476, 31.953567065484094201, 51.690476184117869707, 43.831466584280860843, 92.855933356833702419, 86.936711443093372509, 32.71474469299937482, 71.208513605975895189, 25.215231306774512632, 50.994262848878861405, 54.010992924944730476, 118.94575698072003433, 8.7244044996041338891, 56.084392567314353073, 73.729164465948997531, 30.76930543021808262, 66.48642037556055584, 58.090069321180635598, 25.315709800554031972, 75.119661297980201198, 72.824690818390081404, 96.002976138173835352, 109.02506237038323889, 32.826701345471519744, 37.907702839343983214, 104.27836572914748103, 109.80417703598504886, 26.174404539284296334, 46.665476441452483414, 124.02001771376671968, 11.126352573290205328, 22.870094726556999376, 59.338938558139489032, 37.937435870633635204, 95.861934018397732871, 2.6431529682522523217, 121.93889145312641631, 101.44526882917853072, 109.19459551450199797, 36.692368780546530616, 16.000550840246432927, 117.77678651544920285, 48.373179593752865912, 48.437908830306696473, 33.50544075170182623, 67.745234540812816704, 41.844560245521279285, 64.483826088780915597, 60.122149100781825837, 43.347775437745440286, 50.243838447517191526, 27.92816504703296232, 40.691155181921203621, 68.963807049829483731, 82.190463243903650437, 85.959927724339650013, 77.795499182691855822, 16.222793128832563525, 98.191238318795512896, 61.43158958901767619, 107.13622353972459678, 35.460937059157004114, 118.21846807195470319, 19.454551499959052308, 45.451871495559316827, 72.975223964695032919, 103.98412666210424504, 44.606766703407629393, 27.397526351014676038, 24.122945947132393485, 33.377569697975559393, 1.5796889118282706477, 32.182851074230711674, 119.96772893351590028, 63.423979578157741344, 27.659176438173744828, 79.314018658798886463, 77.692948679021355929, 69.676843670753441984, 79.177872789168759482, 109.94205889768636553, 58.401924333222268615, 47.210175819553114707, 69.941721007195155835, 119.91312947662299848, 11.131723475926264655, 77.303343210242019268, 111.78596773026947631, 37.908266490579990204, 100.42254555357067147, 37.491883164213504642, 75.270397757394675864, 24.268214742263808148, 53.6940940335043706, 93.763684460114745889, 105.37637561147857923, 71.882357590413448634, 87.827502247109805467, 94.103383904879592592, 69.869232531134912279, 55.112546825581375742, 46.023230006474477705, 13.885727775304985698, 81.063458936390816234, 59.732963901973562315, 98.738657982870790875, 121.67499145420151763, 5.2888131376785167959, 120.04952031089487718, 77.53372625524571049, 70.892993768502492458, 85.219337618884310359, 121.33010352918427088, 78.39153090983745642, 2.7069144963315920904, 120.5311501492979005, 109.6404327834607102, 82.62742283658371889, 70.894439856110693654, 92.228345946154149715, 108.98418806771951495, 81.478784437735157553, 86.364524553195224144, 56.510579803041764535, 16.862107058113906533, 96.819556815753458068, 78.644396426119783428, 73.911483752086496679, 99.567554035569628468, 106.32457013782914146, 98.530627097410615534, 108.32992769598786253, 119.04789977947802981, 33.84478943493013503, 80.81793216382357059, 67.563087972168432316, 15.111703220631170552, 111.58997983198059956, 122.07527470492277644, 121.12821544481994351, 99.150300441651779693, 119.07972866093768971, 62.933765935860719765, 26.582666883430647431, 77.088342734627076425, 125.07486541418984416, 65.219020738986728247, 43.56629844626513659, 78.672486417519394308, 74.654011927450483199, 122.53438075627127546, 53.83934798134941957, 50.751079727870092029, 107.07236540636586142, 118.72359124497234006, 27.531826207112317206, 54.306852007284760475, 29.750802367281721672, 98.417114973894058494, 96.879875761125731515, 62.424150360242492752, 1.2732395447346789297, 17.020667972468800144, 62.561182435139926383, 91.385095193109009415, 114.03222335419923184, 47.480210633682872867, 18.215288230894657318, 46.001534636743599549, 19.637271522595256101, 122.77801957209521788, 119.26589486968077836, 59.267653084461926483, 4.8380727637886593584, 102.2787002356017183, 28.977756630574731389, 59.873329382851807168, 113.1874959494889481, 3.9835779897002794314, 80.325820649435627274, 126.80920094477551174, 105.90930061502876924, 127.65902451400324935, 99.960738703051902121, 80.451652088471746538, 37.831420346949016675, 71.888050203560851514, 25.508665314726385986, 43.940669782597979065, 94.526423766379593872, 69.521417166135506704, 50.799845668094349094, 6.515760421178129036, 0.60507669347498449497, 1.5208388932296657003, 70.293076745703729102, 12.419063812529202551, 108.97551900174948969, 42.052353969349496765, 75.055960586207220331, 77.08376409576885635, 75.992401748659176519, 12.976756138625205494, 124.75616347000323003, 61.152090735351521289, 95.077747577503032517, 43.789008505587844411, 16.782199744648096384, 47.652059045343776233, 96.525240632741770241, 45.403418596994015388, 37.414221369406732265, 0.047950597054295940325, 46.236135055165505037, 1.5343925531051354483, 75.827551523827423807, 127.39034184837510111, 71.972360273175581824, 89.167814030803128844, 99.104454269450798165, 21.091160401898378041, 58.778916393632243853, 44.549100766736955848, 17.6647546625463292, 1.6901720754976850003, 51.993701865019829711, 25.595060725227085641, 93.401427981843880843, 101.13491679410799406, 23.987697161239339039, 36.850392400625423761, 107.93376821970377932, 9.1630536620905331802, 37.816594159856322221, 56.549601645609072875, 24.274840295900503406, 64.092393752966017928, 124.01181807503962773, 8.2892459114191296976, 45.779216757120593684, 95.18191362652942189, 17.65158281887852354, 106.56481812605488813, 11.332538624676089967, 25.82792127283391892, 110.92017318784201052, 32.943449127989879344, 45.274778301758487942, 118.22807101811849861, 105.87114112440031022, 73.568154651136865141, 78.338214949799294118, 97.759640280641178833, 4.7522492861244245432, 17.402604287286521867, 72.848250958020798862, 10.829060454983846284, 82.131530521783133736, 0.19978378612722735852, 115.52993682679152698, 113.92486427425319562, 31.097450297882460291, 121.77744747063115938, 121.63485419965945766, 7.6954761732631595805, 93.520089052242838079, 30.856769748999795411, 59.566394529469107511, 75.192536740069044754, 79.157001256608054973, 94.326691842528816778, 75.596873638864053646, 3.3982840142780332826, 119.41095834068983095, 25.814889687248069094, 108.42872124731002259, 101.73494252870295895, 46.620658913161605597, 4.18831826028690557, 44.465183754713507369, 112.18150724782753969, 123.30071919517649803, 27.733484739819687093, 36.467598774128418881, 83.820099521239171736, 115.01586589186263154, 33.557761966123507591, 1.6821327071520499885, 82.582601310277823359, 110.13160481410159264, 12.614549097863346105, 10.450967918623064179, 9.0445855359612323809, 44.760065132031741086, 64.06822853880294133, 98.732254742819350213, 119.83869335857161786, 58.323534807561372872, 49.582110128067142796, 20.516827360377646983, 114.17478394614954595, 0.93331945650061243214, 31.937540160412027035, 36.879686292304540984, 72.241599646389659029, 71.981087154021224706, 3.4670455696286808234, 76.579643662840680918, 55.090757312289497349, 102.88556003839767072, 78.005527989065740258, 108.45324467127284152, 68.749964772025123239, 112.92261804157897132, 88.683393027255078778, 78.241851836475689197, 114.82091458373542991, 86.865103027656004997, 40.313121593124378705, 59.256623841243708739, 17.661146798087429488, 35.520602903758117463, 107.76772445892856922, 19.425351227801002096, 86.752002657380216988, 94.004662038161768578, 100.77227966880673193, 10.562594287544925464, 12.285878463382687187, 13.295236444664624287, 105.80338249475244083, 21.356477197012281977, 99.101311472782981582, 81.425887793204310583, 46.526256820066919317, 80.839850276130164275, 70.393894877368438756, 90.378087252069235547, 21.807614441608166089, 75.342253904553217581, 19.156990091305488022, 81.54984625440556556, 24.546515916332282359, 82.350079032978101168, 112.61260119549478986, 29.287050689166790107, 76.77604270961455768, 42.488984022398653906, 63.046103975608275505, 95.142098512365919305, 72.36775676046818262, 113.21569064025607076, 117.51821491068039904, 9.4562581553145719226, 93.528331058911135187, 13.554882788037502905, 45.76504948889487423, 11.746071821256919065, 63.760534983932302566, 94.180171426247397903, 1.9862219946662662551, 84.454273653533164179, 64.143873885524953948, 61.709494191210978897, 0.2955174357739451807, 85.307924937314965064, 40.282603673505946063, 126.94918056897950009, 31.859017776190739824, 73.973908238345757127, 15.520929425925714895, 63.907134130968188401, 103.38095236823573941, 87.662933168565359665, 57.711866713671042817, 45.873422886186745018, 65.429489386002387619, 14.417027211951790377, 50.430462613549025264, 101.98852569775772281, 108.02198584989309893, 109.89151396144006867, 17.448808999208267778, 112.16878513463234412, 19.458328931897995062, 61.538610860439803218, 4.9728407511211116798, 116.1801386423612712, 50.631419601111701922, 22.239322595964040374, 17.649381636783800786, 64.005952276351308683, 90.050124740766477771, 65.653402690943039488, 75.815405678691604408, 80.556731458298600046, 91.608354071970097721, 52.348809078568592668, 93.330952882908604806, 120.04003542753343936, 22.252705146580410656, 45.740189453113998752, 118.67787711627897806, 75.874871741270908387, 63.723868036795465741, 5.2863059365081426222, 115.8777829062564706, 74.890537658360699425, 90.389191029003995936, 73.384737561093061231, 32.001101680496503832, 107.5535730308984057, 96.746359187505731825, 96.875817660613392945, 67.010881503407290438, 7.4904690816256334074, 83.689120491046196548, 0.96765217756183119491, 120.24429820156365167, 86.695550875490880571, 100.48767689503438305, 55.856330094065924641, 81.38231036384604522, 9.9276140996626054402, 36.380926487810938852, 43.919855448682938004, 27.590998365383711644, 32.445586257668765029, 68.382476637591025792, 122.86317917803899036, 86.272447079449193552, 70.921874118317646207, 108.43693614390940638, 38.909102999918104615, 90.903742991122271633, 17.950447929393703816, 79.968253324208490085, 89.213533406815258786, 54.795052702029352076, 48.24589189426478697, 66.755139395951118786, 3.1593778236565412953, 64.365702148465061327, 111.93545786703180056, 126.84795915631548269, 55.318352876351127634, 30.628037317601410905, 27.385897358046349837, 11.353687341510521946, 30.355745578341156943, 91.884117795372731052, 116.80384866644453723, 94.420351639109867392, 11.883442014393949648, 111.82625895324599696, 22.263446951856167288, 26.606686420487676514, 95.571935460538952611, 75.816532981163618388, 72.845091107141342945, 74.983766328430647263, 22.540795514789351728, 48.536429484527616296, 107.38818806701237918, 59.527368920233129757, 82.75275122296079644, 15.764715180830535246, 47.655004494223248912, 60.206767809762823163, 11.738465062269824557, 110.22509365116275148, 92.046460012948955409, 27.771455550613609375, 34.126917872781632468, 119.46592780395076261, 69.477315965745219728, 115.34998290840303525, 10.577626275360671571, 112.09904062179339235, 27.067452510495058959, 13.785987537008622894, 42.438675237772258697, 114.66020705837217974, 28.78306181967491284, 5.4138289926668221597, 113.06230029859943897, 91.280865566925058374, 37.254845673171075759, 13.788879712225025287, 56.456691892311937409, 89.968376135442667874, 34.957568875473953085, 44.729049106390448287, 113.02115960608352907, 33.724214116231451044, 65.639113631510554114, 29.288792852243204834, 19.822967504176631337, 71.135108071142894914, 84.649140275661920896, 69.061254194824869046, 88.659855391979363048, 110.09579955895605963, 67.689578869863908039, 33.63586432765077916, 7.1261759443368646316, 30.223406441265979083, 95.179959663964837091, 116.15054940984919085, 114.25643088964352501, 70.300600883307197364, 110.15945732187537942, 125.86753187172143953, 53.165333766864932841, 26.176685469257790828, 122.1497308283833263, 2.4380414779734564945, 87.132596892533911159, 29.344972835042426595, 21.308023854904604377, 117.06876151254255092, 107.67869596269883914, 101.50215945574382204, 86.144730812735360814, 109.44718248994468013, 55.063652414224634413, 108.61370401456952095, 59.501604734567081323, 68.834229947791754967, 65.75975152225146303, 124.8483007204849855, 2.5464790894693578593, 34.041335944937600289, 125.12236487028349075, 54.770190386221656809, 100.06444670839846367, 94.960421267365745734, 36.430576461789314635, 92.003069273487199098, 39.274543045194150181, 117.55603914419407374, 110.53178973936155671, 118.53530616892385297, 9.6761455275809566956, 76.557400471203436609, 57.955513261149462778, 119.74665876570725231, 98.374991898977896199, 7.9671559794041968416, 32.651641298874892527, 125.61840188955466147, 83.818601230061176466, 127.31804902801013668, 71.921477406107442221, 32.903304176943493076, 75.66284069390167133, 15.776100407121703029, 51.017330629452771973, 87.881339565199596109, 61.052847532762825722, 11.042834332271013409, 101.59969133618869819, 13.031520842356258072, 1.2101533869499689899, 3.0416777864629693795, 12.586153491407458205, 24.83812762506204308, 89.951038003498979378, 84.104707938702631509, 22.11192117241807864, 26.167528191541350679, 23.984803497318353038, 25.953512277254048968, 121.51232694000646006, 122.30418147070668056, 62.155495155006065033, 87.578017011175688822, 33.564399489296192769, 95.304118090691190446, 65.050481265483540483, 90.806837193988030776, 74.82844273881346453, 0.095901194108591880649, 92.472270110331010073, 3.0687851062139088754, 23.655103047658485593, 126.7806836967538402, 15.944720546354801627, 50.335628061609895667, 70.20890853890159633, 42.182320803796756081, 117.55783278726448771, 89.098201533473911695, 35.329509325092658401, 3.3803441509953700006, 103.98740373003965942, 51.19012145045780926, 58.802855963691399666, 74.269833588215988129, 47.975394322478678077, 73.700784801254485501, 87.867536439407558646, 18.32610732418106636, 75.633188319712644443, 113.09920329121814575, 48.549680591804644791, 0.18478750593567383476, 120.02363615007925546, 16.578491822838259395, 91.558433514244825346, 62.36382725305884378, 35.303165637760685058, 85.129636252109776251, 22.665077249352179933, 51.655842545671475818, 93.840346375684021041, 65.886898255979758687, 90.549556603520613862, 108.45614203624063521, 83.742282248804258415, 19.136309302277368261, 28.676429899602226214, 67.519280561282357667, 9.5044985722488490865, 34.805208574576681713, 17.696501916041597724, 21.658120909971330548, 36.263061043566267472, 0.39956757225809269585, 103.05987365358669194, 99.849728548506391235, 62.19490059576855856, 115.55489494126595673, 115.26970839931891533, 15.39095234652995714, 59.040178104485676158, 61.713539497999590822, 119.132789058941853, 22.385073480141727487, 30.314002513219747925, 60.653383685061271535, 23.193747277728107292, 6.796568028559704544, 110.8219166813796619, 51.629779374499776168, 88.85744249462368316, 75.469885057405917905, 93.241317826323211193, 8.3766365205738111399, 88.930367509430652717, 96.363014495658717351, 118.60143839035299607, 55.466969479639374185, 72.935197548260475742, 39.640199042481981451, 102.03173178372526309, 67.115523932247015182, 3.3642654143077379558, 37.165202620559284696, 92.263209628203185275, 25.229098195726692211, 20.901935837246128358, 18.089171071922464762, 89.520130264067120152, 0.13645707760952063836, 69.464509485642338404, 111.67738671714323573, 116.64706961512274574, 99.164220256134285592, 41.033654720758931944, 100.34956789229909191, 1.8666389130012248643, 63.87508032082405407, 73.759372584612719947, 16.483199292782956036, 15.962174308042449411, 6.9340911392609996255, 25.159287325681361835, 110.18151462458263268, 77.771120076798979426, 28.011055978135118494, 88.906489342549321009, 9.4999295440538844559, 97.845236083161580609, 49.366786054513795534, 28.483703672955016373, 101.64182916747449781, 45.730206055312009994, 80.626243186248757411, 118.51324768248741748, 35.322293596174858976, 71.041205807519872906, 87.53544891785713844, 38.850702455605642172, 45.504005314764071954, 60.009324076327175135, 73.544559337613463867, 21.125188575089850929, 24.571756926765374374, 26.590472889332886552, 83.606764989504881669, 42.712954394028201932, 70.202622945569601143, 34.851775586408621166, 93.052513640137476614, 33.679700552263966529, 12.787789754740515491, 52.756174504138471093, 43.615228883219970157, 22.684507809106435161, 38.313980182614614023, 35.09969250881113112, 49.093031832668202696, 36.700158065959840314, 97.225202390989579726, 58.574101378337218193, 25.552085419229115359, 84.977968044797307812, 126.09220795122018899, 62.284197024735476589, 16.735513520940003218, 98.431381280515779508, 107.03642982136079809, 18.912516310632781824, 59.056662117822270375, 27.109765576075005811, 91.53009897778974846, 23.492143642513838131, 127.52106996786824311, 60.360342852494795807, 3.972443989336170489, 40.908547307069966337, 0.28774777104990789667, 123.41898838242195779, 0.59103487155152834021, 42.615849874629930127, 80.565207347015530104, 125.89836113795900019, 63.718035552385117626, 19.947816476691514254, 31.04185885185142979, 127.81426826194001478, 78.761904736475116806, 47.32586633713071933, 115.42373342734208563, 91.746845772377128014, 2.8589787720047752373, 28.834054423903580755, 100.86092522710168851, 75.977051395519083599, 88.043971699789835839, 91.783027922880137339, 34.897617998420173535, 96.33757026926468825, 38.916657863795990124, 123.07722172087960644, 9.9456815022458613385, 104.36027728472254239, 101.26283920222340384, 44.478645191928080749, 35.298763273571239552, 0.011904552706255344674, 52.100249481536593521, 3.3068053818897169549, 23.630811357386846794, 33.113462916600838071, 55.216708143940195441, 104.69761815714082331, 58.661905765820847591, 112.08007085506687872, 44.505410293160821311, 91.480378906231635483, 109.35575423256159411, 23.749743482541816775, 127.44773607359093148, 10.572611873019923223, 103.75556581251657917, 21.781075316725036828, 52.778382058011629852, 18.769475122186122462, 64.002203360996645642, 87.107146061800449388, 65.492718375015101628, 65.75163532123042387, 6.0217630068145808764, 14.980938163254904794, 39.378240982092393097, 1.9353043551273003686, 112.48859640313094133, 45.391101750985399121, 72.975353790068766102, 111.71266018813548726, 34.764620727692090441, 19.85522819932521088, 72.761852975621877704, 87.839710897365876008, 55.181996730767423287, 64.891172515341168037, 8.7649532751856895629, 117.72635835608161869, 44.544894158902025083, 13.843748236638930393, 88.873872287818812765, 77.818205999839847209, 53.807485982244543266, 35.900895858791045612, 31.936506648420618149, 50.427066813630517572, 109.59010540405870415, 96.491783788533211919, 5.5102787919022375718, 6.3187556473167205695, 0.73140429693376063369, 95.8709157340672391, 125.69591831263096537, 110.63670575270225527, 61.25607463520282181, 54.771794716092699673, 22.707374683021043893, 60.711491156685951864, 55.768235590749100083, 105.60769733289271244, 60.840703278219734784, 23.766884028791537276, 95.652517906491993926, 44.526893903712334577, 53.213372840978991007, 63.143870921081543202, 23.633065962330874754, 17.69018221428632387, 21.967532656861294527, 45.081591029578703456, 97.072858969058870571, 86.776376134024758358, 119.05473784046989749, 37.50550244592159288, 31.529430361661070492, 95.310008988446497824, 120.41353561952564633, 23.476930124543287093, 92.450187302329140948, 56.092920025897910818, 55.542911101227218751, 68.253835745566902915, 110.9318556079051632, 10.954631931494077435, 102.69996581680607051, 21.15525255072498112, 96.198081243586784694, 54.134905020990117919, 27.571975074017245788, 84.877350475548155373, 101.32041411674435949, 57.56612363934982568, 10.827657985337282298, 98.124600597202515928, 54.561731133850116748, 74.509691346342151519, 27.577759424450050574, 112.9133837846275128, 51.936752270885335747, 69.91513775094790617, 89.458098212784534553, 98.04231921216705814, 67.448428232466540067, 3.2782272630211082287, 58.577585704486409668, 39.645935008356900653, 14.270216142289427808, 41.29828055132747977, 10.122508389653376071, 49.319710783962364076, 92.191599117915757233, 7.3791577397278160788, 67.271728655301558319, 14.252351888673729263, 60.446812882535596145, 62.359919327933312161, 104.30109881970201968, 100.51286177929068799, 12.601201766618032707, 92.31891464375439682, 123.73506374344287906, 106.33066753372986568, 52.353370938519219635, 116.2994616567666526, 4.8760829559505509678, 46.265193785067822319, 58.689945670084853191, 42.616047709812846733, 106.13752302508873981, 87.357391925401316257, 75.004318911491282051, 44.289461625474359607, 90.894364979889360256, 110.1273048284529068, 89.227408029139041901, 119.00320946913780062, 9.668459895587147912, 3.5195030445065640379, 121.69660144096997101, 5.0929581789387157187, 68.082671889878838556, 122.24472974056698149, 109.5403807724469516, 72.128893416800565319, 61.920842534731491469, 72.861152923582267249, 56.006138546974398196, 78.549086090391938342, 107.11207828839178546, 93.063579478726751404, 109.07061233784770593, 19.35229105516555137, 25.114800942406873219, 115.91102652230256354, 111.49331753141814261, 68.749983797955792397, 15.934311958808393683, 65.303282597753423033, 123.23680377911296091, 39.63720246012599091, 126.63609805602391134, 15.842954812214884441, 65.80660835389062413, 23.325681387803342659, 31.552200814243406057, 102.03466125890918192, 47.762679130399192218, 122.10569506552565144, 22.085668664545664797, 75.199382672377396375, 26.063041684712516144, 2.4203067738999379799, 6.083355572925938759, 25.17230698281491641, 49.676255250127724139, 51.902076007001596736, 40.209415877405263018, 44.223842344836157281, 52.335056383082701359, 47.969606994636706077, 51.907024554511735914, 115.02465388001655811, 116.60836294141336111, 124.31099031001576805, 47.156034022351377644, 67.128798978596023517, 62.60823618138601887, 2.1009625309707189444, 53.613674387979699532, 21.65688547762692906, 0.19180238822082174011, 56.944540220662020147, 6.1375702124278177507, 47.310206095320609165, 125.56136739350768039, 31.889441092709603254, 100.67125612322342931, 12.41781707780319266, 84.364641607597150141, 107.11566557453261339, 50.19640306694782339, 70.659018650185316801, 6.76068830199437798, 79.974807460079318844, 102.38024290091561852, 117.60571192738643731, 20.539667176431976259, 95.950788644960994134, 19.401569602512608981, 47.735072878815117292, 36.652214648365770699, 23.266376639428926865, 98.198406582436291501, 97.099361183609289583, 0.36957501187498564832, 112.04727230015851092, 33.156983645680156769, 55.116867028489650693, 124.72765450611768756, 70.606331275525008095, 42.259272504219552502, 45.330154498704359867, 103.31168509134658962, 59.68069275137168006, 3.7737965119631553534, 53.099113207044865703, 88.912284072484908393, 39.484564497608516831, 38.272618604554736521, 57.352859799208090408, 7.0385611225647153333, 19.008997144497698173, 69.610417149153363425, 35.393003832083195448, 43.316241819942661095, 72.526122087132534944, 0.7991351445198233705, 78.119747307177021867, 71.699457097012782469, 124.3898011915407551, 103.10978988253555144, 102.53941679863783065, 30.78190469305991428, 118.08035620897135232, 123.42707899600281962, 110.265578117883706, 44.770146960283454973, 60.62800502643949585, 121.30676737012254307, 46.387494555456214584, 13.593136057123047067, 93.643833362759323791, 103.25955874900319031, 49.714884989251004299, 22.939770114815473789, 58.482635652646422386, 16.753273041151260259, 49.860735018864943413, 64.726028991321072681, 109.20287678070599213, 110.93393895928238635, 17.870395096520951483, 79.28039808496760088, 76.063463567454164149, 6.2310478644940303639, 6.7285308286154759116, 74.330405241122207372, 56.52641925640637055, 50.458196391453384422, 41.803871674492256716, 36.178342143848567503, 51.040260528134240303, 0.27291415522267925553, 10.929018971288314788, 95.354773434290109435, 105.29413923024912947, 70.328440512268571183, 82.067309441521501867, 72.699135784598183818, 3.7332778260024497285, 127.75016064165174612, 19.518745169225439895, 32.966398585565912072, 31.924348616084898822, 13.868182278521999251, 50.31857465136636165, 92.363029249168903334, 27.542240153597958852, 56.022111956273874966, 49.812978685098642018, 18.999859088111406891, 67.690472166323161218, 98.733572109027591068, 56.967407345913670724, 75.283658334948995616, 91.460412110627657967, 33.2524863725011528, 109.02649536497483496, 70.644587192353355931, 14.08241161504338379, 47.07089783571427688, 77.701404911211284343, 91.008010629528143909, 120.01864815265798825, 19.089118675226927735, 42.250377150179701857, 49.143513853530748747, 53.180945778669411084, 39.213529979009763338, 85.425908788060041843, 12.405245891139202286, 69.703551172820880311, 58.105027280274953227, 67.359401104527933057, 25.575579509481030982, 105.51234900827694219, 87.230457766439940315, 45.369015618212870322, 76.627960365229228046, 70.199385017622262239, 98.186063665336405393, 73.400316131923318608, 66.450404781979159452, 117.14820275667443639, 51.104170838461868698, 41.955936089598253602, 124.18441590244037798, 124.56839404947095318, 33.471027041883644415, 68.862762561035196995, 86.072859642721596174, 37.825032621265563648, 118.11332423564454075, 54.2195311521536496, 55.06019795557949692, 46.984287285031314241, 127.04213993573648622, 120.72068570498959161, 7.9448879786723409779, 81.817094614139932673, 0.57549554210345377214, 118.83797676484391559, 1.1820697431030566804, 85.231699749263498234, 33.130414694034698186, 123.79672227591800038, 127.43607110477023525, 39.895632953383028507, 62.083717703706497559, 127.62853652388366754, 29.523809472950233612, 94.651732674265076639, 102.84746685468780925, 55.493691544754256029, 5.7179575440095504746, 57.668108847810799489, 73.721850454207014991, 23.954102791041805176, 48.087943399583309656, 55.566055845760274678, 69.795235996843985049, 64.675140538529376499, 77.833315727595618227, 118.15444344175921287, 19.891363004495360656, 80.720554569448722759, 74.52567840444680769, 88.957290383856161498, 70.597526547142479103, 0.023809105412510689348, 104.20049896307318704, 6.6136107637794339098, 47.261622714773693588, 66.226925833201676141, 110.43341628788402886, 81.395236314281646628, 117.32381153164533316, 96.160141710133757442, 89.010820586325280601, 54.960757812466908945, 90.711508465123188216, 47.499486965087271528, 126.89547214718186297, 21.145223746043484425, 79.511131625033158343, 43.562150633450073656, 105.5567641160232597, 37.538950244375882903, 0.0044067219969292636961, 46.214292123604536755, 2.9854367500338412356, 3.5032706424608477391, 12.043526013629161753, 29.961876326509809587, 78.756481964184786193, 3.8706087102582387161, 96.977192806261882652, 90.782203501974436222, 17.950707580141170183, 95.425320376270974521, 69.529241455384180881, 39.71045639865405974, 17.523705951247393386, 47.679421794731752016, 110.36399346153848455, 1.7823450306823360734, 17.529906550375017105, 107.45271671216323739, 89.089788317804050166, 27.687496473281498766, 49.747744575641263509, 27.636411999679694418, 107.61497196448908653, 71.801791717582091223, 63.873013296841236297, 100.85413362726467312, 91.180210808121046284, 64.983567577066423837, 11.020557583804475144, 12.637511294633441139, 1.4628085938711592462, 63.741831468138116179, 123.39183662526556873, 93.273411505404510535, 122.51214927040564362, 109.54358943218539935, 45.414749366042087786, 121.42298231337190373, 111.53647118150183815, 83.215394665785424877, 121.68140655643946957, 47.53376805758671253, 63.305035812983987853, 89.053787807424669154, 106.42674568195798201, 126.2877418421630864, 47.266131924661749508, 35.380364428572647739, 43.935065313722589053, 90.16318205916104489, 66.14571793812137912, 45.552752268053154694, 110.10947568093979498, 75.01100489184318576, 63.058860723325778963, 62.620017976896633627, 112.82707123905129265, 46.953860249090212164, 56.900374604658281896, 112.18584005179582164, 111.08582220245807548, 8.5076714911374438088, 93.863711215810326394, 21.909263862991792848, 77.399931633612141013, 42.310505101453600219, 64.396162487173569389, 108.26981004198023584, 55.143950148038129555, 41.754700951099948725, 74.640828233488718979, 115.13224727869965136, 21.655315970678202575, 68.249201194405031856, 109.12346226770387148, 21.019382692684303038, 55.155518848903739126, 97.826767569255025592, 103.87350454177430947, 11.830275501899450319, 50.916196425572707085, 68.084638424337754259, 6.8968564649330801331, 6.5564545260422164574, 117.15517140897281934, 79.291870016713801306, 28.540432284578855615, 82.596561102654959541, 20.245016779306752142, 98.63942156792836613, 56.383198235831514467, 14.758315479459270136, 6.543457310603116639, 28.504703777347458526, 120.89362576507119229, 124.7198386558702623, 80.602197639407677343, 73.025723558581375983, 25.202403533239703393, 56.63782928751243162, 119.4701274868893961, 84.661335067459731363, 104.70674187704207725, 104.59892331353330519, 9.7521659119011019357, 92.530387570135644637, 117.37989134016970638, 85.232095419629331445, 84.275046050181117607, 46.714783850806270493, 22.008637822982564103, 88.578923250952357193, 53.788729959782358492, 92.254609656909451587, 50.45481605828172178, 110.00641893827560125, 19.336919791174295824, 7.0390060890167660546, 115.39320288194358, 10.185916357881069416, 8.1653437797613150906, 116.48945948113760096, 91.080761544897541171, 16.257786833601130638, 123.84168506946662092, 17.722305847164534498, 112.01227709395243437, 29.098172180787514662, 86.224156576787208905, 58.127158957457140787, 90.141224675695411861, 38.70458211033110274, 50.229601884817384416, 103.82205304460876505, 94.986635062839923194, 9.4999675959115847945, 31.868623917620425345, 2.6065651955104840454, 118.47360755822592182, 79.2744049202556198, 125.27219611204782268, 31.685909624433406861, 3.6132167077812482603, 46.651362775606685318, 63.104401628490450094, 76.069322517818363849, 95.525358260802022414, 116.21139013105130289, 44.171337329094967572, 22.39876534475479275, 52.126083369425032288, 4.8406135477998759598, 12.166711145851877518, 50.344613965629832819, 99.352510500255448278, 103.80415201400319347, 80.418831754810526036, 88.447684689672314562, 104.67011276616540272, 95.939213989273412153, 103.81404910902710981, 102.04930776003675419, 105.21672588283036021, 120.62198062003153609, 94.312068044706393266, 6.2575979571920470335, 125.21647236277203774, 4.2019250619414378889, 107.22734877595939906, 43.313770955253858119, 0.38360477644164348021, 113.88908044132404029, 12.275140424855635501, 94.62041219064485631, 123.12273478701536078, 63.778882185422844486, 73.342512246446858626, 24.835634155610023299, 40.729283215197938262, 86.231331149065226782, 100.39280613389928476, 13.318037300370633602, 13.521376603992393939, 31.949614920162275666, 76.760485801834875019, 107.21142385477287462, 41.079334352867590496, 63.901577289921988267, 38.803139205025217962, 95.470145757630234584, 73.304429296731541399, 46.532753278861491708, 68.396813164876220981, 66.198722367218579166, 0.73915002375360927545, 96.094544600317021832, 66.313967291360313538, 110.23373405698293936, 121.4553090122390131, 13.212662551050016191, 84.518545008439105004, 90.660308997412357712, 78.623370182693179231, 119.36138550274336012, 7.5475930239263107069, 106.19822641409336939, 49.824568144969816785, 78.969128995217033662, 76.545237209109473042, 114.70571959841618082, 14.077122245129430667, 38.017994288999034325, 11.220834298306726851, 70.786007664166390896, 86.63248363988896017, 17.052244174265069887, 1.5982702890432847198, 28.239494614357681712, 15.398914194029202918, 120.77960238308514818, 78.219579765074740862, 77.078833597275661305, 61.563809386123466538, 108.16071241794634261, 118.85415799200927722, 92.531156235767411999, 89.540293920566909947, 121.25601005288262968, 114.61353474024508614, 92.774989110912429169, 27.186272114249732113, 59.28766672552228556, 78.519117498006380629, 99.429769978502008598, 45.879540229630947579, 116.96527130529284477, 33.506546082302520517, 99.721470037733524805, 1.4520579826457833406, 90.405753561415622244, 93.867877918564772699, 35.740790193045540946, 30.560796169938839739, 24.126927134908328298, 12.462095728988060728, 13.457061657234589802, 20.660810482244414743, 113.0528385128127411, 100.91639278290676884, 83.607743348984513432, 72.356684287697135005, 102.08052105626848061, 0.54582831044535851106, 21.858037942580267554, 62.709546868583856849, 82.588278460501896916, 12.656881024540780345, 36.134618883046641713, 17.398271569196367636, 7.4665556520085374359, 127.50032128330349224, 39.03749033845087979, 65.932797171131824143, 63.848697232169797644, 27.736364557043998502, 100.63714930273636128, 56.726058498337806668, 55.084480307195917703, 112.04422391254774993, 99.625957370197284035, 37.99971817622645176, 7.3809443326499604154, 69.467144218055182137, 113.93481469183097943, 22.567316669901629211, 54.920824221255315933, 66.50497274500594358, 90.052990729953307891, 13.289174384706711862, 28.16482323008676758, 94.141795671432191739, 27.402809822426206665, 54.016021259059925796, 112.0372963053159765, 38.17823735045385547, 84.500754300359403715, 98.287027707061497495, 106.36189155733882217, 78.427059958023164654, 42.851817576120083686, 24.81049178228204255, 11.407102345641760621, 116.21005456055354443, 6.7188022090558661148, 51.151159018965699943, 83.024698016557522351, 46.460915532879880629, 90.738031236429378623, 25.25592073046209407, 12.398770035248162458, 68.372127330676448764, 18.800632263850275194, 4.9008095639619568828, 106.29640551334887277, 102.2083416769237374, 83.911872179200145183, 120.36883180488439393, 121.13678809894190636, 66.942054083770926809, 9.7255251220740319695, 44.145719285443192348, 75.650065242534765275, 108.2266484712890815, 108.4390623043072992, 110.12039591115899384, 93.96857457006626646, 126.08427987147661042, 113.44137140997918323, 15.889775957348319935, 35.634189228279865347, 1.1509910842105455231, 109.67595352968783118, 2.3641394862097513396, 42.463399498526996467, 66.260829388073034352, 119.59344455183963873, 126.8721422095404705, 79.791265906766057014, 124.1674354074166331, 127.25707304776733508, 59.047618945904105203, 61.303465348533791257, 77.694933709375618491, 110.98738308950851206, 11.435915088019100949, 115.33621769562523696, 19.443700908414029982, 47.908205582083610352, 96.175886799166619312, 111.13211169152418734, 11.590471993691608077, 1.3502810770623909775, 27.666631455194874434, 108.30888688351842575, 39.78272600899435929, 33.441109138901083497, 21.051356808897253359, 49.914580767715960974, 13.195053094284958206, 0.047618210828659357503, 80.400997926146374084, 13.22722152755886782, 94.523245429551025154, 4.4538516664069902617, 92.866832575771695701, 34.790472628566931235, 106.6476230632943043, 64.320283420271152863, 50.021641172650561202, 109.92151562493745587, 53.423016930246376432, 94.998973930178181035, 125.79094429436372593, 42.290447492090606829, 31.022263250066316687, 87.12430126690378529, 83.113528232046519406, 75.077900488751765806, 0.0088134439938585273921, 92.42858424720907351, 5.9708735000676824711, 7.0065412849216954783, 24.087052027258323506, 59.923752653023257153, 29.512963928369572386, 7.7412174205201154109, 65.954385612527403282, 53.564407003948872443, 35.901415160285978345, 62.85064075254558702, 11.058482910768361762, 79.42091279730811948, 35.047411902494786773, 95.35884358946714201, 92.727986923076969106, 3.5646900613646721467, 35.059813100753672188, 86.905433424330112757, 50.179576635608100332, 55.374992946562997531, 99.495489151282527018, 55.272823999363026815, 87.229943928981811041, 15.603583435164182447, 127.74602659368611057, 73.708267254532984225, 54.360421616245730547, 1.9671351541328476742, 22.041115167608950287, 25.275022589266882278, 2.9256171877423184924, 127.48366293627987034, 118.78367325053477543, 58.546823010812659049, 117.02429854081492522, 91.087178864370798692, 90.829498732087813551, 114.84596462674744544, 95.07294236300731427, 38.430789331574487733, 115.36281311287893914, 95.06753611517342506, 126.61007162597161368, 50.107575614852976287, 84.853491363919602009, 124.57548368432617281, 94.532263849323499016, 70.760728857145295478, 87.870130627448816085, 52.326364118325727759, 4.2914358762427582406, 91.105504536109947367, 92.218951361883227946, 22.02200978368637152, 126.11772144665155793, 125.24003595379690523, 97.654142478102585301, 93.907720498180424329, 113.80074920932020177, 96.371680103595281253, 94.171644404916150961, 17.015342982278525596, 59.727422431620652787, 43.818527725987223675, 26.799863267224282026, 84.621010202910838416, 0.79232497435077675618, 88.539620083960471675, 110.28790029607625911, 83.509401902203535428, 21.281656466981075937, 102.26449455739930272, 43.310631941360043129, 8.4984023888137016911, 90.246924535407742951, 42.038765385368606076, 110.31103769781111623, 67.653535138513689162, 79.747009083548618946, 23.660551003798900638, 101.83239285114905215, 8.1692768486755085178, 13.793712929866160266, 13.112909052088070894, 106.31034281794927665, 30.583740033427602611, 57.080864569161349209, 37.193122205309919082, 40.490033558617142262, 69.27884313585673226, 112.76639647166302893, 29.516630958918540273, 13.086914621206233278, 57.009407554698555032, 113.78725153014602256, 121.43967731174416258, 33.204395278818992665, 18.051447117162751965, 50.404807066479406785, 113.27565857502850122, 110.94025497378243017, 41.322670134923100704, 81.413483754087792477, 81.197846627066610381, 19.50433182380584185, 57.060775140274927253, 106.75978268034305074, 42.464190839262300869, 40.550092100362235215, 93.429567701612540986, 44.017275645968766185, 49.157846501904714387, 107.57745991956835496, 56.509219313818903174, 100.90963211656708154, 92.012837876551202498, 38.673839582348591648, 14.078012178037170088, 102.78640576389079797, 20.371832715762138832, 16.33068755952626816, 104.9789189622788399, 54.161523089795082342, 32.515573667205899255, 119.68337013893324183, 35.444611694332706975, 96.024554187904868741, 58.196344361578667304, 44.448313153574417811, 116.25431791491791955, 52.282449351390823722, 77.40916422066220548, 100.45920376963840681, 79.644106089221168077, 61.973270125679846387, 18.999935191823169589, 63.737247835240850691, 5.2131303910246060695, 108.94721511645548162, 30.548809840514877578, 122.54439222409564536, 63.371819248866813723, 7.2264334155624965206, 93.302725551213370636, 126.20880325698453817, 24.138645035640365677, 63.050716521607682807, 104.42278026210260577, 88.342674658189935144, 44.7975306895095855, 104.25216673885006458, 9.6812270955997519195, 24.333422291703755036, 100.68922793126330362, 70.705021000514534535, 79.608304028010024922, 32.83766350962469005, 48.895369379348267103, 81.340225532334443415, 63.878427978550462285, 79.628098218057857594, 76.098615520077146357, 82.43345176566435839, 113.24396124006671016, 60.624136089412786532, 12.515195914387732046, 122.43294472554407548, 8.4038501238865137566, 86.454697551918798126, 86.627541910507716238, 0.76720955288692493923, 99.778160882648080587, 24.550280849714908982, 61.240824381293350598, 118.24546957403072156, 127.55776437084932695, 18.685024492893717252, 49.671268311220046598, 81.458566430399514502, 44.462662298134091543, 72.785612267798569519, 26.636074600741267204, 27.042753207984787878, 63.899229840328189312, 25.520971603673388017, 86.42284770954574924, 82.158668705735180993, 127.80315457984397653, 77.606278410054073902, 62.940291515264107147, 18.608858593463082798, 93.065506557726621395, 8.7936263297560799401, 4.3974447344371583313, 1.4783000475108565297, 64.189089200637681643, 4.6279345827242650557, 92.467468113969516708, 114.9106180244780262, 26.425325102103670361, 41.037090016881847987, 53.320617994828353403, 29.246740365389996441, 110.72277100548672024, 15.095186047856259393, 84.39645282818673877, 99.649136289943271549, 29.938257990437705303, 25.090474418222584063, 101.41143919683236163, 28.154244490258861333, 76.035988577998068649, 22.44166859661709168, 13.57201532833641977, 45.264967279777920339, 34.104488348530139774, 3.1965405780865694396, 56.478989228715363424, 30.797828388062043814, 113.55920476617393433, 28.439159530153119704, 26.157667194551322609, 123.12761877225057106, 88.321424835896323202, 109.70831598402219242, 57.062312471538461978, 51.080587841137457872, 114.51202010576889734, 101.22706948049381026, 57.549978221824858338, 54.372544228499464225, 118.5753334510482091, 29.038234996016399236, 70.859539957004017197, 91.759080459265533136, 105.93054261058568954, 67.013092164608679013, 71.44294007546704961, 2.9041159652952046599, 52.811507122834882466, 59.735755837129545398, 71.481580386091081891, 61.121592339877679478, 48.253854269816656597, 24.924191457976121455, 26.914123314469179604, 41.321620964492467465, 98.10567702562912018, 73.832785565817175666, 39.215486697972664842, 16.713368575397907989, 76.161042112536961213, 1.0916566208907170221, 43.716075885160535108, 125.41909373717135168, 37.176556921003793832, 25.31376204908156069, 72.269237766093283426, 34.796543138392735273, 14.933111304020712851, 127.00064256660698447, 78.074980676905397559, 3.8655943422636482865, 127.69739446433959529, 55.472729114091634983, 73.274298605472722556, 113.45211699667561334, 110.16896061439547339, 96.088447825099137845, 71.251914740394568071, 75.99943635245290352, 14.76188866530355881, 10.934288436114002252, 99.869629383661958855, 45.134633339803258423, 109.84164844251426985, 5.009945490015525138, 52.105981459906615783, 26.578348769413423724, 56.32964646017717314, 60.283591342864383478, 54.80561964485241333, 108.03204251812348957, 96.074592610631952994, 76.356474700911348918, 41.001508600722445408, 68.574055414126632968, 84.723783114681282314, 28.854119916046329308, 85.703635152240167372, 49.6209835645640851, 22.814204691287159221, 104.42010912110708887, 13.43760441811173223, 102.30231803793503786, 38.049396033115044702, 92.921831065763399238, 53.476062472858757246, 50.511841460924188141, 24.797540070499962894, 8.7442546613528975286, 37.601264527700550389, 9.8016191279275517445, 84.592811026701383526, 76.41668335385111277, 39.823744358403928345, 112.73766360977242584, 114.27357619788745069, 5.8841081675418536179, 19.451050244148063939, 88.291438570890022675, 23.300130485073168529, 88.453296942581800977, 88.87812460861823638, 92.240791822321625659, 59.937149140136170899, 124.16855974295685883, 98.882742819958366454, 31.779551914696639869, 71.268378456559730694, 2.301982168424729025, 91.351907059375662357, 4.7282789724195026793, 84.926798997057630913, 4.5216587761497066822, 111.18688910368291545, 125.74428441908457899, 31.582531813535752008, 120.33487081483690417, 126.51414609553467017, 118.09523789180821041, 122.60693069707122049, 27.389867418754874961, 93.974766179017024115, 22.871830176041839877, 102.67243539125047391, 38.887401816828059964, 95.816411164170858683, 64.351773598336876603, 94.264223383048374671, 23.180943987383216154, 2.700562154124781955, 55.333262910393386846, 88.617773767036851496, 79.56545201798871858, 66.882218277802166995, 42.102713617794506717, 99.829161535431921948, 26.390106188569916412, 0.095236421660956693813, 32.801995852292748168, 26.454443055121373618, 61.046490859102050308, 8.9077033328176185023, 57.733665151543391403, 69.580945257137500448, 85.295246126592246583, 0.64056684054230572656, 100.04328234530476038, 91.843031249874911737, 106.84603386049639084, 61.99794786036000005, 123.58188858872745186, 84.580894984181213658, 62.044526500136271352, 46.248602533811208559, 38.227056464096676791, 22.155800977503531612, 0.017626887991355033591, 56.85716849441814702, 11.941747000135364942, 14.013082569843390957, 48.174104054516647011, 119.84750530604651431, 59.025927856739144772, 15.482434841043868801, 3.908771225054806564, 107.12881400789774489, 71.80283032057195669, 125.70128150509117404, 22.116965821540361503, 30.841825594616238959, 70.094823804993211525, 62.717687178937921999, 57.455973846153938212, 7.1293801227293442935, 70.119626201510982355, 45.810866848660225514, 100.35915327121620066, 110.74998589312963304, 70.990978302565054037, 110.54564799872969161, 46.459887857963622082, 31.207166870332002873, 127.49205318737585912, 19.41653450906596845, 108.72084323249146109, 3.9342703082656953484, 44.082230335217900574, 50.550045178533764556, 5.8512343754882749636, 126.96732587256337865, 109.56734650106955087, 117.09364602162895608, 106.04859708163348841, 54.174357728745235363, 53.65899746417926508, 101.69192925349852885, 62.145884726014628541, 76.861578663152613444, 102.72562622576151625, 62.135072230350488098, 125.22014325194322737, 100.21515122970959055, 41.706982727842841996, 121.15096736865598359, 61.064527698650636012, 13.521457714294228936, 47.74026125489763217, 104.65272823665145552, 8.5828717524891544599, 54.211009072223532712, 56.437902723766455892, 44.044019567376381019, 124.23544289330675383, 122.48007190759744844, 67.30828495620880858, 59.815440996364486637, 99.601498418644041521, 64.743360207194200484, 60.343288809832301922, 34.030685964557051193, 119.45484486324130557, 87.637055451974447351, 53.599726534452202031, 41.242020405821676832, 1.5846499487051914912, 49.079240167924581328, 92.575800592156156199, 39.018803804407070857, 42.563312933965789853, 76.528989114798605442, 86.621263882720086258, 16.996804777631041361, 52.493849070815485902, 84.07753077074085013, 92.622075395625870442, 7.3070702770310163032, 31.494018167097237892, 47.321102007601439254, 75.664785702298104297, 16.338553697351017036, 27.587425859732320532, 26.225818104179779766, 84.620685635898553301, 61.167480066855205223, 114.16172913832269842, 74.386244410619838163, 80.980067117237922503, 10.5576862717171025, 97.532792943326057866, 59.033261917837080546, 26.173829242412466556, 114.01881510940074804, 99.574503060295683099, 114.87935462348832516, 66.40879055763798533, 36.10289423432550393, 100.80961413296245155, 98.551317150060640415, 93.88050994756486034, 82.645340269849839387, 34.826967508179222932, 34.395693254133220762, 39.0086636476116837, 114.12155028055349248, 85.519565360686101485, 84.928381678524601739, 81.100184200728108408, 58.859135403225081973, 88.034551291937532369, 98.315693003813066753, 87.154919839136709925, 113.01843862764144433, 73.819264233137801057, 56.025675753102404997, 77.347679164697183296, 28.156024356077978155, 77.57281152778523392, 40.743665431524277665, 32.661375119056174299, 81.957837924557679798, 108.32304617959016468, 65.03114733441543649, 111.36674027786648367, 70.889223388669051928, 64.049108375809737481, 116.39268872315733461, 88.896626307152473601, 104.5086358298358391, 104.56489870278528542, 26.818328441328048939, 72.918407539280451601, 31.288212178442336153, 123.94654025135969277, 37.999870383646339178, 127.47449567048170138, 10.426260782052850118, 89.894430232910963241, 61.097619681033393135, 117.08878444819129072, 126.74363849773362745, 14.45286683112863102, 58.605451102430379251, 124.41760651396907633, 48.277290071280731354, 126.10143304321900359, 80.845560524208849529, 48.685349316379870288, 89.595061379022808978, 80.504333477700129151, 19.362454191199503839, 48.666844583411148051, 73.378455862530245213, 13.410042001032707049, 31.216608056020049844, 65.675327019249380101, 97.790738758700172184, 34.680451064668886829, 127.75685595710456255, 31.256196436119353166, 24.197231040157930693, 36.86690353133235476, 98.487922480133420322, 121.24827217882557306, 25.03039182877910207, 116.86588945108815096, 16.807700247776665492, 44.909395103841234231, 45.255083821015432477, 1.5344191057774878573, 71.556321765296161175, 49.100561699429817963, 122.48164876259033917, 108.4909391480650811, 127.1155287416986539, 37.370048985787434503, 99.342536622443731176, 34.917132860799029004, 88.925324596271821065, 17.571224535597139038, 53.272149201482534409, 54.085506415969575755, 127.79845968065637862, 51.041943207350414013, 44.845695419095136458, 36.317337411473999964, 127.60630915969159105, 27.212556820108147804, 125.88058303052821429, 37.217717186929803574, 58.13101311545324279, 17.58725265951215988, 8.7948894688743166625, 2.9566000950253510382, 0.37817840127536328509, 9.2558691654521680903, 56.934936227942671394, 101.8212360489560524, 52.850650204207340721, 82.074180033763695974, 106.64123598965670681, 58.493480730783630861, 93.445542010973440483, 30.190372095716156764, 40.79290565637711552, 71.298272579886543099, 59.876515980875410605, 50.180948836448806105, 74.822878393664723262, 56.308488980521360645, 24.071977155996137299, 44.88333719323418336, 27.144030656672839541, 90.529934559555840679, 68.208976697060279548, 6.3930811561731388792, 112.95797845743072685, 61.595656776127725607, 99.118409532347868662, 56.878319060306239408, 52.315334389106283197, 118.25523754450478009, 48.642849671796284383, 91.416631968044384848, 114.12462494307692396, 102.16117568227855372, 101.02404021154143265, 74.454138960987620521, 115.09995644364971668, 108.74508845700256643, 109.1506669020964182, 58.076469992036436452, 13.719079914011672372, 55.518160918534704251, 83.861085221175017068, 6.0261843292209960055, 14.88588015093409922, 5.8082319305904093198, 105.62301424566976493, 119.47151167426272877, 14.963160772182163782, 122.24318467975535896, 96.507708539633313194, 49.848382915952242911, 53.828246628938359208, 82.643241928988572909, 68.21135405126187834, 19.665571131634351332, 78.430973395945329685, 33.426737150799453957, 24.322084225073922426, 2.1833132417814340442, 87.432151770324708195, 122.83818747434270335, 74.353113842007587664, 50.627524098166759359, 16.538475532186566852, 69.593086276785470545, 29.86622260804506368, 126.00128513321760693, 28.149961353810795117, 7.7311886845309345517, 127.39478892867919058, 110.94545822818690795, 18.548597210949083092, 98.904233993354864651, 92.337921228794584749, 64.176895650201913668, 14.50382948079277412, 23.998872704905807041, 29.523777330607117619, 21.868576872231642483, 71.739258767323917709, 90.269266679610154824, 91.683296885028539691, 10.019890980031050276, 104.21196291981323157, 53.156697538826847449, 112.65929292035434628, 120.56718268572876696, 109.61123928970846464, 88.064085036250617122, 64.149185221267543966, 24.712949401822697837, 82.003017201448528795, 9.1481108282569039147, 41.447566229362564627, 57.708239832096296595, 43.407270304480334744, 99.2419671291281702, 45.628409382577956421, 80.840218242214177735, 26.875208836227102438, 76.604636075870075729, 76.098792066233727382, 57.843662131526798476, 106.95212494571751449, 101.02368292184837628, 49.595080140999925788, 17.488509322705795057, 75.202529055401100777, 19.603238255858741468, 41.18562205340640503, 24.83336670770222554, 79.647488716811494669, 97.475327219548489666, 100.54715239577490138, 11.768216335083707236, 38.902100488299765857, 48.582877141780045349, 46.600260970149975037, 48.906593885167239932, 49.756249217240110738, 56.481583644643251318, 119.87429828027597978, 120.33711948591735563, 69.765485639916732907, 63.559103829396917718, 14.536756913119461387, 4.6039643368530960288, 54.703814118751324713, 9.4565579448426433373, 41.853597994115261827, 9.0433175523030513432, 94.373778207369468873, 123.48856883816915797, 63.165063627075141994, 112.66974162967744633, 125.02829219107297831, 108.19047578362005879, 117.21386139414244099, 54.779734837513387902, 59.949532358034048229, 45.743660352087317733, 77.344870782500947826, 77.774803633659757907, 63.632822328345355345, 0.70354719667739118449, 60.528446766096749343, 46.361887974766432308, 5.4011243082495639101, 110.66652582079041167, 49.23554753407734097, 31.13090403598107514, 5.7644365556043339893, 84.205427235592651414, 71.658323070863843895, 52.780212377139832824, 0.19047284332191338763, 65.603991704585496336, 52.908886110242747236, 122.09298171820410062, 17.815406665635237005, 115.46733030309042078, 11.161890514278638875, 42.590492253184493165, 1.2811336810882494319, 72.086564690613158746, 55.686062499753461452, 85.692067720992781688, 123.99589572072363808, 119.1637771774585417, 41.161789968362427317, 124.0890530002725427, 92.497205067622417118, 76.454112928193353582, 44.311601955007063225, 0.035253775982710067183, 113.71433698883629404, 23.883494000270729885, 28.026165139686781913, 96.348208109036932001, 111.69501061209302861, 118.05185571347828954, 30.964869682087737601, 7.8175424501132511068, 86.257628015799127752, 15.60566064114755136, 123.40256301018234808, 44.233931643080723006, 61.683651189236115897, 12.189647609986423049, 125.435374357875844, 114.91194769230787642, 14.258760245462326566, 12.23925240302196471, 91.621733697324089007, 72.718306542436039308, 93.499971786262904061, 13.981956605133746052, 93.091295997459383216, 92.919775715927244164, 62.414333740667643724, 126.98410637475171825, 38.833069018135574879, 89.441686464982922189, 7.8685406165313906968, 88.164460670435801148, 101.10009035706752911, 11.702468750980187906, 125.93465174513039528, 91.134693002142739715, 106.18729204326155013, 84.09719416326697683, 108.34871545749047073, 107.31799492836216814, 75.383858506997057702, 124.29176945203289506, 25.723157326308864867, 77.451252451523032505, 124.27014446070461418, 122.44028650389009272, 72.430302459419181105, 83.413965455689321971, 114.30193473731196718, 122.12905539730127202, 27.042915428588457871, 95.48052250979890232, 81.305456473302911036, 17.16574350497830892, 108.42201814444706542, 112.87580544753291178, 88.088039134756400017, 120.47088578661714564, 116.96014381519853487, 6.6165699124176171608, 119.63088199273261125, 71.202996837291721022, 1.4867204143884009682, 120.68657761966460384, 68.061371929117740365, 110.90968972648624913, 47.274110903948894702, 107.19945306890440406, 82.484040811643353663, 3.1692998974103829823, 98.158480335852800636, 57.151601184312312398, 78.037607608814141713, 85.126625867935217684, 25.057978229600848863, 45.242527765440172516, 33.993609555262082722, 104.98769814163460978, 40.155061541485338239, 57.244150791251740884, 14.614140554062032606, 62.988036334194475785, 94.642204015206516488, 23.329571404596208595, 32.677107394702034071, 55.174851719464641064, 52.451636208363197511, 41.241371271797106601, 122.33496013371404842, 100.32345827664903481, 20.772488821243314305, 33.960134234475845005, 21.115372543437842978, 67.065585886652115732, 118.06652383567416109, 52.34765848482857109, 100.03763021880513406, 71.149006120595004177, 101.7587092469802883, 4.8175811152759706602, 72.20578846865464584, 73.619228265928541077, 69.102634300124918809, 59.761019895133358659, 37.290680539699678775, 69.653935016358445864, 68.791386508266441524, 78.017327295227005379, 100.24310056111062295, 43.039130721375840949, 41.856763357052841457, 34.200368401456216816, 117.71827080645380192, 48.069102583878702717, 68.631386007629771484, 46.309839678277057828, 98.036877255286526633, 19.638528466275602113, 112.05135150620844797, 26.695358329394366592, 56.312048712159594288, 27.145623055570467841, 81.487330863048555329, 65.322750238115986576, 35.915675849118997576, 88.646092359183967346, 2.0622946688345109578, 94.733480555736605311, 13.778446777341741836, 0.098216751623112941161, 104.78537744631830719, 49.79325261430858518, 81.017271659675316187, 81.129797405570570845, 53.636656882659735857, 17.836815078560903203, 62.576424356888310285, 119.89308050271938555, 75.999740767296316335, 126.94899134096340276, 20.852521564109338215, 51.788860465821926482, 122.19523936207042425, 106.17756889638621942, 125.48727699546725489, 28.90573366225726204, 117.21090220486439648, 120.83521302793815266, 96.554580142565100687, 124.20286608644164517, 33.691121048417699058, 97.370698632763378555, 51.190122758049255935, 33.008666955400258303, 38.724908382399007678, 97.33368916682593408, 18.756911725060490426, 26.820084002065414097, 62.433216112043737667, 3.3506540385023981798, 67.581477517400344368, 69.360902129341411637, 127.5137119142091251, 62.512392872238706332, 48.394462080315861385, 73.733807062668347498, 68.975844960270478623, 114.49654435765114613, 50.060783657561842119, 105.73177890217630193, 33.615400495553330984, 89.818790207682468463, 90.510167642030864954, 3.0688382115586136933, 15.11264353059232235, 98.201123398863273906, 116.96329752518431633, 88.981878296133800177, 126.23105748339730781, 74.740097971578506986, 70.685073244887462351, 69.834265721601695986, 49.85064919254364213, 35.142449071194278076, 106.54429840296506882, 108.17101283194278949, 127.59691936131275725, 102.08388641470082803, 89.691390838190272916, 72.634674822951637907, 127.2126183193831821, 54.425113640219933586, 123.76116606105642859, 74.435434373859607149, 116.26202623090648558, 35.174505319024319761, 17.589778937748633325, 5.9132001900507020764, 0.75635680255072657019, 18.511738330904336181, 113.86987245588898077, 75.6424720979121048, 105.70130040841831942, 36.148360067531029927, 85.282471979313413613, 116.9869614615708997, 58.891084021946880966, 60.380744191432313528, 81.585811312757869018, 14.596545159773086198, 119.75303196175082121, 100.36189767290125019, 21.645756787329446524, 112.61697796104272129, 48.143954311992274597, 89.76667438646836672, 54.288061313349317061, 53.059869119115319336, 8.4179533941205590963, 12.786162312349915737, 97.915956914861453697, 123.19131355225908919, 70.236819064695737325, 113.75663812061247882, 104.63066877821620437, 108.51047508900956018, 97.285699343596206745, 54.833263936092407675, 100.24924988615748589, 76.322351364560745424, 74.048080423086503288, 20.908277921978879021, 102.19991288730307133, 89.490176914008770837, 90.301333804192836396, 116.1529399840728729, 27.438159828023344744, 111.03632183707304648, 39.722170442350034136, 12.052368658441992011, 29.771760301868198439, 11.616463861184456619, 83.246028491343167843, 110.94302334852545755, 29.926321544367965544, 116.48636935951435589, 65.015417079266626388, 99.696765831904485822, 107.6564932578803564, 37.286483857980783796, 8.4227081025237566791, 39.331142263272340642, 28.86194679189065937, 66.853474301598907914, 48.644168450147844851, 4.3666264835665060673, 46.86430354065305437, 117.67637494868904469, 20.706227684015175328, 101.25504819633351872, 33.076951064376771683, 11.186172553570941091, 59.732445216093765339, 124.00257026643521385, 56.299922707621590234, 15.462377369065507082, 126.78957785736201913, 93.89091645637381589, 37.097194421901804162, 69.808467986713367281, 56.675842457592807477, 0.35379130040382733569, 29.007658961585548241, 47.997745409811614081, 59.047554661214235239, 43.737153744466922944, 15.478517534651473397, 52.538533359223947627, 55.366593770057079382, 20.039781960065738531, 80.42392583962646313, 106.31339507765733288, 97.318585840712330537, 113.13436537146117189, 91.222478579420567257, 48.128170072504872223, 0.2983704425350879319, 49.425898803649033653, 36.006034402897057589, 18.296221656513807829, 82.895132458725129254, 115.41647966419259319, 86.814540608964307467, 70.483934258256340399, 91.256818765159550821, 33.68043648442835547, 53.750417672457842855, 25.209272151743789436, 24.197584132467454765, 115.68732426305723493, 85.904249891438666964, 74.047365843696752563, 99.190160281999851577, 34.977018645415228093, 22.405058110802201554, 39.206476511717482936, 82.37124410681281006, 49.666733415408089058, 31.294977433626627317, 66.95065443910061731, 73.094304791549802758, 23.536432670167414472, 77.804200976599531714, 97.165754283560090698, 93.200521940299950074, 97.813187770334479865, 99.512498434480221476, 112.96316728929014062, 111.74859656055559753, 112.67423897183834924, 11.530971279833465815, 127.11820765879383544, 29.073513826238922775, 9.2079286737061920576, 109.40762823750264943, 18.913115889685286675, 83.707195988230523653, 18.086635104609740665, 60.747556414742575726, 118.97713767633831594, 126.33012725415392197, 97.339483259358530631, 122.0565843821495946, 88.380951567243755562, 106.42772278828488197, 109.5594696750267758, 119.89906471606809646, 91.487320704178273445, 26.68974156500553363, 27.549607267319515813, 127.26564465669071069, 1.407094393354782369, 121.05689353219349869, 92.723775949532864615, 10.802248616502765799, 93.333051641580823343, 98.47109506815468194, 62.261808071962150279, 11.528873111212305957, 40.410854471188940806, 15.316646141727687791, 105.56042475427966565, 0.38094568664746475406, 3.2079834091709926724, 105.81777222048913245, 116.18596343640820123, 35.630813331274111988, 102.93466060618084157, 22.32378102855727775, 85.180984506368986331, 2.5622673621801368427, 16.173129381226317491, 111.37212499951056088, 43.384135441989201354, 119.99179144145091414, 110.3275543549170834, 82.323579936728492612, 120.17810600054872339, 56.994410135248472216, 24.908225856386707164, 88.62320391001412645, 0.070507551969058113173, 99.42867397767258808, 47.766988000541459769, 56.052330279377201805, 64.696416218077501981, 95.390021224189695204, 108.10371142695657909, 61.929739364175475203, 15.635084900230140192, 44.515256031598255504, 31.211321282298740698, 118.80512602036469616, 88.467863286161446013, 123.36730237847223179, 24.379295219976484077, 122.87074871575532597, 101.82389538461575285, 28.517520490924653132, 24.478504806047567399, 55.243467394651815994, 17.436613084875716595, 58.999943572525808122, 27.963913210271130083, 58.18259199492240441, 57.839551431854488328, 124.82866748133892543, 125.9682127495034365, 77.666138036274787737, 50.883372929965844378, 15.737081233066419372, 48.328921340875240276, 74.200180714135058224, 23.404937501964013791, 123.86930349026442855, 54.269386004289117409, 84.374584086526738247, 40.194388326537591638, 88.697430914984579431, 86.635989856727974257, 22.767717013997753384, 120.58353890406579012, 51.446314652621367713, 26.902504903049702989, 120.54028892141286633, 116.88057300778382341, 16.860604918842000188, 38.82793091138228192, 100.60386947462757234, 116.25811079460254405, 54.085830857180553721, 62.961045019601442618, 34.610912946605822071, 34.331487009960255818, 88.844036288897768827, 97.751610895069461549, 48.176078269516438013, 112.94177157323792926, 105.92028763039706973, 13.2331398248388723, 111.26176398546886048, 14.405993674583442044, 2.9734408287804399151, 113.37315523933284567, 8.1227438582391187083, 93.819379452972498257, 94.548221807901427383, 86.398906137812446104, 36.968081623286707327, 6.3385997948244039435, 68.316960671705601271, 114.30320236862826278, 28.075215217631921405, 42.253251735870435368, 50.115956459201697726, 90.485055530880345032, 67.987219110527803423, 81.975396283269219566, 80.310123082970676478, 114.48830158250348177, 29.228281108124065213, 125.97607266839258955, 61.284408030413032975, 46.659142809192417189, 65.354214789404068142, 110.34970343892928213, 104.90327241672639502, 82.482742543597851181, 116.66992026743173483, 72.64691655329806963, 41.544977642486628611, 67.920268468951690011, 42.230745086879323935, 6.1311717733078694437, 108.13304767134832218, 104.69531696966078016, 72.075260437613906106, 14.298012241193646332, 75.517418493960576598, 9.6351622305555792991, 16.411576937312929658, 19.238456531860720133, 10.205268600249837618, 119.52203979026671732, 74.581361079402995529, 11.307870032720529707, 9.5827730165365210269, 28.034654590457648737, 72.486201122221245896, 86.078261442751681898, 83.713526714105682913, 68.400736802916071611, 107.43654161290760385, 96.138205167761043413, 9.2627720152595429681, 92.619679356557753636, 68.073754510573053267, 39.277056932554842206, 96.102703012416895945, 53.390716658788733184, 112.62409742431918858, 54.291246111144573661, 34.974661726100748638, 2.6455004762319731526, 71.83135169824163313, 49.292184718367934693, 4.1245893376726598945, 61.466961111473210622, 27.556893554683483671, 0.19643350324986386113, 81.570754892636614386, 99.586505228620808339, 34.034543319354270352, 34.25959481114114169, 107.27331376531947171, 35.673630157121806405, 125.15284871378025855, 111.78616100544240908, 23.999481534596270649, 125.89798268192680553, 41.705043128218676429, 103.57772093164749094, 116.39047872414448648, 84.355137792776076822, 122.97455399093450978, 57.811467324518162059, 106.42180440973243094, 113.67042605587994331, 65.109160285133839352, 120.40573217288329033, 67.382242096839036094, 66.74139726552675711, 102.38024551609851187, 66.017333910800516605, 77.449816764801653335, 66.667378333651868161, 37.513823450120980851, 53.640168004130828194, 124.86643222408747533, 6.7013080770084343385, 7.1629550348043267149, 10.721804258686461253, 127.0274238284182502, 125.02478574447741266, 96.788924160635360749, 19.467614125340332976, 9.9516899205445952248, 100.99308871530229226, 100.12156731512732222, 83.463557804352603853, 67.230800991110299947, 51.637580415368574904, 53.020335284061729908, 6.1376764231172273867, 30.225287061188282678, 68.40224679773018579, 105.92659505037227063, 49.963756592271238333, 124.46211496679825359, 21.48019594316065195, 13.370146489778562682, 11.668531443203391973, 99.701298385087284259, 70.284898142392194131, 85.088596805933775613, 88.342025663889216958, 127.19383872262915247, 76.167772829405294033, 51.382781676384183811, 17.269349645906913793, 126.42523663876636419, 108.85022728044350515, 119.52233212211649516, 20.870868747722852277, 104.52405246181297116, 70.349010638048639521, 35.17955787549726665, 11.826400380101404153, 1.5127136051014531404, 37.02347666181231034, 99.739744911781599512, 23.284944195827847579, 83.402600816836638842, 72.296720135065697832, 42.564943958630465204, 105.9739229231417994, 117.78216804389376193, 120.76148838286826503, 35.171622625515738036, 29.193090319549810374, 111.5060639235052804, 72.723795345802500378, 43.291513574662531028, 97.23395592208544258, 96.287908623988187173, 51.53334877293673344, 108.57612262669863412, 106.11973823823063867, 16.835906788241118193, 25.572324624703469453, 67.831913829726545373, 118.38262710452181636, 12.473638129395112628, 99.513276241228595609, 81.261337556436046725, 89.020950178022758337, 66.57139868719241349, 109.66652787218845333, 72.498499772318609757, 24.644702729125128826, 20.096160846173006576, 41.816555843961396022, 76.399825774609780638, 50.980353828021179652, 52.602667608389310772, 104.30587996814574581, 54.876319656050327467, 94.072643674146092962, 79.444340884700068273, 24.104737316883984022, 59.543520603736396879, 23.232927722372551216, 38.492056982689973665, 93.8860466970509151, 59.852643088739569066, 104.97273871903234976, 2.0308341585368907545, 71.393531663812609622, 87.312986515760712791, 74.572967715961567592, 16.845416205047513358, 78.662284526544681285, 57.723893583784956718, 5.706948603197815828, 97.288336900295689702, 8.7332529671330121346, 93.728607081309746718, 107.35274989737808937, 41.412455368033988634, 74.510096392667037435, 66.153902128753543366, 22.372345107141882181, 119.46489043219116866, 120.00514053287042771, 112.59984541524318047, 30.924754738134652143, 125.57915571472767624, 59.781832912747631781, 74.194388843807246303, 11.616935973426734563, 113.35168491518561495, 0.70758260080765467137, 58.015317923171096481, 95.995490819623228163, 118.09510932242847048, 87.474307488933845889, 30.957035069302946795, 105.07706671844789525, 110.73318754011415876, 40.07956392013511504, 32.847851679256564239, 84.626790155318303732, 66.637171681428299053, 98.268730742925981758, 54.444957158841134515, 96.256340145009744447, 0.5967408850701758638, 98.851797607298067305, 72.012068805794115178, 36.592443313031253638, 37.790264917453896487, 102.83295932838882436, 45.629081217932252912, 12.967868516516318778, 54.513637530319101643, 67.360872968856710941, 107.50083534491568571, 50.418544303487578873, 48.395168264938547509, 103.37464852611810784, 43.808499782880971907, 20.094731687397143105, 70.380320563999703154, 69.954037290830456186, 44.810116221604403108, 78.41295302343860385, 36.742488213625620119, 99.333466830816178117, 62.589954867256892612, 5.9013088782048725989, 18.188609583099605516, 47.072865340338466922, 27.608401953202701407, 66.331508567123819375, 58.401043880603538128, 67.626375540672597708, 71.024996868964080932, 97.92633457858391921, 95.497193121114833048, 97.348477943676698487, 23.061942559670569608, 126.23641531758767087, 58.14702765247784555, 18.415857347416022094, 90.815256475005298853, 37.826231779374211328, 39.414391976461047307, 36.17327020921948133, 121.49511282948878943, 109.95427535268026986, 124.66025450831148191, 66.678966518720699241, 116.11316876430282719, 48.761903134491149103, 84.855445576573401922, 91.118939350057189586, 111.7981294321398309, 54.97464140835654689, 53.37948313001106726, 55.099214534642669605, 126.53128931338142138, 2.8141887867132027168, 114.11378706439063535, 57.447551899065729231, 21.604497233009169577, 58.666103283165284665, 68.942190136309363879, 124.52361614392430056, 23.057746222424611915, 80.821708942377881613, 30.633292283459013561, 83.120849508559331298, 0.76189137329492950812, 6.4159668183456233237, 83.635544440981902881, 104.37192687281640247, 71.261626662551861955, 77.869321212361683138, 44.647562057118193479, 42.361969012737972662, 5.1245347243602736853, 32.346258762456272962, 94.744249999021121766, 86.768270883982040687, 111.98358288290546625, 92.655108709837804781, 36.647159873460623203, 112.35621200109744677, 113.98882027049694443, 49.816451712777052308, 49.2464078200282529, 0.14101510394175420515, 70.85734795534517616, 95.533976001082919538, 112.10466055875804159, 1.39283243615864194, 62.780042448383028386, 88.207422853913158178, 123.85947872835095041, 31.270169800460280385, 89.030512063196511008, 62.422642564601119375, 109.6102520407330303, 48.935726572326530004, 118.73460475694446359, 48.758590439956606133, 117.74149743151428993, 75.647790769231505692, 57.035040981849306263, 48.957009612095134798, 110.48693478930363199, 34.87322616975143319, 117.99988714505161624, 55.927826420542260166, 116.36518398984480882, 115.67910286371261464, 121.65733496268148883, 123.93642549901051098, 27.332276072549575474, 101.76674585993168876, 31.474162466132838745, 96.65784268175411853, 20.400361428270116448, 46.80987500393166556, 119.73860698052885709, 108.5387720085818728, 40.749168173053476494, 80.388776653078821255, 49.394861829972796841, 45.271979713455948513, 45.535434027999144746, 113.16707780813158024, 102.89262930524273543, 53.805009806103043957, 113.08057784282937064, 105.76114601556764683, 33.721209837684000377, 77.655861822768201819, 73.207738949258782668, 104.51622158920508809, 108.17166171436110744, 125.92209003920288524, 69.221825893211644143, 68.662974019924149616, 49.688072577799175633, 67.503221790138923097, 96.352156539036514005, 97.8835431464794965, 83.840575260794139467, 26.46627964968138258, 94.523527970937720966, 28.811987349166884087, 5.9468816575645178091, 98.746310478665691335, 16.245487716481875395, 59.638758905948634492, 61.096443615806492744, 44.797812275624892209, 73.936163246573414654, 12.677199589652445866, 8.6339213434148405213, 100.60640473725652555, 56.15043043526384281, 84.506503471740870737, 100.23191291840703343, 52.970111061764328042, 7.9744382210592448246, 35.950792566542077111, 32.620246165944990935, 100.97660316500696354, 58.456562216251768405, 123.9521453367851791, 122.56881606082606595, 93.318285618388472358, 2.7084295788117742632, 92.699406877862202236, 81.806544833456428023, 36.965485087199340342, 105.33984053486710764, 17.29383310659613926, 83.0899552849768952, 7.8405369379033800215, 84.46149017375864787, 12.262343546619376866, 88.266095342700282345, 81.390633939325198298, 16.150520875227812212, 28.596024482390930643, 23.034836987921153195, 19.270324461111158598, 32.823153874625859316, 38.476913063721440267, 20.410537200503313215, 111.04407958053707262, 21.162722158805991057, 22.615740065441059414, 19.165546033073042054, 56.069309180918935454, 16.972402244446129771, 44.156522885507001774, 39.427053428215003805, 8.8014736058357811999, 86.873083225818845676, 64.276410335522086825, 18.525544030522723915, 57.23935871311914525, 8.1475090211461065337, 78.55411386511332239, 64.205406024833791889, 106.78143331758110435, 97.248194848638377152, 108.58249222228914732, 69.949323452201497275, 5.291000952467584284, 15.66270339648326626, 98.584369436735869385, 8.2491786753453197889, 122.93392222294642124, 55.113787109366967343, 0.39286700650336570106, 35.141509785276866751, 71.173010457241616677, 68.069086638708540704, 68.519189622285921359, 86.546627530638943426, 71.34726031424725079, 122.30569742756415508, 95.572322010884818155, 47.998963069196179276, 123.79596536385361105, 83.410086256440990837, 79.155441863298619865, 104.78095744829261093, 40.710275585555791622, 117.94910798186901957, 115.6229346490399621, 84.84360881946849986, 99.34085211176352459, 2.2183205702713166829, 112.81146434577021864, 6.7644841936817101669, 5.4827945310571521986, 76.760491032200661721, 4.0346678216046711896, 26.899633529606944649, 5.3347566673037363216, 75.027646900241961703, 107.28033600826529437, 121.73286444817495067, 13.402616154016868677, 14.32591006960865343, 21.443608517376560485, 126.05484765684013837, 122.04957148895846331, 65.577848321270721499, 38.935228250680665951, 19.903379841092828428, 73.986177430608222494, 72.243134630258282414, 38.927115608708845684, 6.4616019822205998935, 103.27516083073714981, 106.04067056812345982, 12.275352846234454773, 60.450574122380203335, 8.8044935954640095588, 83.853190100744541269, 99.927513184546114644, 120.92422993359650718, 42.9603918863213039, 26.740292979557125363, 23.337062886410421925, 71.402596770178206498, 12.569796284788026242, 42.177193611871189205, 48.684051327782071894, 126.38767744526194292, 24.335545658810588066, 102.76556335276836762, 34.538699291817465564, 124.85047327753636637, 89.700454560890648281, 111.04466424423299031, 41.741737495449342532, 81.048104923629580298, 12.698021276097279042, 70.3591157509945333, 23.652800760202808306, 3.0254272102029062808, 74.046953323628258659, 71.479489823563199025, 46.569888391659333138, 38.805201633676915662, 16.593440270131395664, 85.129887917264568387, 83.947845846287236782, 107.56433608779116184, 113.52297676573653007, 70.343245251031476073, 58.386180639103258727, 95.012127847014198778, 17.447590691605000757, 86.583027149328700034, 66.467911844174523139, 64.575817247976374347, 103.06669754587710486, 89.152245253400906222, 84.239476476461277343, 33.671813576482236385, 51.144649249410576886, 7.6638276594567287248, 108.76525420904363273, 24.947276258793863235, 71.026552482460829196, 34.522675112875731429, 50.041900356049154652, 5.1427973743884649593, 91.333055744380544638, 16.996999544637219515, 49.289405458250257652, 40.192321692346013151, 83.633111687926430022, 24.799651549219561275, 101.9607076560423593, 105.20533521678225952, 80.611759936291491613, 109.75263931210429291, 60.145287348295823904, 30.888681769400136545, 48.209474633767968044, 119.08704120747279376, 46.465855444745102432, 76.98411396538358531, 59.772093394101830199, 119.70528617748277611, 81.945477438064699527, 4.0616683170774194878, 14.787063327628857223, 46.625973031525063561, 21.145935431923135184, 33.690832410098664695, 29.324569053089362569, 115.44778716756991344, 11.413897206399269635, 66.576673800591379404, 17.466505934269662248, 59.457214162623131415, 86.705499794756178744, 82.824910736071615247, 21.020192785337712849, 4.3078042575070867315, 44.744690214287402341, 110.92978086438597529, 112.01028106574449339, 97.199690830486360937, 61.849509476272942265, 123.15831142945535248, 119.56366582549890154, 20.388777687614492606, 23.233871946857107105, 98.703369830371229909, 1.4151652016153093427, 116.03063584634583094, 63.990981639250094304, 108.19021864486057893, 46.948614977867691778, 61.914070138609531568, 82.154133436899428489, 93.466375080228317529, 80.159127840270230081, 65.695703358516766457, 41.253580310640245443, 5.2743433628602360841, 68.537461485851963516, 108.88991431768590701, 64.512680290019488893, 1.1934817701439897064, 69.703595214599772589, 16.024137611588230357, 73.184886626062507275, 75.580529834911430953, 77.665918656777648721, 91.258162435864505824, 25.935737033032637555, 109.02727506064184126, 6.72174593771705986, 87.001670689831371419, 100.83708860697515775, 96.790336529880732996, 78.749297052236215677, 87.616999565761943813, 40.189463374797924189, 12.760641128003044287, 11.908074581660912372, 89.620232443212444196, 28.8259060468772077, 73.484976427254878217, 70.666933661632356234, 125.17990973451378522, 11.802617756409745198, 36.377219166199211031, 94.145730680676933844, 55.216803906409040792, 4.6630171342476387508, 116.80208776121071423, 7.2527510813488333952, 14.049993737931799842, 67.8526691571714764, 62.994386242233304074, 66.696955887353396975, 46.123885119344777195, 124.47283063517534174, 116.2940553049556911, 36.831714694835682167, 53.630512950010597706, 75.652463558748422656, 78.828783952922094613, 72.34654041844260064, 114.99022565898121684, 91.908550705364177702, 121.32050901662660181, 5.3579330374450364616, 104.22633752860565437, 97.523806268985936185, 41.710891153150441824, 54.23787870011801715, 95.596258864279661793, 109.94928281671309378, 106.75896626002213452, 110.19842906928897719, 125.06257862676284276, 5.6283775734300434124, 100.2275741287812707, 114.89510379813145846, 43.208994466018339153, 117.33220656633420731, 9.8843802726223657373, 121.04723228784860112, 46.115492444852861809, 33.643417884755763225, 61.2665845669216651, 38.241699017118662596, 1.5237827465898590162, 12.831933636694884626, 39.27108888196744374, 80.743853745636442909, 14.523253325107361889, 27.738642424723366275, 89.295124114236386959, 84.723938025475945324, 10.249069448720547371, 64.692517524916183902, 61.488499998045881512, 45.536541767967719352, 95.967165765814570477, 57.310217419675609563, 73.294319746924884384, 96.712424002198531525, 99.977640540993888862, 99.632903425554104615, 98.492815640056505799, 0.28203020788714638911, 13.714695910693990299, 63.067952002169477055, 96.209321117516083177, 2.78566487231728388, 125.56008489676969475, 48.414845707829954335, 119.71895745670190081, 62.540339600924198749, 50.061024126393022016, 124.84528512920587673, 91.220504081466060597, 97.871453144656697987, 109.46920951388892718, 97.517180879916850245, 107.48299486303221784, 23.295581538463011384, 114.07008196369861253, 97.914019224193907576, 92.973869578607263975, 69.746452339506504359, 107.99977429010323249, 111.85565284108815831, 104.73036797969325562, 103.35820572742886725, 115.31466992536661564, 119.87285099802102195, 54.664552145102788927, 75.533491719863377512, 62.948324932269315468, 65.31568536350823706, 40.800722856540232897, 93.619750007866969099, 111.47721396106135217, 89.077544017163745593, 81.498336346110590966, 32.77755330615764251, 98.789723659945593681, 90.543959426911897026, 91.070868056001927471, 98.334155616266798461, 77.785258610485470854, 107.61001961220972589, 98.161155685662379256, 83.522292031135293655, 67.442419675371638732, 27.311723645540041616, 18.415477898521203315, 81.032443178413814167, 88.343323428725852864, 123.84418007840940845, 10.443651786426926265, 9.3259480398482992314, 99.376145155601989245, 7.0064435802814841736, 64.704313078076665988, 67.767086292962630978, 39.681150521588278934, 52.932559299366403138, 61.047055941875441931, 57.623974698333768174, 11.893763315132673597, 69.49262095733138267, 32.490975432963750791, 119.27751781189726898, 122.19288723161298549, 89.595624551253422396, 19.872326493146829307, 25.354399179304891732, 17.267842686833319021, 73.2128094745130511, 112.3008608705313236, 41.013006943485379452, 72.463825836814066861, 105.94022212352865608, 15.948876442118489649, 71.901585133084154222, 65.240492331893619848, 73.953206330013927072, 116.91312443250717479, 119.90429067357399617, 117.13763212165576988, 58.636571236776944716, 5.4168591576271865051, 57.398813755728042452, 35.613089666912856046, 73.930970174402318662, 82.679681069734215271, 34.587666213195916498, 38.1799105699537904, 15.681073875810398022, 40.922980347517295741, 24.524687093242391711, 48.532190685404202668, 34.781267878650396597, 32.301041750455624424, 57.192048964781861287, 46.06967397584230639, 38.540648922222317196, 65.646307749251718633, 76.953826127442880534, 40.82107440101026441, 94.088159161074145231, 42.325444317611982115, 45.231480130882118829, 38.331092066146084107, 112.13861836183787091, 33.944804488892259542, 88.313045771014003549, 78.85410685643000761, 17.602947211675200379, 45.746166451641329331, 0.55282067104417365044, 37.05108806104544783, 114.4787174262382905, 16.295018042295851046, 29.108227730230282759, 0.41081204967122175731, 85.562866635165846674, 66.496389697280392284, 89.164984444578294642, 11.898646904402994551, 10.582001904935168568, 31.3254067929701705, 69.168738873471738771, 16.498357350694277557, 117.86784444589284249, 110.22757421873393469, 0.78573401300673140213, 70.28301957055737148, 14.346020914486871334, 8.138173277420719387, 9.0383792445754806977, 45.093255061281524831, 14.694520628498139558, 116.61139485513194813, 63.144644021769636311, 95.997926138395996531, 119.59193072771086008, 38.820172512885619653, 30.310883726600877708, 81.561914896585221868, 81.420551171115221223, 107.89821596374167711, 103.24586929808356217, 41.687217638940637698, 70.68170422352704918, 4.4366411405462713446, 97.622928691540437285, 13.528968387363420334, 10.965589062117942376, 25.520982064401323441, 8.0693356432093423791, 53.799267059213889297, 10.669513334611110622, 22.055293800487561384, 86.560672016534226714, 115.46572889634990133, 26.805232308033737354, 28.65182013921730686, 42.887217034756758949, 124.10969531368391472, 116.09914297792056459, 3.1556966425414429978, 77.870456501361331902, 39.806759682189294836, 19.972354861220082967, 16.486269260516564827, 77.854231217417691369, 12.923203964444837766, 78.550321661477937596, 84.081341136246919632, 24.550705692468909547, 120.90114824476040667, 17.608987190931657096, 39.706380201492720516, 71.855026369095867267, 113.84845986719301436, 85.920783772642607801, 53.480585959114250727, 46.674125772824481828, 14.805193540356412996, 25.139592569576052483, 84.35438722374237841, 97.368102655567781767, 124.77535489052388584, 48.67109131762481411, 77.531126705536735244, 69.077398583638569107, 121.70094655507637071, 51.400909121784934541, 94.089328488465980627, 83.483474990902323043, 34.096209847259160597, 25.396042552198196063, 12.718231501989066601, 47.305601520405616611, 6.0508544204094505403, 20.093906647260155296, 14.958979647126398049, 93.139776783318666276, 77.610403267353831325, 33.186880540266429307, 42.259775834532774752, 39.895691692578111542, 87.128672175585961668, 99.045953531476698117, 12.686490502062952146, 116.77236127821015543, 62.024255694028397556, 34.895181383213639492, 45.166054298661038047, 4.9358236883526842576, 1.1516344959563866723, 78.133395091757847695, 50.304490506805450423, 40.478952952922554687, 67.343627152964472771, 102.28929849882115377, 15.327655318917095428, 89.530508418087265454, 49.894552517591364449, 14.053104964921658393, 69.045350225751462858, 100.0838007120983093, 10.285594748780567897, 54.666111488761089277, 33.993999089274439029, 98.578810916500515305, 80.384643384695664281, 39.266223375852860045, 49.599303098442760529, 75.921415312088356586, 82.410670433568157023, 33.223519872586621204, 91.505278624208585825, 120.29057469659164781, 61.777363538800273091, 96.418949267539574066, 110.17408241494558752, 92.931710889493842842, 25.968227930767170619, 119.54418678820729838, 111.4105723549691902, 35.890954876129399054, 8.1233366341548389755, 29.574126655257714447, 93.251946063050127123, 42.291870863846270368, 67.381664820197329391, 58.649138106182363117, 102.89557433513982687, 22.827794412802177249, 5.1533476011827588081, 34.933011868539324496, 118.91442832524990081, 45.410999589515995467, 37.649821472143230494, 42.040385570675425697, 8.615608515014173463, 89.489380428578442661, 93.859561728775588563, 96.020562131492624758, 66.399381660972721875, 123.69901895254952251, 118.31662285891434294, 111.12733165099780308, 40.777555375228985213, 46.467743893717852188, 69.406739660742459819, 2.8303304032342566643, 104.06127169269166188, 127.98196327850382659, 88.380437289721157867, 93.897229955735383555, 123.82814027722270112, 36.308266873802494956, 58.932750160460273037, 32.318255680544098141, 3.3914067170335329138, 82.507160621280490886, 10.548686725724110147, 9.0749229717075650115, 89.779828635375451995, 1.0253605800389777869, 2.3869635402916173916, 11.407190429199545179, 32.048275223180098692, 18.36977325212865253, 23.161059669826499885, 27.331837313558935421, 54.516324871729011647, 51.871474066068913089, 90.054550121283682529, 13.443491875437757699, 46.003341379666380817, 73.674177213953953469, 65.580673059765103972, 29.498594104476069333, 47.233999131523887627, 80.378926749595848378, 25.521282256006088573, 23.816149163325462723, 51.240464886424888391, 57.651812093758053379, 18.969952854513394414, 13.333867323268350447, 122.35981946902757045, 23.605235512819490395, 72.754438332402060041, 60.291461361353867687, 110.43360781281808158, 9.3260342684989154804, 105.60417552242142847, 14.50550216269766679, 28.099987475867237663, 7.7053383143429527991, 125.98877248447024613, 5.393911774706793949, 92.247770238693192368, 120.94566127035432146, 104.5881106099113822, 73.663429389671364333, 107.26102590002483339, 23.304927117500483291, 29.657567905847827205, 16.693080836888839258, 101.98045131796607166, 55.817101410731993383, 114.64101803325320361, 10.715866074893710902, 80.452675057214946719, 67.047612537971872371, 83.421782306300883647, 108.47575740023967228, 63.192517728562961565, 91.898565633429825539, 85.517932520047907019, 92.396858138581592357, 122.12515725352568552, 11.256755146863724804, 72.455148257566179382, 101.79020759626291692, 86.417988932040316286, 106.66441313266841462, 19.768760545248369453, 114.09446457569720224, 92.230984889705723617, 67.286835769511526451, 122.5331691338433302, 76.483398034237325191, 3.0475654931797180325, 25.663867273389769252, 78.542177763938525459, 33.487707491272885818, 29.046506650214723777, 55.47728484945037053, 50.590248228476411896, 41.447876050951890647, 20.49813889744473272, 1.3850350498323678039, 122.97699999609176302, 91.073083535939076683, 63.934331531629140954, 114.6204348393548571, 18.588639493853406748, 65.424848004400701029, 71.955281081991415704, 71.265806851111847209, 68.985631280113011599, 0.56406041577429277822, 27.429391821387980599, 126.13590400434259209, 64.418642235032166354, 5.5713297446382057387, 123.1201697935393895, 96.82969141566354665, 111.43791491340380162, 125.0806792018483975, 100.12204825278968201, 121.69057025841539144, 54.441008162935759174, 67.742906289317033952, 90.938419027781492332, 67.034361759837338468, 86.965989726064435672, 46.591163076929660747, 100.14016392740086303, 67.82803844839145313, 57.947739157218165928, 11.492904679016646696, 87.999548580210102955, 95.711305682179954601, 81.460735959390149219, 78.716411454861372476, 102.62933985073686927, 111.7457019960420439, 109.32910429020921583, 23.066983439726755023, 125.89664986454226892, 2.6313707270164741203, 81.601445713084103772, 59.239500015737576177, 94.95442792212634231, 50.155088034331129165, 34.996672692221181933, 65.555106612318922998, 69.579447319894825341, 53.087918853827432031, 54.14173611200749292, 68.668311232537234901, 27.570517220974579686, 87.220039224423089763, 68.32231137132839649, 39.044584062274225289, 6.8848393507432774641, 54.623447291080083232, 36.830955797046044609, 34.064886356831266312, 48.686646857455343707, 119.6883601568188169, 20.887303572853852529, 18.651896079700236442, 70.75229031120397849, 14.012887160566606326, 1.4086261561569699552, 7.5341725859252619557, 79.362301043176557869, 105.86511859873280628, 122.09411188375088386, 115.24794939666753635, 23.787526630268985173, 10.985241914662765339, 64.98195086593113956, 110.55503562379817595, 116.38577446322960895, 51.191249102506844793, 39.744652986293658614, 50.708798358609783463, 34.535685373670276022, 18.425618949029740179, 96.601721741062647197, 82.026013886974396883, 16.9276516736317717, 83.880444247057312168, 31.897752884240617277, 15.803170266171946423, 2.4809846637908776756, 19.906412660031492123, 105.82624886501798755, 111.80858134715163033, 106.27526424331153976, 117.27314247355388943, 10.83371831525437301, 114.7976275114560849, 71.226179333829350071, 19.861940348808275303, 37.359362139468430541, 69.175332426391832996, 76.3598211399075808, 31.362147751620796043, 81.845960695034591481, 49.049374186484783422, 97.064381370808405336, 69.562535757300793193, 64.602083500911248848, 114.38409792956372257, 92.139347951684612781, 77.081297844448272372, 3.2926154985070752446, 25.907652254885761067, 81.642148802024166798, 60.176318322148290463, 84.650888635223964229, 90.462960261764237657, 76.662184132295806194, 96.277236723675741814, 67.889608977788157063, 48.626091542031645076, 29.708213712860015221, 35.205894423350400757, 91.492332903282658663, 1.1056413420883473009, 74.102176122094533639, 100.957434852476581, 32.590036084591702092, 58.216455460460565519, 0.82162409934608149342, 43.125733270335331326, 4.992779394564422546, 50.329968889160227263, 23.797293808805989102, 21.164003809870337136, 62.650813585943978978, 10.337477746943477541, 32.996714701392193092, 107.73568889178932295, 92.455148437467869371, 1.5714680260171007831, 12.566039141114742961, 28.692041828973742668, 16.276346554841438774, 18.076758489150961395, 90.186510122563049663, 29.389041256999917096, 105.22278971026389627, 126.2892880435429106, 63.995852276795631042, 111.18386145542172017, 77.640345025771239307, 60.621767453205393394, 35.123829793174081715, 34.841102342234080425, 87.796431927486992208, 78.491738596167124342, 83.374435277884913376, 13.36340844705409836, 8.8732822810961806681, 67.24585738308087457, 27.057936774726840667, 21.931178124235884752, 51.041964128802646883, 16.138671286418684758, 107.59853411842777859, 21.339026669222221244, 44.110587600978760747, 45.121344033072091406, 102.93145779269980267, 53.610464616067474708, 57.30364027843461372, 85.774434069517155876, 120.21939062737146742, 104.19828595584112918, 6.3113932850865239743, 27.740913002722663805, 79.613519364378589671, 39.944709722440165933, 32.972538521036767634, 27.708462434835382737, 25.84640792889331351, 29.100643322959513171, 40.162682272493839264, 49.101411384941457072, 113.80229648952081334, 35.217974381863314193, 79.412760402989079012, 15.710052738195372513, 99.696919734389666701, 43.841567545285215601, 106.96117191822850145, 93.348251545648963656, 29.610387080712825991, 50.279185139155742945, 40.708774447488394799, 66.736205311139201513, 121.55070978104777168, 97.342182635253266199, 27.062253411077108467, 10.154797167280776193, 115.40189311015274143, 102.80181824357350706, 60.178656976935599232, 38.966949981804646086, 68.192419694518321194, 50.792085104396392126, 25.436463003978133202, 94.611203040811233222, 12.101708840818901081, 40.187813294520310592, 29.917959294256434077, 58.27955356664097053, 27.220806534707662649, 66.373761080532858614, 84.519551669065549504, 79.791383385159861064, 46.257344351175561314, 70.091907062953396235, 25.372981004129542271, 105.54472255642031087, 124.04851138805679511, 69.790362766427278984, 90.332108597322076093, 9.8716473767053685151, 2.3032689919164113235, 28.266790183515695389, 100.60898101361090085, 80.957905905848747352, 6.6872543059325835202, 76.578596997642307542, 30.655310637837828835, 51.061016836174530908, 99.789105035182728898, 28.106209929846954765, 10.090700451506563695, 72.167601424196618609, 20.571189497561135795, 109.33222297752217855, 67.987998178552516038, 69.157621833004668588, 32.769286769391328562, 78.532446751705720089, 99.198606196889159037, 23.842830624180351151, 36.821340867136314046, 66.447039745176880388, 55.010557248420809628, 112.58114939318329562, 123.55472707760418416, 64.837898535082786111, 92.34816482989481301, 57.863421778987685684, 51.936455861537979217, 111.08837357641823473, 94.821144709938380402, 71.781909752258798108, 16.246673268309677951, 59.148253310519066872, 58.503892126100254245, 84.583741727696178714, 6.7633296403946587816, 117.29827621236836421, 77.791148670279653743, 45.655588825604354497, 10.306695202365517616, 69.866023737082286971, 109.82885665049980162, 90.821999179031990934, 75.299642944286460988, 84.080771141350851394, 17.231217030031984905, 50.978760857160523301, 59.719123457554815104, 64.041124262988887494, 4.7987633219490817282, 119.398037905102683, 108.63324571783232386, 94.254663301995606162, 81.555110750457970425, 92.935487787439342355, 10.813479321484919637, 5.6606608064721513074, 80.122543385386961745, 127.96392655701129115, 48.760874579445953714, 59.794459911474405089, 119.65628055444540223, 72.616533747604989912, 117.86550032092054607, 64.636511361088196281, 6.7828134340707038064, 37.014321242560981773, 21.097373451448220294, 18.149845943418768002, 51.55965727075090399, 2.0507211600779555738, 4.7739270805832347833, 22.814380858399090357, 64.096550446360197384, 36.73954650425730506, 46.32211933965299977, 54.663674627117870841, 109.03264974345802329, 103.74294813214146416, 52.109100242567365058, 26.886983750875515398, 92.006682759336399613, 19.348354427911544917, 3.1613461195302079432, 58.997188208952138666, 94.467998263051413232, 32.757853499191696756, 51.042564512012177147, 47.632298326650925446, 102.48092977284977678, 115.30362418751610676, 37.939905709026788827, 26.667734646540338872, 116.7196389380551409, 47.21047102564261877, 17.508876664804120082, 120.58292272271137335, 92.867215625636163168, 18.65206853700146894, 83.208351044842856936, 29.011004325395333581, 56.199974951734475326, 15.410676628689543577, 123.97754496894049225, 10.787823549417225877, 56.495540477390022716, 113.8913225407122809, 81.176221219826402375, 19.326858779346366646, 86.522051800049666781, 46.609854235000966582, 59.315135811695654411, 33.386161673781316495, 75.960902635932143312, 111.63420282146762474, 101.28203606650640722, 21.431732149791059783, 32.905350114433531417, 6.0952250759473827202, 38.843564612605405273, 88.951514800479344558, 126.38503545712956111, 55.797131266863289056, 43.035865040099452017, 56.793716277166822692, 116.25031450705137104, 22.513510293731087586, 16.910296515135996742, 75.580415192529471824, 44.83597786408427055, 85.328826265336829238, 39.537521090500376886, 100.18892915139804245, 56.461969779415085213, 6.5736715390266908798, 117.06633826769029838, 24.966796068474650383, 6.095130986359436065, 51.327734546783176484, 29.084355527877050918, 66.975414982545771636, 58.093013300429447554, 110.95456969890074106, 101.18049645695282379, 82.895752101903781295, 40.99627779488946544, 2.7700700996683735866, 117.95399999218716403, 54.146167071878153365, 127.86866306326191989, 101.24086967870971421, 37.177278987710451474, 2.8496960088050400373, 15.910562163986469386, 14.531613702227332396, 9.9712625602260231972, 1.1281208315485855564, 54.858783642779599177, 124.27180800868882216, 0.83728447006433270872, 11.142659489280049456, 118.24033958708241698, 65.659382831330731278, 94.875829826811241219, 122.16135840370043297, 72.244096505583001999, 115.38114051683078287, 108.88201632587515633, 7.485812578637705883, 53.876838055566622643, 6.0687235196783149149, 45.931979452128871344, 93.182326153862959472, 72.280327854805364041, 7.6560768967829062603, 115.89547831443633186, 22.985809358036931371, 47.999097160423843889, 63.422611364363547182, 34.921471918783936417, 29.432822909726382932, 77.258679701477376511, 95.491403992087725783, 90.658208580418431666, 46.133966879457148025, 123.79329972908453783, 5.2627414540365862194, 35.202891426168207545, 118.47900003147879033, 61.908855844252684619, 100.31017606866225833, 69.993345384446001844, 3.1102132246414839756, 11.158894639789650682, 106.17583770765486406, 108.28347222401862382, 9.336622465074469801, 55.141034441952797351, 46.440078448849817505, 8.6446227426567929797, 78.089168124548450578, 13.769678701486554928, 109.24689458216016646, 73.661911594092089217, 68.129772713662532624, 97.373293714914325392, 111.37672031363763381, 41.774607145707705058, 37.303792159404110862, 13.50458062240795698, 28.025774321133212652, 2.8172523123139399104, 15.06834517185416189, 30.724602086356753716, 83.730237197469250532, 116.1882237675054057, 102.4958987933350727, 47.575053260537970345, 21.970483829329168657, 1.9639017318659170996, 93.110071247599989874, 104.77154892645921791, 102.38249820501368959, 79.489305972587317228, 101.41759671721956693, 69.071370747340552043, 36.851237898063118337, 65.203443482128932374, 36.052027773948793765, 33.85530334726718138, 39.760888494114624336, 63.795505768481234554, 31.606340532343892846, 4.9619693275817553513, 39.812825320066622226, 83.652497730039613089, 95.617162694306898629, 84.550528486626717495, 106.54628494710777886, 21.667436630512383999, 101.59525502291580779, 14.452358667658700142, 39.723880697616550606, 74.718724278940499062, 10.350664852787303971, 24.719642279815161601, 62.724295503245230066, 35.691921390069182962, 98.098748372973204823, 66.128762741620448651, 11.125071514605224365, 1.2041670018261356745, 100.76819585913108313, 56.27869590337286354, 26.162595688896544743, 6.5852309970141504891, 51.815304509775160113, 35.284297604048333596, 120.3526366443002189, 41.301777270451566437, 52.925920523532113293, 25.324368264595250366, 64.554473447355121607, 7.7792179555763141252, 97.252183084063290153, 59.416427425720030442, 70.411788846704439493, 54.984665806565317325, 2.2112826841803325806, 20.204352244192705257, 73.914869704953162, 65.180072169183404185, 116.43291092092113104, 1.6432481986921629868, 86.251466540670662653, 9.9855587891288450919, 100.65993777832045453, 47.594587617611978203, 42.328007619740674272, 125.30162717188795796, 20.674955493886955082, 65.993429402788024163, 87.471377783582283882, 56.910296874935738742, 3.1429360520342015661, 25.132078282229485922, 57.384083657947485335, 32.552693109682877548, 36.15351697830556077, 52.373020245126099326, 58.77808251400347217, 82.445579420531430515, 124.5785760870858212, 127.99170455359490006, 94.36772291084707831, 27.280690051542478614, 121.24353490641078679, 70.24765958634816343, 69.682204684471798828, 47.592863854977622395, 28.983477192337886663, 38.748870555769826751, 26.72681689410819672, 17.746564562195999315, 6.4917147661617491394, 54.115873549453681335, 43.862356248471769504, 102.08392825760529377, 32.277342572841007495, 87.197068236855557188, 42.678053338444442488, 88.221175201957521494, 90.242688066147820791, 77.862915585399605334, 107.22092923213494942, 114.60728055687286542, 43.548868139037949732, 112.43878125474657281, 80.39657191168225836, 12.622786570173047949, 55.481826005448965589, 31.227038728757179342, 79.889419444880331866, 65.945077042073535267, 55.416924869670765474, 51.692815857786627021, 58.201286645919026341, 80.325364544987678528, 98.202822769886552123, 99.604592979041626677, 70.435948763730266364, 30.825520805978158023, 31.420105476394383004, 71.393839468779333401, 87.683135090574069181, 85.922343836460640887, 58.696503091297927313, 59.220774161425651982, 100.55837027831148589, 81.417548894980427576, 5.4724106222820410039, 115.10141956209918135, 66.684365270506532397, 54.124506822154216934, 20.309594334565190366, 102.80378622030912084, 77.603636487147014122, 120.35731395387119846, 77.933899963612930151, 8.3848393890366423875, 101.58417020879278425, 50.872926007959904382, 61.222406081622466445, 24.203417681637802161, 80.375626589040621184, 59.835918588512868155, 116.55910713328194106, 54.441613069418963278, 4.7475221610693552066, 41.039103338134736987, 31.582766770319722127, 92.514688702354760608, 12.183814125906792469, 50.74596200826272252, 83.089445112844259711, 120.0970227761172282, 11.580725532858195947, 52.664217194644152187, 19.74329475341073703, 4.6065379838328226469, 56.533580367035028758, 73.21796202722543967, 33.915811811697494704, 13.374508611868805019, 25.157193995284615085, 61.310621275675657671, 102.1220336723526998, 71.578210070365457796, 56.21241985969390953, 20.181400903016765369, 16.335202848393237218, 41.142378995125909569, 90.664445955047995085, 7.975996357108670054, 10.315243666009337176, 65.538573538786295103, 29.064893503415078158, 70.397212393778318074, 47.685661248360702302, 73.64268173427626607, 4.8940794903537607752, 110.02111449684525724, 97.162298786370229209, 119.1094541552120063, 1.6757970701692102011, 56.696329659789626021, 115.72684355797537137, 103.87291172307959641, 94.176747152840107447, 61.642289419880398782, 15.563819504517596215, 32.493346536619355902, 118.29650662103813374, 117.00778425220414647, 41.167483455392357428, 13.526659280792955542, 106.5965524247403664, 27.582297340559307486, 91.311177651208708994, 20.613390404734673211, 11.73204747416821192, 91.657713301003241213, 53.643998358067619847, 22.599285888576559955, 40.161542282701702788, 34.46243406006396981, 101.9575217143210466, 119.43824691511326819, 0.08224852597777498886, 9.5975266439018014353, 110.79607581020900398, 89.266491435668285703, 60.509326603994850302, 35.11022150091957883, 57.870975574878684711, 21.626958642969839275, 11.321321612947940594, 32.245086770777561469, 127.9278531140225823, 97.521749158891907427, 119.58891982295244816, 111.31256110889444244, 17.233067495213617804, 107.73100064184473013, 1.2730227221763925627, 13.565626868141407613, 74.028642485121963546, 42.194746902896440588, 36.299691886837536003, 103.11931454150180798, 4.1014423201559111476, 9.5478541611664695665, 45.628761716801818693, 0.19310089272039476782, 73.479093008514610119, 92.64423867930599954, 109.32734925423937966, 90.065299486919684568, 79.485896264282928314, 104.21820048513473012, 53.773967501754668774, 56.013365518672799226, 38.696708855823089834, 6.3226922390604158863, 117.99437641790791531, 60.935996526102826465, 65.515706998383393511, 102.08512902402435429, 95.264596653305488871, 76.961859545703191543, 102.60724837503221352, 75.879811418053577654, 53.335469293084315723, 105.4392778761102818, 94.42094205128523754, 35.017753329611878144, 113.16584544542638469, 57.734431251272326335, 37.304137074006575858, 38.416702089685713872, 58.022008650790667161, 112.39994990346895065, 30.821353257382725133, 119.95508993788098451, 21.575647098834451754, 112.99108095478368341, 99.782645081428199774, 34.352442439652804751, 38.65371755869637127, 45.044103600099333562, 93.219708470001933165, 118.63027162339130882, 66.772323347566270968, 23.921805271867924603, 95.268405642938887468, 74.564072133012814447, 42.863464299582119565, 65.810700228867062833, 12.19045015189476544, 77.687129225214448525, 49.903029600962327095, 124.77007091425912222, 111.59426253372657811, 86.071730080198904034, 113.58743255433728336, 104.50062901410274208, 45.027020587462175172, 33.820593030275631463, 23.160830385058943648, 89.671955728172179079, 42.657652530673658475, 79.07504218100439175, 72.377858302796084899, 112.92393955883017043, 13.147343078057019738, 106.13267653538059676, 49.933592136949300766, 12.19026197271887213, 102.65546909356635297, 58.168711055757739814, 5.9508299650915432721, 116.18602660086253309, 93.909139397805120097, 74.360992913909285562, 37.79150420380756259, 81.99255558977893088, 5.540140199340385152, 107.90799998437432805, 108.29233414375994471, 127.73732612652747775, 74.481739357419428416, 74.354557975420902949, 5.6993920176100800745, 31.821124327972938772, 29.063227404454664793, 19.942525120452046394, 2.2562416630971711129, 109.71756728556283633, 120.54361601737764431, 1.6745689401286654174, 22.285318978563736891, 108.48067917416847195, 3.3187656626651005354, 61.751659653626120416, 116.32271680740450392, 16.488193011169641977, 102.76228103366520372, 89.764032651750312652, 14.971625157275411766, 107.75367611113324529, 12.13744703935662983, 91.863958904261380667, 58.364652307729556924, 16.560655709610728081, 15.312153793569450499, 103.79095662887266371, 45.971618716073862743, 95.998194320847687777, 126.84522272872709436, 69.842943837567872833, 58.865645819456403842, 26.517359402958391001, 62.982807984179089544, 53.316417160836863331, 92.267933758917934028, 119.58659945816907566, 10.525482908073172439, 70.405782852340053068, 108.95800006295758067, 123.81771168850900722, 72.62035213732815464, 11.986690768892003689, 6.2204264492829679511, 22.317789279582939344, 84.351675415313366102, 88.566944448037247639, 18.673244930148939602, 110.2820688839055947, 92.88015689769963501, 17.289245485317223938, 28.178336249096901156, 27.539357402976747835, 90.493789164323970908, 19.323823188187816413, 8.2595454273287032265, 66.746587429832288763, 94.753440627275267616, 83.549214291415410116, 74.607584318808221724, 27.00916124481591396, 56.051548642266425304, 5.6345046246278798208, 30.13669034370832378, 61.449204172717145411, 39.460474394942139043, 104.37644753501444939, 76.991797586670145392, 95.150106521075940691, 43.940967658658337314, 3.9278034637354721781, 58.220142495203617727, 81.543097852922073798, 76.764996410031017149, 30.978611945178272435, 74.835193434442771832, 10.142741494681104086, 73.702475796129874652, 2.4068869642578647472, 72.10405554789758753, 67.710606694534362759, 79.521776988229248673, 127.59101153696246911, 63.212681064687785693, 9.9239386551635107026, 79.62565064013688243, 39.304995460082864156, 63.234325388617435237, 41.101056973257072968, 85.092569894215557724, 43.334873261028405977, 75.190510045831615571, 28.904717335317400284, 79.44776139523673919, 21.437448557880998123, 20.701329705574607942, 49.439284559630323201, 125.44859100649046013, 71.383842780142003903, 68.197496745950047625, 4.2575254832408973016, 22.250143029210448731, 2.4083340036559093278, 73.53639171826216625, 112.55739180674572708, 52.325191377793089487, 13.170461994028300978, 103.63060901955032023, 70.568595208100305172, 112.70527328860043781, 82.603554540903132875, 105.85184104706422659, 50.648736529194138711, 1.1089468947102432139, 15.558435911156266229, 66.504366168126580305, 118.83285485144369886, 12.823577693408878986, 109.96933161313063465, 4.4225653683606651612, 40.408704488389048493, 19.829739409909961978, 2.3601443383704463486, 104.86582184184226207, 3.2864963973879639525, 44.502933081341325305, 19.971117578261328163, 73.319875556644547032, 95.189175235223956406, 84.656015239484986523, 122.60325434377591591, 41.349910987777548144, 3.9868588055796863046, 46.942755567164567765, 113.82059374987147748, 6.2858721040684031323, 50.264156564458971843, 114.76816731589497067, 65.105386219365755096, 72.307033956611121539, 104.74604049025219865, 117.55616502801058232, 36.891158841066499008, 121.15715217417528038, 127.9834091071934381, 60.735445821694156621, 54.561380103088595206, 114.48706981282521156, 12.495319172699964838, 11.364409368943597656, 95.185727709955244791, 57.966954384675773326, 77.497741111539653502, 53.453633788216393441, 35.49312912439199863, 12.983429532323498279, 108.23174709890736267, 87.724712496947176987, 76.167856515210587531, 64.554685145685652969, 46.394136473711114377, 85.356106676892522955, 48.442350403918680968, 52.485376132299279561, 27.725831170802848646, 86.441858464273536811, 101.21456111374573084, 87.097736278079537442, 96.877562509496783605, 32.79314382336451672, 25.245573140346095897, 110.96365201089793118, 62.454077457517996663, 31.778838889764301712, 3.8901540841470705345, 110.83384973934153095, 103.38563171557325404, 116.40257329184169066, 32.650729089978995034, 68.405645539776742226, 71.209185958086891333, 12.871897527460532729, 61.651041611956316046, 62.840210952792403987, 14.787678937562304782, 47.366270181151776342, 43.844687672921281774, 117.39300618259585463, 118.44154832285494194, 73.116740556626609759, 34.835097789960855152, 10.944821244567719987, 102.20283912420200068, 5.3687305410130647942, 108.24901364430843387, 40.61918866913401871, 77.60757244062187965, 27.207272974297666224, 112.71462790774603491, 27.867799927225860301, 16.769678778076922754, 75.168340417585568503, 101.74585201591980876, 122.44481216324493289, 48.406835363279242301, 32.751253178081242368, 119.67183717702937429, 105.11821426656388212, 108.88322613883792656, 9.4950443221387104131, 82.078206676269473974, 63.165533540643082233, 57.029377404709521215, 24.367628251813584939, 101.49192401652908302, 38.178890225692157401, 112.19404555223445641, 23.161451065716391895, 105.32843438928830437, 39.48658950682147406, 9.2130759676656452939, 113.06716073407369549, 18.435924054450879339, 67.831623623394989409, 26.749017223737610038, 50.314387990569230169, 122.62124255135495332, 76.24406734470903757, 15.156420140730915591, 112.42483971938781906, 40.362801806033530738, 32.670405696786474437, 82.284757990255457116, 53.32889191009599017, 15.951992714217340108, 20.63048733202231233, 3.0771470775725902058, 58.129787006833794294, 12.794424787556636147, 95.371322496725042583, 19.28536346855253214, 9.7881589807111595292, 92.04222899369415245, 66.324597572744096396, 110.2189083104240126, 3.3515941403384204023, 113.39265931958289002, 103.45368711595438072, 79.745823446162830805, 60.353494305680214893, 123.28457883976079756, 31.12763900903519243, 64.986693073242349783, 108.59301324207990547, 106.01556850440829294, 82.334966910784714855, 27.053318561589549063, 85.19310484948073281, 55.16459468112225295, 54.622355302417417988, 41.226780809469346423, 23.464094948340061819, 55.315426602006482426, 107.28799671613523969, 45.198571777153119911, 80.323084565403405577, 68.924868120131577598, 75.915043428645731183, 110.87649383022653637, 0.16449705195554997772, 19.195053287807240849, 93.59215162042164593, 50.532982871336571407, 121.0186532079897006, 70.22044300183915766, 115.74195114975736942, 43.25391728593967855, 22.642643225895881187, 64.490173541558760917, 127.85570622804880259, 67.043498317787452834, 111.17783964590853429, 94.625122217788884882, 34.466134990430873586, 87.462001283689460251, 2.5460454443527851254, 27.131253736286453204, 20.057284970243927091, 84.389493805792881176, 72.599383773678709986, 78.23862908300361596, 8.2028846403118222952, 19.095708322332939133, 91.257523433603637386, 0.38620178544442751445, 18.958186017032858217, 57.288477358615637058, 90.654698508482397301, 52.130598973843007116, 30.971792528569494607, 80.436400970269460231, 107.54793500350933755, 112.02673103734559845, 77.393417711646179669, 12.645384478124469751, 107.98875283581583062, 121.87199305220929091, 3.0314139967704250012, 76.170258048052346567, 62.529193306614615722, 25.923719091410021065, 77.214496750068065012, 23.759622836107155308, 106.67093858617226942, 82.878555752224201569, 60.841884102570475079, 70.035506659227394266, 98.331690890852769371, 115.46886250254465267, 74.608274148013151716, 76.833404179371427745, 116.0440173015849723, 96.799899806937901303, 61.642706514769088244, 111.91017987576196902, 43.151294197672541486, 97.982161909571004799, 71.565290162856399547, 68.704884879305609502, 77.307435117392742541, 90.088207200202305103, 58.439416940007504309, 109.26054324678625562, 5.5446466951325419359, 47.843610543739487184, 62.536811285881412914, 21.128144266025628895, 85.72692859916787711, 3.6214004577341256663, 24.38090030379316886, 27.374258450428897049, 99.806059201928292168, 121.54014182852188242, 95.188525067456794204, 44.143460160401446046, 99.174865108678204706, 81.001258028205484152, 90.054041174924350344, 67.641186060551262926, 46.321660770121525275, 51.343911456344358157, 85.315305061350954929, 30.150084362012421479, 16.755716605595807778, 97.847879117660340853, 26.294686156117677456, 84.265353070761193521, 99.867184273898601532, 24.380523945441382239, 77.310938187132705934, 116.33742211151547963, 11.901659930186724523, 104.37205320172870415, 59.818278795613878174, 20.721985827818571124, 75.583008407618763158, 35.98511117955786176, 11.080280398684408283, 87.81599996875229408, 88.584668287523527397, 127.47465225305859349, 20.96347871484249481, 20.709115950841805898, 11.398784035220160149, 63.642248655945877545, 58.126454808909329586, 39.885050240904092789, 4.5124833261979802046, 91.435134571129310643, 113.08723203475528862, 3.3491378802573308349, 44.570637957131111762, 88.961358348336943891, 6.6375313253302010708, 123.50331930725587881, 104.64543361480900785, 32.976386022342921933, 77.524562067334045423, 51.528065303500625305, 29.943250314554461511, 87.507352222266490571, 24.274894078716897639, 55.727917808522761334, 116.72930461545911385, 33.121311419221456163, 30.624307587142538978, 79.581913257745327428, 91.943237432151363464, 63.996388641699013533, 125.69044545745782671, 11.685887675135745667, 117.73129163891280768, 53.034718805916782003, 125.96561596835817909, 106.63283432167736464, 56.535867517839506036, 111.17319891633815132, 21.050965816149982857, 12.811565704683744116, 89.916000125918799313, 119.63542337702165241, 17.240704274656309281, 23.973381537784007378, 12.440852898565935902, 44.635578559169516666, 40.703350830630370183, 49.133888896074495278, 37.346489860301517183, 92.564137767814827384, 57.76031379539927002, 34.578490970634447876, 56.356672498193802312, 55.078714805957133649, 52.987578328647941817, 38.647646376375632826, 16.519090854661044432, 5.4931748596682155039, 61.506881254554173211, 39.098428582834458211, 21.215168637616443448, 54.018322489631827921, 112.10309728453285061, 11.269009249255759642, 60.273380687416647561, 122.89840834543429082, 78.920948789887916064, 80.752895070032536751, 25.983595173343928764, 62.300213042151881382, 87.881935317316674627, 7.8556069274709443562, 116.44028499041087343, 35.086195705847785575, 25.529992820062034298, 61.95722389036018285, 21.670386868885543663, 20.285482989365846151, 19.404951592259749305, 4.8137739285193674732, 16.208111095798813039, 7.4212133890723634977, 31.043553976462135324, 127.18202307392493822, 126.42536212937557139, 19.847877310330659384, 31.25130128027376486, 78.609990920165728312, 126.46865077723487047, 82.202113946517783916, 42.185139788431115448, 86.669746522060449934, 22.381020091663231142, 57.809434670638438547, 30.895522790473478381, 42.874897115761996247, 41.402659411149215885, 98.878569119264284382, 122.89718201298455824, 14.767685560287645785, 8.3949934919000952505, 8.5150509664817946032, 44.500286058420897461, 4.8166680073154566344, 19.07278343652797048, 97.114783613491454162, 104.65038275558617897, 26.340923988060239935, 79.261218039104278432, 13.137190416204248322, 97.410546577204513596, 37.207109081809903728, 83.703682094128453173, 101.29747305838827742, 2.2178937894204864278, 31.116871822316170437, 5.0087323362567985896, 109.66570970288739773, 25.64715538682139595, 91.938663226261269301, 8.8451307367213303223, 80.817408976778096985, 39.659478819819923956, 4.7202886767445306759, 81.731643683684524149, 6.572992794775927905, 89.005866162682650611, 39.942235156526294304, 18.639751113292732043, 62.378350470451550791, 41.312030478973611025, 117.20650868755183183, 82.699821975555096287, 7.9737176111593726091, 93.885511134332773509, 99.641187499742954969, 12.571744208136806265, 100.52831312892158167, 101.53633463179357932, 2.210772438735148171, 16.614067913225881057, 81.492080980508035282, 107.11233005602116464, 73.782317682136635995, 114.31430434835419874, 127.96681821438687621, 121.47089164338831324, 109.12276020618082839, 100.97413962565042311, 24.990638345399929676, 22.72881873789083329, 62.371455419910489582, 115.93390876935518463, 26.995482223079307005, 106.90726757643278688, 70.98625824878399726, 25.966859064650634537, 88.463494197818363318, 47.449424993897991953, 24.335713030424813041, 1.1093702913713059388, 92.788272947425866732, 42.712213353785045911, 96.884700807837361936, 104.97075226459855912, 55.451662341609335272, 44.8837169285507116, 74.429122227495099651, 46.195472556162712863, 65.75512501899356721, 65.58628764672903344, 50.491146280695829773, 93.927304021795862354, 124.90815491503599333, 63.557677779528603423, 7.7803081682977790479, 93.667699478686699877, 78.771263431150146062, 104.80514658368338132, 65.301458179961628048, 8.8112910795571224298, 14.418371916173782665, 25.743795054921065457, 123.30208322391263209, 125.68042190558480797, 29.575357875124609563, 94.732540362303552683, 87.689375345846201526, 106.78601236519534723, 108.88309664571352187, 18.233481113253219519, 69.670195579921710305, 21.889642489139077952, 76.405678248404001351, 10.737461082029767567, 88.498027288616867736, 81.238377338271675399, 27.215144881243759301, 54.414545948598970426, 97.429255815492069814, 55.735599854451720603, 33.539357556157483486, 22.336680835174774984, 75.491704031843255507, 116.88962432648986578, 96.813670726558484603, 65.502506356166122714, 111.34367435406238656, 82.236428533131402219, 89.766452277675853111, 18.990088644281058805, 36.156413352538947947, 126.33106708128616447, 114.05875480941904243, 48.735256503630807856, 74.983848033058166038, 76.35778045138795278, 96.388091104468912818, 46.322902131436421769, 82.656868778576608747, 78.973179013642948121, 18.426151935334928567, 98.134321468147390988, 36.871848108905396657, 7.6632472467899788171, 53.498034447475220077, 100.62877598114209832, 117.24248510271354462, 24.488134689418075141, 30.312840281465469161, 96.849679438775638118, 80.725603612067061476, 65.340811393576586852, 36.569515980510914233, 106.65778382019198034, 31.903985428434680216, 41.26097466404462466, 6.1542941551451804116, 116.25957401366758859, 25.588849575116910273, 62.742644993450085167, 38.57072693710506428, 19.576317961425957037, 56.084457987391942879, 4.6491951454918307718, 92.437816620848025195, 6.7031882806804787833, 98.78531863916941802, 78.907374231912399409, 31.491646892325661611, 120.70698861136406776, 118.56915767952159513, 62.255278018074022839, 1.9733861464883375447, 89.186026484163448913, 84.031137008820223855, 36.669933821569429711, 54.106637123182736104, 42.38620969896146562, 110.3291893622445059, 109.24471060483483598, 82.453561618938692845, 46.928189896680123638, 110.63085320401660283, 86.575993432270479389, 90.3971435543098778, 32.646169130810449133, 9.8497362402667931747, 23.830086857295100344, 93.752987660453072749, 0.32899410391109995544, 38.390106575618119678, 59.18430324084329186, 101.06596574267678079, 114.03730641598303919, 12.440886003681953298, 103.48390229951473884, 86.5078345718793571, 45.285286451795400353, 0.98034708312115981244, 127.71141245610124315, 6.0869966355785436463, 94.355679291820706567, 61.250244435577769764, 68.932269980865385151, 46.924002567378920503, 5.0920908887055702507, 54.262507472572906408, 40.114569940491492162, 40.778987611589400331, 17.19876754736105795, 28.477258166007231921, 16.40576928062364459, 38.191416644669516245, 54.515046867207274772, 0.7724035708888550289, 37.916372034065716434, 114.5769547172349121, 53.309397016964794602, 104.26119794768601423, 61.943585057142627193, 32.872801940542558441, 87.095870007022313075, 96.053462074694834882, 26.786835423292359337, 25.290768956252577482, 87.977505671635299223, 115.74398610441858182, 6.0628279935444879811, 24.340516096104693133, 125.05838661322923144, 51.847438182823680108, 26.428993500136130024, 47.519245672217948595, 85.341877172348176828, 37.757111504448403139, 121.68376820514458814, 12.071013318458426511, 68.663381781709176721, 102.93772500509294332, 21.216548296029941412, 25.666808358746493468, 104.08803460317358258, 65.599799613875802606, 123.28541302954181447, 95.82035975152393803, 86.302588395348720951, 67.964323819145647576, 15.130580325716437073, 9.4097697586148569826, 26.61487023478912306, 52.176414400404610205, 116.8788338800186466, 90.521086493572511245, 11.089293390265083872, 95.687221087478974368, 125.07362257176282583, 42.256288532054895768, 43.453857198335754219, 7.2428009154682513326, 48.761800607586337719, 54.748516900861432077, 71.612118403860222315, 115.08028365704740281, 62.377050134913588408, 88.286920320806530071, 70.349730217360047391, 34.002516056414606282, 52.108082349852338666, 7.2823721211025258526, 92.643321540243050549, 102.68782291269235429, 42.630610122701909859, 60.300168724024842959, 33.511433211195253534, 67.695758235320681706, 52.58937231223899289, 40.530706141526025021, 71.734368547797203064, 48.761047890886402456, 26.621876374269049847, 104.67484422303095926, 23.803319860377087025, 80.744106403457408305, 119.63655759122775635, 41.443971655640780227, 23.166016815237526316, 71.970222359115723521, 22.160560797372454545, 47.631999937508226139, 49.169336575050692772, 126.94930450611718697, 41.92695742968498962, 41.418231901683611795, 22.797568070440320298, 127.28449731189175509, 116.25290961781865917, 79.770100481808185577, 9.024966652399598388, 54.870269142262259265, 98.174464069510577247, 6.6982757605146616697, 89.141275914265861502, 49.922716696673887782, 13.27506265066404012, 119.0066386145153956, 81.290867229618015699, 65.952772044689481845, 27.049124134668090846, 103.05613060700125061, 59.886500629112561001, 47.014704444532981142, 48.549788157433795277, 111.45583561704916065, 105.45860923091822769, 66.242622838442912325, 61.248615174288715934, 31.163826515494292835, 55.886474864302726928, 127.99277728340166504, 123.38089091491565341, 23.371775350275129313, 107.46258327782561537, 106.06943761183720198, 123.93123193671999616, 85.265668643354729284, 113.07173503568265005, 94.346397832676302642, 42.101931632299965713, 25.62313140937112621, 51.832000251841236604, 111.27084675404330483, 34.481408549312618561, 47.946763075568014756, 24.881705797131871805, 89.271157118339033332, 81.406701661260740366, 98.267777792148990557, 74.692979720606672345, 57.128275535629654769, 115.52062759079854004, 69.156981941268895753, 112.71334499638760462, 110.1574296119142673, 105.97515665729588363, 77.295292752751265652, 33.038181709322088864, 10.986349719336431008, 123.01376250910834642, 78.196857165668916423, 42.430337275232886896, 108.03664497926729382, 96.206194569065701216, 22.538018498511519283, 120.5467613748369331, 117.79681669087221962, 29.841897579779470107, 33.505790140065073501, 51.967190346687857527, 124.60042608430740074, 47.763870634633349255, 15.711213854945526691, 104.88056998082174687, 70.172391411695571151, 51.059985640124068595, 123.9144477807203657, 43.340773737771087326, 40.570965978731692303, 38.809903184519498609, 9.6275478570387349464, 32.416222191597626079, 14.842426778144726995, 62.087107952927908627, 126.36404614785351441, 124.85072425875114277, 39.695754620661318768, 62.502602560551167699, 29.219981840335094603, 124.93730155447337893, 36.404227893035567831, 84.370279576865868876, 45.339493044120899867, 44.762040183326462284, 115.61886934127687709, 61.791045580946956761, 85.749794231523992494, 82.805318822302069748, 69.757138238532206742, 117.79436402596911648, 29.53537112057529157, 16.789986983800190501, 17.030101932963589206, 89.000572116841794923, 9.6333360146309132688, 38.145566873059578938, 66.229567226986546302, 81.300765511172357947, 52.681847976124117849, 30.522436078208556864, 26.274380832412134623, 66.821093154409027193, 74.414218163623445434, 39.407364188260544324, 74.594946116776554845, 4.4357875788409728557, 62.233743644632340875, 10.017464672517235158, 91.33141940577479545, 51.29431077364642988, 55.877326452522538602, 17.690261473446298623, 33.63481795355619397, 79.318957639639847912, 9.4405773534926993307, 35.463287367369048297, 13.145985589555493789, 50.011732325365301222, 79.884470313052588608, 37.279502226585464086, 124.75670094090310158, 82.624060957950860029, 106.41301737510730163, 37.399643951110192575, 15.947435222322383197, 59.771022268669184996, 71.282374999485909939, 25.143488416277250508, 73.056626257846801309, 75.072669263587158639, 4.4215448774702963419, 33.228135826455400093, 34.984161961019708542, 86.224660112045967253, 19.564635364276909968, 100.62860869671203545, 127.93363642877739039, 114.94178328678026446, 90.24552041236529476, 73.948279251300846227, 49.981276690799859352, 45.457637475781666581, 124.74291083982461714, 103.86781753871036926, 53.990964446162251988, 85.814535152869211743, 13.97251649756799452, 51.933718129304907052, 48.926988395636726636, 94.898849987795983907, 48.671426060849626083, 2.2187405827462498564, 57.576545894851733465, 85.4244267075737298, 65.76940161567836185, 81.941504529197118245, 110.90332468321867054, 89.767433857105061179, 20.858244454990199301, 92.390945112329063704, 3.5102500379907723982, 3.1725752934580668807, 100.98229256139529753, 59.854608043595362687, 121.81630983007198665, 127.11535555905720685, 15.560616336599196075, 59.335398957377037732, 29.542526862303930102, 81.610293167370400624, 2.6029163599232560955, 17.62258215911424486, 28.836743832347565331, 51.487590109845768893, 118.60416644782890216, 123.36084381116961595, 59.150715750252857106, 61.465080724610743346, 47.378750691696041031, 85.572024730394332437, 89.766193291427043732, 36.466962226510077016, 11.34039115984342061, 43.779284978281793883, 24.811356496808002703, 21.474922164063173113, 48.996054577237373451, 34.476754676543350797, 54.430289762487518601, 108.82909189720157883, 66.858511630984139629, 111.47119970890344121, 67.078715112314966973, 44.673361670353187947, 22.983408063690148992, 105.77924865298336954, 65.627341453120607184, 3.0050127123358834069, 94.68734870812841109, 36.472857066266442416, 51.532904555351706222, 37.98017728856211761, 72.312826705081533873, 124.66213416257596691, 100.11750961883808486, 97.470513007265253691, 21.967696066119970055, 24.71556090277954354, 64.776182208937825635, 92.645804262876481516, 37.313737557156855473, 29.946358027285896242, 36.852303870673495112, 68.268642936294781975, 73.743696217810793314, 15.326494493579957634, 106.99606889495407813, 73.257551962287834613, 106.48497020542708924, 48.976269378836150281, 60.625680562934576301, 65.699358877554914216, 33.451207224137760932, 2.6816227871531737037, 73.139031961025466444, 85.315567640383960679, 63.807970856869360432, 82.52194932808924932, 12.308588310293998802, 104.51914802733517718, 51.177699150233820546, 125.48528998690017033, 77.141453874210128561, 39.152635922851914074, 112.16891597478388576, 9.2983902909872995224, 56.875633241696050391, 13.406376561364595545, 69.570637278342474019, 29.814748463824798819, 62.9832937846549612, 113.41397722273177351, 109.13831535904319026, 124.51055603614804568, 3.9467722929803130683, 50.372052968330535805, 40.06227401764044771, 73.339867643138859421, 108.21327424636547221, 84.772419397926569218, 92.658378724492649781, 90.489421209673309932, 36.907123237877385691, 93.856379793363885256, 93.261706408036843641, 45.151986864544596756, 52.7942871086197556, 65.292338261620898265, 19.699472480533586349, 47.660173714593838667, 59.505975320906145498, 0.65798820782219991088, 76.780213151236239355, 118.3686064816902217, 74.131931485357199563, 100.07461283196607837, 24.881772007367544575, 78.967804599033115664, 45.015669143758714199, 90.570572903590800706, 1.9606941662459576037, 127.4228249122024863, 12.173993271160725271, 60.711358583641413134, 122.50048887115917751, 9.8645399617307703011, 93.848005134757841006, 10.184181777411140501, 108.52501494514581282, 80.229139880982984323, 81.557975223178800661, 34.3975350947221159, 56.954516332014463842, 32.811538561247289181, 76.38283328933903249, 109.03009373441454954, 1.5448071417777100578, 75.832744068135070847, 101.15390943446982419, 106.61879403393322718, 80.522395895372028463, 123.88717011428525439, 65.745603881088754861, 46.191740014048264129, 64.106924149389669765, 53.573670846584718674, 50.581537912508792942, 47.955011343274236424, 103.48797220883716363, 12.125655987088975962, 48.681032192213024246, 122.11677322645846289, 103.69487636564736022, 52.857987000272260047, 95.038491344439535169, 42.683754344696353655, 75.514223008900444256, 115.36753641028917627, 24.142026636920491001, 9.3267635634219914209, 77.87545001018588664, 42.433096592059882823, 51.333616717496624915, 80.176069206350803142, 3.1995992277552431915, 118.57082605908726691, 63.640719503051514039, 44.605176790701079881, 7.9286476382912951522, 30.261160651432874147, 18.819539517229713965, 53.22974046957824612, 104.35282880081285839, 105.75766776004093117, 53.042172987145022489, 22.178586780533805722, 63.374442174961586716, 122.14724514352565166, 84.512577064109791536, 86.907714396671508439, 14.485601830936502665, 97.523601215172675438, 109.49703380172650213, 15.22423680772408261, 102.16056731409480562, 124.7541002698308148, 48.573840641616698122, 12.699460434720094781, 68.005032112829212565, 104.21616469970467733, 14.564744242208689684, 57.286643080489739077, 77.375645825384708587, 85.261220245403819717, 120.60033744804968592, 67.022866422394145047, 7.3915164706413634121, 105.17874462447798578, 81.061412283055688022, 15.468737095594406128, 97.522095781772804912, 53.243752748541737674, 81.349688446061918512, 47.606639720757812029, 33.488212806918454589, 111.2731151824555127, 82.887943311281560455, 46.332033630478690611, 15.940444718231447041, 44.321121594744909089, 95.263999875016452279, 98.338673150105023524, 125.89860901223437395, 83.85391485936997924, 82.836463803367223591, 45.595136140884278575, 126.56899462378714816, 104.50581923564095632, 31.540200963616371155, 18.049933304799196776, 109.74053828452815651, 68.348928139021154493, 13.396551521032961318, 50.282551828531723004, 99.845433393347775564, 26.550125301331718219, 110.01327722903442918, 34.581734459239669377, 3.9055440893789636903, 54.098248269339819672, 78.11226121400250122, 119.773001258225122, 94.029408889065962285, 97.099576314871228533, 94.911671234101959271, 82.917218461840093369, 4.4852456768858246505, 122.49723034857743187, 62.32765303098858567, 111.77294972860909184, 127.98555456680696807, 118.76178182983130682, 46.743550700553896604, 86.925166555654868716, 84.138875223678041948, 119.86246387344363029, 42.531337286713096546, 98.143470071368938079, 60.692795665352605283, 84.203863264599931426, 51.246262818742252421, 103.66400050368611119, 94.541693508086609654, 68.962817098625237122, 95.893526151136029512, 49.763411594267381588, 50.542314236681704642, 34.81340332252511871, 68.535555584301619092, 21.385959441213344689, 114.25655107126294752, 103.04125518160071806, 10.313963882541429484, 97.426689992778847227, 92.314859223832172574, 83.950313314595405245, 26.590585505502531305, 66.076363418644177727, 21.972699438676499994, 118.02752501822033082, 28.393714331337832846, 84.86067455046941177, 88.073289958538225619, 64.412389138131402433, 45.076036997026676545, 113.09352274967750418, 107.59363338174443925, 59.683795159558940213, 67.011580280130147003, 103.93438069337935303, 121.20085216861480149, 95.527741269266698509, 31.422427709894691361, 81.761139961647131713, 12.344782823391142301, 102.11997128025177517, 119.8288955614407314, 86.681547475542174652, 81.141931957463384606, 77.619806369038997218, 19.255095714077469893, 64.832444383195252158, 29.684853556289453991, 124.17421590585945523, 124.7280922957106668, 121.70144851750228554, 79.391509241322637536, 125.0052051211023354, 58.439963680673827184, 121.87460310894675786, 72.808455786074773641, 40.740559153731737752, 90.678986088241799735, 89.524080366656562546, 103.23773868255375419, 123.5820911618975515, 43.499588463051622966, 37.610637644607777474, 11.514276477068051463, 107.58872805193823297, 59.070742241150583141, 33.579973967600381002, 34.060203865930816391, 50.001144233683589846, 19.266672029261826538, 76.291133746122795856, 4.4591344539767305832, 34.601531022344715893, 105.3636959522482357, 61.044872156417113729, 52.548761664827907225, 5.6421863088216923643, 20.828436327246890869, 78.814728376524726627, 21.189892233553109691, 8.8715751576819457114, 124.46748728926468175, 20.034929345038108295, 54.662838811549590901, 102.58862154729285976, 111.7546529050450772, 35.380522946892597247, 67.269635907116025919, 30.637915279279695824, 18.881154706985398661, 70.926574734738096595, 26.291971179110987578, 100.02346465073424042, 31.768940626105177216, 74.559004453170928173, 121.51340188180984114, 37.248121915905358037, 84.826034750214603264, 74.799287902220385149, 31.894870444644766394, 119.54204453734200797, 14.564749998971819878, 50.286976832558138995, 18.113252515697240597, 22.145338527174317278, 8.8430897549405926839, 66.456271652910800185, 69.968323922043055063, 44.449320224091934506, 39.129270728557457915, 73.257217393427708885, 127.86727285755841876, 101.8835665735641669, 52.491040824734227499, 19.896558502601692453, 99.962553381599718705, 90.91527495156697114, 121.48582167965287226, 79.735635077420738526, 107.98192889232814196, 43.629070305738423485, 27.94503299513598904, 103.8674362586098141, 97.853976791277091252, 61.797699975591967814, 97.342852121699252166, 4.4374811654924997129, 115.15309178970710491, 42.848853415151097579, 3.5388032313603616785, 35.883009058397874469, 93.806649366440979065, 51.534867714210122358, 41.716488909984036582, 56.781890224658127408, 7.0205000759815447964, 6.3451505869197717402, 73.964585122790595051, 119.70921608719436335, 115.63261966014761128, 126.23071111811441369, 31.121232673198392149, 118.67079791475407546, 59.085053724611498183, 35.220586334740801249, 5.205832719846512191, 35.245164318228489719, 57.673487664695130661, 102.97518021969517577, 109.20833289566144231, 118.72168762233923189, 118.30143150050571421, 122.93016144922148669, 94.75750138339572004, 43.144049460792302852, 51.532386582854087465, 72.933924453020154033, 22.68078231968684122, 87.558569956563587766, 49.622712993616005406, 42.949844328126346227, 97.992109154474746902, 68.953509353086701594, 108.86057952497867518, 89.658183794406795641, 5.7170232619719172362, 94.94239941781052039, 6.1574302246299339458, 89.346723340706375893, 45.966816127383935964, 83.558497305970377056, 3.2546829062412143685, 6.0100254246754047927, 61.37469741625682218, 72.945714132532884832, 103.06580911070341244, 75.960354577127873199, 16.625653410166705726, 121.3242683251555718, 72.235019237679807702, 66.941026014530507382, 43.935392132239940111, 49.431121805562725058, 1.552364417879289249, 57.291608525752963033, 74.627475114313710947, 59.892716054571792483, 73.704607741350628203, 8.5372858725932019297, 19.487392435621586628, 30.652988987163553247, 85.992137789911794243, 18.515103924579307204, 84.969940410854178481, 97.952538757672300562, 121.25136112587279058, 3.3987177551098284312, 66.902414448275521863, 5.3632455743099853862, 18.278063922054570867, 42.631135280767921358, 127.61594171374235884, 37.043898656178498641, 24.617176620591635583, 81.038296054670354351, 102.35539830047127907, 122.97057997380397865, 26.282907748420257121, 78.305271845707466127, 96.337831949567771517, 18.596780581974599045, 113.75126648339210078, 26.81275312273282907, 11.141274556684948038, 59.629496927649597637, 125.96658756931356038, 98.827954445467184996, 90.27663071809001849, 121.02111207229609136, 7.8935445859606261365, 100.74410593666107161, 80.124548035284533398, 18.679735286281356821, 88.426548492734582396, 41.544838795856776414, 57.31675744898893754, 52.978842419346619863, 73.81424647575840936, 59.71275958673140849, 58.523412816073687281, 90.303973729089193512, 105.58857421724314918, 2.5846765232417965308, 39.398944961067172699, 95.320347429187677335, 119.011950641812291, 1.3159764156480378006, 25.560426302476116689, 108.73721296338408138, 20.263862970718037104, 72.149225663932156749, 49.763544014738727128, 29.935609198066231329, 90.031338287521066377, 53.141145807181601413, 3.9213883324955531862, 126.84564982440497261, 24.347986542321450543, 121.42271716728646425, 117.00097774231835501, 19.729079923465178581, 59.696010269515682012, 20.368363554825918982, 89.050029890295263613, 32.458279761969606625, 35.115950446357601322, 68.795070189444231801, 113.90903266402892768, 65.62307712249821634, 24.765666578681702958, 90.060187468829099089, 3.0896142835590580944, 23.665488136273779674, 74.307818868943286361, 85.237588067870092345, 33.044791790747694904, 119.77434022857414675, 3.4912077621775097214, 92.383480028100166237, 0.21384829877933952957, 107.14734169316943735, 101.16307582501758588, 95.910022686552110827, 78.975944417674327269, 24.251311974177951925, 97.36206438442968647, 116.23354645292056375, 79.389752731294720434, 105.71597400054815807, 62.076982688879070338, 85.367508689396345289, 23.028446017804526491, 102.73507282057835255, 48.284053273840982001, 18.653527126847620821, 27.75090002037177328, 84.866193184119765647, 102.66723343499688781, 32.352138412701606285, 6.3991984555141243618, 109.14165211817453383, 127.28143900610302808, 89.210353581402159762, 15.857295276582590304, 60.522321302865748294, 37.63907903445942793, 106.45948093916013022, 80.705657601629354758, 83.515335520081862342, 106.08434597429004498, 44.357173561067611445, 126.74888434992681141, 116.29449028705494129, 41.025154128223221051, 45.815428793343016878, 28.971203661876643309, 67.047202430348988855, 90.994067603453004267, 30.448473615451803198, 76.321134628193249227, 121.50820053966162959, 97.147681283233396243, 25.398920869443827542, 8.0100642256620631088, 80.432329399409354664, 29.129488484417379368, 114.57328616098311613, 26.751291650773055153, 42.522440490811277414, 113.20067489610300981, 6.0457328447882900946, 14.783032941286364803, 82.35748924895960954, 34.122824566111376043, 30.937474191192450235, 67.044191563545609824, 106.48750549708711333, 34.699376892123837024, 95.213279441515624058, 66.976425613840547157, 94.546230364914663369, 37.775886622563120909, 92.664067260957381222, 31.880889436462894082, 88.642243189493456157, 62.527999750036542537, 68.677346300213685026, 123.79721802447238588, 39.707829718743596459, 37.672927606734447181, 91.190272281772195129, 125.13798924757793429, 81.011638471281912643, 63.08040192723274231, 36.099866609602031531, 91.481076569059950998, 8.6978562780423089862, 26.793103042065922637, 100.56510365706708399, 71.690866786695551127, 53.100250602667074418, 92.026554458068858366, 69.163468918479338754, 7.8110881787579273805, 108.19649653868327732, 28.224522428008640418, 111.54600251645388198, 60.058817778135562548, 66.199152629746095045, 61.823342468207556522, 37.834436923680186737, 8.9704913537752872799, 116.99446069715486374, 124.65530606198080932, 95.545899457221821649, 127.97110913361757412, 109.52356365966261365, 93.487101401111431187, 45.850333111313375412, 40.277750447359721875, 111.72492774688726058, 85.062674573429831071, 68.286940142737876158, 121.38559133070884855, 40.407726529199862853, 102.49252563748814282, 79.328001007375860354, 61.083387016176857287, 9.9256341972541122232, 63.787052302272059023, 99.526823188534763176, 101.08462847336340928, 69.6268066450538754, 9.071111168603238184, 42.771918882430327358, 100.51310214252953301, 78.082510363201436121, 20.627927765086496947, 66.853379985561332433, 56.629718447667983128, 39.90062662919081049, 53.181171011008700589, 4.1527268372883554548, 43.945398877356637968, 108.05505003644066164, 56.78742866267930367, 41.721349100942461519, 48.146579917080089217, 0.82477827626280486584, 90.152073994056991069, 98.187045499358646339, 87.187266763488878496, 119.36759031912151841, 6.0231605602639319841, 79.868761386762344046, 114.40170433723324095, 63.055482538533397019, 62.844855419793020701, 35.522279923294263426, 24.689565646785922581, 76.239942560507188318, 111.65779112288510078, 45.363094951087987283, 34.283863914930407191, 27.239612738077994436, 38.510191428158577764, 1.6648887663905043155, 59.36970711258254596, 120.34843181172254845, 121.45618459142497159, 115.40289703500820906, 30.78301848264891305, 122.01041024220467079, 116.87992736135129235, 115.74920621789715369, 17.616911572149547283, 81.481118307467113482, 53.357972176487237448, 51.048160733316763071, 78.475477365111146355, 119.164182323795103, 86.999176926103245933, 75.221275289215554949, 23.028552954139740905, 87.177456103876465932, 118.14148448230116628, 67.159947935200762004, 68.120407731861632783, 100.00228846736717969, 38.533344058527291054, 24.582267492245591711, 8.9182689079534611665, 69.203062044693069765, 82.727391904496471398, 122.08974431283422746, 105.09752332965581445, 11.284372617643384729, 41.656872654497419717, 29.629456753049453255, 42.379784467106219381, 17.743150315367529402, 120.9349745785293635, 40.06985869007621659, 109.32567762310281978, 77.177243094589357497, 95.509305810090154409, 70.761045893788832473, 6.5392718142320518382, 61.275830558559391648, 37.762309413974435301, 13.85314946947619319, 52.583942358225613134, 72.046929301468480844, 63.537881252213992411, 21.118008906345494324, 115.02680376361968229, 74.496243831814354053, 41.652069500432844507, 21.598575804440770298, 63.789740889293170767, 111.08408907468401594, 29.129499997943639755, 100.57395366511627799, 36.226505031394481193, 44.290677054348634556, 17.686179509884823347, 4.9125433058252383489, 11.936647844086110126, 88.898640448183869012, 78.258541457118553808, 18.514434786859055748, 127.73454571512047551, 75.767133147128333803, 104.982081649468455, 39.793117005207022885, 71.925106763203075388, 53.830549903137580259, 114.9716433593093825, 31.47127015484511503, 87.963857784656283911, 87.258140611476846971, 55.890065990275616059, 79.734872517223266186, 67.707953582554182503, 123.59539995118757361, 66.685704243398504332, 8.8749623309886374045, 102.30618357941420982, 85.697706830302195158, 7.0776064627207233571, 71.766018116799386917, 59.613298732881958131, 103.06973542842024472, 83.432977819968073163, 113.56378044931625482, 14.041000151963089593, 12.690301173843181459, 19.929170245584828081, 111.41843217439236469, 103.26523932029886055, 124.46142223622882739, 62.242465346400422277, 109.34159582950815093, 118.17010744922299637, 70.441172669481602497, 10.411665439693024382, 70.490328636456979439, 115.3469753293938993, 77.950360439393989509, 90.416665791326522594, 109.44337524468210177, 108.6028630010150664, 117.86032289844661136, 61.515002766791440081, 86.288098921584605705, 103.06477316571181291, 17.867848906040308066, 45.361564639377320418, 47.117139913127175532, 99.245425987232010812, 85.899688656256330432, 67.984218308953131782, 9.9070187061770411674, 89.721159049960988341, 51.316367588813591283, 11.434046523947472451, 61.88479883562104078, 12.31486044926350587, 50.693446681412751786, 91.933632254771509906, 39.116994611940754112, 6.509365812482428737, 12.020050849350809585, 122.74939483251728234, 17.891428265069407644, 78.131618221410462866, 23.920709154255746398, 33.25130682033704943, 114.64853665031478158, 16.470038475363253383, 5.882052029061014764, 87.870784264479880221, 98.862243611125450116, 3.1047288357585784979, 114.58321705150956404, 21.254950228627421893, 119.78543210914722295, 19.409215482701256406, 17.074571745186403859, 38.974784871243173257, 61.305977974330744473, 43.984275579823588487, 37.030207849162252387, 41.93988082171199494, 67.905077515348239103, 114.50272225174558116, 6.7974355102232948411, 5.8048288965510437265, 10.726491148619970772, 36.556127844109141733, 85.262270561539480696, 127.23188342748471769, 74.087797312356997281, 49.234353241186909145, 34.076592109344346682, 76.710796600946196122, 117.94115994760795729, 52.565815496844152221, 28.610543691418570234, 64.675663899135543033, 37.193561163952836068, 99.502532966784201562, 53.625506245465658139, 22.282549113369896077, 119.25899385530283325, 123.93317513863075874, 69.655908890938007971, 52.553261436183674959, 114.04222414459218271, 15.787089171924890252, 73.488211873325781198, 32.249096070569066796, 37.35947057256635162, 48.853096985469164792, 83.089677591713552829, 114.63351489797787508, 105.9576848386968777, 19.628492951516818721, 119.42551917346281698, 117.04682563215101254, 52.607947458182025002, 83.177148434486298356, 5.1693530464835930616, 78.797889922137983376, 62.640694858378992649, 110.02390128362458199, 2.6319528312960756011, 51.120852604955871357, 89.474425926768162753, 40.527725941439712187, 16.298451327864313498, 99.527088029477454256, 59.871218396136100637, 52.062676575042132754, 106.28229161436320283, 7.8427766649947443511, 125.69129964880994521, 48.695973084646539064, 114.84543433457656647, 106.00195548463671003, 39.458159846930357162, 119.39202053903136402, 40.736727109655475942, 50.100059780590527225, 64.91655952393921325, 70.231900892718840623, 9.5901403788921015803, 99.818065328061493346, 3.2461542449964326806, 49.531333157367043896, 52.120374937658198178, 6.1792285671181161888, 47.330976272551197326, 20.615637737890210701, 42.475176135740184691, 66.089583581499027787, 111.5486804571482935, 6.9824155243586574215, 56.766960056200332474, 0.42769659756231703795, 86.294683386342512676, 74.326151650038809748, 63.820045373107859632, 29.951888835348654538, 48.502623948359541828, 66.724128768863010919, 104.46709290584476548, 30.779505462593078846, 83.431948001096316148, 124.15396537776177865, 42.735017378792690579, 46.056892035612690961, 77.470145641156705096, 96.568106547681964003, 37.307054253695241641, 55.501800040747184539, 41.732386368243169272, 77.334466869993775617, 64.70427682540321257, 12.798396911031886702, 90.28330423635270563, 126.56287801220969413, 50.420707162804319523, 31.714590553165180609, 121.04464260573149659, 75.27815806892249384, 84.918961878323898418, 33.411315203262347495, 39.030671040167362662, 84.168691948580089957, 88.714347122138860868, 125.4977686998572608, 104.58898057410988258, 82.050308256446442101, 91.630857586686033756, 57.942407323756924598, 6.0944048607016156893, 53.988135206906008534, 60.896947230907244375, 24.642269256390136434, 115.01640107932689716, 66.295362566466792487, 50.797841738887655083, 16.020128451327764196, 32.864658798818709329, 58.258976968838396715, 101.14657232196623227, 53.502583301546110306, 85.044880981626192806, 98.401349792206019629, 12.091465689576580189, 29.566065882576367585, 36.71497849791921908, 68.245649132222752087, 61.874948382388538448, 6.0883831270948576275, 84.975010994174226653, 69.398753784251312027, 62.426558883034886094, 5.9528512276847322937, 61.092460729829326738, 75.551773245129879797, 57.328134521914762445, 63.761778872929426143, 49.284486378986912314, 125.05599950007672305, 9.3546926004310080316, 119.59443604894477176, 79.415659437487192918, 75.345855213472532341, 54.380544563548028236, 122.27597849515950656, 34.023276942563825287, 126.16080385446548462, 72.19973321920770104, 54.962153138119901996, 17.395712556084617972, 53.586206084135483252, 73.130207314137805952, 15.381733573391102254, 106.20050120533778681, 56.053108916141354712, 10.326937836958677508, 15.62217635751949274, 88.392993077366554644, 56.449044856020918814, 95.092005032907763962, 120.11763555627476308, 4.3983052594921900891, 123.64668493641511304, 75.668873847360373475, 17.940982707554212539, 105.98892139431336545, 121.31061212396525661, 63.091798914447281277, 127.94221826723878621, 91.047127319328865269, 58.974202802222862374, 91.700666222626750823, 80.555500894723081728, 95.449855493778159143, 42.12534914686330012, 8.5738802854793902952, 114.77118266141769709, 80.815453058403363684, 76.985051274979923619, 30.656002014751720708, 122.16677403235371457, 19.851268394508224446, 127.57410460454775603, 71.053646377073164331, 74.169256946730456548, 11.2536132901077508, 18.142222337206476368, 85.543837764864292694, 73.026204285062704002, 28.165020726402872242, 41.255855530176631873, 5.7067599711226648651, 113.25943689533596626, 79.801253258381620981, 106.36234202201740118, 8.3054536745767109096, 87.890797754716913914, 88.110100072881323285, 113.57485732535860734, 83.442698201884923037, 96.293159834160178434, 1.6495565525292477105, 52.304147988117620116, 68.374090998720930656, 46.374533526981394971, 110.73518063824303681, 12.046321120531501947, 31.73752277352832607, 100.8034086744664819, 126.11096507706679404, 125.6897108395860414, 71.044559846588526852, 49.379131293575483141, 24.479885121018014615, 95.315582245770201553, 90.726189902175974566, 68.567727829860814381, 54.479225476155988872, 77.020382856317155529, 3.329777532781008631, 118.7394142251687299, 112.69686362344873487, 114.91236918285358115, 102.80579407001641812, 61.566036965301464079, 116.02082048441297957, 105.75985472270622267, 103.49841243579430738, 35.233823144302732544, 34.962236614934226964, 106.7159443529744749, 102.09632146663716412, 28.950954730225930689, 110.32836464759384398, 45.998353852210129844, 22.442550578431109898, 46.057105908283119788, 46.354912207756569842, 108.28296896460597054, 6.3198958704051619861, 8.2408154637269035447, 72.004576934734359384, 77.066688117054582108, 49.164534984491183423, 17.836537815910560312, 10.406124089389777509, 37.454783808996580774, 116.17948862567209289, 82.1950466593116289, 22.568745235286769457, 83.313745308994839434, 59.25891350609890651, 84.759568934212438762, 35.486300630738696782, 113.869949157058727, 80.139717380156071158, 90.651355246209277539, 26.354486189182352973, 63.018611620183946798, 13.522091787577664945, 13.078543628464103676, 122.5516611171187833, 75.524618827948870603, 27.706298938952386379, 105.16788471645486425, 16.093858602936961688, 127.07576250442798482, 42.236017812694626627, 102.05360752724300255, 20.992487663628708106, 83.304139000865689013, 43.197151608881540596, 127.57948177858997951, 94.168178149368031882, 58.258999995890917489, 73.147907330232555978, 72.453010062788962387, 88.581354108697269112, 35.372359019769646693, 9.8250866116504766978, 23.873295688175858231, 49.797280896371376002, 28.517082914237107616, 37.028869573721749475, 127.46909143024458899, 23.534266294260305585, 81.964163298936909996, 79.586234010417683749, 15.850213526406150777, 107.6610998062787985, 101.94328671861876501, 62.942540309693868039, 47.927715569316205801, 46.516281222953693941, 111.7801319805548701, 31.469745034450170351, 7.4159071651120029856, 119.19079990237878519, 5.3714084867970086634, 17.749924661980912788, 76.612367158832057612, 43.395413660604390316, 14.155212925445084693, 15.532036233598773833, 119.22659746576755424, 78.139470856844127411, 38.865955639939784305, 99.127560898632509634, 28.082000303926179186, 25.380602347686362918, 39.85834049117329414, 94.836864348788367352, 78.530478640601359075, 120.92284447245765477, 124.48493069280448253, 90.683191659019939834, 108.34021489844963071, 12.882345338963204995, 20.823330879389686743, 12.980657272917596856, 102.69395065879143658, 27.900720878791616997, 52.833331582656683167, 90.88675048936784151, 89.205726002033770783, 107.72064579689322272, 123.03000553358651814, 44.576197843172849389, 78.129546331423625816, 35.735697812080616131, 90.723129278754640836, 94.234279826257989043, 70.490851974467659602, 43.799377312516298844, 7.9684366179062635638, 19.814037412357720314, 51.442318099921976682, 102.63273517762718257, 22.868093047894944903, 123.76959767124208156, 24.629720898527011741, 101.38689336282914155, 55.867264509543019813, 78.233989223881508224, 13.018731624964857474, 24.040101698705257149, 117.49878966503456468, 35.782856530142453266, 28.263236442820925731, 47.841418308511492796, 66.502613640677736839, 101.29707330062956316, 32.940076950726506766, 11.764104058125667507, 47.741568528963398421, 69.724487222250900231, 6.2094576715207949746, 101.16643410301912809, 42.509900457258481765, 111.57086421829444589, 38.818430965402512811, 34.149143490372807719, 77.949569742486346513, 122.61195594866148895, 87.968551159647176974, 74.060415698324504774, 83.87976164342762786, 7.8101550307001161855, 101.00544450349116232, 13.594871020446589682, 11.609657793102087453, 21.452982297239941545, 73.112255688221921446, 42.52454112308259937, 126.46376685497307335, 20.175594624713994563, 98.468706482373818289, 68.153184218692331342, 25.421593201896030223, 107.88231989521591458, 105.13163099368830444, 57.221087382840778446, 1.351327798271086067, 74.387122327909310116, 71.005065933568403125, 107.25101249093131628, 44.565098226743430132, 110.51798771060930449, 119.86635027726515546, 11.311817781876015943, 105.1065228723709879, 100.08444828918436542, 31.574178343853418482, 18.976423746651562396, 64.498192141138133593, 74.718941145132703241, 97.706193970941967564, 38.179355183430743637, 101.26702979595938814, 83.91536967739375541, 39.256985903037275421, 110.85103834692927194, 106.09365126430202508, 105.21589491636768798, 38.354296868972596712, 10.338706092970824102, 29.595779844275966752, 125.28138971676162328, 92.047802567249163985, 5.2639056625921512023, 102.24170520991538069, 50.948851853536325507, 81.055451882879424375, 32.596902655732264975, 71.054176058954908513, 119.74243679227220127, 104.12535315008790349, 84.56458322872640565, 15.685553329993126681, 123.38259929761989042, 97.391946169296716107, 101.69086866915677092, 84.003910969273420051, 78.916319693864352303, 110.78404107806272805, 81.473454219310951885, 100.20011956118469243, 1.8331190478820644785, 12.463801785437681247, 19.180280757787841139, 71.636130656122986693, 6.4923084899928653613, 99.062666314734087791, 104.24074987531639636, 12.358457134236232378, 94.661952545102394652, 41.231275475780421402, 84.95035227148400736, 4.1791671630016935524, 95.097360914296586998, 13.964831048717314843, 113.53392011240066495, 0.85539319512827205472, 44.589366772688663332, 20.652303300077619497, 127.64009074621571926, 59.903777670697309077, 97.005247896722721634, 5.4482575377296598163, 80.934185811689530965, 61.559010925189795671, 38.863896002196270274, 120.30793075552355731, 85.470034757585381158, 92.113784071229019901, 26.940291282317048172, 65.136213095363928005, 74.614108507390483283, 111.00360008149436908, 83.464772736489976523, 26.668933739991189213, 1.4085536508064251393, 25.596793822067411384, 52.566608472709049238, 125.12575602441938827, 100.84141432560863905, 63.429181106333999196, 114.08928521146663115, 22.556316137848625658, 41.837923756647796836, 66.82263040652469499, 78.061342080338363303, 40.337383897163817892, 49.428694244281359715, 122.99553739971815958, 81.177961148223403143, 36.100616512896522181, 55.261715173372067511, 115.8848146475138492, 12.188809721403231379, 107.97627041381201707, 121.79389446181448875, 49.284538512783910846, 102.0328021586574323, 4.5907251329335849732, 101.59568347777531017, 32.040256902655528393, 65.729317597637418658, 116.51795393768043141, 74.293144643932464533, 107.00516660309585859, 42.089761963256023591, 68.802699584412039258, 24.182931379153160378, 59.13213176515273517, 73.42995699584207614, 8.4912982644455041736, 123.7498967647770769, 12.176766254193353234, 41.950021988352091284, 10.797507568502624054, 124.85311776606977219, 11.905702455369464587, 122.18492145966229145, 23.103546490259759594, 114.65626904382952489, 127.52355774585885229, 98.568972757977462607, 122.1119990001534461, 18.709385200865654042, 111.18887209789318149, 30.831318874978023814, 22.691710426945064683, 108.76108912709969445, 116.55195699031901313, 68.046553885131288553, 124.32160770893460722, 16.399466438415402081, 109.92430627623980399, 34.791425112169235945, 107.17241216827460448, 18.260414628275611904, 30.763467146785842488, 84.401002410675573628, 112.10621783228270942, 20.653875673917355016, 31.244352715042623458, 48.785986154733109288, 112.89808971204547561, 62.184010065819165902, 112.23527111254952615, 8.796610518988018157, 119.29336987283386406, 23.337747694720746949, 35.881965415112063056, 83.977842788630368887, 114.62122424793051323, 126.18359782889820053, 127.8844365344812104, 54.094254638657730538, 117.94840560444936273, 55.401332445257139625, 33.111001789449801436, 62.899710987556318287, 84.250698293726600241, 17.147760570962418569, 101.54236532283903216, 33.630906116810365347, 25.970102549959847238, 61.312004029507079395, 116.33354806470742915, 39.702536789020086871, 127.14820920909915003, 14.10729275414996664, 20.338513893460913096, 22.507226580215501599, 36.284444674416590715, 43.087675529732223367, 18.052408570125408005, 56.330041452805744484, 82.511711060353263747, 11.413519942248967709, 98.51887379067557049, 31.60250651676687994, 84.724684044034802355, 16.610907349157059798, 47.781595509437465807, 48.22020014576628455, 99.14971465072085266, 38.885396403773484053, 64.586319668320356868, 3.299113105058495421, 104.60829597623887821, 8.7481819974418613128, 92.74906705396642792, 93.470361276486073621, 24.092642241063003894, 63.475045547060290119, 73.606817348936601775, 124.22193015413722605, 123.37942167917572078, 14.089119693177053705, 98.758262587150966283, 48.95977024203966721, 62.631164491544041084, 53.452379804355587112, 9.1354556597216287628, 108.95845095231561572, 26.040765712634311058, 6.6595550655656552408, 109.47882845034109778, 97.393727246901107719, 101.82473836571080028, 77.611588140032836236, 123.13207393060656614, 104.04164096882959711, 83.519709445416083327, 78.996824871588614769, 70.467646288605465088, 69.924473229872091906, 85.43188870595258777, 76.192642933274328243, 57.901909460455499357, 92.656729295187687967, 91.996707704420259688, 44.885101156862219796, 92.114211816566239577, 92.709824415516777663, 88.565937929211941082, 12.639791740810323972, 16.481630927457445068, 16.009153869472356746, 26.133376234109164216, 98.329069968982366845, 35.673075631821120623, 20.812248178779555019, 74.909567617996799527, 104.35897725134782377, 36.390093318623257801, 45.137490470577176893, 38.627490617989678867, 118.517827012201451, 41.519137868424877524, 70.972601261481031543, 99.739898314121091971, 32.279434760315780295, 53.302710492418555077, 52.708972378364705946, 126.0372232403678936, 27.044183575155329891, 26.157087256931845332, 117.10332223424120457, 23.049237655897741206, 55.412597877908410737, 82.335769432909728494, 32.187717205877561355, 126.15152500885596965, 84.472035625392891234, 76.107215054486005101, 41.98497532726105419, 38.608278001731378026, 86.394303217766719172, 127.158963557183597, 60.336356298736063763, 116.51799999178547296, 18.295814660468749935, 16.906020125581562752, 49.162708217394538224, 70.744718039539293386, 19.650173223300953396, 47.746591376351716463, 99.594561792742752004, 57.034165828474215232, 74.057739147447136929, 126.93818286048917798, 47.068532588524249149, 35.928326597877457971, 31.172468020835367497, 31.700427052815939533, 87.322199612561234972, 75.886573437237530015, 125.88508061938773608, 95.855431138632411603, 93.032562445911025861, 95.560263961109740194, 62.939490068900340702, 14.83181433022764395, 110.38159980475757038, 10.742816973594017327, 35.499849323965463554, 25.224734317664115224, 86.790827321208780631, 28.310425850890169386, 31.064072467201185646, 110.45319493153874646, 28.278941713688254822, 77.731911279883206589, 70.255121797265019268, 56.164000607852358371, 50.761204695372725837, 79.716680982350226259, 61.673728697580372682, 29.060957281206356129, 113.84568894491894753, 120.96986138561260304, 53.366383318043517647, 88.680429796899261419, 25.76469067792640999, 41.646661758779373486, 25.961314545838831691, 77.387901317582873162, 55.801441757583233994, 105.66666316531700431, 53.773500978739320999, 50.411452004071179545, 87.441291593790083425, 118.06001106717303628, 89.152395686349336756, 28.259092662847251631, 71.471395624164870242, 53.446258557512919651, 60.468559652519616066, 12.981703948935319204, 87.598754625032597687, 15.936873235812527128, 39.628074824715440627, 102.88463619984759134, 77.265470355254365131, 45.736186095793527784, 119.53919534248416312, 49.259441797057661461, 74.773786725658283103, 111.7345290190896776, 28.467978447763016447, 26.037463249929714948, 48.080203397414152278, 106.99757933006912936, 71.565713060284906533, 56.526472885641851462, 95.682836617026623571, 5.0052272813591116574, 74.594146601259126328, 65.880153901453013532, 23.528208116251335014, 95.483137057930434821, 11.448974444501800463, 12.418915343045227928, 74.332868206038256176, 85.019800914520601509, 95.141728436588891782, 77.636861930805025622, 68.298286980749253416, 27.899139484976331005, 117.22391189732661587, 47.937102319297991926, 20.120831396652647527, 39.759523286855255719, 15.62031006140387035, 74.010889006982324645, 27.189742040893179365, 23.219315586204174906, 42.905964594483521068, 18.224511376443842892, 85.049082246168836718, 124.9275337099461467, 40.351189249431627104, 68.937412964751274558, 8.3063684373846626841, 50.843186403795698425, 87.764639790435467148, 82.263261987376608886, 114.44217476568155689, 2.7026555965458101127, 20.77424465582225821, 14.010131867140444228, 86.502024981862632558, 89.130196453490498243, 93.03597542122224695, 111.73270055453031091, 22.623635563755669864, 82.213045744745613774, 72.168896578368730843, 63.148356687706836965, 37.952847493306762772, 0.99638428227990516461, 21.437882290269044461, 67.412387941883935127, 76.358710366865125252, 74.534059591922414256, 39.83073935478751082, 78.51397180607818882, 93.702076693862181855, 84.187302528607688146, 82.431789832735375967, 76.708593737945193425, 20.677412185941648204, 59.191559688555571483, 122.56277943352688453, 56.095605134498327971, 10.527811325187940383, 76.483410419830761384, 101.89770370707628899, 34.110903765758848749, 65.193805311468167929, 14.108352117913455004, 111.48487358454804053, 80.250706300179444952, 41.129166457452811301, 31.371106659989891341, 118.76519859524341882, 66.783892338597070193, 75.38173733831717982, 40.00782193855047808, 29.832639387732342584, 93.568082156129094074, 34.946908438625541748, 72.400239122369384859, 3.6662380957641289569, 24.927603570875362493, 38.360561515579320258, 15.272261312245973386, 12.984616979989368701, 70.125332629471813561, 80.481499750632792711, 24.716914268476102734, 61.323905090204789303, 82.462550951564480783, 41.900704542968014721, 8.3583343260070250835, 62.194721828593173996, 27.929662097438267665, 99.067840224801329896, 1.7107863902565441094, 89.178733545377326664, 41.304606600158876972, 127.28018149243143853, 119.80755534139825613, 66.010495793445443269, 10.896515075462957611, 33.86837162337906193, 123.11802185037959134, 77.727792004396178527, 112.6158615110507526, 42.940069515174400294, 56.227568142458039802, 53.880582564637734322, 2.272426190731493989, 21.228217014784604544, 94.007200162992376136, 38.929545472979953047, 53.337867479982378427, 2.8171073016128502786, 51.193587644134822767, 105.13321694542173645, 122.25151204884241452, 73.682828651217278093, 126.85836221267163637, 100.17857042293326231, 45.112632275697251316, 83.67584751329923165, 5.6452608130530279595, 28.122684160676726606, 80.674767794327635784, 98.85738848856635741, 117.99107479943995713, 34.355922296446806286, 72.201233025793044362, 110.523430346747773, 103.76962929502769839, 24.377619442810100736, 87.952540827627672115, 115.5877889236289775, 98.569077025571459671, 76.065604317314864602, 9.1814502658708079252, 75.191366955550620332, 64.080513805314694764, 3.4586351952784752939, 105.0359078753645008, 20.586289287868567044, 86.010333206191717181, 84.179523926515685162, 9.6053991688240785152, 48.365862758306320757, 118.26426353030910832, 18.859913991684152279, 16.982596528894646326, 119.49979352955415379, 24.353532508386706468, 83.900043976704182569, 21.595015137008886086, 121.70623553213954438, 23.811404910742567154, 116.36984291932458291, 46.207092980523157166, 101.31253808766268776, 127.04711549172134255, 69.137945515958563192, 116.22399800030689221, 37.418770401734946063, 94.37774419578636298, 61.662637749956047628, 45.383420853893767344, 89.522178254203026881, 105.10391398064166424, 8.0931077702662150841, 120.64321541786921443, 32.798932876834442141, 91.848612552479607984, 69.582850224342109868, 86.344824336549208965, 36.520829256551223807, 61.526934293575322954, 40.802004821354785236, 96.212435664565418847, 41.307751347834710032, 62.488705430085246917, 97.571972309469856555, 97.796179424090951215, 124.36802013164196978, 96.470542225099052303, 17.593221037979674293, 110.58673974566772813, 46.675495389441493899, 71.763930830227764091, 39.955685577264375752, 101.24244849586466444, 124.36719565780003904, 127.7688730689624208, 108.18850927731909906, 107.89681120889872545, 110.80266489051427925, 66.222003578899602871, 125.79942197511627455, 40.501396587453200482, 34.295521141928475117, 75.08473064567806432, 67.261812233620730694, 51.940205099919694476, 122.62400805901779677, 104.6670961294148583, 79.405073578043811722, 126.29641841819830006, 28.214585508299933281, 40.677027786921826191, 45.014453160431003198, 72.56888934883318143, 86.175351059464446735, 36.10481714025081601, 112.66008290561512695, 37.023422120706527494, 22.827039884501573397, 69.03774758135114098, 63.20501303353739786, 41.44936808806960471, 33.221814698314119596, 95.563191018874931615, 96.440400291536207078, 70.29942930144170532, 77.770792807550606085, 1.1726393366407137364, 6.5982262101206288207, 81.216591952477756422, 17.496363994887360604, 57.49813410793649382, 58.940722552972147241, 48.185284482126007788, 126.95009109412421822, 19.213634697876841528, 120.44386030827809009, 118.75884335835144157, 28.178239386357745389, 69.516525174301932566, 97.919540484079334419, 125.26232898308808217, 106.90475960871117422, 18.270911319446895504, 89.916901904631231446, 52.081531425268622115, 13.31911013113494846, 90.957656900685833534, 66.787454493805853417, 75.64947673142523854, 27.223176280069310451, 118.26414786121677025, 80.083281937662832206, 39.039418890835804632, 29.993649743180867517, 12.935292577214568155, 11.848946459747821791, 42.863777411908813519, 24.385285866548656486, 115.80381892091463669, 57.313458590379013913, 55.993415408840519376, 89.77020231372807757, 56.228423633136117132, 57.419648831037193304, 49.131875858423882164, 25.279583481620647945, 32.963261854914890137, 32.018307738944713492, 52.26675246822196641, 68.65813993796473369, 71.346151263642241247, 41.624496357562748017, 21.819135235997237032, 80.717954502695647534, 72.780186637246515602, 90.274980941154353786, 77.254981235982995713, 109.035654024402902, 83.038275736853393028, 13.945202522965701064, 71.479796628242183942, 64.55886952063156059, 106.60542098483711015, 105.41794475673304987, 124.07444648073578719, 54.088367150310659781, 52.314174513867328642, 106.20664446848240914, 46.098475311795482412, 110.82519575582045945, 36.671538865819456987, 64.375434411758760689, 124.30305001771557727, 40.944071250785782468, 24.214430108972010203, 83.96995065452210838, 77.216556003466394031, 44.788606435537076322, 126.31792711436719401, 120.67271259747576551, 105.03599998357094591, 36.59162932093749987, 33.812040251163125504, 98.325416434792714426, 13.489436079078586772, 39.300346446601906791, 95.493182752707070904, 71.189123585485504009, 114.06833165695206844, 20.115478294897911837, 125.87636572097835597, 94.137065177048498299, 71.856653195754915942, 62.344936041670734994, 63.400854105635517044, 46.644399225126107922, 23.77314687447869801, 123.77016123877911014, 63.710862277268461185, 58.065124891825689701, 63.120527922219480388, 125.8789801378006814, 29.663628660458925879, 92.763199609515140764, 21.485633947188034654, 70.999698647930927109, 50.449468635331868427, 45.581654642417561263, 56.620851701780338772, 62.12814493440600927, 92.906389863077492919, 56.557883427380147623, 27.463822559770051157, 12.510243594530038536, 112.32800121570471674, 101.52240939074908965, 31.433361964700452518, 123.34745739516074536, 58.121914562416350236, 99.691377889841533033, 113.93972277122520609, 106.73276663609067327, 49.360859593798522837, 51.529381355852819979, 83.293323517558746971, 51.922629091681301361, 26.775802635165746324, 111.60288351517010597, 83.333326330634008627, 107.547001957478642, 100.82290400814599707, 46.882583187583804829, 108.12002213434971054, 50.304791372698673513, 56.518185325698141241, 14.942791248329740483, 106.8925171150258393, 120.93711930503923213, 25.963407897874276387, 47.197509250065195374, 31.873746471625054255, 79.256149649434519233, 77.769272399698820664, 26.530940710508730263, 91.472372191590693546, 111.07839068496832624, 98.5188835941189609, 21.547573451320204185, 95.469058038179355208, 56.935956895526032895, 52.074926499859429896, 96.160406794831942534, 85.995158660141896689, 15.131426120569813065, 113.0529457712873409, 63.365673234053247143, 10.010454562721861294, 21.188293202521890635, 3.7603078029060270637, 47.056416232506308006, 62.966274115860869642, 22.897948889003600925, 24.837830686094093835, 20.66573641208015033, 42.039601829044840997, 62.283456873181421543, 27.273723861613689223, 8.5965739614985068329, 55.798278969956299989, 106.44782379465323174, 95.874204638595983852, 40.241662793305295054, 79.519046573710511439, 31.240620122811378678, 20.02177801396464929, 54.379484081786358729, 46.438631172408349812, 85.811929188967042137, 36.449022752891323762, 42.098164492341311416, 121.8550674198922934, 80.702378498866892187, 9.8748259295025491156, 16.612736874769325368, 101.68637280759503483, 47.529279580874572275, 36.52652397475685575, 100.88434953136311378, 5.4053111930916202255, 41.54848931164451642, 28.020263734284526436, 45.004049963728903094, 50.260392906984634465, 58.07195084244813188, 95.46540110906062182, 45.247271127514977707, 36.426091489494865527, 16.337793156737461686, 126.29671337541367393, 75.905694986613525543, 1.9927685645598103292, 42.8757645805417269, 6.8247758837715082336, 24.717420733733888483, 21.068119183844828513, 79.661478709578659618, 29.027943612160015618, 59.40415338772800169, 40.37460505721901427, 36.863579665474389913, 25.41718747589038685, 41.354824371883296408, 118.38311937711478095, 117.12555886705376906, 112.19121026899665594, 21.055622650379518745, 24.966820839665160747, 75.795407414152577985, 68.221807531521335477, 2.387610622936335858, 28.216704235830547987, 94.969747169099719031, 32.501412600358889904, 82.258332914905622602, 62.742213319983420661, 109.53039719049047562, 5.5677846771977783646, 22.763474676634359639, 80.015643877100956161, 59.665278775468323147, 59.136164312258188147, 69.893816877254721476, 16.800478244738769718, 7.3324761915282579139, 49.855207141750724986, 76.721123031158640515, 30.544522624491946772, 25.969233959982375382, 12.250665258947265102, 32.962999501265585423, 49.433828536952205468, 122.64781018041321659, 36.925101903128961567, 83.801409085936029442, 16.716668652014050167, 124.38944365718634799, 55.859324194880173309, 70.135680449602659792, 3.4215727805167261977, 50.357467090754653327, 82.609213200321391923, 126.56036298486651503, 111.61511068280015024, 4.0209915868908865377, 21.793030150925915223, 67.736743246761761839, 118.23604370076282066, 27.455584008795995032, 97.231723022101505194, 85.880139030348800588, 112.4551362849160796, 107.76116512927546864, 4.544852381462987978, 42.456434029572847066, 60.014400325984752271, 77.859090945959906094, 106.67573495996475685, 5.6342146032257005572, 102.38717528827328351, 82.266433890847110888, 116.50302409768846701, 19.365657302438194165, 125.71672442534691072, 72.357140845866524614, 90.225264551398140611, 39.351695026598463301, 11.290521626106055919, 56.24536832135709119, 33.349535588658909546, 69.714776977136352798, 107.98214959887991427, 68.71184459289725055, 16.402466051589726703, 93.046860693499183981, 79.53925859005539678, 48.755238885623839451, 47.905081655258982209, 103.17557784726159298, 69.138154051146557322, 24.131208634629729204, 18.362900531745253829, 22.382733911104878644, 0.16102761062938952819, 6.9172703905605885666, 82.071815750729001593, 41.172578575740772067, 44.020666412387072342, 40.359047853031370323, 19.21079833764815703, 96.731725516616279492, 108.52852706061821664, 37.719827983368304558, 33.965193057792930631, 110.99958705910830759, 48.707065016777050914, 39.800087953408365138, 43.19003027402141015, 115.41247106427908875, 47.622809821488772286, 104.73968583864916582, 92.414185961049952311, 74.625076175329013495, 126.09423098344632308, 10.275891031920764362, 104.44799600061378442, 74.837540803473530104, 60.755488391572725959, 123.32527549991209526, 90.766841707791172666, 51.044356508409691742, 82.207827961283328477, 16.186215540536068147, 113.28643083573842887, 65.59786575367252226, 55.697225104962853948, 11.165700448687857715, 44.68964867309841793, 73.041658513102447614, 123.05386858715064591, 81.60400964271320845, 64.424871329130837694, 82.615502695669420063, 124.97741086017413181, 67.143944618943351088, 67.59235884818554041, 120.73604026328757755, 64.941084450198104605, 35.186442075962986564, 93.17347949133545626, 93.350990778886625776, 15.527861660455528181, 79.911371154532389482, 74.484896991732966853, 120.73439131560371607, 127.53774613792847958, 88.37701855464183609, 87.793622417801088886, 93.605329781032196479, 4.4440071578028437216, 123.5988439502325491, 81.002793174906400964, 68.591042283860588213, 22.169461291359766619, 6.5236244672414613888, 103.88041019984302693, 117.24801611803923151, 81.334192258833354572, 30.810147156091261422, 124.59283683639660012, 56.429171016599866562, 81.354055573843652382, 90.028906320865644375, 17.137778697670000838, 44.350702118928893469, 72.209634280505269999, 97.320165811230253894, 74.046844241413054988, 45.654079769006784773, 10.075495162702281959, 126.41002606707479572, 82.898736176139209419, 66.443629396631877171, 63.126382037753501208, 64.880800583076052135, 12.598858602887048619, 27.54158561510121217, 2.3452786732814274728, 13.19645242024489562, 34.433183904955512844, 34.992727989774721209, 114.99626821587298764, 117.88144510594429448, 96.370568964255653555, 125.90018218824843643, 38.427269395757321035, 112.88772061655981815, 109.51768671670288313, 56.356478772719128756, 11.033050348607503111, 67.839080968162306817, 122.52465796617980232, 85.809519217422348447, 36.541822638897428988, 51.833803809262462892, 104.16306285053724423, 26.6382202622735349, 53.915313801375305047, 5.5749089876153448131, 23.298953462850477081, 54.446352560142258881, 108.52829572243717848, 32.166563875325664412, 78.078837781671609264, 59.987299486361735035, 25.87058515442913631, 23.697892919495643582, 85.727554823817627039, 48.770571733100950951, 103.60763784182927338, 114.62691718075802783, 111.98683081768467673, 51.54040462745615514, 112.45684726627223426, 114.83929766207438661, 98.263751716847764328, 50.559166963241295889, 65.926523709829780273, 64.036615477889426984, 104.5335049364475708, 9.3162798759331053589, 14.692302527284482494, 83.248992715125496034, 43.638270471998112043, 33.435909005394933047, 17.560373274496669183, 52.549961882312345551, 26.509962471969629405, 90.071308048809441971, 38.076551473706786055, 27.890405045935040107, 14.959593256488005864, 1.117739041266759159, 85.210841969677858287, 82.835889513469737722, 120.14889296147157438, 108.17673430062131956, 104.62834902773829526, 84.413288936964818276, 92.196950623594602803, 93.650391511640918907, 73.343077731642551953, 0.75086882352115935646, 120.60610003543115454, 81.888142501571564935, 48.428860217947658384, 39.939901309047854738, 26.433112006932788063, 89.577212871077790624, 124.63585422873802599, 113.34542519495516899, 82.07199996714189183, 73.183258641874999739, 67.624080502329888986, 68.650832869585428853, 26.978872158157173544, 78.600692893207451561, 62.986365505417779787, 14.378247170974645996, 100.13666331390413688, 40.230956589799461653, 123.75273144196034991, 60.274130354100634577, 15.713306391509831883, 124.68987208334146999, 126.80170821127103409, 93.288798450252215844, 47.546293748961033998, 119.54032247755822027, 127.42172455453692237, 116.1302497836513794, 126.24105584444259875, 123.75796027560136281, 59.327257320921489736, 57.526399219030281529, 42.971267894379707286, 13.999397295861854218, 100.89893727066373685, 91.163309284835122526, 113.24170340356067754, 124.25628986881565652, 57.812779726154985838, 113.11576685476029525, 54.927645119540102314, 25.02048718906371505, 96.656002431413071463, 75.044818781498179305, 62.866723929404543014, 118.69491479032149073, 116.24382912483633845, 71.382755779686704045, 99.879445542454050155, 85.465533272181346547, 98.721719187597045675, 103.05876271170927794, 38.586647035117493942, 103.84525818336260272, 53.551605270335130626, 95.20576703034384991, 38.666652661271655234, 87.094003914960921975, 73.645808016291994136, 93.765166375171247637, 88.240044268699421082, 100.60958274539734703, 113.03637065139628248, 29.885582496663118945, 85.785034230055316584, 113.87423861007846426, 51.926815795748552773, 94.395018500130390748, 63.747492943253746489, 30.512299298872676445, 27.538544799397641327, 53.061881421021098504, 54.944744383185025072, 94.15678136994029046, 69.0377671882379218, 43.095146902644046349, 62.938116076358710416, 113.87191379105206579, 104.14985299972249777, 64.320813589667523047, 43.990317320283793379, 30.262852241143264109, 98.105891542574681807, 126.73134646810649429, 20.020909125443722587, 42.376586405043781269, 7.5206156058120541275, 94.112832465012616012, 125.93254823172173928, 45.79589777800720185, 49.67566137218818767, 41.331472824160300661, 84.079203658089681994, 124.56691374636284309, 54.547447723227378447, 17.193147922997013666, 111.59655793991623796, 84.895647589310101466, 63.748409277191967703, 80.483325586614228087, 31.038093147421022877, 62.481240245622757357, 40.04355602792929858, 108.75896816357271746, 92.877262344820337603, 43.623858377934084274, 72.898045505782647524, 84.19632898468626081, 115.71013483978822478, 33.404756997733784374, 19.749651859005098231, 33.225473749542288715, 75.372745615190069657, 95.05855916174914455, 73.05304794951734948, 73.768699062726227567, 10.810622386183240451, 83.096978623289032839, 56.040527468569052871, 90.008099927457806189, 100.52078581396926893, 116.14390168489990174, 62.930802218121243641, 90.494542255029955413, 72.852182978989731055, 32.675586313478561351, 124.59342675083098584, 23.811389973227051087, 3.9855371291196206585, 85.751529161083453801, 13.649551767543016467, 49.434841467467776965, 42.136238367693295004, 31.322957419157319237, 58.055887224320031237, 118.80830677545600338, 80.74921011443802854, 73.727159330948779825, 50.834374951780773699, 82.709648743770230794, 108.76623875423319987, 106.25111773411117611, 96.382420537993311882, 42.111245300759037491, 49.933641679333959473, 23.590814828308793949, 8.443615063042670954, 4.7752212458726717159, 56.433408471664733952, 61.939494338199438062, 65.002825200721417787, 36.516665829811245203, 125.4844266399704793, 91.060794380980951246, 11.135569354395556729, 45.526949353272357257, 32.031287754201912321, 119.33055755093664629, 118.27232862452001427, 11.787633754509442952, 33.600956489477539435, 14.664952383056515828, 99.710414283505087951, 25.44224606231728103, 61.089045248983893543, 51.938467919964750763, 24.501330517894530203, 65.925999002531170845, 98.867657073908048915, 117.29562036082643317, 73.850203806261561112, 39.602818171872058883, 33.433337304031738313, 120.77888731437269598, 111.7186483897639846, 12.271360899205319583, 6.8431455610334523953, 100.71493418151294463, 37.218426400646421826, 125.12072596973666805, 95.230221365600300487, 8.0419831737817730755, 43.586060301855468424, 7.4734864935271616559, 108.47208740152564133, 54.911168017595628044, 66.463446044206648367, 43.760278060701239156, 96.910272569835797185, 87.522330258550937288, 9.089704762925975956, 84.912868059145694133, 120.02880065197314252, 27.718181891923450166, 85.351469919929513708, 11.268429206455039093, 76.774350576550205005, 36.532867781697859755, 105.00604819537693402, 38.731314604880026309, 123.43344885069745942, 16.714281691736687208, 52.450529102796281222, 78.703390053196926601, 22.581043252212111838, 112.49073664271418238, 66.699071177321457071, 11.429553954276343575, 87.964299197763466509, 9.4236891857945010997, 32.804932103183091385, 58.09372138700200594, 31.078517180110793561, 97.510477771247678902, 95.810163310521602398, 78.351155694526823936, 10.276308102293114644, 48.262417269259458408, 36.725801063490507659, 44.765467822209757287, 0.32205522125877905637, 13.834540781124815112, 36.143631501458003186, 82.345157151481544133, 88.041332824774144683, 80.718095706062740646, 38.421596675299952039, 65.463451033236196963, 89.057054121236433275, 75.439655966736609116, 67.930386115589499241, 93.99917411821661517, 97.414130033554101828, 79.600175906816730276, 86.38006054804645828, 102.8249421285581775, 95.245619642981182551, 81.479371677301969612, 56.828371922103542602, 21.250152350661664968, 124.18846196689264616, 20.551782063845166704, 80.895992001231206814, 21.675081606947060209, 121.5109767831490899, 118.65055099982782849, 53.533683415582345333, 102.08871301682302146, 36.415655922566656955, 32.372431081075774273, 98.572861671480495716, 3.1957315073450445198, 111.3944502099257079, 22.331400897375715431, 89.379297346200473839, 18.083317026208533207, 118.10773717430492979, 35.208019285430054879, 0.84974265826531336643, 37.231005391342478106, 121.9548217203519016, 6.287889237890340155, 7.1847176963747187983, 113.47208052657879307, 1.8821689003962092102, 70.372884151929611107, 58.346958982674550498, 58.701981557773251552, 31.055723320914694341, 31.822742309068416944, 20.969793983469571685, 113.46878263121107011, 127.07549227586059715, 48.754037109287310159, 47.587244835605815751, 59.210659562064392958, 8.888014315609325422, 119.19768790046873619, 34.005586349816439906, 9.1820845677211764269, 44.338922582719533239, 13.047248934486560756, 79.76082039968969184, 106.49603223608210101, 34.668384517666709144, 61.620294312186160823, 121.18567367279320024, 112.8583420332033711, 34.708111147690942744, 52.05781264173128875, 34.275557395340001676, 88.701404237861424917, 16.419268561014177976, 66.640331622464145767, 20.093688482826109976, 91.308159538013569545, 20.150990325408201898, 124.82005213415322942, 37.797472352282056818, 4.8872587932637543418, 126.2527640755106404, 1.7616011661521042697, 25.197717205774097238, 55.08317123020242434, 4.6905573465628549457, 26.392904840489791241, 68.866367809911025688, 69.985455979553080397, 101.99253643174961326, 107.76289021189222694, 64.741137928514945088, 123.80036437649687286, 76.85453879151828005, 97.775441233123274287, 91.035373433405766264, 112.71295754543825751, 22.0661006972186442, 7.6781619363282516133, 117.04931593236324261, 43.619038434844696894, 73.083645277794857975, 103.66760761852856376, 80.326125701074488461, 53.276440524547069799, 107.83062760275061009, 11.149817975230689626, 46.597906925700954162, 108.89270512028815574, 89.056591444874356966, 64.333127750654966803, 28.157675563343218528, 119.97459897272347007, 51.74117030885827262, 47.395785838994925143, 43.455109647635254078, 97.54114346620553988, 79.215275683662184747, 101.25383436151969363, 95.973661635369353462, 103.08080925491594826, 96.913694532548106508, 101.67859532415241119, 68.527503433695528656, 101.11833392648622976, 3.8530474196631985251, 0.073230955782491946593, 81.067009872895141598, 18.632559751866210718, 29.384605054572602967, 38.497985430250992067, 87.276540943996224087, 66.871818010789866094, 35.120746548993338365, 105.0999237646246911, 53.019924943942896789, 52.142616097618883941, 76.15310294741357211, 55.780810091870080214, 29.919186512976011727, 2.2354780825371562969, 42.421683939359354554, 37.671779026943113422, 112.29778592294678674, 88.353468601246277103, 81.256698055480228504, 40.826577873933274532, 56.393901247192843584, 59.300783023285475792, 18.686155463285103906, 1.5017376470459566917, 113.21220007086594705, 35.77628500314312987, 96.857720435895316768, 79.879802618095709477, 52.866224013869214104, 51.154425742159219226, 121.27170845747968997, 98.69085038991397596, 36.143999934287421638, 18.366517283753637457, 7.2481610046634159517, 9.3016657391708577052, 53.957744316317985067, 29.201385786414903123, 125.97273101083555957, 28.756494341952929972, 72.27332662780827377, 80.461913179602561286, 119.50546288392069982, 120.54826070820126915, 31.426612783023301745, 121.37974416668657796, 125.60341642254570615, 58.577596900504431687, 95.092587497925705975, 111.08064495512007852, 126.84344910907748272, 104.26049956730639678, 124.48211168888883549, 119.51592055120272562, 118.65451464184297947, 115.05279843806056306, 85.942535788759414572, 27.998794591727346415, 73.797874541331111686, 54.32661856967388303, 98.483406807124993065, 120.51257973763131304, 115.62555945231360965, 98.231533709520590492, 109.85529023908020463, 50.040974378131068079, 65.312004862829780905, 22.089637562996358611, 125.73344785881272401, 109.38982958064661943, 104.48765824967631488, 14.765511559377046069, 71.75889108491173829, 42.931066544366331073, 69.443438375197729329, 78.117525423418555874, 77.173294070238625864, 79.690516366725205444, 107.10321054067389923, 62.411534060687699821, 77.333305322546948446, 46.188007829925481929, 19.29161603258762625, 59.530332750346133253, 48.480088537402480142, 73.21916549079833203, 98.072741302792564966, 59.77116499332987587, 43.570068460114271147, 99.748477220156928524, 103.85363159150074353, 60.790037000260781497, 127.49498588650749298, 61.02459859774535289, 55.077089598795282654, 106.12376284204219701, 109.88948876637368812, 60.3135627398842189, 10.075534376475843601, 86.190293805288092699, 125.87623215272105881, 99.743827582107769558, 80.29970599944863352, 0.64162717933504609391, 87.980634640567586757, 60.525704482290166197, 68.211783085153001593, 125.46269293621298857, 40.041818250887445174, 84.753172810087562539, 15.041231211627746234, 60.225664930028870003, 123.86509646344347857, 91.591795556014403701, 99.35132274437637534, 82.662945648324239301, 40.158407316183001967, 121.13382749272568617, 109.09489544645475689, 34.38629584599766531, 95.193115879836113891, 41.791295178620202933, 127.49681855438757339, 32.966651173228456173, 62.076186294842045754, 124.96248049124551471, 80.087112055862235138, 89.517936327149072895, 57.754524689640675206, 87.247716755871806527, 17.796091011568933027, 40.392657969376159599, 103.42026967957644956, 66.809513995467568748, 39.499303718010196462, 66.450947499088215409, 22.745491230383777292, 62.11711832350192708, 18.106095899038336938, 19.537398125452455133, 21.621244772370118881, 38.193957246578065678, 112.08105493714174372, 52.016199854919250356, 73.041571627942175837, 104.28780336979980348, 125.86160443624612526, 52.989084510063548805, 17.70436595797946211, 65.351172626960760681, 121.18685350166197168, 47.622779946454102173, 7.9710742582428792957, 43.503058322166907601, 27.299103535086032934, 98.869682934935553931, 84.272476735386590008, 62.645914838314638473, 116.11177444864370045, 109.61661355091564474, 33.49842022887969506, 19.45431866189755965, 101.66874990356518538, 37.419297487544099567, 89.532477508470037719, 84.502235468225990189, 64.764841075986623764, 84.222490601518074982, 99.867283358667918947, 47.181629656617587898, 16.887230126085341908, 9.5504424917453434318, 112.8668169433294679, 123.8789886764025141, 2.0056504014428355731, 73.033331659626128385, 122.96885327994459658, 54.121588761961902492, 22.271138708791113459, 91.053898706544714514, 64.062575508407462621, 110.66111510187693057, 108.54465724904002855, 23.575267509018885903, 67.20191297895507887, 29.329904766116669634, 71.420828567010175902, 50.88449212463456206, 122.17809049796778709, 103.87693583992950153, 49.002661035792698385, 3.8519980050659796689, 69.735314147819735808, 106.59124072165650432, 19.700407612523122225, 79.205636343744117767, 66.866674608067114605, 113.55777462874902994, 95.437296779527969193, 24.542721798410639167, 13.68629112207054277, 73.429868363025889266, 74.43685280129648163, 122.24145193947697408, 62.460442731200600974, 16.083966347563546151, 87.172120603710936848, 14.946972987057961291, 88.944174803051282652, 109.82233603519489407, 4.9268920884132967331, 87.52055612140611629, 65.820545139675232349, 47.044660517101874575, 18.179409525855589891, 41.825736118295026245, 112.05760130394628504, 55.436363783850538312, 42.702939839859027416, 22.536858412913716165, 25.548701153100410011, 73.065735563399357488, 82.012096390753868036, 77.462629209763690596, 118.86689770139491884, 33.428563383473374415, 104.90105820559620042, 29.406780106393853202, 45.162086504427861655, 96.98147328543200274, 5.3981423546429141425, 22.859107908556325128, 47.928598395526933018, 18.847378371589002199, 65.609864206366182771, 116.18744277400401188, 62.1570343602252251, 67.020955542498995783, 63.620326621046842774, 28.702311389053647872, 20.552616204586229287, 96.524834538518916816, 73.451602126981015317, 89.530935644419514574, 0.64411044251755811274, 27.669081562249630224, 72.28726300291964435, 36.690314302966726245, 48.082665649551927345, 33.436191412125481293, 76.843193350603542058, 2.926902066476031905, 50.114108242476504529, 22.879311933476856211, 7.8607722311826364603, 59.998348236436868319, 66.828260067111841636, 31.20035181363709853, 44.760121096096554538, 77.649884257119992981, 62.491239285966003081, 34.958743354603939224, 113.6567438442070852, 42.500304701326967916, 120.37692393378529232, 41.103564127693971386, 33.791984002462413628, 43.350163213897758396, 115.02195356630181777, 109.30110199965565698, 107.06736683116832864, 76.177426033646042924, 72.831311845133313909, 64.744862162155186525, 69.145723342960991431, 6.3914630146937270183, 94.788900419851415791, 44.66280179475506884, 50.758594692400947679, 36.166634052420704393, 108.21547434860985959, 70.416038570860109758, 1.6994853165306267329, 74.46201078268859419, 115.90964344070380321, 12.57577847578068031, 14.369435392753075575, 98.944161053157586139, 3.7643378007960563991, 12.745768303859222215, 116.693917965349101, 117.4039631155465031, 62.111446641829388682, 63.645484618136833888, 41.93958796694278135, 98.937565262425778201, 126.15098455172119429, 97.508074218574620318, 95.174489671211631503, 118.42131912412878592, 17.776028631222288823, 110.39537580093747238, 68.011172699632879812, 18.364169135442352854, 88.677845165439066477, 26.094497868976759491, 31.521640799383021658, 84.992064472164202016, 69.336769035333418287, 123.24058862437595963, 114.37134734559003846, 97.716684066406742204, 69.416222295381885488, 104.1156252834625775, 68.551114790683641331, 49.402808475722849835, 32.838537122031993931, 5.2806632449282915331, 40.18737696565585793, 54.61631907602713909, 40.301980650820041774, 121.64010426830645883, 75.594944704564113636, 9.7745175865311466623, 124.50552815102128079, 3.5232023323042085394, 50.395434411548194475, 110.16634246040484868, 9.3811146931257098913, 52.78580968098322046, 9.7327356198256893549, 11.970911959109798772, 75.985072863502864493, 87.525780423788091866, 1.4822758570335281547, 119.60072875299738371, 25.709077583040198078, 67.550882466250186553, 54.070746866815170506, 97.425915090876515023, 44.1322013944372884, 15.356323872660141205, 106.09863186472648522, 87.238076869693031767, 18.167290555593353929, 79.335215237060765503, 32.652251402148976922, 106.55288104909777758, 87.661255205501220189, 22.299635950461379252, 93.195813851405546302, 89.785410240579949459, 50.113182889752351912, 0.66625550130993360654, 56.315351126690075034, 111.94919794544694014, 103.48234061771654524, 94.791571677989850286, 86.910219295274146134, 67.082286932414717739, 30.430551367328007473, 74.507668723043025238, 63.947323270742344903, 78.161618509835534496, 65.827389065099850995, 75.357190648308460368, 9.0550068673946952913, 74.236667852972459514, 7.7060948393263970502, 0.14646191156862187199, 34.134019745790283196, 37.265119503732421435, 58.769210109145205934, 76.995970860505622113, 46.553081887996086152, 5.7436360215797321871, 70.241493097990314709, 82.199847529249382205, 106.03984988788579358, 104.28523219524140586, 24.30620589482714422, 111.56162018374016043, 59.838373025952023454, 4.4709561650743125938, 84.843367878718709107, 75.343558053889864823, 96.59557184589721146, 48.706937202496192185, 34.513396110960457008, 81.653155747866549063, 112.78780249438568717, 118.60156604657095158, 37.372310926570207812, 3.0034752940955513623, 98.424400141731894109, 71.552570006289897719, 65.715440871794271516, 31.759605236191418953, 105.73244802773842821, 102.30885148432207643, 114.54341691496301792, 69.381700779827951919, 72.287999868574843276, 36.733034567507274915, 14.496322009326831903, 18.60333147834171541, 107.91548863263597013, 58.402771572833444225, 123.94546202167111915, 57.512988683909497922, 16.546653255620185519, 32.92382635920876055, 111.01092576784503763, 113.09652141640253831, 62.853225566050241468, 114.75948833337679389, 123.20683284509141231, 117.15519380101250135, 62.185174995855049929, 94.161289910243795021, 125.68689821815860341, 80.520999134616431547, 120.96422337778130895, 111.03184110240908922, 109.30902928368959692, 102.10559687612476409, 43.885071577518829145, 55.997589183458330808, 19.595749082662223373, 108.65323713935140404, 68.96681361425362411, 113.02515947526626405, 103.25111890463085729, 68.463067419044818962, 91.710580478160409257, 100.08194875626577414, 2.6240097256595618092, 44.1792751259963552, 123.46689571762908599, 90.779659161296876846, 80.975316499352629762, 29.531023118757730117, 15.517782169823476579, 85.862133088736300124, 10.886876750399096636, 28.235050846840749728, 26.346588140480889706, 31.381032733454048866, 86.206421081347798463, 124.82306812137903762, 26.666610645093896892, 92.376015659850963857, 38.583232065175252501, 119.06066550069226651, 96.960177074808598263, 18.438330981600302039, 68.145482605585129932, 119.54232998665975174, 87.140136920232180273, 71.496954440317495028, 79.70726318300148705, 121.58007400052520097, 126.98997177301498596, 122.04919719549070578, 110.15417919759420329, 84.247525684088031994, 91.778977532747376245, 120.6271254797684378, 20.151068752955325181, 44.380587610579823377, 123.7524643054457556, 71.487655164219177095, 32.59941199889726704, 1.2832543586737301666, 47.961269281135173514, 121.05140896458397037, 8.4235661703096411657, 122.92538587242597714, 80.083636501778528327, 41.506345620175125077, 30.082462423255492467, 120.45132986006137799, 119.73019292689059512, 55.18359111203244538, 70.702645488756388659, 37.325891296648478601, 80.316814632369641913, 114.26765498545501032, 90.189790892909513786, 68.772591691998968599, 62.386231759672227781, 83.582590357244043844, 126.99363710877514677, 65.933302346456912346, 124.15237258968772949, 121.92496098249466741, 32.174224111728108255, 51.03587265429814579, 115.50904937928498839, 46.495433511743613053, 35.592182023137866054, 80.785315938755957177, 78.840539359156537103, 5.6190279909351374954, 78.998607436024030903, 4.9018949981764308177, 45.490982460767554585, 124.23423664700385416, 36.212191798076673876, 39.074796250904910266, 43.242489544740237761, 76.387914493159769336, 96.162109874283487443, 104.03239970984213869, 18.083143255887989653, 80.575606739603244932, 123.72320887249225052, 105.97816902012709761, 35.408731915962562198, 2.7023452539251593407, 114.37370700332394335, 95.245559892908204347, 15.942148516485758591, 87.006116644333815202, 54.598207070175703848, 69.73936586987474584, 40.544953470773180015, 125.29182967663291492, 104.22354889729103888, 91.233227101831289474, 66.996840457759390119, 38.908637323798757279, 75.337499807130370755, 74.838594975091837114, 51.064955016943713417, 41.004470936455618357, 1.5296821519768855069, 40.444981203039787943, 71.734566717335837893, 94.363259313235175796, 33.774460252170683816, 19.100884983494324842, 97.733633886662573786, 119.7579773528050282, 4.011300802889309125, 18.066663319255894748, 117.93770655988919316, 108.24317752392380498, 44.542277417585864896, 54.107797413093067007, 0.12515101681856322102, 93.322230203757499112, 89.089314498080057092, 47.150535018041409785, 6.4038259579101577401, 58.659809532236977248, 14.841657134020351805, 101.7689842492727621, 116.35618099593921215, 79.753871679859003052, 98.00532207158539677, 7.7039960101355973165, 11.470628295643109595, 85.18248144331300864, 39.400815225049882429, 30.411272687491873512, 5.7333492161378671881, 99.115549257501697866, 62.874593559055938385, 49.085443596821278334, 27.372582244144723518, 18.859736726055416511, 20.873705602596601238, 116.48290387895394815, 124.92088546240120195, 32.167932695130730281, 46.344241207421873696, 29.89394597411956056, 49.888349606106203282, 91.644672070389788132, 9.8537841768302314449, 47.041112242815870559, 3.6410902793504646979, 94.08932103420374915, 36.358819051711179782, 83.651472236593690468, 96.115202607892570086, 110.87272756770107662, 85.405879679718054831, 45.073716825827432331, 51.097402306204458, 18.131471126802352956, 36.024192781511374051, 26.925258419531019172, 109.73379540278983768, 66.857126766950386809, 81.802116411192400847, 58.813560212787706405, 90.324173008859361289, 65.96294657086400548, 10.796284709285828285, 45.718215817112650257, 95.857196791053866036, 37.694756743181642378, 3.21972841273600352, 104.37488554800802376, 124.31406872045408818, 6.0419110849979915656, 127.24065324209368555, 57.404622778107295744, 41.105232409176096553, 65.04966907704147161, 18.903204253965668613, 51.061871288839029148, 1.2882208850351162255, 55.338163124502898427, 16.5745260058392887, 73.380628605933452491, 96.16533129910385469, 66.872382824254600564, 25.686386701207084116, 5.8538041329520638101, 100.22821648495300906, 45.758623866953712422, 15.721544462365272921, 119.99669647287737462, 5.6565201342236832716, 62.400703627274197061, 89.520242192196747055, 27.299768514239985961, 124.98247857193200616, 69.917486709211516427, 99.313487688414170407, 85.00060940265757381, 112.75384786757058464, 82.207128255387942772, 67.583968004924827255, 86.700326427799154771, 102.04390713260727352, 90.602203999314951943, 86.134733662340295268, 24.354852067295723828, 17.662623690270265797, 1.489724324314011028, 10.291446685921982862, 12.782926029391092015, 61.577800839702831581, 89.325603589510137681, 101.51718938480189536, 72.333268104845046764, 88.430948697223357158, 12.832077141720219515, 3.3989706330648914445, 20.92402156538082636, 103.81928688140760642, 25.15155695156136062, 28.73887078550978913, 69.888322106315172277, 7.5286756015921127982, 25.491536607722082408, 105.38783593070183997, 106.80792623109300621, 124.22289328365877736, 127.29096923627366778, 83.879175933889200678, 69.875130524855194381, 124.30196910344602657, 67.016148437152878614, 62.348979342426900985, 108.84263824825757183, 35.552057262448215624, 92.790751601878582733, 8.0223453992657596245, 36.728338270888343686, 49.355690330878132954, 52.188995737953518983, 63.043281598769681295, 41.984128944332042011, 10.673538070670474553, 118.48117724875555723, 100.74269469118371489, 67.433368132817122387, 10.832444590767408954, 80.231250566928792978, 9.1022295813672826625, 98.805616951445699669, 65.677074244063987862, 10.561326489856583066, 80.37475393131171586, 109.23263815205427818, 80.603961301643721526, 115.28020853661655565, 23.189889409128227271, 19.549035173065931303, 121.01105630204256158, 7.0464046646084170789, 100.79086882310002693, 92.33268492080969736, 18.762229386251419783, 105.57161936196644092, 19.46547123965137871, 23.941823918223235523, 23.970145727009366965, 47.051560847576183733, 2.9645517140706942882, 111.20145750599476742, 51.418155166084034136, 7.1017649325003731064, 108.14149373363397899, 66.851830181753030047, 88.2644027888745768, 30.71264774532392039, 84.197263729456608417, 46.476153739386063535, 36.334581111190345837, 30.670430474121531006, 65.304502804301591823, 85.105762098199193133, 47.322510411002440378, 44.599271900926396484, 58.391627702814730583, 51.570820481159898918, 100.22636577950470382, 1.3325110026198672131, 112.63070225338378805, 95.898395890897518257, 78.964681235436728457, 61.583143355983338552, 45.820438590551930247, 6.1645738648294354789, 60.861102734659652924, 21.015337446086050477, 127.89464654148832778, 28.323237019674706971, 3.6547781302033399697, 22.714381296616920736, 18.110013734793028561, 20.473335705944919027, 15.4121896786527941, 0.29292382313724374399, 68.268039491580566391, 74.530239007464842871, 117.53842021829404985, 25.991941721011244226, 93.106163775992172305, 11.487272043163102353, 12.482986195984267397, 36.399695058502402389, 84.079699775775225135, 80.570464390482811723, 48.61241178965792642, 95.123240367480320856, 119.67674605190404691, 8.9419123301522631664, 41.686735757441056194, 22.687116107783367625, 65.191143691794422921, 97.41387440499238437, 69.026792221924551995, 35.306311495736736106, 97.575604988775012316, 109.20313209314190317, 74.744621853140415624, 6.0069505881947407033, 68.848800283467426198, 15.105140012583433418, 3.4308817435885430314, 63.519210472386475885, 83.464896055480494397, 76.61770296864779084, 101.08683382992967381, 10.763401559659541817, 16.575999737149686553, 73.466069135014549829, 28.992644018653663807, 37.2066629566870688, 87.830977265271940269, 116.80554314567052643, 119.89092404334223829, 115.02597736781899584, 33.093306511240371037, 65.847652718421159079, 94.021851535693713231, 98.193042832805076614, 125.70645113210412092, 101.51897666675722576, 118.4136656901864626, 106.31038760202500271, 124.37034999171373784, 60.32257982049122802, 123.3737964363208448, 33.041998269236501073, 113.92844675556625589, 94.063682204821816413, 90.618058567382831825, 76.211193752249528188, 87.770143155037658289, 111.99517836691666162, 39.191498165328084724, 89.306474278706446057, 9.9336272285108861979, 98.050318950536166085, 78.502237809265352553, 8.9261348380932759028, 55.421160956324456492, 72.163897512531548273, 5.2480194513191236183, 88.358550251996348379, 118.93379143526180997, 53.559318322597391671, 33.950632998705259524, 59.062046237515460234, 31.035564339650591137, 43.724266177472600248, 21.773753500798193272, 56.470101693681499455, 52.693176280961779412, 62.762065466911735712, 44.412842162699234905, 121.64613624275807524, 53.333221290191431763, 56.752031319701927714, 77.166464130354142981, 110.12133100138817099, 65.920354149617196526, 36.876661963204242056, 8.2909652111738978419, 111.08465997332314146, 46.280273840467998525, 14.993908880638628034, 31.414526366006612079, 115.16014800105403992, 125.97994354603360989, 116.09839439098504954, 92.308358395192044554, 40.495051368179701967, 55.557955065498390468, 113.2542509595368756, 40.302137505910650361, 88.761175221163284732, 119.50492861089514918, 14.97531032843835419, 65.19882399779453408, 2.5665087173474603333, 95.922538562273985008, 114.10281792916794075, 16.84713234062292031, 117.85077174485559226, 32.167273003557056654, 83.012691240350250155, 60.164924846514622914, 112.90265972012639395, 111.46038585378119024, 110.36718222406852874, 13.405290977512777317, 74.651782593300595181, 32.633629264742921805, 100.53530997091365862, 52.379581785822665552, 9.5451833839979371987, 124.77246351934809354, 39.165180714491725666, 125.98727421755393152, 3.8666046929174626712, 120.30474517937909695, 115.84992196498933481, 64.34844822345621651, 102.07174530859992956, 103.01809875856997678, 92.990867023487226106, 71.184364046279370086, 33.570631877515552333, 29.681078718313074205, 11.23805598187391297, 29.997214872048061807, 9.8037899963528616354, 90.981964921538747149, 120.46847329400770832, 72.424383596153347753, 78.149592501809820533, 86.484979089484113501, 24.77582898632317665, 64.324219748570612865, 80.064799419684277382, 36.166286511779617285, 33.151213479210127844, 119.44641774498813902, 83.956338040254195221, 70.817463831925124396, 5.4046905078539566603, 100.74741400665152469, 62.491119785816408694, 31.884297032975155162, 46.012233288667630404, 109.19641414035504567, 11.47873173974949168, 81.08990694154999801, 122.58365935326946783, 80.447097794585715747, 54.466454203666216927, 5.9936809155187802389, 77.817274647601152537, 22.674999614264379488, 21.677189950183674227, 102.12991003388742683, 82.008941872911236715, 3.0593643039537710138, 80.889962406083213864, 15.469133434675313765, 60.726518626470351592, 67.548920504341367632, 38.201769966992287664, 67.467267773328785552, 111.51595470561005641, 8.02260160577861825, 36.133326638515427476, 107.8754131197820243, 88.486355047847609967, 89.084554835171729792, 108.21559482618613401, 0.25030203364076442085, 58.644460407518636202, 50.178628996160114184, 94.30107003608281957, 12.80765191582031548, 117.3196190644739545, 29.683314268044341588, 75.537968498549162177, 104.71236199188206228, 31.507743359721644083, 68.010644143170793541, 15.407992020271194633, 22.94125659128985717, 42.36496288662601728, 78.801630450099764857, 60.822545374983747024, 11.466698432279372355, 70.231098515003395732, 125.74918711811187677, 98.170887193642556667, 54.745164488289447036, 37.719473452110833023, 41.747411205196840456, 104.96580775791153428, 121.8417709248024039, 64.335865390261460561, 92.688482414843747392, 59.78789194823912112, 99.776699212216044543, 55.289344140779576264, 19.70756835366046289, 94.082224485631741118, 7.2821805587045673747, 60.1786420684074983, 72.717638103425997542, 39.302944473191018915, 64.23040521578877815, 93.745455135405791225, 42.811759359436109662, 90.147433651654864661, 102.194804612408916, 36.262942253604705911, 72.048385563022748102, 53.850516839065676322, 91.46759080558331334, 5.7142535339044115972, 35.604232822384801693, 117.62712042557905079, 52.648346017722360557, 3.9258931417280109599, 21.59256941857165657, 91.436431634228938492, 63.714393582107732072, 75.389513486363284755, 6.4394568254756450187, 80.749771096019685501, 120.62813744090817636, 12.08382216999962111, 126.4813064841873711, 114.80924555621459149, 82.210464818355831085, 2.0993381540829432197, 37.806408507931337226, 102.1237425776780583, 2.576441770070232451, 110.67632624900579685, 33.14905201168221538, 18.76125721187054296, 64.330662598211347358, 5.744765648509201128, 51.372773402414168231, 11.707608265907765599, 72.456432969906018116, 91.517247733907424845, 31.44308892473418382, 111.99339294575474923, 11.313040268447366543, 124.8014072545520321, 51.040484384393494111, 54.599537028483609902, 121.9649571438676503, 11.834973418426670833, 70.626975376831978792, 42.001218805318785599, 97.507695735144807259, 36.414256510779523524, 7.1679360098532924894, 45.400652855601947522, 76.087814265218185028, 53.204407998633541865, 44.269467324680590536, 48.709704134591447655, 35.325247380540531594, 2.9794486486280220561, 20.582893371843965724, 25.565852058782184031, 123.15560167940930114, 50.651207179023913341, 75.034378769607428694, 16.666536209693731507, 48.861897394446714316, 25.664154283440439031, 6.797941266129782889, 41.848043130765290698, 79.638573762815212831, 50.30311390312272124, 57.477741571023216238, 11.776644212630344555, 15.057351203184225596, 50.983073215447802795, 82.77567186140731792, 85.615852462189650396, 120.44578656732119271, 126.58193847255097353, 39.758351867782039335, 11.75026104971402674, 120.60393820689569111, 6.0322968743057572283, 124.69795868485743995, 89.685276496515143663, 71.104114524900069227, 57.581503203757165466, 16.044690798531519249, 73.456676541776687372, 98.711380661756265908, 104.37799147590703797, 126.08656319753936259, 83.968257888664084021, 21.347076141344587086, 108.96235449751475244, 73.485389382371067768, 6.8667362656342447735, 21.664889181538455887, 32.462501133857585955, 18.204459162738203304, 69.611233902895037318, 3.3541484881279757246, 21.122652979716804111, 32.749507862627069699, 90.465276304112194339, 33.207922603291081032, 102.5604170732331113, 46.379778818256454542, 39.098070346135500586, 114.02211260408512317, 14.092809329220472137, 73.581737646203691838, 56.66536984161939472, 37.524458772506477544, 83.14323872393288184, 38.93094247930275742, 47.883647836446471047, 47.94029145401873393, 94.103121695152367465, 5.9291034281450265553, 94.402915011989534833, 102.83631033216806827, 14.203529865004384192, 88.28298746727159596, 5.7036603635096980724, 48.528805577749153599, 61.425295490651478758, 40.394527458913216833, 92.95230747877212707, 72.669162222380691674, 61.340860948246699991, 2.6090056086068216246, 42.211524196398386266, 94.645020822004880756, 89.198543801852792967, 116.78325540562946117, 103.14164096232343582, 72.452731559009407647, 2.6650220052433724049, 97.261404506771214074, 63.796791781798674492, 29.929362470873456914, 123.1662867119666771, 91.640877181103860494, 12.329147729658870958, 121.72220546931930585, 42.030674892175738933, 127.78929308297665557, 56.646474039349413943, 7.3095562604066799395, 45.428762593233841471, 36.220027469586057123, 40.946671411889838055, 30.824379357305588201, 0.58584764627812546678, 8.5360789831647707615, 21.06047801493332372, 107.07684043658809969, 51.983883442026126431, 58.212327551984344609, 22.974544086326204706, 24.965972391968534794, 72.799390117008442758, 40.159399551554088248, 33.140928780965623446, 97.224823579319490818, 62.246480734964279691, 111.35349210380809382, 17.883824660304526333, 83.373471514882112388, 45.374232215570373228, 2.3822873835924838204, 66.82774880998840672, 10.053584443849103991, 70.61262299147711019, 67.151209977553662611, 90.406264186287444318, 21.489243706280831248, 12.013901176393119385, 9.6976005669348523952, 30.210280025170504814, 6.8617634871770860627, 127.03842094477658975, 38.929792110964626772, 25.235405937299219659, 74.173667659859347623, 21.526803119319083635, 33.151999474303011084, 18.932138270029099658, 57.985288037307327613, 74.413325913377775578, 47.661954530547518516, 105.61108629134469084, 111.78184808668811456, 102.05195473563799169, 66.186613022484380053, 3.6953054368459561374, 60.043703071391064441, 68.386085665613791207, 123.41290226421187981, 75.037953333518089494, 108.82733138037656317, 84.620775204053643392, 120.74069998342747567, 120.64515964098609402, 118.74759287264168961, 66.083996538476640126, 99.856893511136149755, 60.127364409647270804, 53.236117134769301629, 24.422387504502694355, 47.540286310078954557, 95.990356733833323233, 78.382996330659807427, 50.612948557412892114, 19.867254457025410375, 68.100637901075970149, 29.004475618530705106, 17.852269676186551806, 110.84232191265255096, 16.327795025063096546, 10.496038902641885215, 48.717100503996334737, 109.86758287052361993, 107.11863664519478334, 67.901265997410519049, 118.12409247503092047, 62.071128679301182274, 87.448532354945200495, 43.547507001596386544, 112.94020338736299891, 105.38635256192355882, 125.52413093382347142, 88.825684325402107788, 115.29227248551615048, 106.66644258038286353, 113.50406263940749341, 26.33292826071192394, 92.242662002779979957, 3.8407082992343930528, 73.753323926408484112, 16.581930422351433663, 94.169319946646282915, 92.56054768093599705, 29.987817761277256068, 62.829052732016862137, 102.32029600211171783, 123.95988709207085776, 104.19678878197373706, 56.616716790384089109, 80.990102736359403934, 111.11591013100041891, 98.508501919073751196, 80.604275011824938701, 49.522350442330207443, 111.00985722179029835, 29.95062065688034636, 2.3976479955890681595, 5.1330174346985586453, 63.845077124551607994, 100.20563585833588149, 33.694264681249478599, 107.70154348971118452, 64.334546007117751287, 38.025382480700500309, 120.32984969303288381, 97.805319440256425878, 94.920771707566018449, 92.734364448137057479, 26.810581955029192613, 21.303565186601190362, 65.267258529489481589, 73.070619941827317234, 104.7591635716453311, 19.090366767999512376, 121.54492703869618708, 78.330361428987089312, 123.97454843510786304, 7.7332093858385633212, 112.60949035876183189, 103.6998439299823076, 0.69689644691607099958, 76.143490617203497095, 78.03619751714359154, 57.981734046978090191, 14.368728092558740173, 67.141263755031104665, 59.36215743662614841, 22.476111963747825939, 59.994429744096123613, 19.60757999270936125, 53.963929843081132276, 112.93694658801905462, 16.848767192310333485, 28.299185003623279044, 44.969958178968227003, 49.551657972649991279, 0.64843949714486370794, 32.129598839368554763, 72.332573023559234571, 66.302426958423893666, 110.89283548997991602, 39.91267608051202842, 13.634927663850248791, 10.809381015711551299, 73.494828013303049374, 124.98223957163281739, 63.768594065950310323, 92.024466577338898787, 90.392828280710091349, 22.95746347950262134, 34.179813883103633998, 117.16731870654257364, 32.894195589171431493, 108.93290840733243385, 11.987361831041198457, 27.634549295202305075, 45.349999228528758977, 43.354379900370986434, 76.259820067778491648, 36.017883745822473429, 6.1187286079111800063, 33.779924812166427728, 30.93826686935062753, 121.45303725294434116, 7.0978410086827352643, 76.403539933988213306, 6.9345355466612090822, 95.031909411220112815, 16.0452032115572365, 72.26665327703449293, 87.750826239567686571, 48.972710095698857913, 50.169109670347097563, 88.431189652372268029, 0.5006040672815288417, 117.2889208150372724, 100.35725799232386635, 60.60214007216563914, 25.615303831644268939, 106.63923812894790899, 59.366628536092321156, 23.075936997101962334, 81.424723983764124569, 63.015486719446926145, 8.0212882863415870816, 30.815984040542389266, 45.882513182579714339, 84.729925773255672539, 29.603260900199529715, 121.64509074996749405, 22.933396864562382689, 12.462197030010429444, 123.49837423622739152, 68.341774387285113335, 109.49032897657889407, 75.438946904221666045, 83.49482241039731889, 81.931615515826706542, 115.68354184960844577, 0.67173078052655910142, 57.376964829687494785, 119.57578389648188022, 71.553398424432089087, 110.57868828155915253, 39.415136707324563758, 60.164448971263482235, 14.564361117409134749, 120.35728413681863458, 17.435276206851995084, 78.605888946385675808, 0.46081043158119427972, 59.49091027081158245, 85.623518718875857303, 52.294867303313367302, 76.389609224817832001, 72.525884507213049801, 16.096771126045496203, 107.70103367813499062, 54.935181611166626681, 11.428507067808823194, 71.208465644773241365, 107.25424085115810158, 105.29669203544472111, 7.8517862834560219198, 43.185138837146951118, 54.872863268457876984, 127.42878716421910212, 22.779026972726569511, 12.878913650954928016, 33.499542192043008981, 113.25627488181635272, 24.167644340002880199, 124.96261296837838017, 101.61849111243282096, 36.42092963671166217, 4.1986763081658864394, 75.612817015866312431, 76.247485155356116593, 5.1528835401441028807, 93.352652498011593707, 66.298104023364430759, 37.522514423744723899, 0.6613251964226947166, 11.489531297018402256, 102.74554680482833646, 23.415216531819169177, 16.91286593981567421, 55.03449546781484969, 62.886177849472005619, 95.98678589150949847, 22.626080536898371065, 121.6028145091040642, 102.08096876878698822, 109.19907405697085778, 115.9299142877353006, 23.669946836856979644, 13.253950753663957585, 84.002437610641209176, 67.015391470289614517, 72.828513021559047047, 14.335872019706584979, 90.801305711207533022, 24.175628530436370056, 106.40881599726708373, 88.538934649361181073, 97.41940826918289531, 70.650494761081063189, 5.9588972972560441121, 41.165786743687931448, 51.131704117568006041, 118.31120335881860228, 101.30241435805146466, 22.068757539214857388, 33.333072419387463015, 97.723794788893428631, 51.328308566880878061, 13.595882532259565778, 83.696086261534219375, 31.277147525630425662, 100.60622780624544248, 114.95548314204643248, 23.55328842526068911, 30.114702406368451193, 101.96614643089924357, 37.551343722818273818, 43.231704924382938771, 112.89157313464602339, 125.16387694510194706, 79.51670373556407867, 23.500522099431691458, 113.20787641379138222, 12.064593748615152435, 121.39591736971851788, 51.370552993033925304, 14.208229049800138455, 115.16300640751433093, 32.089381597066676477, 18.913353083557012724, 69.422761323516169796, 80.755982951817713911, 124.17312639508236316, 39.936515777328168042, 42.69415228269281215, 89.924708995033142855, 18.970778764742135536, 13.733472531268489547, 43.329778363076911774, 64.925002267715171911, 36.408918325480044587, 11.222467805790074635, 6.7082969762559514493, 42.245305959433608223, 65.499015725257777376, 52.930552608224388678, 66.415845206585800042, 77.120834146466222592, 92.759557636512909085, 78.19614069227463915, 100.04422520817024633, 28.185618658444582252, 19.163475292407383677, 113.33073968323878944, 75.048917545016593067, 38.286477447865763679, 77.861884958605514839, 95.767295672892942093, 95.880582908041105838, 60.20624339030473493, 11.858206856293691089, 60.805830023982707644, 77.672620664336136542, 28.407059730012406362, 48.565974934543191921, 11.407320727019396145, 97.057611155498307198, 122.85059098130295752, 80.789054917830071645, 57.904614957547892118, 17.338324444761383347, 122.68172189649339998, 5.2180112172136432491, 84.423048392800410511, 61.290041644013399491, 50.397087603709223913, 105.56651081126256031, 78.283281924650509609, 16.905463118018815294, 5.3300440104903827887, 66.522809013542428147, 127.59358356359734898, 59.858724941750551807, 118.33257342393699219, 55.281754362207720987, 24.658295459321379894, 115.4444109386386117, 84.061349784355115844, 127.57858616595331114, 113.29294807869882789, 14.619112520813359879, 90.857525186467682943, 72.440054939172114246, 81.89334282377967611, 61.648758714611176401, 1.1716952925562509336, 17.072157966329541523, 42.12095602987028542, 86.153680873176199384, 103.96776688405225286, 116.42465510396868922, 45.949088172652409412, 49.931944783940707566, 17.598780234016885515, 80.318799103108176496, 66.28185756193488487, 66.449647158638981637, 124.49296146993219736, 94.706984207616187632, 35.767649320612690644, 38.746943029764224775, 90.748464431140746456, 4.7645747671886056196, 5.6554976199768134393, 20.107168887701845961, 13.225245982957858359, 6.3024199551109632012, 52.812528372574888635, 42.978487412561662495, 24.027802352786238771, 19.39520113386970479, 60.420560050344647607, 13.723526974357810104, 126.0768418895531795, 77.859584221929253545, 50.470811874602077296, 20.347335319718695246, 43.05360623863816727, 66.303998948606022168, 37.864276540058199316, 115.97057607461829321, 20.826651826755551156, 95.323909061098675011, 83.222172582693019649, 95.563696173379867105, 76.103909471279621357, 4.3732260449687601067, 7.3906108736919122748, 120.08740614278576686, 8.7721713312312203925, 118.8258045284273976, 22.075906667039816966, 89.654662760756764328, 41.241550408110924764, 113.48139996685858932, 113.29031928197582602, 109.4951857452870172, 4.1679930769569182303, 71.713787022272299509, 120.25472881929454161, 106.47223426953860326, 48.844775009005388711, 95.080572620161547093, 63.980713467670284444, 28.765992661319614854, 101.22589711482578423, 39.734508914054458728, 8.2012758021519402973, 58.008951237061410211, 35.70453935237674159, 93.684643825305101927, 32.655590050126193091, 20.992077805283770431, 97.434201007996307453, 91.735165741050877841, 86.237273290393204661, 7.802531994824676076, 108.24818495006184094, 124.14225735860600253, 46.897064709890400991, 87.095014003196411068, 97.880406774729635799, 82.772705123850755626, 123.04826186765058083, 49.651368650807853555, 102.58454497103593894, 85.332885160769365029, 99.008125278814986814, 52.665856521427485859, 56.485324005559959915, 7.6814165984724240843, 19.506647852816968225, 33.163860844706505304, 60.338639893296203809, 57.121095361871994101, 59.975635522554512136, 125.65810546403736225, 76.64059200422707363, 119.9197741841453535, 80.393577563947474118, 113.2334335807718162, 33.980205472722445847, 94.231820262004475808, 69.017003838151140371, 33.208550023649877403, 99.044700884664052865, 94.019714443580596708, 59.901241313760692719, 4.7952959911781363189, 10.266034869397117291, 127.69015424910685397, 72.411271716671762988, 67.388529362502595177, 87.403086979426007019, 0.66909201423550257459, 76.050764961404638598, 112.65969938606940559, 67.610638880516489735, 61.841543415135674877, 57.468728896274114959, 53.621163910058385227, 42.607130373206018703, 2.5345170589826011565, 18.141239883658272447, 81.518327143290662207, 38.180733536002662731, 115.08985407739601214, 28.660722857977816602, 119.94909687021572609, 15.466418771680764621, 97.218980717527301749, 79.3996878599646152, 1.393792893835779978, 24.286981234410632169, 28.07239503428718308, 115.96346809395618038, 28.737456185121118324, 6.2825275100622093305, 118.72431487325229682, 44.952223927495651878, 119.98885948819224723, 39.215159985418722499, 107.92785968616590253, 97.873893176038109232, 33.69753438462066697, 56.598370007246558089, 89.939916357936454006, 99.103315945299982559, 1.2968789942897274159, 64.259197678740747506, 16.66514604712210712, 4.6048539168514253106, 93.785670979959832039, 79.825352161024056841, 27.269855327700497583, 21.618762031426740577, 18.989656026609736728, 121.96447914326563478, 127.53718813190425863, 56.048933154681435553, 52.785656561420182697, 45.914926959005242679, 68.359627766210905975, 106.33463741308514727, 65.788391178346500965, 89.865816814664867707, 23.974723662082396913, 55.26909859040461015, 90.699998457057517953, 86.708759800745610846, 24.519640135556983296, 72.035767491644946858, 12.237457215822360013, 67.559849624332855456, 61.87653373870489304, 114.90607450588868232, 14.195682017369108507, 24.807079867976426613, 13.869071093326056143, 62.063818822440225631, 32.090406423118110979, 16.53330655407262384, 47.501652479135373142, 97.945420191397715826, 100.3382193406978331, 48.862379304744536057, 1.0012081345666956622, 106.57784163007818279, 72.714515984647732694, 121.20428014433127828, 51.230607663292175857, 85.27847625789581798, 118.73325707218464231, 46.151873994207562646, 34.849447967528249137, 126.03097343889385229, 16.042576572683174163, 61.631968081084778532, 91.765026365163066657, 41.459851546511345077, 59.206521800402697409, 115.2901814999349881, 45.866793729124765377, 24.924394060024496866, 118.99674847245842102, 8.6835487745738646481, 90.980657953161426121, 22.87789380844697007, 38.989644820798275759, 35.863231031657051062, 103.36708369921689155, 1.3434615610531182028, 114.75392965937862755, 111.15156779296739842, 15.106796848867816152, 93.157376563118305057, 78.830273414649127517, 120.32889794252696447, 29.128722234821907477, 112.71456827363726916, 34.870552413707628148, 29.211777892771351617, 0.92162086316238855943, 118.98182054162680288, 43.247037437751714606, 104.58973460663037258, 24.77921844963930198, 17.051769014426099602, 32.193542252094630385, 87.402067356269981246, 109.87036322233325336, 22.857014135621284368, 14.416931289550120709, 86.508481702319841133, 82.593384070889442228, 15.70357256691204384, 86.370277674293902237, 109.74572653691939195, 126.85757432843820425, 45.558053945453139022, 25.757827301909856033, 66.999084384089655941, 98.512549763632705435, 48.335288680009398377, 121.92522593676039833, 75.236982224865641911, 72.841859273423324339, 8.3973526163354108576, 23.225634031736262841, 24.494970310712233186, 10.30576708029184374, 58.705304996026825393, 4.5962080467288615182, 75.045028847489447799, 1.3226503928453894332, 22.979062594040442491, 77.491093609656672925, 46.830433063638338353, 33.82573187963134842, 110.06899093562969938, 125.77235569894764922, 63.973571783018996939, 45.252161073800380109, 115.20562901821176638, 76.161937537577614421, 90.398148113945353543, 103.85982857547060121, 47.339893673713959288, 26.507901507331553148, 40.00487522128605633, 6.0307829405828670133, 17.657026043118094094, 28.671744039416807937, 53.602611422415066045, 48.351257060876378091, 84.81763199453780544, 49.077869298726000125, 66.838816538369428599, 13.300989522162126377, 11.917794594512088224, 82.331573487375862896, 102.26340823513965006, 108.62240671764084254, 74.60482871610292932, 44.137515078429714777, 66.666144838778564008, 67.447589577786857262, 102.65661713376175612, 27.191765064522769535, 39.39217252306843875, 62.554295051264489302, 73.212455612494522939, 101.91096628409286495, 47.106576850521378219, 60.229404812740540365, 75.932292861798487138, 75.102687445636547636, 86.46340984876951552, 97.78314626929568476, 122.32775389020753209, 31.03340747112815734, 47.001044198867020896, 98.415752827582764439, 24.12918749723394285, 114.79183473943703575, 102.74110598606785061, 28.416458099603914889, 102.32601281502866186, 64.178763194133352954, 37.826706167117663426, 10.84552264703597757, 33.5119659036390658, 120.34625279016472632, 79.873031554656336084, 85.388304565389262279, 51.849417990066285711, 37.941557529484271072, 27.466945062536979094, 86.659556726157461526, 1.8500045354303438216, 72.817836650960089173, 22.444935611580149271, 13.416593952511902899, 84.490611918870854424, 2.9980314505155547522, 105.86110521645241533, 4.8316904131752380636, 26.241668292936083162, 57.519115273029456148, 28.392281384552916279, 72.088450416340492666, 56.371237316889164504, 38.326950584818405332, 98.661479366481216857, 22.097835090033186134, 76.572954895735165337, 27.723769917214667657, 63.534591345789522165, 63.761165816082211677, 120.41248678060946986, 23.716413712587382179, 121.61166004796905327, 27.345241328672273085, 56.814119460024812724, 97.131949869086383842, 22.814641454042430269, 66.115222311000252375, 117.70118196260955301, 33.57810983566014329, 115.80922991509578424, 34.676648889526404673, 117.36344379298679996, 10.436022434430924477, 40.846096785600821022, 122.58008328802679898, 100.7941752074220858, 83.133021622528758598, 28.566563849301019218, 33.810926236037630588, 10.660088020984403556, 5.0456180270884942729, 127.18716712719469797, 119.71744988350474159, 108.66514684787398437, 110.56350872441544197, 49.316590918646397768, 102.88882187728086137, 40.122699568713869667, 127.15717233191026025, 98.58589615739765577, 29.238225041630357737, 53.715050372939003864, 16.88010987834786647, 35.786685647559352219, 123.2975174292223528, 2.3433905851125018671, 34.144315932659083046, 84.241912059744208818, 44.307361746356036747, 79.935533768108143704, 104.84931020793737844, 91.898176345304818824, 99.863889567881415132, 35.197560468037409009, 32.637598206219990971, 4.5637151238734077197, 4.899294317281601252, 120.9859229398680327, 61.413968415232375264, 71.535298641225381289, 77.493886059532087529, 53.496928862281492911, 9.5291495343772112392, 11.310995239953626879, 40.214337775403691921, 26.450491965915716719, 12.604839910221926402, 105.62505674514977727, 85.95697482512332499, 48.055604705572477542, 38.790402267739409581, 120.84112010069293319, 27.447053948719258187, 124.15368377910999698, 27.719168443858507089, 100.94162374920415459, 40.694670639437390491, 86.107212477279972518, 4.6079978972120443359, 75.728553080120036611, 103.94115214923658641, 41.65330365351474029, 62.647818122200988, 38.444345165389677277, 63.127392346763372188, 24.207818942559242714, 8.7464520899375202134, 14.781221747387462528, 112.17481228557153372, 17.544342662466078764, 109.6516090568547952, 44.151813334083271911, 51.309325521513528656, 82.483100816225487506, 98.962799933720816625, 98.58063856395529001, 90.990371490574034397, 8.3359861539174744394, 15.427574044548236998, 112.5094576385927212, 84.944468539077206515, 97.6895500180144154, 62.161145240323094185, 127.96142693534056889, 57.531985322642867686, 74.451794229651568457, 79.469017828108917456, 16.402551604303880595, 116.01790247412282042, 71.409078704757121159, 59.369287650610203855, 65.311180100252386183, 41.98415561057117884, 66.868402015996252885, 55.470331482105393661, 44.474546580786409322, 15.605063989652990131, 88.496369900127319852, 120.28451471721200505, 93.79412941978443996, 46.190028006396460114, 67.760813549462909577, 37.545410247705149231, 118.09652373530116165, 99.302737301619345089, 77.169089942075515864, 42.665770321542368038, 70.016250557629973628, 105.33171304285497172, 112.97064801111991983, 15.362833196944848169, 39.01329570563393645, 66.327721689413010608, 120.67727978659240762, 114.2421907237439882, 119.95127104511266225, 123.31621092807836249, 25.281184008457785239, 111.83954836829434498, 32.787155127898586215, 98.466867161543632392, 67.960410945448529674, 60.463640524012589594, 10.034007676302280743, 66.417100047303392785, 70.089401769331743708, 60.039428887161193416, 119.80248262752138544, 9.5905919823562726378, 20.53206973879787256, 127.38030849821734591, 16.822543433347163955, 6.7770587250088283326, 46.806173958852014039, 1.338184028474643128, 24.101529922812915174, 97.319398772142449161, 7.2212777610329794697, 123.68308683027134975, 114.93745779254822992, 107.24232782011677045, 85.214260746412037406, 5.0690341179652023129, 36.282479767320182873, 35.036654286581324413, 76.361467072008963441, 102.17970815479202429, 57.321445715959271183, 111.89819374043145217, 30.932837543365167221, 66.437961435058241477, 30.799375719929230399, 2.7875857876715599559, 48.573962468821264338, 56.144790068578004139, 103.92693618791236077, 57.474912370242236648, 12.56505502012805664, 109.44862974650459364, 89.904447854994941736, 111.97771897638449445, 78.430319970841082977, 87.85571937233544304, 67.747786352076218463, 67.395068769241333939, 113.19674001449675416, 51.87983271587654599, 70.206631890599965118, 2.5937579885830928106, 0.5183953574814950116, 33.33029209424421424, 9.2097078337064886, 59.571341959923302056, 31.650704322048113681, 54.539710655400995165, 43.237524062853481155, 37.979312053223111434, 115.92895828653490753, 127.07437626380851725, 112.09786630936287111, 105.57131312284036539, 91.829853918014123337, 8.7192555324254499283, 84.669274826173932524, 3.5767823566966399085, 51.731633629329735413, 47.949447324164793827, 110.53819718081285828, 53.399996914118673885, 45.417519601491221692, 49.03928027111760457, 16.071534983289893717, 24.474914431648358004, 7.1196992486657109112, 123.75306747741342406, 101.81214901177736465, 28.391364034738217015, 49.614159735952853225, 27.738142186652112287, 124.12763764488045126, 64.180812846236221958, 33.066613108145247679, 95.003304958274384262, 67.89084038279906963, 72.676438681395666208, 97.724758609489072114, 2.0024162691370293032, 85.155683260160003556, 17.429031969299103366, 114.40856028866255656, 102.46121532658798969, 42.556952515795273939, 109.46651414436928462, 92.303747988415125292, 69.698895935060136253, 124.06194687778770458, 32.085153145366348326, 123.26393616217319504, 55.530052730326133315, 82.919703093022690155, 118.4130436008090328, 102.58036299986997619, 91.733587458249530755, 49.848788120052631712, 109.99349694491684204, 17.367097549151367275, 53.961315906322852243, 45.75578761689394014, 77.979289641596551519, 71.726462063317740103, 78.734167398433783092, 2.6869231221098743845, 101.50785931876089307, 94.303135585934796836, 30.213593697739270283, 58.314753126240248093, 29.660546829301893013, 112.65779588505392894, 58.257444469647452934, 97.429136547274538316, 69.741104827415256295, 58.423555785542703234, 1.8432417263284150977, 109.96364108325360576, 86.49407487550706719, 81.179469213260745164, 49.55843689927860396, 34.103538028852199204, 64.387084504192898748, 46.804134712543600472, 91.740726444666506723, 45.714028271242568735, 28.833862579103879398, 45.016963404643320246, 37.186768141778884456, 31.407145133827725658, 44.740555348591442453, 91.491453073842421873, 125.7151486568764085, 91.116107890906278044, 51.515654603823350044, 5.9981687681829498615, 69.02509952726541087, 96.670577360022434732, 115.85045187352443463, 22.4739644497349218, 17.683718546846648678, 16.794705232670821715, 46.451268063472525682, 48.989940621424466372, 20.611534160583687481, 117.41060999205365079, 9.1924160934613610152, 22.090057694978895597, 2.6453007856907788664, 45.958125188084522961, 26.98218721931334585, 93.660866127276676707, 67.651463759266334819, 92.137981871263036737, 123.54471139789529843, 127.94714356603799388, 90.504322147604398197, 102.41125803642717074, 24.323875075158866821, 52.796296227894345066, 79.719657150941202417, 94.679787347427918576, 53.015803014663106296, 80.009750442572112661, 12.061565881165734027, 35.314052086236188188, 57.343488078833615873, 107.20522284483013209, 96.702514121756394161, 41.63526398907561088, 98.155738597455638228, 5.6776330767388571985, 26.601979044324252754, 23.835589189027814427, 36.663146974755363772, 76.526816470282938099, 89.244813435285323067, 21.20965743220585864, 88.275030156863067532, 5.3322896775607659947, 6.8951791555773525033, 77.313234267523512244, 54.38353012904553907, 78.7843450461368775, 125.10859010253261658, 18.424911224992683856, 75.821932568185729906, 94.213153701042756438, 120.45880962548471871, 23.864585723600612255, 22.205374891276733251, 44.926819697542669019, 67.5662925385950075, 116.65550778041506419, 62.06681494225631468, 94.00208839773767977, 68.831505655165528879, 48.258374994471523678, 101.58366947887770948, 77.482211972135701217, 56.832916199211467756, 76.652025630060961703, 0.35752638826670590788, 75.653412334235326853, 21.691045294071955141, 67.023931807281769579, 112.69250558033309062, 31.746063109316310147, 42.776609130778524559, 103.69883598013257142, 75.883115058968542144, 54.933890125077596167, 45.319113452314923052, 3.7000090708643256221, 17.635673301920178346, 44.889871223160298541, 26.833187905027443776, 40.981223837741708849, 5.9960629010347474832, 83.722210432904830668, 9.6633808263504761271, 52.483336585872166324, 115.0382305460589123, 56.784562769109470537, 16.176900832680985332, 112.74247463377832901, 76.653901169640448643, 69.322958732962433714, 44.195670180070010247, 25.145909791470330674, 55.447539834429335315, 127.06918269157904433, 127.52233163216806133, 112.82497356121893972, 47.432827425178402336, 115.22332009593810653, 54.690482657344546169, 113.62823892005326343, 66.263899738172767684, 45.629282908088498516, 4.2304446220041427296, 107.40236392521910602, 67.156219671323924558, 103.61845983019156847, 69.353297779056447325, 106.72688758597359993, 20.872044868861848954, 81.692193571205280023, 117.16016657605723594, 73.588350414847809589, 38.266043245057517197, 57.133127698602038436, 67.621852472075261176, 21.320176041968807112, 10.091236054180626525, 126.37433425439303392, 111.43489976701312116, 89.33029369575160672, 93.12701744883088395, 98.633181837296433514, 77.777643754561722744, 80.245399137431377312, 126.31434466382415849, 69.17179231479531154, 58.476450083260715473, 107.43010074587800773, 33.76021975669573294, 71.573371295122342417, 118.59503485844470561, 4.6867811702250037342, 68.288631865321804071, 40.483824119488417637, 88.614723492715711473, 31.871067536219925387, 81.698620415878394851, 55.796352690613275627, 71.727779135762830265, 70.395120936074818019, 65.275196412439981941, 9.1274302477468154393, 9.7985886345668404829, 113.9718458797360654, 122.82793683046838851, 15.070597282454400556, 26.987772119064175058, 106.99385772456298582, 19.058299068754422478, 22.621990479907253757, 80.428675550807383843, 52.900983931831433438, 25.209679820447490783, 83.250113490303192521, 43.913949650246649981, 96.111209411144955084, 77.58080453548245714, 113.68224020138950436, 54.894107897442154353, 120.30736755821999395, 55.438336887720652157, 73.883247498408309184, 81.389341278878418962, 44.214424954559945036, 9.2159957944277266506, 23.457106160243711201, 79.882304298473172821, 83.30660730702948058, 125.295636244401976, 76.888690330779354554, 126.25478469352674438, 48.415637885118485428, 17.492904179875040427, 29.562443494778563036, 96.349624571143067442, 35.088685324935795506, 91.303218113709590398, 88.303626668166543823, 102.61865104303069529, 36.966201632454612991, 69.925599867445271229, 69.161277127914217999, 53.980742981148068793, 16.671972307834948879, 30.855148089100111974, 97.018915277189080371, 41.888937078158051008, 67.3791000360288308, 124.32229048064982635, 127.92285387068113778, 115.06397064528937335, 20.903588459306774894, 30.938035656217834912, 32.805103208611399168, 104.03580494824927882, 14.818157409514242318, 118.73857530122404569, 2.6223602005047723651, 83.96831122114599566, 5.7368040319925057702, 110.94066296421078732, 88.949093161572818644, 31.21012797930961824, 48.992739800258277683, 112.56902943442764808, 59.588258839568879921, 92.380056012796558207, 7.5216270989258191548, 75.090820495413936442, 108.19304747060596128, 70.605474603242328158, 26.338179884151031729, 85.331540643088374054, 12.032501115263585234, 82.663426085713581415, 97.941296022239839658, 30.725666393889696337, 78.026591411271510879, 4.6554433788296591956, 113.35455957318845321, 100.48438144749161438, 111.90254209022896248, 118.63242185615672497, 50.562368016919208458, 95.679096736588689964, 65.57431025580081041, 68.933734323090902762, 7.9208218908970593475, 120.92728104802517919, 20.068015352608199464, 4.8342000946104235481, 12.178803538663487416, 120.07885777432602481, 111.60496525504277088, 19.181183964712545276, 41.064139477599383099, 126.76061699643469183, 33.645086866694327909, 13.554117450017656665, 93.612347917704028077, 2.676368056949286256, 48.203059845629468327, 66.638797544284898322, 14.442555522069596918, 119.36617366054633749, 101.87491558510009781, 86.484655640237178886, 42.428521492824074812, 10.138068235930404626, 72.564959534640365746, 70.073308573166286806, 24.722934144021564862, 76.359416309584048577, 114.64289143192218035, 95.79638748086290434, 61.865675086733972421, 4.8759228701164829545, 61.598751439858460799, 5.5751715753431199118, 97.147924937642528675, 112.28958013715964626, 79.85387237582472153, 114.9498247404844733, 25.130110040259751258, 90.89725949301282526, 51.80889570999352145, 95.955437952772626886, 28.860639941685803933, 47.71143874467452406, 7.4955727041560749058, 6.7901375384863058571, 98.393480028993508313, 103.75966543175672996, 12.413263781199930236, 5.1875159771661856212, 1.0367907149629900232, 66.660584188488428481, 18.4194156674129772, 119.14268391984660411, 63.301408644096227363, 109.07942131080199033, 86.47504812570696231, 75.958624106449860847, 103.85791657307345304, 126.14875252762067248, 96.195732618729380192, 83.142626245684368769, 55.659707836028246675, 17.438511064854537835, 41.338549652347865049, 7.153564713393279817, 103.46326725865947083, 95.898894648333225632, 93.076394361625716556, 106.79999382823734777, 90.835039202982443385, 98.078560542235209141, 32.143069966579787433, 48.949828863296716008, 14.239398497335059801, 119.50613495483048609, 75.624298023554729298, 56.78272806947643403, 99.22831947190570645, 55.476284373307862552, 120.2552752897645405, 0.3616256924724439159, 66.133226216294133337, 62.006609916552406503, 7.7816807656017772388, 17.352877362794970395, 67.449517218978144228, 4.0048325382776965853, 42.311366520320007112, 34.858063938601844711, 100.8171205773287511, 76.922430653175979387, 85.113905031594185857, 90.933028288742207224, 56.607495976833888562, 11.397791870123910485, 120.12389375557540916, 64.170306290736334631, 118.52787232434639009, 111.06010546065590461, 37.839406186049018288, 108.82608720162170357, 77.160725999743590364, 55.46717491649906151, 99.697576240108901402, 91.986993889833684079, 34.73419509830273455, 107.92263181264570449, 91.511575233791518258, 27.958579283196741017, 15.452924126639118185, 29.468334796867566183, 5.3738462442233867478, 75.015718637525424128, 60.606271171869593672, 60.427187395478540566, 116.62950625248049619, 59.321093658607424004, 97.315591770107857883, 116.51488893929854385, 66.85827309455271461, 11.482209654830512591, 116.84711157108540647, 3.6864834526604681741, 91.927282166510849493, 44.98814975101777236, 34.358938426521490328, 99.11687379855720792, 68.207076057708036387, 0.77416900838579749689, 93.608269425090838922, 55.481452889333013445, 91.42805654248513747, 57.667725158207758795, 90.033926809286640491, 74.373536283557768911, 62.814290267655451316, 89.481110697186522884, 54.982906147688481724, 123.43029731375645497, 54.232215781812556088, 103.03130920764670009, 11.996337536369537702, 10.050199054534459719, 65.341154720048507443, 103.70090374704886926, 44.9479288994698436, 35.367437093693297356, 33.58941046534164343, 92.902536126945051365, 97.979881242848932743, 41.22306832117101294, 106.82121998410730157, 18.38483218692272203, 44.180115389957791194, 5.2906015713815577328, 91.916250376169045921, 53.964374438630329678, 59.321732254556991393, 7.3029275185363076162, 56.275963742529711453, 119.08942279579423484, 127.89428713207962574, 53.008644295212434372, 76.822516072854341473, 48.647750150317733642, 105.59259245579232811, 31.439314301886042813, 61.359574694855837151, 106.03160602932985057, 32.019500885144225322, 24.123131762331468053, 70.628104172472376376, 114.68697615767086972, 86.410445689663902158, 65.405028243512788322, 83.270527978154859738, 68.311477194911276456, 11.355266153477714397, 53.203958088648505509, 47.671178378055628855, 73.326293949514365522, 25.053632940569514176, 50.489626870570646133, 42.41931486441535526, 48.550060313726135064, 10.664579355121531989, 13.790358311158342985, 26.626468535047024488, 108.76706025809471612, 29.568690092277392978, 122.21718020506523317, 36.84982244998900569, 23.643865136375097791, 60.426307402089150855, 112.91761925096943742, 47.72917144720122451, 44.410749782553466503, 89.853639395088976016, 7.1325850771936529782, 105.31101556083012838, 124.13362988451626734, 60.004176795478997519, 9.6630113103310577571, 96.516749988946685335, 75.167338957759056939, 26.964423944271402434, 113.66583239842657349, 25.304051260121923406, 0.71505277653704979457, 23.306824668470653705, 43.38209058814754826, 6.0478636145671771374, 97.38501116066618124, 63.492126218632620294, 85.553218261560687097, 79.397671960268780822, 23.766230117937084287, 109.86778025015519233, 90.638226904629846103, 7.4000181417286512442, 35.271346603840356693, 89.779742446320597082, 53.666375810054887552, 81.962447675483417697, 11.992125802073132945, 39.444420865809661336, 19.326761652700952254, 104.96667317174797063, 102.07646109211782459, 113.56912553821894107, 32.353801665365608642, 97.484949267560295993, 25.307802339284535265, 10.645917465924867429, 88.391340360140020493, 50.291819582940661348, 110.89507966886230861, 126.13836538316172664, 127.04466326433976064, 97.649947122437879443, 94.865654850360442651, 102.44664019187621307, 109.38096531469273032, 99.256477840110164834, 4.5277994763455353677, 91.258565816176997032, 8.4608892440119234379, 86.804727850441850023, 6.3124393426514870953, 79.236919660386774922, 10.706595558116532629, 85.453775171947199851, 41.744089737727335887, 35.384387142414198024, 106.32033315211810987, 19.176700829695619177, 76.532086490115034394, 114.26625539720771485, 7.2437049441505223513, 42.640352083937614225, 20.182472108364891028, 124.74866850878970581, 94.869799534029880306, 50.660587391506851418, 58.254034897665405879, 69.266363674596505007, 27.555287509127083467, 32.490798274866392603, 124.62868932765195495, 10.343584629590623081, 116.95290016652506893, 86.860201491756015457, 67.520439513395103859, 15.146742590248322813, 109.19006971688941121, 9.3735623404536454473, 8.5772637306436081417, 80.967648238980473252, 49.229446985431422945, 63.742135072443488752, 35.397240831756789703, 111.59270538123018923, 15.455558271529298509, 12.790241872153274016, 2.5503928248799638823, 18.254860495493630879, 19.597177269137318945, 99.943691759475768777, 117.65587366093677701, 30.141194564908801112, 53.975544238131988095, 85.987715449125971645, 38.116598137508844957, 45.243980959818145493, 32.857351101614767686, 105.80196786366286688, 50.419359640898619546, 38.500226980606385041, 87.82789930049693794, 64.222418822293548146, 27.16160907096855226, 99.364480402779008728, 109.78821579488794669, 112.61473511643998791, 110.87667377544494229, 19.766494996820256347, 34.778682557756837923, 88.428849909119890071, 18.431991588855453301, 46.914212320491060382, 31.764608596949983621, 38.613214614062599139, 122.59127248880758998, 25.777380661562347086, 124.50956938705348875, 96.831275770236970857, 34.985808359750080854, 59.12488698956076405, 64.699249142286134884, 70.177370649875228992, 54.606436227422818774, 48.607253336336725624, 77.237302086061390582, 73.93240326491286396, 11.851199734894180438, 10.322554255828435998, 107.96148596229613759, 33.343944615669897757, 61.710296178203861928, 66.037830554378160741, 83.777874156319739996, 6.7582000720612995792, 120.64458096130329068, 127.84570774136591353, 102.1279412905787467, 41.807176918613549788, 61.876071312435669824, 65.610206417226436315, 80.071609896498557646, 29.636314819032122614, 109.47715060244809138, 5.244720401013182709, 39.93662244229199132, 11.473608063988649519, 93.881325928425212624, 49.898186323145637289, 62.42025595861923648, 97.985479600520193344, 97.138058868855296168, 119.17651767914139782, 56.760112025593116414, 15.043254197855276288, 22.181640990831510862, 88.386094941215560539, 13.210949206484656315, 52.676359768305701436, 42.663081286176748108, 24.065002230530808447, 37.326852171430800809, 67.882592044479679316, 61.451332787783030653, 28.053182822543021757, 9.31088675766295637, 98.709119146380544407, 72.968762894986866741, 95.805084180461562937, 109.26484371231344994, 101.12473603384205489, 63.358193473181017907, 3.1486205116052587982, 9.867468646181805525, 15.841643781797756674, 113.85456209605035838, 40.136030705216398928, 9.6684001892208470963, 24.35760707733061281, 112.1577155486556876, 95.209930510085541755, 38.362367929425090551, 82.128278955202404177, 125.52123399286938366, 67.290173733388655819, 27.108234900038951309, 59.224695835408056155, 5.352736113898572512, 96.406119691262574634, 5.2775950885697966442, 28.885111044142831815, 110.73234732109631295, 75.749831170203833608, 44.969311280474357773, 84.857042985648149624, 20.27613647186444723, 17.129919069284369471, 12.146617146332573611, 49.445868288046767702, 24.718832619171735132, 101.28578286384799867, 63.59277496172580868, 123.73135017346794484, 9.7518457402329659089, 123.1975028797169216, 11.150343150689877803, 66.29584987528869533, 96.579160274322930491, 31.707744751653081039, 101.89964948096894659, 50.260220080523140496, 53.79451898602565052, 103.61779141999068088, 63.910875905545253772, 57.721279883371607866, 95.422877489349048119, 14.991145408312149812, 13.580275076976249693, 68.786960057990654605, 79.519330863513459917, 24.826527562399860471, 10.375031954332371242, 2.0735814299296180252, 5.3211683769768569618, 36.838831334829592379, 110.28536783969320823, 126.6028172881960927, 90.158842621607618639, 44.950096251417562598, 23.917248212903359672, 79.715833146150544053, 124.29750505524498294, 64.391465237462398363, 38.285252491368737537, 111.31941567205649335, 34.877022129709075671, 82.677099304695730098, 14.307129426790197613, 78.926534517322579632, 63.797789296670089243, 58.152788723255071091, 85.59998765647833352, 53.670078405968524748, 68.157121084470418282, 64.286139933159574866, 97.899657726593432017, 28.478796994673757581, 111.01226990966461017, 23.248596047109458596, 113.56545613895650604, 70.456638943811412901, 110.95256874661936308, 112.51055057952908101, 0.72325138494852581061, 4.2664524325882666744, 124.01321983310481301, 15.563361531203554478, 34.705754725593578769, 6.8990344379562884569, 8.0096650765553931706, 84.622733040640014224, 69.716127877207327401, 73.634241154661140172, 25.844861306355596753, 42.227810063192009693, 53.866056577484414447, 113.21499195366777712, 22.795583740251458948, 112.2477875111544563, 0.34061258147630724125, 109.05574464869641815, 94.120210921311809216, 75.678812372101674555, 89.65217440324704512, 26.321451999487180728, 110.934349833001761, 71.395152480221440783, 55.973987779667368159, 69.468390196609107079, 87.84526362529504695, 55.023150467583036516, 55.917158566393482033, 30.905848253278236371, 58.936669593735132366, 10.747692488450411474, 22.031437275050848257, 121.21254234373918734, 120.85437479096071911, 105.25901250496099237, 118.64218731721848599, 66.631183540219353745, 105.02977787860072567, 5.7165461891054292209, 22.96441930966466316, 105.69422314217081293, 7.3729669053245743271, 55.854564333021698985, 89.97629950203554472, 68.717876853046618635, 70.233747597118053818, 8.4141521154160727747, 1.5483380167752329726, 59.216538850181677844, 110.96290577866966487, 54.85611308497027494, 115.33545031641915557, 52.067853618576918961, 20.747072567115537822, 125.62858053531454061, 50.962221394373045769, 109.96581229538060143, 118.86059462751290994, 108.46443156362875015, 78.062618415293400176, 23.992675072739075404, 20.100398109072557418, 2.6823094401006528642, 79.401807494097738527, 89.8958577989396872, 70.734874187386594713, 67.178820930686924839, 57.805072253893740708, 67.959762485697865486, 82.446136642345663859, 85.642439968214603141, 36.769664373845444061, 88.360230779915582389, 10.581203142766753444, 55.832500752338091843, 107.92874887726065936, 118.64346450911398279, 14.605855037072615232, 112.55192748506306089, 110.17884559159210767, 127.78857426415925147, 106.01728859042850672, 25.645032145708682947, 97.295500300639105262, 83.18518491158465622, 62.878628603772085626, 122.71914938971531228, 84.063212058659701142, 64.039001770288450643, 48.246263524666574085, 13.256208344948390732, 101.37395231534537743, 44.820891379331442295, 2.8100564870292146225, 38.541055956313357456, 8.6229543898225529119, 22.710532306955428794, 106.40791617729701102, 95.342356756114895688, 18.652587899028731044, 50.107265881139028352, 100.97925374114129227, 84.838629728830710519, 97.100120627452270128, 21.329158710243063979, 27.580716622316685971, 53.252937070097686956, 89.534120516189432237, 59.137380184558423935, 116.43436041013046633, 73.699644899978011381, 47.287730272750195581, 120.85261480418193969, 97.835238501938874833, 95.458342894402449019, 88.821499565110570984, 51.707278790177952033, 14.265170154390943935, 82.622031121663894737, 120.26725976903253468, 120.00835359095799504, 19.326022620665753493, 65.033499977897008648, 22.334677915518113878, 53.928847888546442846, 99.331664796853146981, 50.608102520243846811, 1.4301055530740995891, 46.613649336941307411, 86.76418117629509652, 12.095727229137992254, 66.77002232133236248, 126.98425243726887857, 43.106436523125012172, 30.795343920537561644, 47.532460235877806554, 91.735560500314022647, 53.276453809259692207, 14.800036283457302488, 70.542693207684351364, 51.559484892641194165, 107.33275162011341308, 35.924895350966835395, 23.98425160414626589, 78.888841731619322672, 38.653523305405542487, 81.933346343499579234, 76.152922184239287162, 99.138251076437882148, 64.707603330734855263, 66.969898535120591987, 50.615604678569070529, 21.291834931853372836, 48.782680720280040987, 100.58363916588496068, 93.790159337728255196, 124.27673076632709126, 126.08932652868315927, 67.299894244879396865, 61.731309700724523282, 76.893280383756064111, 90.761930629389098613, 70.512955680220329668, 9.0555989526947087143, 54.517131632353994064, 16.921778488027484855, 45.609455700883700047, 12.624878685306612169, 30.473839320773549844, 21.413191116236703238, 42.907550343894399703, 83.488179475454671774, 70.768774284832034027, 84.64066630423621973, 38.353401659394876333, 25.064172980233706767, 100.5325107944154297, 14.487409888301044703, 85.28070416787522845, 40.364944216733420035, 121.4973370175830496, 61.739599068059760612, 101.32117478301370284, 116.50806979533444974, 10.532727349193010014, 55.110575018254166935, 64.981596549736423185, 121.25737865530390991, 20.68716925918488414, 105.90580033305377583, 45.720402983515668893, 7.0408790267902077176, 30.293485180500283604, 90.380139433778822422, 18.747124680907290895, 17.154527461290854262, 33.935296477960946504, 98.45889397086284589, 127.4842701448869775, 70.794481663517217385, 95.185410762460378464, 30.911116543058597017, 25.580483744306548033, 5.1007856497599277645, 36.509720990987261757, 39.194354538278275868, 71.887383518955175532, 107.31174732187355403, 60.282389129817602225, 107.95108847626761417, 43.97543089825194329, 76.233196275017689914, 90.487961919636290986, 65.71470220323317335, 83.603935727325733751, 100.83871928179723909, 77.000453961216408061, 47.655798600997513859, 0.44483764458709629253, 54.323218141940742498, 70.728960805558017455, 91.57643158977953135, 97.229470232883613789, 93.753347550889884587, 39.532989993640512694, 69.557365115517313825, 48.857699818239780143, 36.863983177710906602, 93.828424640982120764, 63.529217193899967242, 77.226429228128836257, 117.18254497761517996, 51.554761323128332151, 121.01913877411061549, 65.662551540473941714, 69.971616719500161707, 118.24977397912516608, 1.3984982845759077463, 12.354741299750457983, 109.21287245484563755, 97.214506672677089227, 26.474604172126419144, 19.864806529825727921, 23.702399469788360875, 20.645108511660509976, 87.922971924592275172, 66.687889231339795515, 123.42059235641136183, 4.0756611087599594612, 39.555748312639479991, 13.516400144126237137, 113.28916192261021934, 127.69141548273546505, 76.255882581157493405, 83.614353837227099575, 123.75214262487497763, 3.220412834456510609, 32.143219792997115292, 59.272629638067883207, 90.954301204896182753, 10.489440802026365418, 79.873244884583982639, 22.947216127980937017, 59.762651856850425247, 99.796372646291274577, 124.84051191724211094, 67.970959201044024667, 66.276117737714230316, 110.35303535828279564, 113.52022405118623283, 30.086508395710552577, 44.363281981666659703, 48.772189882431121077, 26.421898412969312631, 105.35271953661140287, 85.326162572353496216, 48.130004461061616894, 74.653704342865239596, 7.7651840889593586326, 122.90266557556606131, 56.106365645086043514, 18.621773515329550719, 69.418238292764726793, 17.93752578997737146, 63.610168360926763853, 90.529687424630537862, 74.249472067684109788, 126.71638694636203581, 6.2972410232105175965, 19.73493729236361105, 31.683287563599151326, 99.709124192100716755, 80.272061410432797857, 19.336800378441694193, 48.715214154664863599, 96.315431097315013176, 62.41986102017108351, 76.724735858853819082, 36.256557910408446332, 123.04246798574240529, 6.580347466777311638, 54.216469800077902619, 118.44939167081611231, 10.705472227800783003, 64.812239382525149267, 10.555190177139593288, 57.77022208828930161, 93.464694642196263885, 23.499662340407667216, 89.938622560952353524, 41.714085971296299249, 40.552272943728894461, 34.259838138568738941, 24.293234292665147223, 98.891736576093535405, 49.437665238347108243, 74.571565727695997339, 127.18554992345525534, 119.46270034693588968, 19.503691480469569797, 118.3950057594338432, 22.300686301383393584, 4.5916997505810286384, 65.158320548645860981, 63.415489503309800057, 75.799298961937893182, 100.52044016104628099, 107.58903797205493902, 79.235582839981361758, 127.82175181109050754, 115.44255976674685371, 62.845754978698096238, 29.982290816624299623, 27.160550153956137365, 9.5739201159849471878, 31.038661727026919834, 49.653055124799720943, 20.750063908664742485, 4.1471628598592360504, 10.642336753953713924, 73.677662669662822736, 92.570735679390054429, 125.20563457639582339, 52.317685243215237278, 89.900192502835125197, 47.834496425810357323, 31.431666292304726085, 120.59501011049360386, 0.78293047492479672655, 76.570504982741113054, 94.638831344112986699, 69.75404425942178932, 37.354198609391460195, 28.614258853584033204, 29.853069034645159263, 127.59557859334017849, 116.30557744651378016, 43.19997531295666704, 107.34015681194068748, 8.3142421689444745425, 0.5722798663227877114, 67.799315453186864033, 56.957593989351153141, 94.024539819329220336, 46.49719209422255517, 99.130912277916650055, 12.91327788762646378, 93.905137493242364144, 97.021101159058162011, 1.4465027698970516212, 8.5329048651801713277, 120.02643966620962601, 31.126723062407108955, 69.411509451187157538, 13.798068875912576914, 16.01933015311442432, 41.245466081283666426, 11.432255754418292781, 19.268482309322280344, 51.689722612711193506, 84.455620126384019386, 107.73211315497246687, 98.42998390733555425, 45.591167480506555876, 96.495575022308912594, 0.6812251629562524613, 90.111489297396474285, 60.24042184262725641, 23.357624744206987089, 51.30434880649409024, 52.642903998977999436, 93.868699666007159976, 14.790304960442881566, 111.9479755593383743, 10.936780393221852137, 47.6905272505900939, 110.04630093516607303, 111.83431713278696407, 61.811696506556472741, 117.87333918747026473, 21.495384976900822949, 44.062874550105334492, 114.42508468748201267, 113.7087495819250762, 82.518025009925622726, 109.28437463444060995, 5.2623670804423454683, 82.059555757201451343, 11.433092378210858442, 45.928838619332964299, 83.388446284341625869, 14.745933810649148654, 111.70912866604703595, 51.952599004074727418, 9.4357537060968752485, 12.467495194236107636, 16.828304230832145549, 3.0966760335504659452, 118.43307770036699367, 93.925811557339329738, 109.71222616994418786, 102.67090063283831114, 104.13570723715383792, 41.494145134234713623, 123.25716107062908122, 101.92444278874972952, 91.931624590764840832, 109.72118925502945785, 88.928863127261138288, 28.125236830586800352, 47.985350145478150807, 40.200796218145114835, 5.3646188802049437072, 30.803614988199115032, 51.791715597883012379, 13.469748374776827404, 6.3576418613738496788, 115.61014450779111939, 7.9195249713993689511, 36.892273284694965696, 43.284879936429206282, 73.539328747694526101, 48.720461559831164777, 21.162406285533506889, 111.66500150467982166, 87.85749775452495669, 109.28692901822796557, 29.211710074148868443, 97.103854970129759749, 92.357691183187853312, 127.57714852832214092, 84.034577180857013445, 51.290064291421003873, 66.591000601281848503, 38.37036982316931244, 125.75725720754780923, 117.43829877943426254, 40.126424117323040264, 0.07800354057690128684, 96.49252704933314817, 26.512416689900419442, 74.747904630690754857, 89.641782758662884589, 5.6201129740584292449, 77.082111912626714911, 17.245908779645105824, 45.421064613914495567, 84.815832354597660014, 62.684713512229791377, 37.305175798057462089, 100.21453176228169468, 73.958507482286222512, 41.677259457665059017, 66.200241254904540256, 42.658317420486127958, 55.161433244633371942, 106.50587414019901189, 51.068241032378864475, 118.27476036912048585, 104.86872082026457065, 19.399289799956022762, 94.575460545504029142, 113.70522960836751736, 67.670477003881387645, 62.916685788808536017, 49.642999130221141968, 103.41455758035590407, 28.530340308785525849, 37.244062243331427453, 112.53451953806870733, 112.01670718191599008, 38.652045241335144965, 2.0669999557940172963, 44.669355831036227755, 107.85769577709652367, 70.663329593706293963, 101.21620504048769362, 2.8602111061481991783, 93.2272986738862528, 45.528362352590193041, 24.191454458279622486, 5.5400446426683629397, 125.96850487453775713, 86.212873046250024345, 61.590687841075123288, 95.064920471759251086, 55.471121000628045294, 106.55290761851938441, 29.600072566918242956, 13.085386415368702728, 103.11896978528602631, 86.665503240226826165, 71.849790701933670789, 47.96850320829616976, 29.777683463238645345, 77.307046610814722953, 35.866692687002796447, 24.305844368482212303, 70.276502152879402274, 1.4152066614733485039, 5.9397970702411839738, 101.23120935713814106, 42.583669863706745673, 97.565361440563719952, 73.16727833177355933, 59.58031867546014837, 120.5534615326578205, 124.17865305736995651, 6.5997884897587937303, 123.46261940145268454, 25.786560767515766202, 53.523861258781835204, 13.025911360440659337, 18.111197905389417429, 109.03426326470798813, 33.843556976054969709, 91.218911401767400093, 25.249757370613224339, 60.947678641547099687, 42.826382232473406475, 85.815100687792437384, 38.976358950912981527, 13.537548569667706033, 41.28133260847243946, 76.706803318789752666, 50.128345960471051512, 73.0650215888308594, 28.974819776605727384, 42.561408335750456899, 80.729888433470478049, 114.9946740351660992, 123.4791981361231592, 74.642349566031043651, 105.01613959066889947, 21.065454698386020027, 110.22115003651197185, 1.963193099476484349, 114.51475731060781982, 41.374338518373406259, 83.81160066610755166, 91.440805967031337786, 14.081758053580415435, 60.586970361004205188, 52.760278867557644844, 37.494249361814581789, 34.309054922581708524, 67.870592955925530987, 68.917787941729329759, 126.96854028977759299, 13.588963327038072748, 62.370821524920756929, 61.822233086117194034, 51.160967488613096066, 10.201571299519855529, 73.019441981974523515, 78.388709076560189715, 15.774767037913989043, 86.623494643747108057, 120.56477825963884243, 87.902176952535228338, 87.950861796507524559, 24.466392550039017806, 52.975923839276219951, 3.4294044064699846786, 39.207871454655105481, 73.677438563598116161, 26.000907922432816122, 95.311597201995027717, 0.88967528917419258505, 108.64643628388512298, 13.45792161111603491, 55.15286317956270068, 66.458940465770865558, 59.506695101779769175, 79.065979987284663366, 11.11473023103462765, 97.715399636479560286, 73.727966355425451184, 59.656849281964241527, 127.05843438780357246, 26.452858456261310494, 106.36508995523035992, 103.10952264626030228, 114.03827754822486895, 3.325103080951521406, 11.943233439003961394, 108.49954795825033216, 2.7969965691518154927, 24.709482599504553946, 90.425744909694913076, 66.429013345354178455, 52.949208344252838288, 39.729613059651455842, 47.40479893957672175, 41.29021702332465793, 47.845943849188188324, 5.3757784626832290087, 118.84118471282272367, 8.1513222175199189223, 79.111496625278959982, 27.032800288256112253, 98.578323845224076649, 127.38283096547456807, 24.511765162318624789, 39.228707674457837129, 119.50428524974995526, 6.440825668913021218, 64.286439585994230583, 118.54525927613940439, 53.908602409792365506, 20.978881604056368815, 31.746489769167965278, 45.894432255961874034, 119.52530371370448847, 71.592745292586187134, 121.68102383448785986, 7.9419184020916873123, 4.5522354754320986103, 92.706070716569229262, 99.040448102372465655, 60.173016791421105154, 88.726563963333319407, 97.544379764862242155, 52.84379682594226324, 82.705439073226443725, 42.652325144706992432, 96.260008922126871767, 21.307408685730479192, 15.530368177922355244, 117.80533115113212261, 112.21273129017208703, 37.243547030662739417, 10.836476585529453587, 35.87505157995474292, 127.22033672185716568, 53.059374849264713703, 20.498944135371857556, 125.43277389272407163, 12.594482046421035193, 39.4698745847272221, 63.366575127201940631, 71.418248384205071488, 32.544122820865595713, 38.673600756883388385, 97.430428309333365178, 64.630862194630026352, 124.839722040345805, 25.449471717707638163, 72.513115820820530644, 118.08493597148481058, 13.160694933554623276, 108.43293960015944322, 108.89878334163222462, 21.410944455605203984, 1.6244787650502985343, 21.110380354279186577, 115.5404441765822412, 58.92938928439252777, 46.999324680818972411, 51.877245121904707048, 83.428171942592598498, 81.104545887457788922, 68.519676277137477882, 48.586468585333932424, 69.783473152187070809, 98.875330476697854465, 21.143131455395632656, 126.37109984691414866, 110.92540069387177937, 39.007382960942777572, 108.79001151886768639, 44.601372602766787168, 9.1833995011656952556, 2.3166410972917219624, 126.83097900662323809, 23.598597923879424343, 73.040880322096199961, 87.178075944113516016, 30.471165679966361495, 127.64350362218465307, 102.88511953349370742, 125.69150995739619248, 59.964581633248599246, 54.321100307912274729, 19.147840231969894376, 62.077323454053839669, 99.306110249599441886, 41.500127817329484969, 8.2943257197184721008, 21.284673507911065826, 19.355325339329283452, 57.141471358783746837, 122.41126915279164677, 104.63537048643411254, 51.800385005670250393, 95.668992851620714646, 62.863332584613090148, 113.19002022098720772, 1.5658609498532314319, 25.141009965485864086, 61.277662688229611376, 11.50808851884357864, 74.70839721878292039, 57.228517707168066408, 59.706138069293956505, 127.19115718668399495, 104.61115489302756032, 86.399950625916972058, 86.680313623881374951, 16.628484337888949085, 1.1445597326492134016, 7.5986309063737280667, 113.91518797870594426, 60.049079638658440672, 92.994384188448748318, 70.26182455583330011, 25.826555775256565539, 59.810274986488366267, 66.042202318116324022, 2.8930055397941032425, 17.065809730363980634, 112.05287933241925202, 62.253446124817855889, 10.823018902374315076, 27.596137751825153828, 32.03866030622884864, 82.490932162567332853, 22.864511508836585563, 38.536964618644560687, 103.37944522542602499, 40.911240252771676751, 87.464226309948571725, 68.859967814671108499, 91.18233496101674973, 64.991150044617825188, 1.3624503259161429014, 52.22297859479294857, 120.48084368525451282, 46.715249488413974177, 102.60869761298818048, 105.28580799795963685, 59.737399332014319953, 29.58060992088940111, 95.895951118676748592, 21.873560786443704274, 95.381054501183825778, 92.092601870335784042, 95.668634265573928133, 123.62339301311658346, 107.74667837494052947, 42.990769953801645897, 88.125749100214306964, 100.85016937496766332, 99.417499163853790378, 37.036050019854883431, 90.568749268881219905, 10.524734160888328915, 36.119111514402902685, 22.866184756421716884, 91.857677238669566577, 38.776892568683251739, 29.491867621301935287, 95.418257332094071899, 103.90519800814945484, 18.871507412193750497, 24.934990388472215272, 33.656608461664291099, 6.1933520671045698691, 108.86615540073762531, 59.851623114682297455, 91.424452339892013697, 77.341801265680260258, 80.271414474307675846, 82.988290268473065225, 118.51432214126180043, 75.848885577503097011, 55.863249181533319643, 91.442378510062553687, 49.857726254522276577, 56.250473661173600703, 95.970700290956301615, 80.401592436290229671, 10.729237760409887414, 61.607229976398230065, 103.58343119576602476, 26.939496749553654809, 12.715283722751337336, 103.22028901558223879, 15.839049942798737902, 73.784546569389931392, 86.569759872862050543, 19.078657495389052201, 97.440923119662329555, 42.324812571070651757, 95.330003009363281308, 47.714995509053551359, 90.573858036455931142, 58.423420148301374866, 66.207709940263157478, 56.715382366379344603, 127.15429705664791982, 40.069154361714026891, 102.58012858284564572, 5.1820012025636970066, 76.740739646342262859, 123.51451441509925644, 106.87659755886852508, 80.252848234646080527, 0.15600708115744055249, 64.985054098666296341, 53.024833379804476863, 21.495809261385147693, 51.283565517325769179, 11.240225948120496469, 26.164223825253429823, 34.491817559290211648, 90.842129227832629113, 41.631664709195320029, 125.36942702445958275, 74.610351596118562156, 72.429063524567027343, 19.917014964572445024, 83.354518915333756013, 4.4004825098090805113, 85.316634840972255915, 110.32286648927038186, 85.01174828039802378, 102.13648206476136693, 108.54952073824460967, 81.737441640529141296, 38.798579599915683502, 61.150921091011696262, 99.410459216738672694, 7.3409540077627752908, 125.83337157761707203, 99.285998260445921915, 78.82911516071180813, 57.060680617571051698, 74.488124486666492885, 97.069039076141052647, 96.033414363831980154, 77.30409048267028993, 4.1339999115880345926, 89.338711662076093489, 87.71539155419668532, 13.326659187416225905, 74.432410080975387245, 5.7204222123000363354, 58.4545973477725056, 91.056724705180386081, 48.382908916559244972, 11.080089285340363858, 123.93700974907915224, 44.42574609250004869, 123.18137568215388455, 62.129840943518502172, 110.94224200125972857, 85.105815237038768828, 59.200145133836485911, 26.170772830741043435, 78.237939570575690595, 45.331006480457290309, 15.699581403867341578, 95.937006416592339519, 59.555366926480928669, 26.614093221629445907, 71.733385374005592894, 48.611688736968062585, 12.553004305758804549, 2.8304133229466970079, 11.879594140486005927, 74.462418714276282117, 85.167339727417129325, 67.130722881131077884, 18.33455666354711866, 119.16063735092393472, 113.10692306531927898, 120.35730611473991303, 13.199576979521225439, 118.92523880290536908, 51.573121535031532403, 107.04772251756367041, 26.051822720881318673, 36.222395810782472836, 90.068526529415976256, 67.687113952113577398, 54.437822803534800187, 50.499514741226448677, 121.89535728309419937, 85.652764464950450929, 43.630201375584874768, 77.952717901825963054, 27.075097139335412066, 82.5626652169485169, 25.413606637579505332, 100.256691920945741, 18.130043177661718801, 57.949639553211454768, 85.122816671504551778, 33.459776866944594076, 101.98934807033583638, 118.95839627224995638, 21.28469913206572528, 82.032279181337798946, 42.130909396775678033, 92.442300073023943696, 3.9263861989566066768, 101.02951462121563964, 82.748677036750450497, 39.6232013322187413, 54.881611934062675573, 28.16351610716083087, 121.17394072200841038, 105.52055773511528969, 74.988498723629163578, 68.618109845167055028, 7.7411859118546999525, 9.8355758834622974973, 125.93708057955518598, 27.177926654076145496, 124.74164304984151386, 123.64446617223438807, 102.32193497722619213, 20.403142599043349037, 18.038883963952685008, 28.777418153120379429, 31.549534075831616065, 45.246989287497854093, 113.12955651928132284, 47.804353905074094655, 47.901723593015049119, 48.932785100078035612, 105.9518476785524399, 6.8588088129399693571, 78.415742909313848941, 19.354877127196232323, 52.001815844869270222, 62.623194403990055434, 1.7793505783520231489, 89.292872567770245951, 26.915843222235707799, 110.30572635912903934, 4.9178809315417311154, 119.01339020356317633, 30.131959974569326732, 22.229460462072893279, 67.43079927296275855, 19.455932710850902367, 119.31369856393212103, 126.11686877560714493, 52.905716912526258966, 84.730179910464357818, 78.219045292524242541, 100.0765550964497379, 6.6502061619030428119, 23.886466878011560766, 88.999095916500664316, 5.5939931383036309853, 49.41896519901274587, 52.851489819393464131, 4.8580266907083569095, 105.89841668850567658, 79.459226119302911684, 94.8095978791534435, 82.58043404664931586, 95.691887698376376648, 10.751556925370095996, 109.68236942564544734, 16.302644435043475823, 30.222993250557919964, 54.065600576515862485, 69.156647690451791277, 126.76566193094913615, 49.023530324637249578, 78.457415348919312237, 111.00857049949991051, 12.881651337829680415, 0.57287917199209914543, 109.09051855227880878, 107.81720481958473101, 41.95776320811273763, 63.492979538335930556, 91.788864511927386047, 111.05060742741261492, 15.185490585176012246, 115.36204766897571972, 15.883836804183374625, 9.1044709508678351995, 57.412141433142096503, 70.08089620474493131, 120.34603358284584829, 49.453127926670276793, 67.08875952972448431, 105.68759365188816446, 37.410878146452887449, 85.304650289417622844, 64.520017844253743533, 42.614817371460958384, 31.060736355848348467, 107.61066230226788321, 96.425462580344174057, 74.487094061325478833, 21.672953171058907174, 71.750103159913123818, 126.44067344371433137, 106.11874969852942741, 40.99788827074735309, 122.86554778544814326, 25.188964092842070386, 78.9397491694544442, 126.73315025440751924, 14.836496768410142977, 65.088245641731191427, 77.347201513770414749, 66.860856618670368334, 1.2617243892636906821, 121.67944408069161, 50.898943435418914305, 17.026231641644699266, 108.16987194296962116, 26.321389867112884531, 88.865879200322524412, 89.797566683264449239, 42.821888911210407969, 3.2489575301005970687, 42.220760708562011132, 103.0808883531644824, 117.85877856878505554, 93.998649361637944821, 103.75449024381305207, 38.856343885188834975, 34.209091774919215823, 9.0393525542749557644, 97.172937170671502827, 11.566946304377779597, 69.75066095339570893, 42.286262910794903291, 124.74219969383193529, 93.850801387743558735, 78.014765921889193123, 89.580023037735372782, 89.202745205533574335, 18.366799002331390511, 4.6332821945870819036, 125.66195801324647618, 47.197195847762486665, 18.081760644196037902, 46.356151888227032032, 60.942331359936360968, 127.28700724437294411, 77.770239066991052823, 123.38301991479238495, 119.92916326649719849, 108.64220061582454946, 38.295680463939788751, 124.15464690811131732, 70.612220499198883772, 83.000255634658969939, 16.588651439436944202, 42.569347015822131652, 38.710650678658566903, 114.28294271757113165, 116.82253830558693153, 81.270740972868225072, 103.60077001134413877, 63.337985703241429292, 125.72666516922981828, 98.380040441974415444, 3.1317218997064628638, 50.282019930971728172, 122.55532537645922275, 23.01617703769079526, 21.416794437565840781, 114.45703541433613282, 119.41227613859155099, 126.38231437337162788, 81.222309786058758618, 44.799901251837582095, 45.360627247762749903, 33.25696867577789817, 2.2891194652984268032, 15.197261812751094112, 99.830375957411888521, 120.09815927732051932, 57.988768376901134616, 12.52364911166660022, 51.653111550513131078, 119.62054997298037051, 4.0844046362326480448, 5.7860110795882064849, 34.131619460731599247, 96.105758664838504046, 124.50689224963571178, 21.646037804748630151, 55.192275503650307655, 64.077320612461335259, 36.981864325134665705, 45.729023017673171125, 77.073929237292759353, 78.75889045085204998, 81.822480505546991481, 46.928452619900781428, 9.7199356293458549771, 54.364669922037137439, 1.9823000892356503755, 2.7249006518322858028, 104.44595718958953512, 112.96168737050902564, 93.430498976831586333, 77.217395225979998941, 82.571615995919273701, 119.47479866402863991, 59.161219841782440199, 63.791902237353497185, 43.747121572891046526, 62.762109002371289534, 56.185203740675206063, 63.337268531151494244, 119.2467860262368049, 87.493356749881058931, 85.981539907606929773, 48.251498200432251906, 73.700338749938964611, 70.834998327707580756, 74.07210003971340484, 53.13749853776243981, 21.049468321776657831, 72.23822302880580537, 45.732369512847071746, 55.715354477339133155, 77.553785137366503477, 58.983735242607508553, 62.836514664191781776, 79.810396016298909672, 37.743014824387500994, 49.869980776944430545, 67.313216923328582197, 12.386704134212777717, 89.732310801475250628, 119.70324622936823289, 54.848904679787665373, 26.683602531364158494, 32.542828948618989671, 37.97658053694976843, 109.02864428252723883, 23.697771155006194022, 111.72649836307027726, 54.884757020125107374, 99.715452509044553153, 112.50094732235083939, 63.941400581916241208, 32.803184872580459341, 21.458475520819774829, 123.21445995280009811, 79.166862391532049514, 53.878993499107309617, 25.430567445506312652, 78.440578031164477579, 31.678099885601113783, 19.569093138783500763, 45.139519745724101085, 38.157314990781742381, 66.881846239324659109, 84.649625142144941492, 62.660006018726562615, 95.429991018110740697, 53.147716072911862284, 116.84684029660638771, 4.4154198805263149552, 113.43076473276232718, 126.30859411329583963, 80.13830872343169176, 77.160257165691291448, 10.364002405131031992, 25.481479292688163696, 119.02902883020215086, 85.753195117740688147, 32.505696469292161055, 0.31201416231851908378, 1.9701081973325926811, 106.04966675960895373, 42.991618522770295385, 102.56713103465153836, 22.480451896240992937, 52.328447650506859645, 68.983635118584061274, 53.684258455665258225, 83.263329418394278036, 122.73885404892280349, 21.220703192240762291, 16.858127049134054687, 39.834029929148528026, 38.709037830667512026, 8.8009650196217990015, 42.63326968194814981, 92.645732978544401703, 42.02349656079968554, 76.272964129526371835, 89.099041476492857328, 35.474883281058282591, 77.597159199831367005, 122.3018421820270305, 70.820918433477345388, 14.68190801552918856, 123.66674315523778205, 70.571996520895481808, 29.65823032142361626, 114.12136123514574138, 20.976248973336623749, 66.138078152285743272, 64.066828727667598287, 26.60818096534057986, 8.2679998231797071639, 50.677423324155824957, 47.43078310839337064, 26.653318374836089788, 20.864820161950774491, 11.440844424600072671, 116.90919469554864918, 54.113449410360772163, 96.765817833118489943, 22.160178570684365695, 119.87401949815830449, 88.851492185003735358, 118.36275136430776911, 124.25968188704064232, 93.884484002523095114, 42.211630474077537656, 118.40029026767297182, 52.34154566148572485, 28.475879141151381191, 90.662012960914580617, 31.399162807738321135, 63.874012833184679039, 119.11073385296185734, 53.228186443262529792, 15.466770748011185788, 97.22337747393612517, 25.106008611517609097, 5.6608266458933940157, 23.759188280975649832, 20.924837428556202212, 42.334679454837896628, 6.2614457622621557675, 36.669113327097875299, 110.32127470185150742, 98.213846130642195931, 112.71461222947982606, 26.399153959046088858, 109.85047760581073817, 103.14624307006670278, 86.095445035127340816, 52.103645441762637347, 72.444791621568583651, 52.137053058835590491, 7.3742279042271547951, 108.87564560706960037, 100.99902948245653533, 115.79071456619203673, 43.305528929900901858, 87.260402751173387514, 27.905435803651926108, 54.150194278674462112, 37.125330433897033799, 50.827213275162648642, 72.513383841891482007, 36.260086355323437601, 115.89927910642654751, 42.245633343009103555, 66.919553733892826131, 75.978696140675310744, 109.91679254449991276, 42.569398264135088539, 36.064558362675597891, 84.261818793554994045, 56.884600146047887392, 7.8527723979168513324, 74.059029242434917251, 37.497354073500900995, 79.246402664437482599, 109.76322386812535115, 56.327032214325299719, 114.34788144402045873, 83.041115470234217355, 21.976997447261965135, 9.2362196903341100551, 15.482371823709399905, 19.671151766928232973, 123.87416115911037195, 54.355853308155928971, 121.48328609968666569, 119.28893234447241412, 76.643869954452384263, 40.806285198086698074, 36.077767927905370016, 57.554836306240758859, 63.099068151663232129, 90.493978574999346165, 98.25911303856264567, 95.60870781014818931, 95.803447186030098237, 97.865570200159709202, 83.903695357104879804, 13.717617625883576693, 28.831485818627697881, 38.709754254392464645, 104.00363168973854044, 125.24638880798011087, 3.5587011567076842766, 50.585745135540491901, 0, 92.611452718258078676, 9.8357618630871002097, 110.02678040712635266, 60.263919949138653465, 44.458920924149424536, 6.8615985459255171008, 38.911865421705442714, 110.62739712786788004, 124.23373755121428985, 105.81143382505615591, 41.460359820932353614, 28.43809058505212306, 72.153110192899475805, 13.300412323806085624, 47.772933756026759511, 49.998191833001328632, 11.187986276607261971, 98.837930398025491741, 105.70297963878692826, 9.7160533814203517977, 83.796833377014991129, 30.918452238609461347, 61.61919575831052498, 37.160868093302269699, 63.383775396752753295, 21.503113850743829971, 91.364738851290894672, 32.605288870086951647, 60.445986501115839928, 108.13120115303536295, 10.313295380907220533, 125.5313238618982723, 98.047060649278137134, 28.914830697842262452, 94.017140998999821022, 25.76330267565936083, 1.1457583439878362697, 90.181037104561255546, 87.634409639169462025, 83.915526416225475259, 126.98595907667549909, 55.577729023858410073, 94.10121485482522985, 30.370981170355662471, 102.72409533795143943, 31.767673608366749249, 18.208941901735670399, 114.82428286628783098, 12.1617924094935006, 112.69206716569169657, 98.906255853344191564, 6.1775190594526065979, 83.375187303776328918, 74.821756292909412878, 42.609300578838883666, 1.0400356885074870661, 85.229634742921916768, 62.121472711700334912, 87.22132460453940439, 64.850925160691986093, 20.974188122650957666, 43.345906342121452326, 15.500206319829885615, 124.88134688743230072, 84.237499397058854811, 81.995776541498344159, 117.73109557089628652, 50.37792818568777875, 29.879498338912526378, 125.46630050881503848, 29.672993536823923932, 2.1764912834660208318, 26.694403027544467477, 5.7217132373407366686, 2.5234487785310193431, 115.35888816138321999, 101.79788687083782861, 34.052463283293036511, 88.339743885939242318, 52.64277973422940704, 49.731758400648686802, 51.595133366528898478, 85.643777822420815937, 6.4979150602011941373, 84.441521417127660243, 78.161776706332602771, 107.71755713757374906, 59.997298723279527621, 79.50898048762610415, 77.712687770377669949, 68.418183549838431645, 18.078705108553549508, 66.345874341343005653, 23.133892608759197174, 11.501321906791417859, 84.572525821593444562, 121.48439938766750856, 59.701602775490755448, 28.029531843782024225, 51.160046075474383542, 50.405490411067148671, 36.733598004662781022, 9.266564389177801786, 123.32391602649295237, 94.394391695524973329, 36.163521288392075803, 92.712303776454064064, 121.88466271987635992, 126.57401448874952621, 27.540478133985743625, 118.76603982958840788, 111.85832653299803496, 89.284401231649098918, 76.591360927883215481, 120.30929381622627261, 13.224440998401405523, 38.000511269317939878, 33.177302878873888403, 85.138694031644263305, 77.421301357320771785, 100.56588543514590128, 105.64507661117750104, 34.541481945740088122, 79.201540022688277531, 126.67597140648649656, 123.45333033846327453, 68.760080883948830888, 6.2634437994129257277, 100.56403986194709432, 117.11065075291844551, 46.032354075381590519, 42.83358887513531954, 100.91407082867590361, 110.82455227718673996, 124.76462874674689374, 34.444619572117517237, 89.599802503675164189, 90.721254495525499806, 66.51393735155579634, 4.5782389305968536064, 30.394523625505826203, 71.660751914827415021, 112.19631855464103865, 115.97753675380226923, 25.04729822333320044, 103.30622310102990014, 111.24109994596074102, 8.1688092724652960896, 11.57202215917641297, 68.263238921466836473, 64.211517329680646071, 121.01378449927142356, 43.292075609497260302, 110.38455100730425329, 0.15464122492267051712, 73.963728650269331411, 91.45804603534998023, 26.147858474589156685, 29.517780901707737939, 35.644961011093982961, 93.856905239801562857, 19.439871258691709954, 108.72933984407427488, 3.964600178471300751, 5.4498013036682095844, 80.891914379182708217, 97.92337474102168926, 58.860997953666810645, 26.43479045196363586, 37.14323199184218538, 110.94959732805727981, 118.3224396835648804, 127.58380447470699437, 87.494243145782093052, 125.52421800474257907, 112.37040748135041213, 126.67453706230662647, 110.49357205247724778, 46.986713499765755842, 43.963079815213859547, 96.502996400868141791, 19.400677499881567201, 13.669996655418799492, 20.14420007942680968, 106.27499707552487962, 42.098936643556953641, 16.47644605761161074, 91.464739025694143493, 111.43070895468190429, 27.107570274733006954, 117.96747048521501711, 125.67302932838720153, 31.620792032597819343, 75.486029648775001988, 99.739961553892499069, 6.6264338466571643949, 24.773408268429193413, 51.464621602954139234, 111.40649245873646578, 109.69780935957533075, 53.367205062728316989, 65.085657897237979341, 75.953161073903174838, 90.057288565054477658, 47.395542310016026022, 95.452996726140554529, 109.76951404025385273, 71.430905018092744285, 97.001894644701678772, 127.88280116383248242, 65.606369745160918683, 42.916951041643187637, 118.4289199056038342, 30.333724783064099029, 107.75798699821461923, 50.861134891016263282, 28.881156062328955159, 63.356199771202227566, 39.138186277567001525, 90.27903949144820217, 76.314629981567122741, 5.763692478652956197, 41.299250284289882984, 125.32001203745676321, 62.859982036221481394, 106.29543214582736255, 105.69368059321277542, 8.8308397610562678892, 98.861529465528292349, 124.61718822659531725, 32.276617446867021499, 26.320514331382582895, 20.728004810262063984, 50.962958585379965371, 110.05805766040793969, 43.506390235481376294, 65.01139293858432211, 0.62402832463703816757, 3.9402163946688233409, 84.099333519217907451, 85.983237045544228749, 77.134262069303076714, 44.960903792481985874, 104.65689530101735727, 9.9672702371681225486, 107.36851691133415443, 38.526658836792194052, 117.47770809784560697, 42.441406384481524583, 33.716254098268109374, 79.668059858297056053, 77.418075661335024051, 17.601930039243598003, 85.266539363899937598, 57.291465957088803407, 84.046993121603009058, 24.545928259056381648, 50.198082952985714655, 70.949766562120203162, 27.194318399666371988, 116.60368436405769899, 13.641836866954690777, 29.363816031062015099, 119.33348631047920207, 13.143993041790963616, 59.316460642850870499, 100.24272247029148275, 41.952497946676885476, 4.2761563045751245227, 0.13365745533519657329, 53.21636193068115972, 16.535999646359414328, 101.35484664831164991, 94.861566216786741279, 53.306636749672179576, 41.729640323901548982, 22.88168884920378332, 105.81838939109729836, 108.2268988207251823, 65.531635666236979887, 44.320357141372369369, 111.74803899632024695, 49.702984370011108695, 108.7255027286191762, 120.51936377408492262, 59.768968005046190228, 84.42326094815871329, 108.80058053534594364, 104.6830913229714497, 56.951758282306400361, 53.324025921829161234, 62.79832561547664227, 127.74802566637299606, 110.22146770592371468, 106.45637288652505958, 30.933541496026009554, 66.446754947872250341, 50.212017223035218194, 11.321653291786788031, 47.518376561951299664, 41.849674857112404425, 84.669358909675793257, 12.522891524524311535, 73.338226654199388577, 92.642549403706652811, 68.42769226128802984, 97.429224458959652111, 52.798307918095815694, 91.700955211625114316, 78.292486140137043549, 44.190890070254681632, 104.20729088352891267, 16.889583243137167301, 104.27410611767118098, 14.748455808457947569, 89.751291214142838726, 73.998058964916708646, 103.58142913238407345, 86.611057859805441694, 46.520805502346775029, 55.810871607303852215, 108.30038855734892422, 74.250660867794067599, 101.65442655032893526, 17.026767683782964014, 72.520172710650513181, 103.79855821285309503, 84.491266686021845089, 5.83910746778929024, 23.957392281354259467, 91.83358508899982553, 85.138796528270177078, 72.129116725354833761, 40.52363758710998809, 113.76920029209577478, 15.705544795837340644, 20.118058484869834501, 74.994708147005439969, 30.492805328878603177, 91.526447736250702292, 112.65406442865423742, 100.69576288804455544, 38.08223094046843471, 43.95399489452393027, 18.47243938066822011, 30.96474364741879981, 39.342303533860103926, 119.74832231822074391, 108.71170661631549592, 114.96657219937696937, 110.57786468894846621, 25.287739908904768527, 81.612570396173396148, 72.155535855810740031, 115.10967261248151772, 126.19813630333010224, 52.987957150002330309, 68.518226077128929319, 63.217415620300016599, 63.606894372063834453, 67.731140400323056383, 39.807390714209759608, 27.435235251767153386, 57.662971637255395763, 77.419508508788567269, 80.007263379480718868, 122.49277761596022174, 7.1174023134153685533, 101.1714902710809838, 0, 57.222905436516157351, 19.671523726177838398, 92.053560814252705313, 120.52783989827730693, 88.917841848298849072, 13.723197091851034202, 77.823730843414523406, 93.254794255739398068, 120.46747510243221768, 83.622867650112311821, 82.920719641864707228, 56.8761811701078841, 16.306220385798951611, 26.600824647612171248, 95.545867512053519022, 99.996383666002657264, 22.37597255321816192, 69.67586079605462146, 83.405959277573856525, 19.432106762844341574, 39.593666754029982258, 61.836904477218922693, 123.23839151662104996, 74.321736186604539398, 126.76755079350550659, 43.006227701491297921, 54.729477702581789345, 65.210577740173903294, 120.89197300223531784, 88.262402306070725899, 20.626590761818079045, 123.06264772380018258, 68.094121298556274269, 57.829661395688162884, 60.034281997999642044, 51.526605351318721659, 2.2915166879793105181, 52.362074209126149071, 47.268819278342562029, 39.831052832454588497, 125.97191815335099818, 111.15545804772045813, 60.202429709650459699, 60.74196234071496292, 77.448190675902878866, 63.535347216733498499, 36.417883803474978777, 101.64856573257566197, 24.323584818990639178, 97.384134331387031125, 69.812511706688383128, 12.355038118908851175, 38.750374607556295814, 21.643512585818825755, 85.218601157677767333, 2.080071377018612111, 42.459269485843833536, 124.2429454234043078, 46.44264920908244676, 1.7018503213839721866, 41.948376245301915333, 86.691812684242904652, 31.000412639663409209, 121.76269377486823942, 40.474998794121347601, 35.991553083000326296, 107.46219114179257303, 100.7558563713755575, 59.758996677828690736, 122.93260101763007697, 59.345987073647847865, 4.3529825669320416637, 53.388806055088934954, 11.443426474681473337, 5.0468975570656766649, 102.71777632277007797, 75.59577374167565722, 68.104926566589711001, 48.679487771878484637, 105.28555946845881408, 99.463516801297373604, 103.19026673306143493, 43.287555644841631874, 12.995830120406026253, 40.883042834258958464, 28.323553412668843521, 87.43511427514749812, 119.99459744656269322, 31.017960975255846279, 27.425375540758977877, 8.8363670996768632904, 36.157410217110736994, 4.6917486826860113069, 46.267785217518394347, 23.002643813586473698, 41.145051643190527102, 114.96879877533501713, 119.4032055509815109, 56.05906368756404845, 102.32009215094876708, 100.81098082213429734, 73.467196009325562045, 18.533128778359241551, 118.64783205298954272, 60.788783391049946658, 72.327042576787789585, 57.424607552911766106, 115.76932543975635781, 125.14802897749905242, 55.080956267975125229, 109.53207965918045375, 95.716653065999707906, 50.568802463301835814, 25.182721855770068942, 112.61858763245254522, 26.448881996802811045, 76.001022538635879755, 66.354605757751414785, 42.277388063292164588, 26.84260271464518155, 73.131770870295440545, 83.290153222355002072, 69.082963891483814223, 30.403080045380193042, 125.3519428129766311, 118.90666067693018704, 9.5201617678976617754, 12.526887598829489434, 73.128079723897826625, 106.22130150583689101, 92.064708150763181038, 85.667177750274277059, 73.828141657355445204, 93.649104554377117893, 121.52925749349378748, 68.889239144238672452, 51.199605007353966357, 53.44250899105463759, 5.0278747031115926802, 9.1564778611937072128, 60.789047251015290385, 15.321503829658468021, 96.392637109285715269, 103.95507350760453846, 50.094596446670038858, 78.61244620206343825, 94.482199891921482049, 16.337618544930592179, 23.144044318356463918, 8.5264778429373109248, 0.42303465936493012123, 114.02756899854284711, 86.584151218998158583, 92.769102014608506579, 0.30928244984897901304, 19.9274573005423008, 54.916092070699960459, 52.295716949178313371, 59.035561803415475879, 71.289922022191603901, 59.713810479603125714, 38.879742517387057887, 89.458679688148549758, 7.9292003569426015019, 10.899602607340057148, 33.783828758369054412, 67.846749482043378521, 117.72199590733362129, 52.86958090392727172, 74.286463983688008739, 93.899194656118197599, 108.64487936713339877, 127.16760894941762672, 46.988486291567824082, 123.04843600948879612, 96.740814962704462232, 125.34907412461689091, 92.98714410495449556, 93.973426999531511683, 87.926159630431357073, 65.005992801736283582, 38.801354999763134401, 27.339993310837598983, 40.288400158857257338, 84.549994151049759239, 84.197873287113907281, 32.95289211522685946, 54.929478051391924964, 94.861417909363808576, 54.215140549469651887, 107.93494097043367219, 123.34605865677440306, 63.241584065199276665, 22.972059297553641954, 71.479923107784998138, 13.25286769331432879, 49.546816536858386826, 102.92924320591191645, 94.812984917476569535, 91.395618719150661491, 106.73441012546027196, 2.1713157944795966614, 23.906322147809987655, 52.114577130108955316, 94.791084620035690023, 62.905993452284747036, 91.539028080511343433, 14.861810036189126549, 66.003789289406995522, 127.76560232766860281, 3.2127394903218373656, 85.833902083290013252, 108.85783981120766839, 60.667449566131836036, 87.515973996429238468, 101.72226978203252656, 57.762312124657910317, 126.71239954240445513, 78.276372555137641029, 52.55807898290004232, 24.629259963134245481, 11.527384957305912394, 82.598500568583403947, 122.64002407491352642, 125.71996407244660077, 84.590864291658363072, 83.387361186425550841, 17.661679522116173757, 69.723058931060222676, 121.23437645319063449, 64.553234893734042998, 52.641028662765165791, 41.456009620524127968, 101.92591717076356872, 92.116115320815879386, 87.012780470966390567, 2.0227858771686442196, 1.2480566492740763351, 7.8804327893412846606, 40.198667038439452881, 43.966474091092095478, 26.268524138606153429, 89.921807584967609728, 81.313790602034714539, 19.934540474339883076, 86.737033822668308858, 77.053317673588026082, 106.95541619569121394, 84.882812768963049166, 67.432508196539856726, 31.336119716597750084, 26.836151322670048103, 35.203860078490833985, 42.533078727799875196, 114.58293191418124479, 40.093986243206018116, 49.091856518112763297, 100.39616590597506729, 13.899533124240406323, 54.388636799336381955, 105.20736872811539797, 27.283673733909381554, 58.727632062127668178, 110.66697262095840415, 26.287986083581927232, 118.63292128570537898, 72.485444940582965501, 83.904995893353770953, 8.5523126091502490453, 0.26731491067039314657, 106.43272386136231944, 33.071999292722466635, 74.709693296626937808, 61.723132433573482558, 106.61327349934435915, 83.459280647803097963, 45.763377698411204619, 83.636778782198234694, 88.453797641454002587, 3.0632713324739597738, 88.640714282748376718, 95.496077992640493903, 99.405968740025855368, 89.451005457241990371, 113.03872754816984525, 119.53793601009601844, 40.84652189632106456, 89.601161070695525268, 81.366182645942899399, 113.90351656461280072, 106.64805184365832247, 125.59665123095692252, 127.49605133274963009, 92.442935411851067329, 84.912745773050119169, 61.867082992055657087, 4.8935098957445006818, 100.42403444607043639, 22.643306583573576063, 95.036753123902599327, 83.699349714228446828, 41.338717819351586513, 25.045783049052261049, 18.676453308402415132, 57.2850988074169436, 8.8553845225796976592, 66.858448917919304222, 105.59661583619526937, 55.401910423253866611, 28.584972280274087097, 88.381780140513001243, 80.414581767057825346, 33.779166486277972581, 80.548212235342361964, 29.496911616919533117, 51.50258242828931543, 19.996117929837055271, 79.162858264771784889, 45.222115719610883389, 93.041611004693550058, 111.62174321461134241, 88.600777114701486425, 20.501321735591773177, 75.308853100657870527, 34.053535367569566006, 17.040345421304664342, 79.597116425709828036, 40.982533372043690179, 11.67821493557858048, 47.914784562712156912, 55.667170178003289038, 42.277593056543992134, 16.258233450709667522, 81.047275174223614158, 99.538400584191549569, 31.411089591674681287, 40.236116969743306981, 21.989416294010879938, 60.985610657760844333, 55.052895472505042562, 97.308128857312112814, 73.391525776089110877, 76.16446188093686942, 87.90798978904786054, 36.94487876133644022, 61.92948729483759962, 78.68460706772384583, 111.4966446364451258, 89.423413232634629821, 101.93314439875757671, 93.155729377896932419, 50.575479817813175032, 35.225140792350430274, 16.311071711625118041, 102.21934522496667341, 124.39627260666020447, 105.9759143000082986, 9.0364521542578586377, 126.4348312406000332, 127.21378874413130688, 7.4622808006461127661, 79.614781428419519216, 54.870470503534306772, 115.3259432745144295, 26.839017017580772517, 32.014526758965075715, 116.98555523192044348, 14.234804626834375085, 74.342980542165605584, 0, 114.4458108730323147, 39.343047452359314775, 56.107121628509048605, 113.05567979655825184, 49.835683696597698145, 27.446394183705706382, 27.647461686832684791, 58.509588511482434114, 112.93495020486807334, 39.245735300228261622, 37.841439283729414456, 113.7523623402157682, 32.612440771597903222, 53.201649295227980474, 63.091735024110676022, 71.992767332005314529, 44.751945106439961819, 11.351721592112880899, 38.81191855514771305, 38.864213525688683148, 79.187333508059964515, 123.67380895444148337, 118.4767830332457379, 20.643472373212716775, 125.53510158701101318, 86.012455402982595842, 109.45895540516721667, 2.4211554803514445666, 113.78394600447427365, 48.524804612141451798, 41.253181523639796069, 118.12529544760400313, 8.1882425971125485376, 115.65932279137632577, 120.06856399599928409, 103.05321070263744332, 4.5830333759586210363, 104.72414841825229814, 94.537638556685124058, 79.662105664912814973, 123.94383630670563434, 94.310916095440916251, 120.4048594193009194, 121.48392468143356382, 26.896381351805757731, 127.070694433466997, 72.835767606953595532, 75.297131465151323937, 48.647169637984916335, 66.768268662777700229, 11.625023413376766257, 24.710076237817702349, 77.500749215112591628, 43.287025171641289489, 42.437202315355534665, 4.160142754037224222, 84.918538971691305051, 120.48589084680861561, 92.885298418164893519, 3.4037006427679443732, 83.896752490607468644, 45.383625368485809304, 62.000825279330456397, 115.52538754973647883, 80.949997588242695201, 71.983106166004290571, 86.924382283588784048, 73.511712742751115002, 119.51799335566101945, 117.86520203526015393, 118.69197414729569573, 8.7059651338640833274, 106.77761211017786991, 22.886852949362946674, 10.093795114134991309, 77.435552645543793915, 23.191547483354952419, 8.2098531331794220023, 97.358975543760607252, 82.57111893692126614, 70.927033602598385187, 78.380533466122869868, 86.575111289683263749, 25.991660240812052507, 81.766085668517916929, 56.64710682534132502, 46.870228550298634218, 111.98919489312538644, 62.035921950511692557, 54.850751081517955754, 17.672734199353726581, 72.314820434221473988, 9.3834973653720226139, 92.535570435036788695, 46.005287627172947396, 82.290103286384692183, 101.93759755067367223, 110.80641110196302179, 112.11812737513173488, 76.640184301901172148, 73.621961644272232661, 18.93439201865112409, 37.06625755672212108, 109.29566410597908543, 121.5775667821035313, 16.65408515357557917, 114.84921510582717019, 103.5386508795163536, 122.29605795499810483, 110.16191253595388844, 91.06415931836454547, 63.433306131999415811, 101.13760492660367163, 50.365443711543775862, 97.237175264905090444, 52.897763993609260069, 24.002045077275397489, 4.7092115155028295703, 84.554776126584329177, 53.685205429294001078, 18.263541740594519069, 38.580306444713642122, 10.165927782967628445, 60.806160090764024062, 122.70388562595690018, 109.81332135386037407, 19.040323535795323551, 25.053775197662616847, 18.256159447799291229, 84.44260301167742, 56.129416301530000055, 43.334355500552192098, 19.656283314714528387, 59.298209108757873764, 115.05851498699121294, 9.7784782884773449041, 102.39921001470793271, 106.88501798210927518, 10.05574940622318536, 18.312955722387414426, 121.57809450203058077, 30.643007659320574021, 64.785274218575068517, 79.910147015209076926, 100.1891928933437157, 29.224892404130514478, 60.964399783842964098, 32.675237089861184359, 46.288088636712927837, 17.05295568587462185, 0.84606931873349822126, 100.0551379970893322, 45.168302437999955146, 57.538204029217013158, 0.61856489969795802608, 39.8549146010846016, 109.83218414139992092, 104.59143389835662674, 118.07112360683458974, 14.579844044383207802, 119.42762095920625143, 77.759485034777753754, 50.917359376297099516, 15.858400713885203004, 21.799205214680114295, 67.567657516741746804, 7.6934989640903950203, 107.44399181466724258, 105.73916180785818142, 20.572927967379655456, 59.798389312240033178, 89.289758734270435525, 126.33521789883525344, 93.976972583139286144, 118.09687201898123021, 65.481629925412562443, 122.6981482492374198, 57.9742882099126291, 59.946853999066661345, 47.852319260866352124, 2.0119856034725671634, 77.602709999526268803, 54.679986621675197966, 80.576800317718152655, 41.099988302103156457, 40.395746574227814563, 65.905784230453718919, 109.85895610278748791, 61.722835818727617152, 108.43028109894294175, 87.869881940870982362, 118.69211731354880612, 126.48316813039855333, 45.944118595110921888, 14.959846215569996275, 26.505735386628657579, 99.09363307372041163, 77.858486411827470874, 61.625969834956777049, 54.791237438304960961, 85.468820250920543913, 4.3426315889591933228, 47.81264429561997531, 104.22915426022154861, 61.582169240071380045, 125.81198690457313205, 55.078056161022686865, 29.723620072381891077, 4.0075785788176290225, 127.53120465533720562, 6.4254789806473127101, 43.667804166583664482, 89.71567962241533678, 121.33489913226731005, 47.031947992862114916, 75.444539564065053128, 115.52462424931945861, 125.42479908481254824, 28.552745110275282059, 105.11615796580008464, 49.258519926268490963, 23.054769914615462767, 37.197001137170445872, 117.28004814983069082, 123.43992814489683951, 41.181728583320364123, 38.774722372851101682, 35.323359044235985493, 11.446117862124083331, 114.46875290638490696, 1.1064697874680859968, 105.28205732553033158, 82.912019241051893914, 75.851834341530775419, 56.232230641635396751, 46.025560941932781134, 4.0455717543372884393, 2.4961132985481526703, 15.760865578682569321, 80.397334076878905762, 87.932948182184190955, 52.537048277215944836, 51.843615169938857434, 34.627581204069429077, 39.869080948683404131, 45.474067645340255694, 26.106635347179690143, 85.910832391386065865, 41.765625537929736311, 6.8650163930797134526, 62.672239433195500169, 53.672302645340096205, 70.407720156981667969, 85.066157455603388371, 101.16586382836248958, 80.187972486412036233, 98.183713036229164572, 72.792331811950134579, 27.799066248480812646, 108.77727359867640189, 82.414737456230795942, 54.567347467818763107, 117.45526412425897433, 93.3339452419168083, 52.575972167167492444, 109.26584257141439593, 16.97088988116956898, 39.809991786707541905, 17.104625218300498091, 0.53462982134442427196, 84.865447722724638879, 66.143998585448571248, 21.419386593253875617, 123.4462648671506031, 85.226546998692356283, 38.918561295606195927, 91.526755396826047217, 39.273557564400107367, 48.907595282908005174, 6.1265426649515575264, 49.281428565500391414, 62.992155985284625785, 70.811937480055348715, 50.90201091448761872, 98.077455096343328478, 111.07587202019203687, 81.693043792645767098, 51.202322141394688515, 34.732365291889436776, 99.807033129229239421, 85.296103687320282916, 123.19330246191748301, 126.99210266550289816, 56.885870823705772636, 41.825491546100238338, 123.73416598411131417, 9.7870197914926393423, 72.848068892140872777, 45.286613167150790105, 62.073506247808836633, 39.398699428456893656, 82.677435638703173026, 50.091566098104522098, 37.352906616804830264, 114.5701976148338872, 17.710769045163033297, 5.7168978358422464225, 83.193231672390538733, 110.80382084650773322, 57.169944560551812174, 48.763560281029640464, 32.829163534115650691, 67.558332972559583141, 33.096424470684723929, 58.993823233839066233, 103.00516485657863086, 39.992235859677748522, 30.325716529543569777, 90.444231439225404756, 58.083222009390738094, 95.243486429226322798, 49.201554229402972851, 41.002643471187184332, 22.617706201315741055, 68.107070735139132012, 34.080690842612966662, 31.194232851419656072, 81.965066744087380357, 23.35642987115716096, 95.829569125424313825, 111.33434035600657808, 84.555186113087984268, 32.516466901422973024, 34.094550348450866295, 71.076801168383099139, 62.822179183349362575, 80.472233939486613963, 43.978832588021759875, 121.97122131552168867, 110.1057909450137231, 66.616257714627863606, 18.783051552181859734, 24.328923761877376819, 47.815979578095721081, 73.889757522672880441, 123.85897458967883722, 29.369214135451329639, 94.993289272893889574, 50.846826465272897622, 75.866288797518791398, 58.311458755793864839, 101.15095963562635006, 70.450281584700860549, 32.622143423253874062, 76.438690449936984805, 120.79254521332404693, 83.951828600016597193, 18.072904308519355254, 124.86966248120370437, 126.42757748826261377, 14.924561601295863511, 31.229562856839038432, 109.74094100707225152, 102.65188654902885901, 53.678034035161545034, 64.029053517933789408, 105.97111046384088695, 28.469609253672388149, 20.685961084334849147, 0, 100.89162174606462941, 78.68609490471862955, 112.21424325702173519, 98.111359593116503675, 99.671367393195396289, 54.892788367411412764, 55.294923373665369581, 117.01917702296850621, 97.869900409736146685, 78.491470600456523243, 75.68287856746246689, 99.504724680431536399, 65.224881543195806444, 106.40329859045596095, 126.18347004822499002, 15.985534664010629058, 89.503890212879923638, 22.703443184225761797, 77.6238371102954261, 77.728427051381004276, 30.374667016123567009, 119.34761790888660471, 108.95356606649147579, 41.286944746429071529, 123.07020317402566434, 44.024910805968829663, 90.917910810334433336, 4.8423109607065271121, 99.5678920089485473, 97.049609224282903597, 82.506363047283230117, 108.25059089520800626, 16.376485194228735054, 103.31864558275628951, 112.13712799200220616, 78.106421405274886638, 9.1660667519208800513, 81.448296836508234264, 61.075277113373886095, 31.324211329829267925, 119.88767261341490666, 60.621832190885470482, 112.80971883860547678, 114.96784936286712764, 53.792762703615153441, 126.14138886693763197, 17.671535213910829043, 22.594262930302647874, 97.294339275973470649, 5.5365373255554004572, 23.250046826753532514, 49.420152475639042677, 27.001498430225183256, 86.574050343282578979, 84.874404630711069331, 8.3202855080780864228, 41.83707794338624808, 112.97178169362086919, 57.770596836329787038, 6.8074012855395267252, 39.793504981214937288, 90.767250736971618608, 124.00165055866455077, 103.05077509947659564, 33.899995176489028381, 15.966212332012219122, 45.848764567181206075, 19.023425485505867982, 111.03598671132567688, 107.73040407052394585, 109.38394829459139146, 17.411930267731804634, 85.555224220355739817, 45.773705898725893348, 20.187590228273620596, 26.87110529108758783, 46.383094966709904838, 16.419706266362481983, 66.717951087524852483, 37.14223787384617026, 13.854067205200408353, 28.761066932249377714, 45.150222579366527498, 51.983320481627742993, 35.532171337035833858, 113.29421365068265004, 93.740457100600906415, 95.978389786250772886, 124.07184390102338511, 109.70150216303591151, 35.34546839871109114, 16.629640868442947976, 18.766994730747683207, 57.071140870073577389, 92.01057525434953277, 36.580206572773022344, 75.875195101350982441, 93.612822203929681564, 96.236254750267107738, 25.280368603805982275, 19.243923288544465322, 37.86878403730224818, 74.13251511344788014, 90.591328211961808847, 115.15513356420706259, 33.308170307154796319, 101.69843021165797836, 79.077301759036345175, 116.59211590999984764, 92.323825071911414852, 54.12831863672909094, 126.8666122640024696, 74.275209853207343258, 100.7308874230911897, 66.474350529810180888, 105.79552798721852014, 48.004090154550794978, 9.4184230310092971195, 41.109552253168658353, 107.37041085858800216, 36.527083481189038139, 77.160612889427284244, 20.331855565935256891, 121.6123201815316861, 117.40777125191743835, 91.626642707720748149, 38.080647071590647101, 50.107550395325233694, 36.512318895602220437, 40.885206023354839999, 112.25883260306000011, 86.668711001104384195, 39.312566629429056775, 118.59641821751938551, 102.11702997398242587, 19.556956576958327787, 76.798420029419503408, 85.770035964218550362, 20.111498812446370721, 36.62591144477846683, 115.15618900406116154, 61.286015318644786021, 1.5705484371537750121, 31.820294030421791831, 72.378385786687431391, 58.449784808261028957, 121.9287995676859282, 65.350474179722368717, 92.576177273425855674, 34.105911371749243699, 1.6921386374706344213, 72.110275994178664405, 90.336604875999910291, 115.07640805843402632, 1.2371297993959160522, 79.709829202172841178, 91.664368282803479815, 81.182867796716891462, 108.14224721366917947, 29.159688088770053582, 110.85524191841614083, 27.518970069555507507, 101.83471875259419903, 31.716801427770406008, 43.598410429363866569, 7.1353150334834936075, 15.386997928184428019, 86.887983629338123137, 83.478323615716362838, 41.145855934759310912, 119.59677862448370433, 50.579517468544509029, 124.67043579767414485, 59.953945166282210266, 108.1937440379660984, 2.9632598508287628647, 117.39629649847483961, 115.9485764198252582, 119.89370799813332269, 95.704638521732704248, 4.0239712069487723056, 27.205419999052537605, 109.35997324335403391, 33.153600635436305311, 82.199976604206312913, 80.791493148459267104, 3.8115684609110758174, 91.717912205574975815, 123.44567163745887228, 88.860562197889521485, 47.739763881745602703, 109.38423462710125023, 124.96633626079710666, 91.888237190221843775, 29.91969243113999255, 53.011470773260953138, 70.18726614744446124, 27.716972823654941749, 123.2519396699135541, 109.5824748766135599, 42.937640501841087826, 8.6852631779183866456, 95.625288591243588598, 80.458308520443097223, 123.16433848014639807, 123.6239738091462641, 110.15611232204537373, 59.447240144763782155, 8.0151571576388960239, 127.06240931067441124, 12.85095796129462542, 87.335608333170966944, 51.431359244834311539, 114.6697982645346201, 94.06389598572786781, 22.889079128133744234, 103.04924849864255521, 122.84959816962509649, 57.105490220554202097, 82.232315931600169279, 98.517039852536981925, 46.109539829230925534, 74.394002274344529724, 106.56009629966501961, 118.87985628979367903, 82.363457166640728246, 77.549444745702203363, 70.646718088471970987, 22.892235724251804641, 100.93750581276981393, 2.2129395749398099724, 82.564114651064301142, 37.824038482107425807, 23.703668683061550837, 112.4644612832707935, 92.051121883865562268, 8.0911435086745768785, 4.9922265970963053405, 31.521731157368776621, 32.794668153761449503, 47.86589636436838191, 105.07409655443188967, 103.68723033987771487, 69.255162408138858154, 79.738161897366808262, 90.948135290680511389, 52.213270694363018265, 43.82166478277213173, 83.531251075859472621, 13.730032786159426905, 125.34447886639100034, 107.34460529068383039, 12.815440313966973918, 42.132314911210414721, 74.331727656728617148, 32.375944972827710444, 68.367426072461967124, 17.584663623900269158, 55.598132496965263272, 89.554547197356441757, 36.829474912461591884, 109.13469493564116419, 106.91052824852158665, 58.667890483833616599, 105.15194433433862287, 90.531685142828791868, 33.94177976233913796, 79.619983573415083811, 34.209250436600996181, 1.0692596426924865227, 41.730895445452915737, 4.287997170897142496, 42.838773186511389213, 118.89252973430484417, 42.453093997384712566, 77.837122591216029832, 55.053510793652094435, 78.547115128800214734, 97.815190565816010348, 12.253085329903115053, 98.562857131000782829, 125.98431197057288955, 13.62387496011069743, 101.80402182897887542, 68.154910192690294934, 94.151744040387711721, 35.386087585291534197, 102.40464428278937703, 69.46473058378251153, 71.614066258462116821, 42.59220737464420381, 118.38660492383860401, 125.98420533100579632, 113.77174164741154527, 83.650983092204114655, 119.46833196822262835, 19.574039582988916663, 17.696137784281745553, 90.573226334301580209, 124.14701249561767327, 78.797398856917425292, 37.354871277406346053, 100.1831321962090442, 74.705813233613298507, 101.1403952296677744, 35.421538090326066595, 11.433795671688130824, 38.386463344784715446, 93.607641693015466444, 114.33988912110362435, 97.527120562059280928, 65.658327068234939361, 7.1166659451191662811, 66.192848941369447857, 117.98764646768177045, 78.010329713157261722, 79.984471719355497044, 60.651433059090777533, 52.888462878450809512, 116.16644401878511417, 62.486972858452645596, 98.403108458805945702, 82.005286942374368664, 45.235412402635120088, 8.2141414702782640234, 68.161381685225933325, 62.388465702842950122, 35.930133488174760714, 46.712859742317959899, 63.659138250852265628, 94.668680712016794132, 41.110372226175968535, 65.032933802845946047, 68.189100696905370569, 14.153602336766198277, 125.64435836670236313, 32.944467878973227926, 87.957665176047157729, 115.94244263104701531, 92.211581890031084185, 5.2325154292557272129, 37.566103104363719467, 48.657847523754753638, 95.631959156195080141, 19.779515045349398861, 119.71794917935767444, 58.738428270906297257, 61.986578545787779149, 101.69365293054943322, 23.732577595037582796, 116.62291751159136766, 74.301919271256338106, 12.900563169401721098, 65.244286846511386102, 24.87738089987760759, 113.58509042665173183, 39.903657200033194385, 36.145808617042348487, 121.73932496241104673, 124.85515497652522754, 29.849123202591727022, 62.459125713681714842, 91.481882014148141025, 77.303773098057718016, 107.35606807032672805, 0.058107035871216794476, 83.942220927681773901, 56.939218507344776299, 41.371922168669698294, 0, 73.78324349213289679, 29.372189809440897079, 96.428486514047108358, 68.222719186236645328, 71.342734786390792578, 109.78557673482282553, 110.58984674733437714, 106.03835404594065039, 67.739800819475931348, 28.982941200916684465, 23.365757134928571759, 71.009449360866710776, 2.4497630863952508662, 84.806597180915559875, 124.36694009644998005, 31.971069328024896095, 51.007780425759847276, 45.406886368455161573, 27.24767422059449018, 27.456854102762008552, 60.749334032250771997, 110.6952358177768474, 89.907132132982951589, 82.573889492861781036, 118.14040634805132868, 88.049821611941297306, 53.835821620668866672, 9.6846219214166922029, 71.135784017900732579, 66.099218448569445172, 37.012726094570098212, 88.501181790416012518, 32.752970388461108087, 78.637291165516217006, 96.274255984008050291, 28.212842810549773276, 18.332133503845398081, 34.896593673016468529, 122.15055422674777219, 62.648422659662173828, 111.77534522682981333, 121.24366438177457894, 97.619437677214591531, 101.93569872573425528, 107.58552540723394486, 124.28277773387526395, 35.343070427821658086, 45.188525860608933726, 66.588678551946941298, 11.073074651110800914, 46.500093653507065028, 98.840304951278085355, 54.002996860454004491, 45.148100686568795936, 41.748809261422138661, 16.640571016156172846, 83.674155886776134139, 97.943563387241738383, 115.54119367265957408, 13.61480257107905345, 79.587009962433512555, 53.534501473946875194, 120.00330111733273952, 78.101550198953191284, 67.799990352978056762, 31.932424664024438243, 91.697529134362412151, 38.046850971015373943, 94.071973422654991737, 87.460808141051529674, 90.767896589182782918, 34.823860535467247246, 43.110448440711479634, 91.547411797451786697, 40.375180456547241192, 53.742210582175175659, 92.766189933419809677, 32.839412532728601946, 5.4359021750497049652, 74.284475747692340519, 27.708134410400816705, 57.522133864498755429, 90.300445158733054996, 103.96664096325548599, 71.064342674075305695, 98.58842730136893806, 59.480914201205450809, 63.956779572505183751, 120.14368780205040821, 91.403004326071823016, 70.690936797422182281, 33.259281736885895953, 37.533989461495366413, 114.14228174015079276, 56.02115050869906554, 73.160413145549682667, 23.75039020270560286, 59.225644407863001106, 64.472509500534215476, 50.56073720761196455, 38.487846577088930644, 75.737568074608134339, 20.265030226899398258, 53.182656423927255673, 102.31026712841412518, 66.616340614313230617, 75.396860423319594702, 30.154603518076328328, 105.18423182000333327, 56.647650143822829705, 108.25663727346181986, 125.73322452800857718, 20.550419706418324495, 73.461774846186017385, 4.948701059623999754, 83.591055974437040277, 96.008180309101589955, 18.836846062018594239, 82.219104506340954686, 86.740821717176004313, 73.054166962381714256, 26.321225778854568489, 40.663711131874151761, 115.22464036306701018, 106.81554250383851468, 55.253285415441496298, 76.161294143181294203, 100.21510079065046739, 73.024637791204440873, 81.770412046713317977, 96.5176652061236382, 45.33742200220876839, 78.625133258861751528, 109.19283643504240899, 76.234059947964851744, 39.113913153920293553, 25.596840058842644794, 43.540071928437100723, 40.222997624892741442, 73.251822889560571639, 102.31237800812232308, 122.57203063728957204, 3.1410968743075500242, 63.640588060847221641, 16.756771573374862783, 116.89956961652569589, 115.85759913537549437, 2.7009483594483754132, 57.152354546851711348, 68.211822743498487398, 3.3842772749449068215, 16.220551988360966789, 52.673209752003458561, 102.15281611686805263, 2.4742595987954700831, 31.419658404349320335, 55.328736565610597609, 34.365735593437420903, 88.284494427338358946, 58.319376177543745143, 93.710483836832281668, 55.037940139114652993, 75.669437505188398063, 63.433602855540812016, 87.196820858727733139, 14.270630066966987215, 30.773995856372494018, 45.775967258679884253, 38.956647231432725675, 82.291711869522259803, 111.19355724896740867, 101.15903493709265604, 121.3408715953482897, 119.90789033256805851, 88.387488075932196807, 5.9265197016611637082, 106.7925929969533172, 103.89715283965415438, 111.78741599627028336, 63.409277043465408497, 8.0479424138975446112, 54.410839998108713189, 90.719946486708067823, 66.3072012708762486, 36.399953208416263806, 33.582986296918534208, 7.6231369218257896136, 55.435824411153589608, 118.89134327492138254, 49.72112439577904297, 95.479527763494843384, 90.76846925420613843, 121.9326725215978513, 55.776474380443687551, 59.8393848622799851, 106.02294154652554425, 12.374532294888922479, 55.433945647313521476, 118.50387933983074618, 91.1649497532271198, 85.875281003682175651, 17.37052635584041127, 63.250577182490815176, 32.916617040889832424, 118.32867696029643412, 119.24794761829616618, 92.312224644090747461, 118.89448028952756431, 16.030314315281430027, 126.12481862134882249, 25.701915922592888819, 46.671216666341933887, 102.86271848967226106, 101.33959652907287818, 60.12779197145573562, 45.778158256271126447, 78.098496997288748389, 117.69919633925019298, 114.21098044111204217, 36.464631863203976536, 69.034079705073963851, 92.219079658465489047, 20.788004548692697426, 85.120192599333677208, 109.75971257959099603, 36.726914333285094472, 27.098889491408044705, 13.293436176943941973, 45.78447144850724726, 73.875011625539627858, 4.4258791498832579236, 37.128229302128602285, 75.648076964214851614, 47.407337366126739653, 96.928922566545224981, 56.102243767731124535, 16.182287017349153757, 9.9844531941926106811, 63.043462314737553243, 65.589336307522899006, 95.731792728740401799, 82.148193108867417322, 79.374460679759067716, 10.510324816281354288, 31.476323794737254502, 53.896270581361022778, 104.42654138872603653, 87.643329565547901439, 39.062502151718945242, 27.460065572322491789, 122.68895773278563865, 86.68921058136766078, 25.630880627933947835, 84.26462982242446742, 20.663455313457234297, 64.751889945659058867, 8.7348521449239342473, 35.169327247800538316, 111.19626499393416452, 51.109094394712883513, 73.658949824926821748, 90.269389871285966365, 85.821056497043173295, 117.33578096767087118, 82.303888668680883711, 53.063370285657583736, 67.883559524681913899, 31.239967146830167621, 68.418500873201992363, 2.1385192853849730454, 83.461790890905831475, 8.5759943417942849919, 85.677546373026416404, 109.78505946860968834, 84.90618799477306311, 27.674245182432059664, 110.10702158730782685, 29.094230257604067447, 67.630381131632020697, 24.506170659809868084, 69.125714262005203636, 123.9686239411457791, 27.247749920225032838, 75.608043657961388817, 8.3098203853842278477, 60.303488080775423441, 70.772175170586706372, 76.809288565582392039, 10.92946116756866104, 15.228132516924233641, 85.184414749292045599, 108.773209847680846, 123.96841066201523063, 99.543483294826728525, 39.301966184411867289, 110.9366639364452567, 39.148079165977833327, 35.392275568567129085, 53.146452668606798397, 120.29402499123898451, 29.594797713834850583, 74.709742554816330085, 72.36626439242172637, 21.411626467230234994, 74.280790459335548803, 70.843076180652133189, 22.867591343376261648, 76.77292668957306887, 59.215283386034570867, 100.67977824221088667, 67.054241124122199835, 3.316654136469878722, 14.233331890241970541, 4.3856978827425336931, 107.97529293536717887, 28.020659426318161422, 31.968943438710994087, 121.30286611818155507, 105.776925756905257, 104.33288803757022833, 124.97394571690892917, 68.806216917615529383, 36.010573884748737328, 90.470824805270240176, 16.428282940560166026, 8.3227633704518666491, 124.77693140568590024, 71.860266976349521428, 93.425719484635919798, 127.31827650170816923, 61.337361424033588264, 82.220744452355575049, 2.0658676056955300737, 8.3782013938107411377, 28.307204673536034534, 123.28871673340472626, 65.88893575795009383, 47.915330352097953437, 103.8848852620976686, 56.423163780062168371, 10.465030858511454426, 75.132206208727438934, 97.315695047513145255, 63.263918312390160281, 39.5590300907024357, 111.43589835871534888, 117.47685654181259451, 123.97315709157919628, 75.387305861098866444, 47.465155190078803571, 105.24583502318637329, 20.603838542512676213, 25.801126338807080174, 2.4885736930227722041, 49.754761799758853158, 99.170180853303463664, 79.807314400066388771, 72.291617234088334953, 115.47864992482573143, 121.71030995305045508, 59.698246405183454044, 124.91825142736706766, 54.963764028299920028, 26.607546196119074011, 86.712136140653456096, 0.11621407174607156776, 39.884441855363547802, 113.8784370146895526, 82.743844337343034567, 0, 19.566486984265793581, 58.744379618885432137, 64.856973028097854694, 8.4454383724769286346, 14.685469572785223136, 91.571153469649289036, 93.179693494672392262, 84.076708091884938767, 7.4796016389518626966, 57.965882401833368931, 46.731514269860781496, 14.018898721733421553, 4.8995261727941397112, 41.613194361834757729, 120.73388019289996009, 63.942138656049792189, 102.01556085151969455, 90.813772736910323147, 54.49534844118898036, 54.913708205524017103, 121.49866806450518197, 93.390471635557332775, 51.814264265969541157, 37.147778985727200052, 108.28081269610629533, 48.099643223882594611, 107.67164324133773334, 19.369243842837022385, 14.271568035805103136, 4.1984368971388903446, 74.025452189143834403, 49.002363580832025036, 65.505940776925854152, 29.274582331036071992, 64.548511968016100582, 56.425685621099546552, 36.664267007690796163, 69.793187346036575036, 116.30110845349554438, 125.29684531932798564, 95.550690453663264634, 114.48732876355279586, 67.238875354432821041, 75.871397451472148532, 87.171050814471527701, 120.5655554677505279, 70.686140855643316172, 90.377051721221505431, 5.177357103893882595, 22.146149302221601829, 93.000187307017768035, 69.680609902556170709, 108.00599372090800898, 90.296201373137591872, 83.497618522844277322, 33.28114203231598367, 39.348311773555906257, 67.887126774487114744, 103.08238734531914815, 27.22960514216174488, 31.174019924867025111, 107.06900294789738837, 112.00660223466911702, 28.203100397910020547, 7.5999807059561135247, 63.864849328052514466, 55.395058268724824302, 76.093701942034385866, 60.143946845313621452, 46.921616282106697327, 53.535793178365565836, 69.647721070934494492, 86.220896881426597247, 55.094823594903573394, 80.750360913094482385, 107.4844211643539893, 57.532379866839619353, 65.678825065457203891, 10.871804350103047909, 20.568951495384681039, 55.41626882080163341, 115.04426772900114884, 52.60089031746974797, 79.93328192651460995, 14.128685348154249368, 69.176854602737876121, 118.96182840241090162, 127.91355914501400548, 112.2873756041044544, 54.80600865214728401, 13.381873594844364561, 66.518563473771791905, 75.067978922994370805, 100.28456348030158551, 112.04230101740176906, 18.320826291099365335, 47.50078040541120572, 118.45128881572964019, 0.94501900106843095273, 101.1214744152239291, 76.975693154181499267, 23.475136149216268677, 40.530060453798796516, 106.36531284785814933, 76.620534256831888342, 5.2326812286300992128, 22.793720846639189404, 60.309207036156294635, 82.368463640006666537, 113.29530028764929739, 88.513274546927277697, 123.46644905601715436, 41.100839412840286968, 18.92354969237203477, 9.8974021192516374867, 39.182111948877718532, 64.016360618203179911, 37.673692124040826457, 36.43820901268554735, 45.481643434355646605, 18.108333924763428513, 52.642451557709136978, 81.3274222637519415, 102.44928072613765835, 85.631085007677029353, 110.50657083088663057, 24.322588286366226384, 72.430201581304572755, 18.049275582412519725, 35.540824093430273933, 65.035330412247276399, 90.674844004421174759, 29.250266517727141036, 90.385672870088455966, 24.468119895929703489, 78.227826307840587106, 51.193680117685289588, 87.080143856874201447, 80.445995249789120862, 18.503645779124781257, 76.624756016248284141, 117.14406127457914408, 6.2821937486151000485, 127.28117612169444328, 33.513543146753363544, 105.79913923305502976, 103.71519827075098874, 5.4018967188967508264, 114.30470909370706067, 8.4236454869969747961, 6.7685545498898136429, 32.441103976721933577, 105.3464195040105551, 76.305632233736105263, 4.9485191975909401663, 62.839316808698640671, 110.65747313122119522, 68.731471186878479784, 48.568988854676717892, 116.63875235509112827, 59.420967673664563335, 110.07588027823294397, 23.338875010376796126, 126.86720571108526201, 46.393641717455466278, 28.54126013393397443, 61.547991712744988035, 91.551934517359768506, 77.91329446286908933, 36.583423739048157586, 94.387114497938455315, 74.318069874188950052, 114.68174319070021738, 111.815780665139755, 48.774976151864393614, 11.853039403325965395, 85.585185993910272373, 79.794305679311946733, 95.574831992540566716, 126.81855408693081699, 16.095884827795089222, 108.82167999622106436, 53.439892973419773625, 4.6144025417524971999, 72.799906416832527611, 67.165972593837068416, 15.246273843651579227, 110.8716488223108172, 109.78268654984276509, 99.44224879156172392, 62.959055526993324747, 53.536938508415914839, 115.8653450431957026, 111.5529487608873751, 119.67876972456360818, 84.045883093051088508, 24.749064589777844958, 110.86789129463068093, 109.00775867966513033, 54.329899506457877578, 43.750562007364351302, 34.74105271168082254, 126.50115436498163035, 65.833234081783302827, 108.65735392059650621, 110.49589523659597035, 56.6244492881851329, 109.78896057905512862, 32.060628630566498032, 124.24963724269764498, 51.403831845185777638, 93.342433332687505754, 77.725436979348160094, 74.679193058145756368, 120.25558394291147124, 91.556316512545890873, 28.196993994581134757, 107.39839267850402393, 100.42196088222408434, 72.929263726411591051, 10.06815941015156568, 56.438159316930978093, 41.576009097389032831, 42.240385198670992395, 91.519425159181992058, 73.453828666570188943, 54.19777898281608941, 26.586872353887883946, 91.568942897018132498, 19.750023251079255715, 8.8517582997665158473, 74.256458604260842549, 23.296153928429703228, 94.814674732257117284, 65.857845133090449963, 112.20448753546224907, 32.364574034698307514, 19.968906388385221362, 126.08692462947874446, 3.1786726150494359899, 63.463585457484441577, 36.296386217738472624, 30.748921359518135432, 21.020649632562708575, 62.952647589474509005, 107.79254116272568353, 80.853082777455711039, 47.286659131095802877, 78.125004303437890485, 54.920131144648621557, 117.37791546557491529, 45.378421162735321559, 51.261761255871533649, 40.52925964484893484, 41.326910626918106573, 1.503779891318117734, 17.469704289847868495, 70.338654495604714612, 94.392529987868329044, 102.21818878942940501, 19.317899649857281474, 52.538779742575570708, 43.642112994089984568, 106.67156193534538033, 36.607777337361767422, 106.12674057131880545, 7.7671190493638277985, 62.479934293663973222, 8.8370017464039847255, 4.2770385707735840697, 38.92358178181166295, 17.151988683592207963, 43.355092746056470787, 91.570118937223014655, 41.81237598954612622, 55.348490364867757307, 92.214043174615653697, 58.188460515211772872, 7.260762263264041394, 49.012341319623374147, 10.251428524014045252, 119.9372478822915582, 54.495499840453703655, 23.216087315922777634, 16.619640770768455695, 120.60697616155448486, 13.544350341177050723, 25.618577131164784078, 21.858922335137322079, 30.456265033852105262, 42.368829498587729177, 89.546419695365329972, 119.93682132403409923, 71.086966589653457049, 78.603932368827372557, 93.87332787289051339, 78.296158331955666654, 70.78455113713425817, 106.29290533721723477, 112.58804998247796902, 59.189595427673339145, 21.419485109636298148, 16.73252878484345274, 42.823252934464107966, 20.561580918674735585, 13.686152361304266378, 45.735182686756161274, 25.54585337914613774, 118.43056677207277971, 73.359556484425411327, 6.1084822482443996705, 6.633308272939757444, 28.466663780483941082, 8.771395765488705365, 87.950585870734357741, 56.041318852636322845, 63.937886877425626153, 114.60573223636674811, 83.553851513814151986, 80.665776075144094648, 121.94789143382149632, 9.6124338352310587652, 72.021147769501112634, 52.941649610540480353, 32.856565881120332051, 16.645526740907371277, 121.55386281137543847, 15.720533952702680836, 58.851438969275477575, 126.63655300341633847, 122.67472284807081451, 36.441488904711150099, 4.1317352113910601474, 16.756402787621482275, 56.614409347075707046, 118.57743346681309049, 3.77787151590018766, 95.830660704199544853, 79.769770524195337202, 112.84632756012433674, 20.93006171702654683, 22.264412417458515847, 66.63139009502629051, 126.52783662478032056, 79.118060181408509379, 94.871796717434335733, 106.95371308362882701, 119.94631418316203053, 22.774611722197732888, 94.930310380161245121, 82.491670046372746583, 41.207677085025352426, 51.602252677614160348, 4.9771473860455444083, 99.509523599521344295, 70.340361706610565307, 31.614628800136415521, 16.583234468180307886, 102.95729984965510084, 115.42061990610091016, 119.39649281037054607, 121.83650285473413533, 109.92752805659984006, 53.215092392241786001, 45.424272281306912191, 0.23242814349214313552, 79.768883710730733583, 99.756874029382743174, 37.487688674686069135, 0, 39.132973968531587161, 117.48875923777450225, 1.7139460561957093887, 16.890876744953857269, 29.37093914557408425, 55.142306939302216051, 58.359386989348422503, 40.153416183769877534, 14.959203277903725393, 115.93176480366673786, 93.463028539721562993, 28.037797443466843106, 9.7990523455882794224, 83.226388723673153436, 113.46776038579992019, 127.88427731210322236, 76.031121703043027082, 53.627545473820646293, 108.99069688237796072, 109.82741641105167218, 114.99733612901400193, 58.780943271114665549, 103.62852853193908231, 74.295557971458038082, 88.561625392212590668, 96.199286447768827202, 87.343286482675466686, 38.738487685674044769, 28.543136071610206272, 8.3968737942814186681, 20.050904378287668806, 98.004727161667688051, 3.0118815538517083041, 58.549164662075781962, 1.0970239360322011635, 112.8513712421990931, 73.328534015385230305, 11.586374692076788051, 104.60221690699472674, 122.59369063865960925, 63.101380907330167247, 100.9746575271092297, 6.4777507088692800608, 23.742794902947935043, 46.342101628946693381, 113.13111093550469377, 13.372281711290270323, 52.754103442443010863, 10.35471420778776519, 44.292298604446841637, 58.000374614039174048, 11.361219805115979398, 88.011987441819655942, 52.592402746278821724, 38.995237045688554645, 66.56228406463196734, 78.696623547115450492, 7.7742535489778674673, 78.164774690638296306, 54.459210284323489759, 62.348039849737688201, 86.138005895798414713, 96.013204469338234048, 56.406200795820041094, 15.199961411912227049, 127.72969865610502893, 110.79011653745328658, 24.187403884068771731, 120.2878936906272429, 93.843232564213394653, 107.07158635673476965, 11.295442141872626962, 44.441793762853194494, 110.18964718981078477, 33.500721826192602748, 86.968842328707978595, 115.06475973368287669, 3.3576501309144077823, 21.743608700206095818, 41.137902990769362077, 110.8325376416069048, 102.08853545800229767, 105.20178063494313392, 31.866563853029219899, 28.257370696312136715, 10.353709205479390221, 109.92365680482544121, 127.82711829003164894, 96.574751208208908793, 109.61201730429456802, 26.763747189692367101, 5.0371269475472217891, 22.135957845992379589, 72.569126960606809007, 96.084602034803538118, 36.64165258219873067, 95.001560810826049419, 108.90257763145928038, 1.8900380021404998843, 74.242948830451496178, 25.951386308362998534, 46.950272298432537355, 81.060120907601231011, 84.73062569571993663, 25.241068513663776685, 10.465362457260198426, 45.587441693278378807, 120.61841407231622725, 36.736927280016971054, 98.590600575302232755, 49.026549093858193373, 118.93289811203430872, 82.201678825680573937, 37.847099384744069539, 19.794804238503274973, 78.364223897759075044, 0.032721236406359821558, 75.347384248081652913, 72.8764180253710947, 90.963286868711293209, 36.216667849526857026, 105.28490311542191193, 34.654844527507520979, 76.898561452278954675, 43.262170015357696684, 93.013141661773261148, 48.645176572736090748, 16.860403162609145511, 36.09855116482503945, 71.081648186864185845, 2.0706608244981907774, 53.349688008842349518, 58.50053303545792005, 52.771345740176911931, 48.936239791863044957, 28.455652615681174211, 102.38736023537421715, 46.160287713752040872, 32.891990499578241725, 37.007291558253200492, 25.24951203250020626, 106.28812254915828817, 12.564387497233838076, 126.56235224338888656, 67.027086293510365067, 83.598278466113697505, 79.430396541501977481, 10.803793437797139632, 100.60941818741412135, 16.847290973993949592, 13.537109099779627286, 64.882207953447505133, 82.692839008024748182, 24.611264467472210526, 9.8970383951818803325, 125.67863361740091932, 93.314946262446028413, 9.4629423737569595687, 97.137977709357073763, 105.27750471018225653, 118.84193534733276465, 92.15176055646588793, 46.677750020757230232, 125.73441142217052402, 92.787283434910932556, 57.08252026786794886, 123.09598342549361405, 55.103869034723174991, 27.826588925738178659, 73.16684747809995315, 60.774228995880548609, 20.636139748381538084, 101.36348638140407274, 95.631561330279510003, 97.549952303728787228, 23.706078806655568769, 43.170371987824182725, 31.588611358627531445, 63.149663985084771411, 125.63710817386163399, 32.191769655593816424, 89.643359992442128714, 106.87978594683954725, 9.2288050835086323787, 17.599812833668693202, 6.3319451876741368324, 30.492547687306796433, 93.743297644625272369, 91.565373099689168157, 70.884497583123447839, 125.91811105398664949, 107.07387701683546766, 103.73069008639504318, 95.105897521778388182, 111.35753944913085434, 40.091766186102177016, 49.498129179559327895, 93.735782589264999842, 90.01551735933026066, 108.65979901291575516, 87.501124014728702605, 69.482105423365283059, 125.00230872996689868, 3.6664681635702436324, 89.314707841196650406, 92.991790473191940691, 113.24889857637390378, 91.577921158110257238, 64.121257261136634042, 120.49927448539528996, 102.80766369037519326, 58.684866665378649486, 27.450873958699958166, 21.358386116295150714, 112.51116788582658046, 55.112633025091781747, 56.393987989165907493, 86.796785357011685846, 72.843921764451806666, 17.858527452823182102, 20.136318820303131361, 112.87631863386195619, 83.152018194778065663, 84.480770397345622769, 55.038850318367622094, 18.907657333140377887, 108.39555796563217882, 53.173744707779405871, 55.137885794036264997, 39.50004650215851143, 17.703516599536669673, 20.512917208521685097, 46.592307856863044435, 61.629349464517872548, 3.7156902661845379043, 96.408975070924498141, 64.729148069396615028, 39.937812776774080703, 124.17384925895748893, 6.3573452300988719799, 126.92717091496888315, 72.592772435476945248, 61.497842719036270864, 42.041299265125417151, 125.90529517895265599, 87.585082325455005048, 33.706165554911422078, 94.573318262195243733, 28.25000860687578097, 109.84026228930088109, 106.75583093114983058, 90.756842325470643118, 102.5235225117430673, 81.058519289697869681, 82.653821253836213145, 3.0075597826362354681, 34.939408579699374968, 12.677308991213067202, 60.785059975740296068, 76.436377578858810011, 38.635799299714562949, 105.07755948515114142, 87.284225988183607114, 85.343123870690760668, 73.215554674727172824, 84.253481142637610901, 15.534238098727655597, 124.95986858732794644, 17.674003492807969451, 8.5540771415471681394, 77.847163563623325899, 34.303977367184415925, 86.710185492116579553, 55.140237874446029309, 83.624751979092252441, 110.69698072973915259, 56.428086349231307395, 116.37692103042354574, 14.521524526528082788, 98.024682639246748295, 20.502857048031728482, 111.8744957645831164, 108.99099968090740731, 46.432174631845555268, 33.23928154154054937, 113.21395232310896972, 27.088700682354101446, 51.237154262329568155, 43.717844670274644159, 60.912530067704210524, 84.737658997179096332, 51.092839390734297922, 111.87364264807183645, 14.173933179310552077, 29.207864737658383092, 59.746655745784664759, 28.592316663914971286, 13.569102274272154318, 84.585810674438107526, 97.176099964955938049, 118.37919085534667829, 42.838970219272596296, 33.465057569686905481, 85.646505868928215932, 41.12316183734947117, 27.372304722608532757, 91.470365373512322549, 51.09170675829591346, 108.86113354414919741, 18.719112968850822654, 12.216964496488799341, 13.266616545879514888, 56.933327560967882164, 17.542791530981048709, 47.901171741468715481, 112.08263770527264569, 127.87577375485489029, 101.2114644727371342, 39.10770302763194195, 33.331552150291827274, 115.89578286764299264, 19.224867670465755509, 16.042295539002225269, 105.88329922108096071, 65.713131762240664102, 33.291053481818380533, 115.10772562275087694, 31.441067905405361671, 117.70287793855459313, 125.27310600683631492, 117.34944569614162901, 72.882977809422300197, 8.2634704227821202949, 33.512805575242964551, 113.22881869415505207, 109.15486693362618098, 7.5557430318040132988, 63.661321408402727684, 31.539541048394312384, 97.692655120248673484, 41.860123434053093661, 44.528824834920669673, 5.2627801900562189985, 125.0556732495642791, 30.236120362820656737, 61.743593434872309444, 85.907426167261291994, 111.89262836632406106, 45.549223444399103755, 61.86062076032612822, 36.983340092745493166, 82.415354170050704852, 103.2045053552283207, 9.9542947720947267953, 71.019047199046326568, 12.680723413221130613, 63.229257600272831041, 33.16646893636425375, 77.914599699313839665, 102.84123981220545829, 110.79298562074473011, 115.67300570947190863, 91.855056113199680112, 106.430184784483572, 90.848544562613824382, 0.46485628698792424984, 31.537767421465105144, 71.513748058765486348, 74.975377349375776248, 0, 78.265947937066812301, 106.97751847554900451, 3.4278921123914187774, 33.781753489907714538, 58.741878291148168501, 110.2846138786044321, 116.71877397869684501, 80.306832367539755069, 29.918406555811088765, 103.8635296073371137, 58.926057079443125986, 56.075594886933686212, 19.598104691180196824, 38.45277744734994485, 98.935520771603478352, 127.76855462421008269, 24.062243406086054165, 107.25509094764493057, 89.981393764759559417, 91.654832822106982348, 101.99467225802800385, 117.56188654223296908, 79.257057063881802605, 20.591115942919714143, 49.123250784428819316, 64.398572895541292382, 46.686572965350933373, 77.476975371348089539, 57.086272143220412545, 16.793747588562837336, 40.10180875657897559, 68.009454323335376102, 6.0237631077034166083, 117.09832932415156392, 2.1940478720680403057, 97.702742484401824186, 18.657068030770460609, 23.172749384157214081, 81.204433813993091462, 117.1873812773192185, 126.20276181466397247, 73.949315054218459409, 12.955501417738560122, 47.485589805895870086, 92.684203257897024741, 98.262221871009387542, 26.744563422580540646, 105.50820688488602173, 20.70942841557553038, 88.584597208893683273, 116.0007492280783481, 22.722439610235596774, 48.023974883639311884, 105.18480549255764345, 77.990474091380747268, 5.1245681292675726581, 29.393247094230900984, 15.548507097959372913, 28.329549381276592612, 108.91842056864697952, 124.69607969947901438, 44.276011791600467404, 64.026408938676468097, 112.81240159164008219, 30.399922823828092078, 127.45939731221369584, 93.580233074906573165, 48.374807768141181441, 112.57578738125812379, 59.686465128430427285, 86.143172713469539303, 22.590884283745253924, 88.883587525710026966, 92.379294379621569533, 67.001443652388843475, 45.937684657419595169, 102.12951946736575337, 6.7153002618288155645, 43.487217400412191637, 82.275805981542362133, 93.665075283217447577, 76.177070916008233326, 82.403561269886267837, 63.733127706058439799, 56.514741392624273431, 20.70741841096241842, 91.847313609654520405, 127.65423658006693586, 65.149502416421455564, 91.224034608589136042, 53.527494379384734202, 10.074253895094443578, 44.271915691988397157, 17.138253921217255993, 64.169204069607076235, 73.283305164401099319, 62.003121621655736817, 89.805155262918560766, 3.7800760042846377473, 20.485897660902992357, 51.902772616725997068, 93.900544596868712688, 34.1202418152061, 41.46125139143987326, 50.482137027327553369, 20.930724914520396851, 91.174883386560395593, 113.2368281446324545, 73.473854560037580086, 69.181201150604465511, 98.053098187720024725, 109.86579622407225543, 36.403357651361147873, 75.694198769488139078, 39.589608477010187926, 28.728447795518150087, 0.065442472816357621923, 22.694768496166943805, 17.752836050745827379, 53.926573737422586419, 72.433335699057352031, 82.569806230843823869, 69.309689055018679937, 25.797122904557909351, 86.524340030719031347, 58.026283323546522297, 97.290353145472181495, 33.720806325218291022, 72.197102329653716879, 14.163296373728371691, 4.1413216489963815548, 106.69937601768469904, 117.0010660709158401, 105.54269148035382386, 97.872479583726089913, 56.911305231365986401, 76.774720470748434309, 92.320575427504081745, 65.783980999156483449, 74.014583116506400984, 50.499024065004050499, 84.576245098316576332, 25.12877499447131413, 125.12470448678141111, 6.0541725870243681129, 39.196556932231032988, 30.860793083003954962, 21.607586875597917242, 73.218836374828242697, 33.694581947987899184, 27.074218199562892551, 1.7644159068986482453, 37.385678016049496364, 49.222528934944421053, 19.794076790367398644, 123.35726723480547662, 58.629892524892056827, 18.925884747517557116, 66.275955418717785506, 82.555009420368151041, 109.68387069466916728, 56.30352111293177586, 93.355500041518098442, 123.46882284434468602, 57.574566869821865112, 114.1650405357395357, 118.1919668509872281, 110.20773806944634998, 55.653177851479995297, 18.333694956199906301, 121.54845799176109722, 41.272279496766714146, 74.726972762811783468, 63.263122660562657984, 67.099904607457574457, 47.412157613311137538, 86.340743975648365449, 63.177222717255062889, 126.2993279701731808, 123.27421634772326797, 64.383539311191270826, 51.286719984887895407, 85.759571893679094501, 18.457610167017264757, 35.199625667337386403, 12.663890375348273665, 60.985095374617230846, 59.486595289254182717, 55.130746199381974293, 13.768995166250533657, 123.83622210797693697, 86.147754033674573293, 79.461380172793724341, 62.211795043560414342, 94.715078898261708673, 80.183532372204354033, 98.996258359122293768, 59.471565178533637663, 52.0310347186641593, 89.319598025835148292, 47.002248029461043188, 10.964210846734204097, 122.00461745993379736, 7.3329363271441252436, 50.629415682396938791, 57.983580946387519361, 98.497797152751445537, 55.155842316224152455, 0.24251452227690606378, 112.99854897079421789, 77.61532738075038651, 117.36973333075729897, 54.901747917399916332, 42.716772232593939407, 97.022335771653160919, 110.22526605018356349, 112.78797597833545296, 45.593570714027009672, 17.687843528903613333, 35.717054905646364205, 40.2726376406099007, 97.752637267727550352, 38.304036389559769304, 40.961540794691245537, 110.07770063673524419, 37.815314666284393752, 88.791115931264357641, 106.34748941555881174, 110.27577158807252999, 79.00009300431702286, 35.407033199073339347, 41.025834417043370195, 93.18461571372608887, 123.2586989290357451, 7.4313805323727137875, 64.81795014185263426, 1.4582961387932300568, 79.875625553551799385, 120.34769851791497786, 12.714690460201381939, 125.85434182994140428, 17.185544870953890495, 122.99568543807254173, 84.08259853025447228, 123.81059035790894995, 47.170164650910010096, 67.412331109826482134, 61.146636524390487466, 56.500017213755199919, 91.680524578605400166, 85.511661862303299131, 53.513684650944924215, 77.047045023489772575, 34.11703857939937734, 37.307642507672426291, 6.0151195652724709362, 69.878817159398749936, 25.354617982426134404, 121.57011995148423011, 24.872755157717620023, 77.271598599432763876, 82.155118970305920811, 46.568451976370852208, 42.686247741385159316, 18.431109349457983626, 40.506962285275221802, 31.068476197458949173, 121.91973717465589289, 35.348006985615938902, 17.108154283097974258, 27.694327127246651798, 68.607954734372469829, 45.420370984233159106, 110.2804757488956966, 39.24950395818814286, 93.393961459478305187, 112.85617269846625277, 104.75384206084709149, 29.043049053059803555, 68.049365278493496589, 41.005714096067094943, 95.748991529169870773, 89.98199936181481462, 92.864349263691110536, 66.478563083084736718, 98.427904646217939444, 54.177401364708202891, 102.47430852465913631, 87.435689340549288318, 121.82506013541205903, 41.475317994358192664, 102.18567878147223382, 95.747285296143672895, 28.347866358621104155, 58.415729475316766184, 119.49331149156932952, 57.184633327829942573, 27.138204548547946615, 41.171621348879853031, 66.352199929915514076, 108.75838171069335658, 85.677940438545192592, 66.930115139377448941, 43.293011737856431864, 82.246323674698942341, 54.744609445220703492, 54.940730747024645098, 102.1834135165954649, 89.72226708829839481, 37.438225937701645307, 24.433928992977598682, 26.533233091759029776, 113.86665512193576433, 35.085583061962097418, 95.802343482941068942, 96.165275410548929358, 127.75154750971341855, 74.422928945474268403, 78.2154060552638839, 66.663104300587292528, 103.79156573528598528, 38.449735340931511018, 32.084591078008088516, 83.766598442165559391, 3.4262635244813282043, 66.582106963640399044, 102.21545124550175387, 62.882135810814361321, 107.40575587711282424, 122.54621201367626782, 106.69889139228689601, 17.765955618844600394, 16.52694084556424059, 67.025611150489567081, 98.457637388313742122, 90.309733867252361961, 15.111486063608026598, 127.32264281680545537, 63.079082096792262746, 67.385310240497346967, 83.720246868106187321, 89.057649669841339346, 10.525560380116075976, 122.11134649912855821, 60.472240725641313475, 123.48718686974825687, 43.814852334526221966, 95.785256732648122124, 91.09844688879820751, 123.72124152065225644, 73.966680185494624311, 36.830708340105047682, 78.409010710456641391, 19.908589544193091569, 14.038094398092653137, 25.361446826442261226, 126.45851520054566208, 66.332937872732145479, 27.829199398631317308, 77.682479624414554564, 93.585971241493098205, 103.34601141894745524, 55.710112226402998203, 84.860369568970781984, 53.697089125227648765, 0.92971257397584849969, 63.075534842933848267, 15.027496117534610676, 21.950754698755190475, 0, 28.531895874137262581, 85.955036951098009013, 6.8557842247828375548, 67.563506979815429077, 117.48375658229997498, 92.569227757212502183, 105.43754795739732799, 32.613664735079510137, 59.836813111625815509, 79.727059214674227405, 117.85211415888625197, 112.15118977386737242, 39.196209382360393647, 76.90555489470352768, 69.871041543210594682, 127.53710924842016539, 48.124486812175746309, 86.51018189529349911, 51.962787529522756813, 55.309665644217602676, 75.989344516059645684, 107.12377308446957613, 30.514114127763605211, 41.182231885839428287, 98.24650156886127661, 0.79714579108258476481, 93.373145930705504725, 26.953950742696179077, 114.17254428644082509, 33.587495177125674672, 80.203617513161589159, 8.018908646670752205, 12.047526215410471195, 106.19665864830312785, 4.3880957441397185903, 67.405484968803648371, 37.314136061540921219, 46.345498768318066141, 34.408867627989820903, 106.37476255464207497, 124.40552362933158292, 19.898630108436918817, 25.911002835477120243, 94.97117961179537815, 57.368406515797687462, 68.524443742018775083, 53.489126845164719271, 83.016413769772043452, 41.418856831154698739, 49.169194417791004525, 104.00149845615669619, 45.444879220471193548, 96.047949767282261746, 82.369610985118924873, 27.980948182761494536, 10.249136258535145316, 58.786494188461801969, 31.097014195918745827, 56.659098762556823203, 89.836841137297597015, 121.39215939895802876, 88.552023583204572788, 0.052817877356574172154, 97.624803183280164376, 60.799845647659822134, 126.91879462443102966, 59.16046614981314633, 96.749615536286000861, 97.151574762516247574, 119.37293025686449255, 44.286345426939078607, 45.181768567494145827, 49.767175051423691912, 56.758588759246777045, 6.0028873047776869498, 91.875369314842828317, 76.259038934731506743, 13.430600523657631129, 86.974434800828021253, 36.551611963088362245, 59.330150566438533133, 24.35414183202010463, 36.807122539772535674, 127.4662554121168796, 113.02948278524854686, 41.414836821928474819, 55.69462721931267879, 127.30847316013750969, 2.2990048328465491068, 54.448069217178272083, 107.0549887587694684, 20.148507790188887157, 88.543831383976794314, 34.276507842438149964, 0.33840813921779044904, 18.566610328802198637, 124.00624324331511161, 51.610310525837121531, 7.5601520085692754947, 40.971795321805984713, 103.80554523345563211, 59.801089193737425376, 68.2404836304122, 82.922502782879746519, 100.96427405465510674, 41.861449829044431681, 54.349766773124429164, 98.473656289268546971, 18.947709120078798151, 10.362402301212569, 68.10619637544368743, 91.731592448144510854, 72.806715302725933725, 23.388397538976278156, 79.179216954020375852, 57.456895591036300175, 0.13088494563271524385, 45.389536992333887611, 35.505672101491654757, 107.85314747484881082, 16.86667139811834204, 37.139612461691285716, 10.619378110037359875, 51.59424580911945668, 45.048680061438062694, 116.05256664709668257, 66.580706290944362991, 67.441612650440220023, 16.394204659307433758, 28.326592747456743382, 8.2826432979964010883, 85.398752035373036051, 106.00213214183531818, 83.085382960711285705, 67.744959167452179827, 113.82261046273561078, 25.549440941500506597, 56.641150855011801468, 3.5679619983166048769, 20.029166233012801968, 100.99804813001173898, 41.152490196633152664, 50.25754998894626624, 122.24940897356282221, 12.108345174048736226, 78.393113864462065976, 61.721586166007909924, 43.215173751199472463, 18.437672749656485394, 67.389163895975798368, 54.148436399125785101, 3.5288318137972964905, 74.771356032098992728, 98.445057869888842106, 39.588153580738435267, 118.71453446961095324, 117.25978504978411365, 37.851769495038752211, 4.55191083743920899, 37.11001884073994006, 91.367741389338334557, 112.6070422258671897, 58.711000083039834863, 118.93764568868937204, 115.1491337396473682, 100.33008107148270938, 108.38393370197809418, 92.415476138892699964, 111.30635570295999059, 36.66738991240345058, 115.09691598352219444, 82.544558993537066272, 21.453945525623566937, 126.52624532112895395, 6.1998092149151489139, 94.824315226625913056, 44.681487951296730898, 126.35444543451376376, 124.59865594034999958, 118.54843269545017392, 0.76707862238254165277, 102.57343996977942879, 43.519143787358189002, 36.915220334034529515, 70.399251334674772806, 25.327780750696547329, 121.97019074923446169, 118.97319057850836543, 110.26149239876394859, 27.537990332504705293, 119.67244421595751191, 44.295508067352784565, 30.922760345587448683, 124.42359008712446666, 61.430157796527055325, 32.367064744412346045, 69.992516718244587537, 118.94313035706727533, 104.06206943733195658, 50.639196051670296583, 94.004496058922086377, 21.928421693472046172, 116.00923491986759473, 14.665872654288250487, 101.25883136479387758, 115.9671618927786767, 68.995594305502891075, 110.31168463244830491, 0.48502904455745010637, 97.997097941592073767, 27.230654761504410999, 106.73946666151459794, 109.80349583479983266, 85.433544465191516792, 66.044671543309959816, 92.450532100370764965, 97.575951956670905929, 91.187141428054019343, 35.375687057807226665, 71.434109811292728409, 80.545275281223439379, 67.505274535455100704, 76.608072779119538609, 81.923081589386129053, 92.155401273470488377, 75.630629332572425483, 49.582231862532353261, 84.694978831121261464, 92.551543176148697967, 30.00018600863404572, 70.814066398146678694, 82.051668834090378368, 58.369231427455815719, 118.51739785807149019, 14.862761064749065554, 1.6359002837052685209, 2.9165922775900980923, 31.751251107107236749, 112.69539703582995571, 25.429380920406401856, 123.70868365988280857, 34.371089741911418969, 117.99137087614872144, 40.165197060512582539, 119.62118071581789991, 94.34032930182365817, 6.8246622196529642679, 122.29327304878097493, 113.00003442751403782, 55.36104915721443831, 43.023323724606598262, 107.02736930189348641, 26.09409004697954515, 68.234077158798754681, 74.615285015344852582, 12.030239130548579851, 11.757634318797499873, 50.709235964855906786, 115.14023990296846023, 49.745510315438878024, 26.543197198869165732, 36.310237940611841623, 93.136903952741704416, 85.372495482770318631, 36.862218698915967252, 81.013924570550443605, 62.136952394921536325, 115.83947434931178577, 70.696013971231877804, 34.216308566195948515, 55.388654254496941576, 9.2159094687449396588, 90.840741968469956191, 92.560951497791393194, 78.499007916379923699, 58.787922918960248353, 97.712345396936143516, 81.507684121694182977, 58.086098106123245088, 8.0987305569869931787, 82.011428192137827864, 63.497983058343379525, 51.963998723633267218, 57.728698527385859052, 4.9571261661731114145, 68.855809292435878888, 108.35480272942004376, 76.948617049321910599, 46.871378681102214614, 115.65012027082411805, 82.950635988720023306, 76.371357562948105624, 63.494570592287345789, 56.695732717242208309, 116.83145895063717035, 110.98662298313865904, 114.36926665565988515, 54.27640909709589323, 82.34324269776334404, 4.7043998598346661311, 89.516763421390351141, 43.355880877094023162, 5.8602302787548978813, 86.586023475716501707, 36.492647349397884682, 109.48921889044140698, 109.88146149405292817, 76.366827033194567775, 51.444534176596789621, 74.876451875403290614, 48.867857985955197364, 53.066466183521697531, 99.733310243871528655, 70.171166123927832814, 63.604686965885775862, 64.330550821101496695, 127.50309501943047508, 20.845857890952174785, 28.430812110531405779, 5.3262086011782230344, 79.583131470571970567, 76.899470681863022037, 64.169182156019815011, 39.533196884331118781, 6.8525270489662943874, 5.1642139272844360676, 76.430902491003507748, 125.76427162163236062, 86.811511754229286453, 117.09242402735617361, 85.397782784573792014, 35.531911237689200789, 33.053881691132119158, 6.0512223009827721398, 68.915274776631122222, 52.619467734504723921, 30.222972127219691174, 126.64528563361091074, 126.15816419358816347, 6.7706204809983319137, 39.440493736216012621, 50.115299339682678692, 21.05112076023578993, 116.22269299825711641, 120.94448145128626493, 118.97437373949651374, 87.629704669056081912, 63.570513465299882228, 54.196893777600052999, 119.44248304130815086, 19.933360370989248622, 73.661416680213733343, 28.818021420913282782, 39.817179088386183139, 28.076188796185306273, 50.722893652888160432, 124.91703040109496214, 4.6658757454642909579, 55.658398797266272595, 27.364959248832747107, 59.171942482989834389, 78.692022837894910481, 111.42022445280963439, 41.720739137941563968, 107.39417825045529753, 1.8594251479516969994, 126.15106968586769653, 30.054992235069221351, 43.901509397514018929, 0, 57.063791748278163141, 43.910073902196018025, 13.711568449569313088, 7.1270139596344961319, 106.96751316460358794, 57.138455514428642346, 82.875095914794655982, 65.227329470159020275, 119.673626223255269, 31.454118429352092789, 107.70422831777614192, 96.302379547734744847, 78.392418764724425273, 25.811109789410693338, 11.742083086424827343, 127.07421849684033077, 96.248973624355130596, 45.020363790586998221, 103.92557505904551363, 110.61933128843884333, 23.978689032122929348, 86.247546168939152267, 61.028228255527210422, 82.364463771682494553, 68.49300313772255322, 1.5942915821688075084, 58.74629186141100945, 53.907901485395996133, 100.34508857288165018, 67.174990354254987324, 32.407235026323178317, 16.03781729334150441, 24.09505243082458037, 84.393317296606255695, 8.7761914882830751594, 6.8109699376072967425, 74.628272123081842437, 92.690997536636132281, 68.817735255983279785, 84.749525109287787927, 120.81104725866316585, 39.797260216873837635, 51.822005670957878465, 61.942359223590756301, 114.7368130315990129, 9.0488874840375501662, 106.97825369033307652, 38.032827539544086903, 82.837713662309397478, 98.33838883558564703, 80.002996912313392386, 90.889758440946025075, 64.095899534568161471, 36.739221970237849746, 55.961896365522989072, 20.498272517070290633, 117.57298837692360394, 62.194028391837491654, 113.31819752511728439, 51.673682274598832009, 114.7843187979196955, 49.104047166409145575, 0.10563575471678632312, 67.249606366560328752, 121.59969129532328225, 125.83758924886205932, 118.32093229962993064, 65.499231072575639701, 66.303149525036133127, 110.7458605137289851, 88.572690853878157213, 90.363537134988291655, 99.534350102847383823, 113.51717751849355409, 12.005774609559011878, 55.750738629689294612, 24.518077869463013485, 26.861201047318900237, 45.948869601656042505, 73.10322392617672449, 118.66030113287706627, 48.70828366404020926, 73.614245079548709327, 126.93251082423739717, 98.058965570497093722, 82.829673643856949639, 111.38925443862899556, 126.61694632027501939, 4.5980096656930982135, 108.89613843436018215, 86.109977517538936809, 40.297015580381412292, 49.087662767953588627, 68.553015684876299929, 0.67681627843558089808, 37.133220657608035253, 120.01248648663022323, 103.22062105167424306, 15.120304017138550989, 81.943590643615607405, 79.61109046691126423, 119.60217838747848873, 8.4809672608280379791, 37.845005565763131017, 73.928548109310213476, 83.722899658088863362, 108.69953354625249631, 68.947312578537093941, 37.895418240157596301, 20.724804602428775979, 8.2123927508910128381, 55.463184896292659687, 17.61343060545186745, 46.776795077956194291, 30.358433908040751703, 114.91379118207260035, 0.26176989126543048769, 90.779073984667775221, 71.011344202986947494, 87.706294949697621632, 33.73334279623668408, 74.279224923382571433, 21.238756220074719749, 103.18849161823891336, 90.097360122879763367, 104.10513329419700312, 5.1614125818923639599, 6.8832253008804400451, 32.788409318614867516, 56.653185494913486764, 16.565286595996440155, 42.797504070749710081, 84.004264283674274338, 38.170765921426209388, 7.4899183349079976324, 99.645220925471221562, 51.098881883001013193, 113.28230171002360294, 7.1359239966332097538, 40.058332466029241914, 73.996096260023477953, 82.304980393266305327, 100.51509997789617046, 116.49881794712564442, 24.21669034810111043, 28.786227728924131952, 123.44317233201581985, 86.430347502398944926, 36.875345499316608766, 6.7783277919515967369, 108.2968727982515702, 7.0576636275982309598, 21.542712064201623434, 68.890115739777684212, 79.176307161480508512, 109.42906893922554445, 106.51957009956822731, 75.703538990077504423, 9.1038216748784179799, 74.220037681483518099, 54.735482778680307092, 97.2140844517343794, 117.4220001660833077, 109.87529137738238205, 102.29826747929837438, 72.660162142969056731, 88.767867403959826333, 56.830952277789037907, 94.612711405919981189, 73.334779824810539139, 102.19383196704802685, 37.089117987074132543, 42.907891051250771852, 125.05249064225790789, 12.399618429833935807, 61.648630453251826111, 89.362975902597099775, 124.70889086903116549, 121.19731188070363714, 109.09686539090034785, 1.5341572447687212843, 77.146879939558857586, 87.038287574716378003, 73.830440668072697008, 12.798502669349545613, 50.655561501396732638, 115.94038149847256136, 109.94638115702036885, 92.522984797527897172, 55.075980665013048565, 111.34488843191502383, 88.591016134705569129, 61.845520691178535344, 120.84718017424893333, 122.86031559305411065, 64.734129488828330068, 11.985033436492813053, 109.88626071413818863, 80.124138874663913157, 101.27839210334423115, 60.008992117844172753, 43.856843386947730323, 104.01846983973518945, 29.331745308580138953, 74.517662729591393145, 103.93432378556099138, 9.9911886110094201285, 92.62336926489660982, 0.97005808911853819154, 67.994195883184147533, 54.461309523012459977, 85.478933323032833869, 91.606991669603303308, 42.867088930386671564, 4.0893430866235576104, 56.901064200745167909, 67.151903913345449837, 54.374282856108038686, 70.751374115618091309, 14.868219622589094797, 33.090550562450516736, 7.0105490709138393868, 25.216145558242715197, 35.846163178775896085, 56.310802546944614733, 23.261258665144850966, 99.164463725068344502, 41.389957662246160908, 57.103086352297395933, 60.000372017271729419, 13.628132796296995366, 36.103337668180756737, 116.73846285491526942, 109.03479571614298038, 29.725522129498131108, 3.2718005674105370417, 5.8331845551801961847, 63.502502214218111476, 97.390794071659911424, 50.858761840816441691, 119.41736731976561714, 68.742179483826475916, 107.98274175229744287, 80.330394121028803056, 111.2423614316394378, 60.68065860364731634, 13.649324439309566515, 116.58654609756558784, 98.000068855028075632, 110.72209831442887662, 86.046647449216834502, 86.054738603786972817, 52.188180093962728279, 8.4681543176011473406, 21.230570030689705163, 24.060478261097159702, 23.515268637594999745, 101.41847192971545155, 102.28047980593692046, 99.491020630877756048, 53.086394397738331463, 72.620475881227321224, 58.27380790548704681, 42.744990965540637262, 73.724437397835572483, 34.027849141100887209, 124.27390478984671063, 103.67894869862720952, 13.392027942463755608, 68.43261713239189703, 110.77730850899388315, 18.431818937493517296, 53.681483936943550361, 57.121902995586424368, 28.998015832759847399, 117.57584583792413468, 67.42469079387592501, 35.015368243388365954, 116.17219621224649018, 16.197461113977624336, 36.022856384275655728, 126.99596611668675905, 103.92799744726653444, 115.4573970547717181, 9.9142523323498608079, 9.7116185848753957544, 88.709605458843725501, 25.897234098643821198, 93.742757362208067207, 103.30024054165187408, 37.901271977443684591, 24.742715125896211248, 126.98914118457469158, 113.3914654344880546, 105.66291790127797867, 93.973245966280956054, 100.73853331131977029, 108.55281819419542444, 36.68648539552668808, 9.4087997196729702409, 51.033526842780702282, 86.711761754188046325, 11.720460557513433741, 45.172046951433003414, 72.985294698799407342, 90.978437780886451947, 91.762922988109494327, 24.733654066389135551, 102.88906835319721722, 21.752903750806581229, 97.735715971910394728, 106.13293236704703304, 71.466620487746695289, 12.342332247855665628, 127.2093739317751897, 0.66110164220663136803, 127.00619003886095015, 41.691715781904349569, 56.861624221062811557, 10.652417202360084048, 31.166262941147579113, 25.798941363729682053, 0.3383643120396300219, 79.066393768662237562, 13.705054097932588775, 10.328427854572510114, 24.861804982010653475, 123.52854324326835922, 45.623023508462210884, 106.1848480547159852, 42.795565569147584029, 71.063822475378401577, 66.107763382264238317, 12.102444601969182258, 9.8305495532622444443, 105.23893546900944784, 60.445944254443020327, 125.29057126722545945, 124.31632838717632694, 13.541240961996663827, 78.880987472432025243, 100.23059867936535738, 42.10224152047521784, 104.44538599651787081, 113.88896290257616783, 109.94874747899666545, 47.259409338112163823, 127.14102693059976446, 108.39378755520374398, 110.88496608261630172, 39.866720741982135223, 19.322833360427466687, 57.636042841826565564, 79.634358176772366278, 56.152377592370612547, 101.44578730577995884, 121.83406080218992429, 9.3317514909285819158, 111.31679759453254519, 54.729918497665494215, 118.34388496598330676, 29.38404567579345894, 94.840448905619268771, 83.441478275886765914, 86.788356500910595059, 3.7188502959070319775, 124.30213937173539307, 60.109984470138442703, 87.803018795028037857, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; sleef-3.5.1/src/quad/sleefquad_footer.h.org000066400000000000000000000001151373003144100206160ustar00rootroot00000000000000#ifdef __cplusplus } #endif #undef IMPORT #endif // #ifndef __SLEEFQUAD_H__ sleef-3.5.1/src/quad/sleefquad_header.h.org000066400000000000000000000040531373003144100205550ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __SLEEFQUAD_H__ #define __SLEEFQUAD_H__ #include "sleef.h" #ifdef __cplusplus extern "C" { #endif #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllexport) #else // #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllimport) #if (defined(_MSC_VER)) #pragma comment(lib,"sleefquad.lib") #endif // #if (defined(_MSC_VER)) #endif // #ifdef IMPORT_IS_EXPORT #else // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #define IMPORT #endif // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) // #if !defined(Sleef_quad_DEFINED) #define Sleef_quad_DEFINED #if defined(ENABLEFLOAT128) typedef __float128 Sleef_quad; #else typedef struct { uint64_t x, y; } Sleef_quad; #endif #endif #if !defined(Sleef_quad1_DEFINED) #define Sleef_quad1_DEFINED typedef union { struct { Sleef_quad x; }; Sleef_quad s[1]; } Sleef_quad1; #endif #if !defined(Sleef_quad2_DEFINED) #define Sleef_quad2_DEFINED typedef union { struct { Sleef_quad x, y; }; Sleef_quad s[2]; } Sleef_quad2; #endif #if !defined(Sleef_quad4_DEFINED) #define Sleef_quad4_DEFINED typedef union { struct { Sleef_quad x, y, z, w; }; Sleef_quad s[4]; } Sleef_quad4; #endif #if !defined(Sleef_quad8_DEFINED) #define Sleef_quad8_DEFINED typedef union { Sleef_quad s[8]; } Sleef_quad8; #endif #if defined(__ARM_FEATURE_SVE) && !defined(Sleef_quadx_DEFINED) #define Sleef_quadx_DEFINED typedef union { Sleef_quad s[32]; } Sleef_quadx; #endif // IMPORT Sleef_quad1 Sleef_strtoq(const char *str, char **endptr, int base); IMPORT void Sleef_qtostr(char *s, int n, Sleef_quad1 a, int base); // sleef-3.5.1/src/quad/sleefsimdqp.c000066400000000000000000004357151373003144100170320ustar00rootroot00000000000000// Copyright Naoki Shibata and contributors 2010 - 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include "misc.h" #define __SLEEFSIMDQP_C__ #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif #ifdef ENABLE_PUREC_SCALAR #define CONFIG 1 #include "helperpurec_scalar.h" #ifdef DORENAME #include "qrenamepurec_scalar.h" #endif #endif #ifdef ENABLE_PURECFMA_SCALAR #define CONFIG 2 #include "helperpurec_scalar.h" #ifdef DORENAME #include "qrenamepurecfma_scalar.h" #endif #endif #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #ifdef DORENAME #include "qrenamesse2.h" #endif #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #ifdef DORENAME #include "qrenameavx2128.h" #endif #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #ifdef DORENAME #include "qrenameavx.h" #endif #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #ifdef DORENAME #include "qrenamefma4.h" #endif #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #ifdef DORENAME #include "qrenameavx2.h" #endif #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #ifdef DORENAME #include "qrenameavx512f.h" #endif #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #ifdef DORENAME #include "qrenameadvsimd.h" #endif #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #ifdef DORENAME #include "qrenamesve.h" #endif #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #ifdef DORENAME #include "qrenamevsx.h" #endif #endif // #include "dd.h" #if !defined(ENABLE_SVE) typedef struct { vdouble x, y, z; } vdouble3; static vdouble vd3getx_vd_vd3(vdouble3 v) { return v.x; } static vdouble vd3gety_vd_vd3(vdouble3 v) { return v.y; } static vdouble vd3getz_vd_vd3(vdouble3 v) { return v.z; } static vdouble3 vd3setxyz_vd3_vd_vd_vd(vdouble x, vdouble y, vdouble z) { vdouble3 v = { x, y, z }; return v; } static vdouble3 vd3setx_vd3_vd3_vd(vdouble3 v, vdouble d) { v.x = d; return v; } static vdouble3 vd3sety_vd3_vd3_vd(vdouble3 v, vdouble d) { v.y = d; return v; } static vdouble3 vd3setz_vd3_vd3_vd(vdouble3 v, vdouble d) { v.z = d; return v; } typedef struct { vmask e; vdouble3 d3; } tdx; static vmask tdxgete_vm_tdx(tdx t) { return t.e; } static vdouble3 tdxgetd3_vd3_tdx(tdx t) { return t.d3; } static vdouble tdxgetd3x_vd_tdx(tdx t) { return t.d3.x; } static vdouble tdxgetd3y_vd_tdx(tdx t) { return t.d3.y; } static vdouble tdxgetd3z_vd_tdx(tdx t) { return t.d3.z; } static tdx tdxsete_tdx_tdx_vm(tdx t, vmask e) { t.e = e; return t; } static tdx tdxsetd3_tdx_tdx_vd3(tdx t, vdouble3 d3) { t.d3 = d3; return t; } static tdx tdxsetx_tdx_tdx_vd(tdx t, vdouble x) { t.d3.x = x; return t; } static tdx tdxsety_tdx_tdx_vd(tdx t, vdouble y) { t.d3.y = y; return t; } static tdx tdxsetz_tdx_tdx_vd(tdx t, vdouble z) { t.d3.z = z; return t; } static tdx tdxsetxyz_tdx_tdx_vd_vd_vd(tdx t, vdouble x, vdouble y, vdouble z) { t.d3 = (vdouble3) { x, y, z }; return t; } static tdx tdxseted3_tdx_vm_vd3(vmask e, vdouble3 d3) { return (tdx) { e, d3 }; } static tdx tdxsetexyz_tdx_vm_vd_vd_vd(vmask e, vdouble x, vdouble y, vdouble z) { return (tdx) { e, (vdouble3) { x, y, z } }; } static vmask vm2getx_vm_vm2(vmask2 v) { return v.x; } static vmask vm2gety_vm_vm2(vmask2 v) { return v.y; } static vmask2 vm2setxy_vm2_vm_vm(vmask x, vmask y) { return (vmask2) { x, y }; } static vmask2 vm2setx_vm2_vm2_vm(vmask2 v, vmask x) { v.x = x; return v; } static vmask2 vm2sety_vm2_vm2_vm(vmask2 v, vmask y) { v.y = y; return v; } #endif // #if defined(ENABLE_MAIN) #include static void printvmask(char *mes, vmask g) { uint64_t u[VECTLENDP]; vstoreu_v_p_vd((double *)u, vreinterpret_vd_vm(g)); printf("%s ", mes); for(int i=0;i #include #include #include static const tdx pow10tab[14] = { { 16386, { 1.25, 0, 0 } }, // 10 { 16389, { 1.5625, 0, 0 } }, // 100 { 16396, { 1.220703125, 0, 0 } }, // 10000 { 16409, { 1.490116119384765625, 0, 0 } }, // 1e+08 { 16436, { 1.1102230246251565404, 0, 0 } }, // 1e+16 { 16489, { 1.2325951644078310121, -6.6143055845634601483e-17, 0 } }, // 1e+32 { 16595, { 1.519290839321567832, -3.2391917291561477915e-17, -1.8687814275678753633e-33 } }, // 1e+64 { 16808, { 1.1541223272232170594, -8.6760553787903265289e-17, -5.7759618887794337486e-33 } }, // 1e+128 { 17233, { 1.3319983461951343529, -4.0129993161716667573e-17, -4.1720927621797370111e-34 } }, // 1e+256 { 18083, { 1.7742195942665728303, 4.9309343678620668082e-17, 1.3386888736008621608e-34 } }, // 1e+512 { 19784, { 1.5739275843397213528, -1.0848584040002990893e-16, 4.3586291506346591213e-33 } }, // 1e+1024 { 23186, { 1.2386240203727352238, -5.8476062413608067671e-17, -2.0006771920677486581e-33 } }, // 1e+2048 { 29989, { 1.5341894638443178689, -1.0973609479387666102e-17, -6.5816871252891901643e-34 } }, // 1e+4096 { 43596, { 1.1768686554854577153, 3.0579788864750933707e-17, -2.6771867381968692559e-34 } }, // 1e+8192 }; static tdx pow10i(int n) { tdx r = vcast_tdx_vd(1); for(int i=0;i<14;i++) if ((n & (1 << i)) != 0) r = mul2_tdx_tdx_tdx(r, pow10tab[i]); return r; } static int ilog10(tdx t) { int r = 0, p = 1; if ((int)vcmp_vm_tdx_tdx(t, vcast_tdx_vd(1)) < 0) { t = div2_tdx_tdx_tdx(vcast_tdx_vd(1), t); p = -1; } for(int i=12;i>=0;i--) { int c = vcmp_vm_tdx_tdx(t, pow10tab[i]); if ((p > 0 && c >= 0) || (p < 0 && c > 0)) { t = div2_tdx_tdx_tdx(t, pow10tab[i]); r |= (1 << i); } } if (p < 0) r++; return r * p; } EXPORT vargquad Sleef_strtoq(char *str, char **endptr, int base) { while(isspace(*str)) str++; char *p = str; int positive = 1, bp = 0, e = 0, error = 0, mf = 0; tdx n = vcast_tdx_vd(0), d = vcast_tdx_vd(1); if (*p == '-') { positive = 0; p++; } else if (*p == '+') p++; if (tolower(p[0]) == 'n' && tolower(p[1]) == 'a' && tolower(p[2]) == 'n') { if (endptr != NULL) *endptr = p+3; vmask2 r = { vcast_vm_i_i(-1, -1), vcast_vm_i_i(-1, -1) }; return vcast_aq_vm2(r); } if (tolower(p[0]) == 'i' && tolower(p[1]) == 'n' && tolower(p[2]) == 'f') { if (endptr != NULL) *endptr = p+3; if (positive) { vmask2 r = { vcast_vm_i_i(0, 0), vcast_vm_i_i(0x7fff0000, 0) }; return vcast_aq_vm2(r); } else { vmask2 r = { vcast_vm_i_i(0, 0), vcast_vm_i_i(0xffff0000, 0) }; return vcast_aq_vm2(r); } } while(*p != '\0' && !error) { if ('0' <= *p && *p <= '9') { n = add2_tdx_tdx_tdx(mul2_tdx_tdx_tdx(n, vcast_tdx_vd(10)), vcast_tdx_vd(*p - '0')); if (bp) d = mul2_tdx_tdx_tdx(d, vcast_tdx_vd(10)); p++; mf = 1; continue; } if (*p == '.') { if (bp) break; bp = 1; p++; continue; } if (*p == 'e' || *p == 'E') { char *q; e = strtol(p+1, &q, 10); if (p+1 == q || isspace(*(p+1))) { e = 0; } else { p = q; } break; } error = 1; break; } if (error || !mf) { if (endptr != NULL) *endptr = str; vmask2 r = { vcast_vm_i_i(0, 0), vcast_vm_i_i(0, 0) }; return vcast_aq_vm2(r); } n = div2_tdx_tdx_tdx(n, d); if (e > 0) n = mul2_tdx_tdx_tdx(n, pow10i(+e)); if (e < 0) n = div2_tdx_tdx_tdx(n, pow10i(-e)); if (!positive) n = vneg_tdx_tdx(n); if (endptr != NULL) *endptr = str; return vcast_aq_vm2(vcast_vf128_tdx(n)); } EXPORT void Sleef_qtostr(char *s, int n, vargquad a, int base) { if (n <= 0) return; if (n > 48) n = 48; if (n < 9) { *s = '\0'; return; } union { vmask2 q; struct { uint64_t l, h; }; } c128 = { .q = vcast_vm2_aq(a) }; char *p = s; if (visnanq_vo_vm2(c128.q)) { sprintf(p, "nan"); return; } if ((c128.h & UINT64_C(0x8000000000000000)) != 0) { *p++ = '-'; c128.h ^= UINT64_C(0x8000000000000000); } else { *p++ = '+'; } if (visinfq_vo_vm2(c128.q)) { sprintf(p, "inf"); return; } tdx t = vcast_tdx_vf128(c128.q); int e = ilog10(t); if (e < 0) t = mul2_tdx_tdx_tdx(t, pow10i(-e-1)); if (e >= 0) t = div2_tdx_tdx_tdx(t, pow10i(+e+1)); t = add2_tdx_tdx_tdx(t, div2_tdx_tdx_tdx(vcast_tdx_vd(0.5), pow10i(n-8))); *p++ = '.'; if ((int)vcmp_vm_tdx_tdx(t, vcast_tdx_vd(1)) >= 0) { t = div2_tdx_tdx_tdx(t, vcast_tdx_vd(10)); e++; } for(;n>=9;n--) { t = mul2_tdx_tdx_tdx(t, vcast_tdx_vd(10)); int ia = (int)vcast_vd_tdx(t); if ((int)vcmp_vm_tdx_tdx(t, vcast_tdx_vd(ia)) < 0) ia--; *p++ = ia + '0'; t = add2_tdx_tdx_tdx(t, vcast_tdx_vd(-ia)); } if (viszeroq_vo_vm2(c128.q)) { *p++ = '\0'; return; } *p++ = 'e'; e++; if (e >= 0) *p++ = '+'; if (e < 0) { *p++ = '-'; e = -e; } sprintf(p, "%d", e); } #endif // Functions for debugging ------------------------------------------------------------------------------------------------------------ #ifdef ENABLE_MAIN // gcc -DENABLE_MAIN -DENABLEFLOAT128 -Wno-attributes -I../libm -I../quad-tester -I../common -I../arch -DUSEMPFR -DENABLE_AVX2 -mavx2 -mfma sleefsimdqp.c ../common/common.c ../quad-tester/qtesterutil.c -lm -lmpfr #include #include #include #include #include #include #include "qtesterutil.h" int main(int argc, char **argv) { xsrand(time(NULL) + (int)getpid()); int lane = xrand() % VECTLENDP; printf("lane = %d\n", lane); char s[200]; double ad[32]; mpfr_set_default_prec(18000); mpfr_t fr0, fr1, fr2; mpfr_inits(fr0, fr1, fr2, NULL); mpfr_set_d(fr0, 0, GMP_RNDN); if (argc >= 2) mpfr_set_str(fr0, argv[1], 10, GMP_RNDN); Sleef_quad q0 = mpfr_get_f128(fr0, GMP_RNDN); mpfr_set_f128(fr0, q0, GMP_RNDN); if (argc >= 2) printf("arg0 : %s\n", sprintfr(fr0)); vargquad a0; #if 1 memrand(&a0, sizeof(vargquad)); #else memset(&a0, 0, sizeof(vargquad)); #endif a0.s[lane] = q0; #if 0 memrand(ad, sizeof(ad)); ad[lane] = mpfr_get_d(fr0, GMP_RNDN); a0 = xcast_from_doubleq(vloadu_vd_p(ad)); #endif mpfr_set_d(fr1, 0, GMP_RNDN); if (argc >= 3) mpfr_set_str(fr1, argv[2], 10, GMP_RNDN); Sleef_quad q1 = mpfr_get_f128(fr1, GMP_RNDN); mpfr_set_f128(fr1, q1, GMP_RNDN); if (argc >= 3) printf("arg1 : %s\n", sprintfr(fr1)); vargquad a1; #if 1 memrand(&a1, sizeof(vargquad)); #else memset(&a1, 0, sizeof(vargquad)); #endif a1.s[lane] = q1; #if 0 memrand(ad, sizeof(ad)); ad[lane] = mpfr_get_d(fr1, GMP_RNDN); a1 = xcast_from_doubleq(vloadu_vd_p(ad)); #endif // #if 1 vargquad a2 = xaddq_u05(a0, a1); mpfr_add(fr2, fr0, fr1, GMP_RNDN); #endif #if 0 vargquad a2 = xmulq_u05(a0, a1); mpfr_mul(fr2, fr0, fr1, GMP_RNDN); #endif #if 0 vargquad a2 = xdivq_u05(a0, a1); mpfr_div(fr2, fr0, fr1, GMP_RNDN); #endif #if 0 vargquad a2 = xsqrtq_u05(a0); mpfr_sqrt(fr2, fr0, GMP_RNDN); #endif // mpfr_set_f128(fr2, mpfr_get_f128(fr2, GMP_RNDN), GMP_RNDN); printf("corr : %s\n", sprintfr(fr2)); Sleef_quad q2 = a2.s[lane]; mpfr_set_f128(fr2, q2, GMP_RNDN); printf("test : %s\n", sprintfr(fr2)); } #endif sleef-3.5.1/travis/000077500000000000000000000000001373003144100141225ustar00rootroot00000000000000sleef-3.5.1/travis/before_install.aarch64-gcc.sh000066400000000000000000000035021373003144100214270ustar00rootroot00000000000000#!/bin/bash set -ev export PATH=$PATH:/usr/bin dpkg --add-architecture arm64 cat /etc/apt/sources.list | sed -e 's/^deb /deb \[arch=amd64\] /g' -e 's/\[arch=amd64\] \[arch=amd64\]/\[arch=amd64\]/g' > /tmp/sources.list . /etc/os-release cat <> /tmp/sources.list deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse EOF mv /tmp/sources.list /etc/apt/sources.list apt-get -qq update apt-get install -y git cmake gcc-aarch64-linux-gnu libc6-arm64-cross libc6:arm64 libmpfr-dev:arm64 libgomp1:arm64 libmpfr-dev binfmt-support qemu qemu-user-static libfftw3-dev:arm64 libssl-dev libssl-dev:arm64 ninja-build sleef-3.5.1/travis/before_install.arm64-clang-lto.sh000066400000000000000000000002101373003144100222450ustar00rootroot00000000000000#!/bin/bash set -ev sudo apt-get -qq update sudo apt-get install -y cmake ninja-build libmpfr-dev libssl-dev libfftw3-dev clang-8 lld-8 sleef-3.5.1/travis/before_install.arm64-clang.sh000066400000000000000000000001641373003144100214610ustar00rootroot00000000000000#!/bin/bash set -ev sudo apt-get -qq update sudo apt-get install -y libmpfr-dev libfftw3-dev libssl-dev ninja-build sleef-3.5.1/travis/before_install.arm64-gcc-lto.sh000066400000000000000000000001721373003144100217240ustar00rootroot00000000000000#!/bin/bash set -ev sudo apt-get -qq update sudo apt-get install -y cmake ninja-build libmpfr-dev libssl-dev libfftw3-dev sleef-3.5.1/travis/before_install.arm64-gcc-sve.sh000066400000000000000000000003631373003144100217250ustar00rootroot00000000000000#!/bin/bash set -ev sudo apt-get -qq update sudo apt-get install -y cmake libmpfr-dev libssl-dev libfftw3-dev ninja-build wget -nv https://shibata.naist.jp/~n-sibata/travis/binutils-2.34-aarch64.tar.xz tar xf binutils-2.34-aarch64.tar.xz -C / sleef-3.5.1/travis/before_install.arm64-gcc.sh000066400000000000000000000001721373003144100211300ustar00rootroot00000000000000#!/bin/bash set -ev sudo apt-get -qq update sudo apt-get install -y cmake libmpfr-dev libssl-dev libfftw3-dev ninja-build sleef-3.5.1/travis/before_install.armhf-gcc.sh000066400000000000000000000034611373003144100213000ustar00rootroot00000000000000#!/bin/bash set -ev export PATH=$PATH:/usr/bin dpkg --add-architecture armhf cat /etc/apt/sources.list | sed -e 's/^deb /deb \[arch=amd64\] /g' -e 's/\[arch=amd64\] \[arch=amd64\]/\[arch=amd64\]/g' > /tmp/sources.list . /etc/os-release cat <> /tmp/sources.list deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse EOF mv /tmp/sources.list /etc/apt/sources.list apt-get -qq update apt-get install -y git cmake gcc-arm-linux-gnueabihf libc6-armhf-cross libc6:armhf libmpfr-dev:armhf libgomp1:armhf libmpfr-dev binfmt-support qemu qemu-user-static libssl-dev libssl-dev:armhf ninja-build sleef-3.5.1/travis/before_install.osx-clang.sh000066400000000000000000000001001373003144100213270ustar00rootroot00000000000000#!/bin/bash set -ev brew update brew install openssl fftw ninja sleef-3.5.1/travis/before_install.osx-gcc.sh000066400000000000000000000001011373003144100210000ustar00rootroot00000000000000#!/bin/bash set -ev brew update brew install gcc@6 openssl ninja sleef-3.5.1/travis/before_install.ppc64le-clang.sh000066400000000000000000000001751373003144100220070ustar00rootroot00000000000000#!/bin/bash set -ev uname -a sudo apt-get -qq update sudo apt-get install -y libmpfr-dev libssl-dev libfftw3-dev ninja-build sleef-3.5.1/travis/before_install.ppc64le-gcc.sh000066400000000000000000000001751373003144100214570ustar00rootroot00000000000000#!/bin/bash set -ev uname -a sudo apt-get -qq update sudo apt-get install -y libmpfr-dev libssl-dev libfftw3-dev ninja-build sleef-3.5.1/travis/before_install.s390x-clang.sh000066400000000000000000000001751373003144100214200ustar00rootroot00000000000000#!/bin/bash set -ev uname -a sudo apt-get -qq update sudo apt-get install -y libmpfr-dev libssl-dev libfftw3-dev ninja-build sleef-3.5.1/travis/before_install.s390x-gcc.sh000066400000000000000000000001751373003144100210700ustar00rootroot00000000000000#!/bin/bash set -ev uname -a sudo apt-get -qq update sudo apt-get install -y libmpfr-dev libssl-dev libfftw3-dev ninja-build sleef-3.5.1/travis/before_install.x86_64-clang.sh000066400000000000000000000001641373003144100214660ustar00rootroot00000000000000#!/bin/bash set -ev sudo apt-get -qq update sudo apt-get install -y libmpfr-dev libfftw3-dev libssl-dev ninja-build sleef-3.5.1/travis/before_install.x86_64-gcc.sh000066400000000000000000000001641373003144100211360ustar00rootroot00000000000000#!/bin/bash set -ev sudo apt-get -qq update sudo apt-get install -y libmpfr-dev libfftw3-dev libssl-dev ninja-build sleef-3.5.1/travis/before_script.aarch64-gcc.sh000066400000000000000000000007371373003144100212740ustar00rootroot00000000000000#!/bin/bash set -ev cd /build mkdir build-native cd build-native cmake -G Ninja -DBUILD_QUAD=TRUE .. ninja all cd /build mkdir build-cross cd build-cross cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. sleef-3.5.1/travis/before_script.arm64-clang-lto.sh000066400000000000000000000005141373003144100221120ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=clang-8 export CXX=clang++-8 cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE -DCMAKE_LINKER=lld-8 -DSLEEF_ENABLE_LLVM_BITCODE=TRUE .. sleef-3.5.1/travis/before_script.arm64-clang.sh000066400000000000000000000005121373003144100213140ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=clang-8 export CXX=clang++-8 cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENABLE_ALTDIV=TRUE -DENABLE_ALTSQRT=TRUE .. sleef-3.5.1/travis/before_script.arm64-gcc-lto.sh000066400000000000000000000004201373003144100215560ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_SHARED_LIBS=FALSE -DENABLE_LTO=TRUE .. sleef-3.5.1/travis/before_script.arm64-gcc-sve.sh000066400000000000000000000005631373003144100215650ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export PATH=/opt/local/bin:$PATH export LD_LIBRARY_PATH=/opt/local/lib:$LD_LIBRARY_PATH export CC=gcc-10 cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DEMULATOR=qemu-aarch64 -DENFORCE_TESTER3=TRUE -DBUILD_INLINE_HEADERS=TRUE -DBUILD_QUAD=TRUE -DENFORCE_SVE=TRUE .. sleef-3.5.1/travis/before_script.arm64-gcc.sh000066400000000000000000000004101373003144100207610ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=gcc-8 cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE .. sleef-3.5.1/travis/before_script.armhf-gcc.sh000066400000000000000000000006741373003144100211410ustar00rootroot00000000000000#!/bin/bash set -ev cd /build mkdir build-native cd build-native cmake -G Ninja -DBUILD_QUAD=TRUE .. ninja all cd /build mkdir build-cross cd build-cross cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE .. sleef-3.5.1/travis/before_script.osx-clang.sh000066400000000000000000000006341373003144100212010ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. sleef-3.5.1/travis/before_script.osx-gcc.sh000066400000000000000000000006541373003144100206530ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=gcc-6 cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. sleef-3.5.1/travis/before_script.ppc64le-clang.sh000066400000000000000000000004131373003144100216400ustar00rootroot00000000000000#!/bin/bash set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VSX=TRUE .. sleef-3.5.1/travis/before_script.ppc64le-gcc.sh000066400000000000000000000004131373003144100213100ustar00rootroot00000000000000#!/bin/bash set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_VSX=TRUE .. sleef-3.5.1/travis/before_script.s390x-clang.sh000066400000000000000000000004201373003144100212470ustar00rootroot00000000000000#!/bin/bash set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_ZVECTOR2=TRUE .. sleef-3.5.1/travis/before_script.s390x-gcc.sh000066400000000000000000000004201373003144100207170ustar00rootroot00000000000000#!/bin/bash set -ev mkdir build && cd build cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install \ -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_ZVECTOR2=TRUE .. sleef-3.5.1/travis/before_script.x86_64-clang.sh000066400000000000000000000006051373003144100213240ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=clang-7 cmake -G Ninja -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. sleef-3.5.1/travis/before_script.x86_64-gcc.sh000066400000000000000000000006241373003144100207750ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DRUNNING_ON_TRAVIS=TRUE -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DENFORCE_TESTER3=TRUE -DBUILD_QUAD=TRUE -DBUILD_INLINE_HEADERS=TRUE -DENFORCE_SSE2=TRUE -DENFORCE_SSE4=TRUE -DENFORCE_AVX=TRUE -DENFORCE_FMA4=TRUE -DENFORCE_AVX2=TRUE -DENFORCE_AVX512F=TRUE .. sleef-3.5.1/travis/script.aarch64-gcc.sh000066400000000000000000000002261373003144100177430ustar00rootroot00000000000000#!/bin/bash set -ev cd /build/build-cross ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.arm64-clang-lto.sh000066400000000000000000000002461373003144100205720ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE test -f lib/sleefdp.ll ctest -j `nproc` ninja install sleef-3.5.1/travis/script.arm64-clang.sh000066400000000000000000000002171373003144100177740ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.arm64-gcc-lto.sh000066400000000000000000000002741373003144100202430ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE readelf -a lib/libsleef.a|grep -cq __gnu_lto ctest -j `nproc` ninja install sleef-3.5.1/travis/script.arm64-gcc-sve.sh000066400000000000000000000004131373003144100202350ustar00rootroot00000000000000#!/bin/bash set -ev export PATH=/opt/local/bin:$PATH export LD_LIBRARY_PATH=/opt/local/lib:$LD_LIBRARY_PATH export QEMU_CPU=max,sve-max-vq=1 export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE cd sleef.build ninja all ctest -V -j `nproc` ninja install sleef-3.5.1/travis/script.arm64-gcc.sh000066400000000000000000000002171373003144100174440ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.armhf-gcc.sh000066400000000000000000000002261373003144100176100ustar00rootroot00000000000000#!/bin/bash set -ev cd /build/build-cross ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.osx-clang.sh000066400000000000000000000002171373003144100176540ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.osx-gcc.sh000066400000000000000000000002171373003144100173240ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.ppc64el-clang.sh000066400000000000000000000002551373003144100203220ustar00rootroot00000000000000#!/bin/bash set -ev export QEMU_CPU=POWER8 cd /build/build-cross ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.ppc64le-clang.sh000066400000000000000000000001061373003144100203150ustar00rootroot00000000000000#!/bin/bash set -ev cd build ninja all ctest -j `nproc` ninja install sleef-3.5.1/travis/script.ppc64le-gcc.sh000066400000000000000000000001061373003144100177650ustar00rootroot00000000000000#!/bin/bash set -ev cd build ninja all ctest -j `nproc` ninja install sleef-3.5.1/travis/script.s390x-clang.sh000066400000000000000000000001061373003144100177260ustar00rootroot00000000000000#!/bin/bash set -ev cd build ninja all ctest -j `nproc` ninja install sleef-3.5.1/travis/script.s390x-gcc.sh000066400000000000000000000001061373003144100173760ustar00rootroot00000000000000#!/bin/bash set -ev cd build ninja all ctest -j `nproc` ninja install sleef-3.5.1/travis/script.x86_64-clang.sh000066400000000000000000000002171373003144100200010ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/script.x86_64-gcc.sh000066400000000000000000000002131373003144100174450ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build ninja export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j `nproc` ninja install sleef-3.5.1/travis/setupdocker.sh000066400000000000000000000004531373003144100170100ustar00rootroot00000000000000#!/bin/bash set -ev docker pull ubuntu:bionic; docker run -d --name bionic -dti ubuntu:bionic bash; tar cfz /tmp/builddir.tgz . docker cp /tmp/builddir.tgz bionic:/tmp/ docker exec bionic mkdir /build docker exec bionic tar xfz /tmp/builddir.tgz -C /build docker exec bionic rm -f /tmp/builddir.tgz sleef-3.5.1/travis/toolchain-aarch64.cmake000066400000000000000000000010161373003144100203300ustar00rootroot00000000000000SET (CMAKE_CROSSCOMPILING TRUE) SET (CMAKE_SYSTEM_NAME "Linux") SET (CMAKE_SYSTEM_PROCESSOR "aarch64") SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu /lib/aarch64-linux-gnu) find_program(CMAKE_C_COMPILER aarch64-linux-gnu-gcc aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) sleef-3.5.1/travis/toolchain-armhf.cmake000066400000000000000000000010051373003144100201730ustar00rootroot00000000000000SET (CMAKE_CROSSCOMPILING TRUE) SET (CMAKE_SYSTEM_NAME "Linux") SET (CMAKE_SYSTEM_PROCESSOR "armhf") SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) find_program(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) sleef-3.5.1/travis/toolchain-ppc64el.cmake000066400000000000000000000007071373003144100203630ustar00rootroot00000000000000SET (CMAKE_CROSSCOMPILING TRUE) SET (CMAKE_SYSTEM_NAME "Linux") SET (CMAKE_SYSTEM_PROCESSOR "ppc64") SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) find_program(CMAKE_C_COMPILER ppc64el-cc) SET(CMAKE_AR /usr/powerpc64le-linux-gnu/bin/ar) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) sleef-3.5.1/travis/toolchain-s390x.cmake000066400000000000000000000006151373003144100177720ustar00rootroot00000000000000SET (CMAKE_CROSSCOMPILING TRUE) SET (CMAKE_SYSTEM_NAME "Linux") SET (CMAKE_SYSTEM_PROCESSOR "s390x") SET(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu /usr/include/s390x-linux-gnu /usr/lib/s390x-linux-gnu) find_program(CMAKE_C_COMPILER s390x-linux-gnu-gcc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)